duckdb 1.1.2-dev6.0 → 1.1.4-dev11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +5 -54
- package/binding.gyp +73 -52
- package/package.json +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/avg.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/corr.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/covar.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/stddev.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/approx_count.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/arg_min_max.cpp +66 -18
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitagg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitstring_agg.cpp +5 -7
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bool.cpp +3 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/kurtosis.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/product.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/skew.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/string_agg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/sum.cpp +13 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approx_top_k.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approximate_quantile.cpp +51 -15
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mad.cpp +25 -10
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mode.cpp +215 -71
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/quantile.cpp +58 -31
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/binned_histogram.cpp +9 -4
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/histogram.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/list.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_avg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_count.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_intercept.cpp +6 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_r2.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_slope.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxx_syy.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxy.cpp +3 -3
- package/src/duckdb/extension/core_functions/core_functions_extension.cpp +85 -0
- package/src/duckdb/{src → extension}/core_functions/function_list.cpp +30 -51
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/corr.hpp +3 -7
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/distributive_functions.hpp +16 -21
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/histogram_helpers.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/holistic_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/nested_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_helpers.hpp +2 -2
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_sort_tree.hpp +140 -58
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_state.hpp +50 -43
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_count.hpp +2 -2
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_slope.hpp +3 -7
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/array_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/bit_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/blob_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/date_functions.hpp +22 -55
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/debug_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/enum_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/generic_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/list_functions.hpp +4 -4
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/map_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/math_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/operators_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/random_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/string_functions.hpp +10 -103
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/struct_functions.hpp +1 -19
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/union_functions.hpp +1 -1
- package/src/duckdb/extension/core_functions/include/core_functions_extension.hpp +22 -0
- package/src/duckdb/{src → extension}/core_functions/lambda_functions.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/array/array_functions.cpp +11 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/array/array_value.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/bit/bitstring.cpp +12 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/blob/base64.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/blob/encode.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/date/age.cpp +9 -3
- package/src/duckdb/extension/core_functions/scalar/date/current.cpp +29 -0
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_diff.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_part.cpp +42 -9
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_sub.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_trunc.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/epoch.cpp +19 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/date/make_date.cpp +40 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/date/time_bucket.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/to_interval.cpp +54 -28
- package/src/duckdb/{src → extension}/core_functions/scalar/debug/vector_type.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/enum/enum_functions.cpp +2 -7
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/alias.cpp +2 -2
- package/src/duckdb/{src/function → extension/core_functions}/scalar/generic/binning.cpp +4 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/can_implicitly_cast.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/current_setting.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/hash.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/least.cpp +30 -10
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/stats.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/system_functions.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/typeof.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/array_slice.cpp +93 -88
- package/src/duckdb/{src → extension}/core_functions/scalar/list/flatten.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_aggregates.cpp +7 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_distance.cpp +8 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_filter.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_has_any_or_all.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_reduce.cpp +5 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_sort.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_transform.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_value.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/range.cpp +7 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/cardinality.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map.cpp +5 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_concat.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_entries.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_extract.cpp +13 -25
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_from_entries.cpp +2 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_keys_values.cpp +11 -9
- package/src/duckdb/{src → extension}/core_functions/scalar/math/numeric.cpp +83 -37
- package/src/duckdb/{src → extension}/core_functions/scalar/operators/bitwise.cpp +19 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/random/random.cpp +4 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/random/setseed.cpp +2 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/ascii.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/bar.cpp +6 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/chr.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/damerau_levenshtein.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/format_bytes.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/hamming.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/hex.cpp +7 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/string/instr.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/jaccard.cpp +1 -1
- package/src/duckdb/extension/core_functions/scalar/string/jaro_winkler.cpp +112 -0
- package/src/duckdb/{src → extension}/core_functions/scalar/string/left_right.cpp +6 -6
- package/src/duckdb/{src → extension}/core_functions/scalar/string/levenshtein.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/pad.cpp +9 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/string/parse_path.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/printf.cpp +3 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/repeat.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/replace.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/reverse.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/starts_with.cpp +5 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/string/to_base.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/translate.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/trim.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/unicode.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/url_encode.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/struct/struct_insert.cpp +25 -31
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_extract.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_tag.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_value.cpp +3 -3
- package/src/duckdb/extension/icu/icu-dateadd.cpp +16 -11
- package/src/duckdb/extension/icu/icu-datefunc.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datepart.cpp +8 -5
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +8 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +2 -2
- package/src/duckdb/extension/icu/icu-makedate.cpp +18 -7
- package/src/duckdb/extension/icu/icu-strptime.cpp +9 -3
- package/src/duckdb/extension/icu/icu-table-range.cpp +2 -2
- package/src/duckdb/extension/icu/icu-timebucket.cpp +4 -1
- package/src/duckdb/extension/icu/icu-timezone.cpp +67 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +60 -5
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -1
- package/src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/locid.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/lsr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/lsr.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/normlzr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/rbbinode.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/schriter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/stringtriebuilder.cpp +8 -8
- package/src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uchriter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/brkiter.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/bytestriebuilder.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/chariter.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/dtintrv.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/locid.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/messagepattern.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/normlzr.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/parsepos.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/rbbi.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/schriter.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/strenum.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringpiece.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringtriebuilder.h +9 -9
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/ucharstriebuilder.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uchriter.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uniset.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/unistr.h +12 -12
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uobject.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unifiedcache.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/uniset.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ustr_titlecase_brkiter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ustrenum.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/alphaindex.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/coll.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/datefmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/dcfmtsym.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvinf.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen_impl.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtrule.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/fpositer.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measunit.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measure.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/msgfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp +9 -9
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/plurfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/rbtz.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/region.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/rulebasedcollator.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/selfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/simpletz.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/sortkey.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tmutamt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames.cpp +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzrule.cpp +8 -8
- package/src/duckdb/extension/icu/third_party/icu/i18n/tztrans.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/alphaindex.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/calendar.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/choicfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coll.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/currpinf.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/datefmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dcfmtsym.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/decimfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtfmtsym.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvfmt.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvinf.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtptngen.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fieldpos.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fmtable.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fpositer.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measunit.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measure.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/msgfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/numfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbnf.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbtz.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/region.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/search.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/selfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/simpletz.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/smpdtfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/sortkey.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/stsearch.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tblcoll.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/timezone.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutamt.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tznames.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzrule.h +8 -8
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tztrans.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/vtzone.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp +2 -2
- package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -1
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
- package/src/duckdb/extension/json/include/json_common.hpp +14 -10
- package/src/duckdb/extension/json/include/json_scan.hpp +48 -7
- package/src/duckdb/extension/json/include/json_structure.hpp +2 -1
- package/src/duckdb/extension/json/include/json_transform.hpp +5 -2
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +57 -20
- package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +7 -6
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +6 -5
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +20 -17
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +48 -17
- package/src/duckdb/extension/json/json_functions/read_json.cpp +83 -34
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -3
- package/src/duckdb/extension/json/json_functions.cpp +14 -16
- package/src/duckdb/extension/json/json_scan.cpp +36 -16
- package/src/duckdb/extension/json/json_serializer.cpp +1 -1
- package/src/duckdb/extension/json/serialize_json.cpp +2 -2
- package/src/duckdb/extension/parquet/column_reader.cpp +136 -116
- package/src/duckdb/extension/parquet/column_writer.cpp +870 -604
- package/src/duckdb/extension/parquet/geo_parquet.cpp +4 -5
- package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +0 -4
- package/src/duckdb/extension/parquet/include/column_reader.hpp +24 -19
- package/src/duckdb/extension/parquet/include/column_writer.hpp +7 -5
- package/src/duckdb/extension/parquet/include/decode_utils.hpp +138 -18
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +4 -3
- package/src/duckdb/extension/parquet/include/null_column_reader.hpp +1 -14
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +45 -0
- package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +101 -90
- package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +179 -0
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +2 -3
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +48 -0
- package/src/duckdb/extension/parquet/include/parquet_extension.hpp +8 -0
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +5 -0
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +22 -18
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -5
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +87 -3
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +30 -16
- package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +0 -8
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +1 -42
- package/src/duckdb/extension/parquet/include/thrift_tools.hpp +13 -1
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +4 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +240 -197
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +138 -6
- package/src/duckdb/extension/parquet/parquet_reader.cpp +155 -79
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +258 -38
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +17 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +65 -34
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +4 -0
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +13 -0
- package/src/duckdb/src/catalog/catalog.cpp +272 -97
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +9 -4
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +145 -95
- package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +9 -3
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +15 -0
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +40 -24
- package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry.cpp +3 -0
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +60 -5
- package/src/duckdb/src/catalog/catalog_search_path.cpp +27 -14
- package/src/duckdb/src/catalog/catalog_set.cpp +75 -31
- package/src/duckdb/src/catalog/default/default_functions.cpp +13 -8
- package/src/duckdb/src/catalog/default/default_views.cpp +1 -0
- package/src/duckdb/src/catalog/dependency_manager.cpp +133 -5
- package/src/duckdb/src/catalog/duck_catalog.cpp +17 -9
- package/src/duckdb/src/common/adbc/adbc.cpp +18 -0
- package/src/duckdb/src/common/allocator.cpp +3 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +30 -9
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +63 -82
- package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +4 -3
- package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +361 -0
- package/src/duckdb/src/common/arrow/arrow_util.cpp +10 -6
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +6 -2
- package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +2 -1
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +27 -14
- package/src/duckdb/src/common/assert.cpp +1 -2
- package/src/duckdb/src/common/bind_helpers.cpp +1 -1
- package/src/duckdb/src/common/box_renderer.cpp +316 -26
- package/src/duckdb/src/common/cgroups.cpp +7 -1
- package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
- package/src/duckdb/src/common/enum_util.cpp +2865 -6882
- package/src/duckdb/src/common/enums/compression_type.cpp +12 -0
- package/src/duckdb/src/common/enums/metric_type.cpp +24 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +4 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/error_data.cpp +23 -6
- package/src/duckdb/src/common/exception/binder_exception.cpp +1 -1
- package/src/duckdb/src/common/exception.cpp +20 -28
- package/src/duckdb/src/common/extra_type_info.cpp +85 -20
- package/src/duckdb/src/common/file_buffer.cpp +5 -2
- package/src/duckdb/src/common/file_system.cpp +8 -3
- package/src/duckdb/src/common/fsst.cpp +3 -3
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/local_file_system.cpp +169 -60
- package/src/duckdb/src/common/multi_file_list.cpp +4 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +240 -63
- package/src/duckdb/src/common/opener_file_system.cpp +37 -0
- package/src/duckdb/src/common/operator/cast_operators.cpp +77 -11
- package/src/duckdb/src/common/operator/string_cast.cpp +6 -2
- package/src/duckdb/src/common/pipe_file_system.cpp +4 -4
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +25 -14
- package/src/duckdb/src/common/radix_partitioning.cpp +17 -16
- package/src/duckdb/src/common/random_engine.cpp +39 -3
- package/src/duckdb/src/common/render_tree.cpp +3 -19
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -58
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +2 -0
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +20 -19
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
- package/src/duckdb/src/common/serializer/memory_stream.cpp +36 -0
- package/src/duckdb/src/common/sort/comparators.cpp +7 -7
- package/src/duckdb/src/common/sort/partition_state.cpp +2 -2
- package/src/duckdb/src/common/stacktrace.cpp +127 -0
- package/src/duckdb/src/common/string_util.cpp +157 -32
- package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +15 -3
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +4 -0
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +71 -8
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +27 -6
- package/src/duckdb/src/common/types/conflict_manager.cpp +21 -7
- package/src/duckdb/src/common/types/date.cpp +39 -25
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +4 -11
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +21 -7
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +17 -17
- package/src/duckdb/src/common/types/timestamp.cpp +70 -33
- package/src/duckdb/src/common/types/uuid.cpp +11 -0
- package/src/duckdb/src/common/types/validity_mask.cpp +16 -5
- package/src/duckdb/src/common/types/value.cpp +357 -199
- package/src/duckdb/src/common/types/varint.cpp +64 -18
- package/src/duckdb/src/common/types/vector.cpp +78 -38
- package/src/duckdb/src/common/types.cpp +199 -92
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +2 -1
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +32 -5
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +3 -1
- package/src/duckdb/src/execution/adaptive_filter.cpp +6 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +410 -111
- package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_between.cpp +6 -0
- package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +4 -3
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_function.cpp +1 -0
- package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +5 -4
- package/src/duckdb/src/execution/expression_executor.cpp +5 -3
- package/src/duckdb/src/execution/index/art/art.cpp +208 -72
- package/src/duckdb/src/execution/index/art/base_leaf.cpp +1 -1
- package/src/duckdb/src/execution/index/art/leaf.cpp +12 -7
- package/src/duckdb/src/execution/index/art/node.cpp +2 -1
- package/src/duckdb/src/execution/index/art/node256_leaf.cpp +6 -6
- package/src/duckdb/src/execution/index/art/plan_art.cpp +50 -55
- package/src/duckdb/src/execution/index/art/prefix.cpp +7 -13
- package/src/duckdb/src/execution/index/bound_index.cpp +30 -5
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +3 -5
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -9
- package/src/duckdb/src/execution/join_hashtable.cpp +254 -158
- package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
- package/src/duckdb/src/execution/operator/aggregate/physical_partitioned_aggregate.cpp +226 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +77 -70
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +114 -50
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +19 -10
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +22 -15
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +95 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +6 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +75 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +40 -12
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +395 -163
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +20 -23
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +115 -49
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +66 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -23
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +220 -46
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +43 -32
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +54 -119
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +184 -20
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +83 -21
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_validator.cpp +63 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +7 -4
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +3 -2
- package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +9 -1
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +132 -15
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -55
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +284 -154
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +40 -55
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -3
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +298 -227
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +5 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +3 -4
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +32 -19
- package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -0
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +6 -0
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -19
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +11 -27
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +308 -119
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +105 -55
- package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +6 -2
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +1 -1
- package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +15 -6
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +92 -50
- package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +0 -1
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +8 -4
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +54 -22
- package/src/duckdb/src/execution/operator/set/physical_union.cpp +5 -1
- package/src/duckdb/src/execution/physical_operator.cpp +15 -9
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +101 -12
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +11 -140
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +6 -5
- package/src/duckdb/src/execution/physical_plan/plan_export.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +16 -13
- package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_order.cpp +7 -7
- package/src/duckdb/src/execution/physical_plan/plan_prepare.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_projection.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +8 -3
- package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -2
- package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +1 -2
- package/src/duckdb/src/execution/physical_plan/plan_top_n.cpp +3 -2
- package/src/duckdb/src/execution/physical_plan_generator.cpp +0 -22
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +136 -116
- package/src/duckdb/src/execution/sample/base_reservoir_sample.cpp +136 -0
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +930 -0
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +6 -12
- package/src/duckdb/src/function/aggregate/distributive/{first.cpp → first_last_any.cpp} +37 -18
- package/src/duckdb/src/{core_functions → function}/aggregate/distributive/minmax.cpp +19 -12
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +72 -13
- package/src/duckdb/src/function/built_in_functions.cpp +85 -2
- package/src/duckdb/src/function/cast/decimal_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/string_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/struct_cast.cpp +81 -49
- package/src/duckdb/src/function/cast/union/from_struct.cpp +7 -5
- package/src/duckdb/src/function/compression_config.cpp +6 -0
- package/src/duckdb/src/function/encoding_function.cpp +134 -0
- package/src/duckdb/src/function/function.cpp +8 -13
- package/src/duckdb/src/function/function_binder.cpp +100 -21
- package/src/duckdb/src/function/function_list.cpp +178 -0
- package/src/duckdb/src/function/macro_function.cpp +4 -4
- package/src/duckdb/src/function/pragma/pragma_functions.cpp +0 -2
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +0 -4
- package/src/duckdb/src/{core_functions/core_functions.cpp → function/register_function_list.cpp} +12 -8
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +62 -23
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +33 -16
- package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +21 -0
- package/src/duckdb/src/{core_functions/scalar/blob → function/scalar}/create_sort_key.cpp +86 -23
- package/src/duckdb/src/{core_functions → function}/scalar/date/strftime.cpp +6 -4
- package/src/duckdb/src/function/scalar/generic/constant_or_null.cpp +5 -7
- package/src/duckdb/src/{core_functions → function}/scalar/generic/error.cpp +3 -1
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +2 -2
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +1 -7
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +27 -21
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +8 -12
- package/src/duckdb/src/function/scalar/list/list_select.cpp +1 -4
- package/src/duckdb/src/function/scalar/list/list_zip.cpp +6 -6
- package/src/duckdb/src/{core_functions → function}/scalar/map/map_contains.cpp +2 -2
- package/src/duckdb/src/function/scalar/nested_functions.cpp +0 -11
- package/src/duckdb/src/function/scalar/{operators → operator}/add.cpp +2 -1
- package/src/duckdb/src/function/scalar/{operators → operator}/arithmetic.cpp +195 -127
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +30 -21
- package/src/duckdb/src/function/scalar/strftime_format.cpp +10 -0
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +11 -41
- package/src/duckdb/src/function/scalar/string/concat.cpp +22 -20
- package/src/duckdb/src/function/scalar/string/concat_ws.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/contains.cpp +16 -19
- package/src/duckdb/src/function/scalar/string/length.cpp +38 -24
- package/src/duckdb/src/function/scalar/string/like.cpp +80 -47
- package/src/duckdb/src/{core_functions → function}/scalar/string/md5.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/prefix.cpp +0 -4
- package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +2 -1
- package/src/duckdb/src/function/scalar/string/regexp.cpp +17 -7
- package/src/duckdb/src/{core_functions → function}/scalar/string/regexp_escape.cpp +2 -2
- package/src/duckdb/src/{core_functions → function}/scalar/string/sha1.cpp +1 -1
- package/src/duckdb/src/{core_functions → function}/scalar/string/sha256.cpp +1 -1
- package/src/duckdb/src/{core_functions → function}/scalar/string/string_split.cpp +4 -5
- package/src/duckdb/src/function/scalar/string/strip_accents.cpp +3 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +14 -13
- package/src/duckdb/src/function/scalar/string/suffix.cpp +0 -4
- package/src/duckdb/src/function/scalar/struct/struct_concat.cpp +115 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +35 -31
- package/src/duckdb/src/{core_functions → function}/scalar/struct/struct_pack.cpp +7 -7
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -8
- package/src/duckdb/src/function/scalar/system/write_log.cpp +170 -0
- package/src/duckdb/src/function/scalar_function.cpp +5 -5
- package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +3 -2
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +287 -1
- package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +6 -6
- package/src/duckdb/src/function/table/arrow.cpp +32 -352
- package/src/duckdb/src/function/table/arrow_conversion.cpp +43 -7
- package/src/duckdb/src/function/table/copy_csv.cpp +38 -23
- package/src/duckdb/src/function/table/glob.cpp +1 -1
- package/src/duckdb/src/function/table/query_function.cpp +12 -7
- package/src/duckdb/src/function/table/read_csv.cpp +114 -46
- package/src/duckdb/src/function/table/read_file.cpp +26 -6
- package/src/duckdb/src/function/table/sniff_csv.cpp +25 -5
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +6 -7
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +141 -16
- package/src/duckdb/src/function/table/system/duckdb_log.cpp +64 -0
- package/src/duckdb/src/function/table/system/duckdb_log_contexts.cpp +65 -0
- package/src/duckdb/src/function/table/system/duckdb_memory.cpp +0 -1
- package/src/duckdb/src/function/table/system/duckdb_settings.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_tables.cpp +1 -13
- package/src/duckdb/src/function/table/system/duckdb_types.cpp +1 -1
- package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +17 -0
- package/src/duckdb/src/function/table/system/pragma_table_info.cpp +6 -0
- package/src/duckdb/src/function/table/system/pragma_table_sample.cpp +95 -0
- package/src/duckdb/src/function/table/system/test_all_types.cpp +56 -46
- package/src/duckdb/src/function/table/system_functions.cpp +3 -0
- package/src/duckdb/src/function/table/table_scan.cpp +487 -289
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/table_function.cpp +10 -6
- package/src/duckdb/src/function/window/window_aggregate_function.cpp +248 -0
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +48 -0
- package/src/duckdb/src/function/window/window_aggregator.cpp +88 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +854 -0
- package/src/duckdb/src/function/window/window_collection.cpp +146 -0
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +357 -0
- package/src/duckdb/src/function/window/window_custom_aggregator.cpp +146 -0
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +758 -0
- package/src/duckdb/src/function/window/window_executor.cpp +99 -0
- package/src/duckdb/src/function/window/window_index_tree.cpp +63 -0
- package/src/duckdb/src/function/window/window_merge_sort_tree.cpp +275 -0
- package/src/duckdb/src/function/window/window_naive_aggregator.cpp +361 -0
- package/src/duckdb/src/function/window/window_rank_function.cpp +288 -0
- package/src/duckdb/src/function/window/window_rownumber_function.cpp +191 -0
- package/src/duckdb/src/function/window/window_segment_tree.cpp +594 -0
- package/src/duckdb/src/function/window/window_shared_expressions.cpp +50 -0
- package/src/duckdb/src/function/window/window_token_tree.cpp +142 -0
- package/src/duckdb/src/function/window/window_value_function.cpp +566 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +74 -17
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +9 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +4 -10
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +18 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +2 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +5 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +21 -18
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +3 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +10 -2
- package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +11 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +26 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +144 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +11 -4
- package/src/duckdb/src/include/duckdb/common/assert.hpp +12 -1
- package/src/duckdb/src/include/duckdb/common/atomic_ptr.hpp +102 -0
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +65 -6
- package/src/duckdb/src/include/duckdb/common/chrono.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/column_index.hpp +72 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +120 -0
- package/src/duckdb/src/include/duckdb/{core_functions/core_functions.hpp → common/enums/collation_type.hpp} +2 -7
- package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/enums/function_errors.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_preservation_type.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
- package/src/duckdb/src/include/duckdb/{core_functions/aggregate → common/enums}/quantile_enum.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enums/scan_vector_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/error_data.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/extension_type_info.hpp +37 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +9 -3
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +19 -10
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/fsst.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +6 -0
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +12 -2
- package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +147 -27
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +2 -7
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +16 -5
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/platform.hpp +34 -3
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +10 -13
- package/src/duckdb/src/include/duckdb/common/random_engine.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +0 -2
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/stacktrace.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +30 -2
- package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +13 -2
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +21 -4
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +58 -10
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +43 -16
- package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +63 -21
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +62 -16
- package/src/duckdb/src/include/duckdb/common/types/varint.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +34 -7
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +12 -7
- package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +12 -13
- package/src/duckdb/src/include/duckdb/common/vector_operations/binary_executor.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +111 -4
- package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +48 -10
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +25 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +28 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +23 -16
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +25 -16
- package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +15 -10
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent/physical_fixed_batch_copy.hpp → aggregate/physical_partitioned_aggregate.hpp} +25 -27
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +21 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +38 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +8 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +29 -23
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +15 -13
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +13 -5
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +36 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +21 -13
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +52 -22
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +6 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_validator.hpp +58 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +6 -3
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +16 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +9 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +8 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +55 -10
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +28 -7
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +6 -9
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +17 -16
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +7 -3
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +55 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +10 -9
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +16 -13
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +0 -4
- package/src/duckdb/src/include/duckdb/execution/partition_info.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +20 -9
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +1 -11
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/progress_data.hpp +58 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +160 -31
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_function_utils.hpp +31 -0
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +61 -10
- package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/minmax_n_helpers.hpp +1 -1
- package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/sort_key_helpers.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +47 -27
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +3 -10
- package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +13 -6
- package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +15 -0
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +29 -6
- package/src/duckdb/src/include/duckdb/{core_functions → function}/create_sort_key.hpp +4 -1
- package/src/duckdb/src/include/duckdb/function/encoding_function.hpp +78 -0
- package/src/duckdb/src/include/duckdb/function/function.hpp +22 -1
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/function_list.hpp +39 -0
- package/src/duckdb/src/include/duckdb/function/function_set.hpp +13 -7
- package/src/duckdb/src/include/duckdb/{core_functions → function}/lambda_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/partition_stats.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/register_function_list_helper.hpp +69 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +154 -23
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_utils.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar/date_functions.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar/generic_common.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +32 -23
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/list_functions.hpp +156 -0
- package/src/duckdb/src/include/duckdb/function/scalar/map_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +4 -45
- package/src/duckdb/src/include/duckdb/function/scalar/operator_functions.hpp +102 -0
- package/src/duckdb/src/include/duckdb/function/scalar/operators.hpp +2 -16
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +16 -25
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_utils.hpp +38 -0
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_common.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +401 -76
- package/src/duckdb/src/include/duckdb/function/scalar/struct_functions.hpp +63 -0
- package/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp +33 -0
- package/src/duckdb/src/include/duckdb/function/scalar/system_functions.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +17 -8
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +59 -6
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +12 -9
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +18 -13
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -4
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +14 -0
- package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +15 -10
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +94 -18
- package/src/duckdb/src/include/duckdb/{core_functions → function}/to_interval.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp +44 -0
- package/src/duckdb/src/include/duckdb/function/window/window_aggregate_states.hpp +56 -0
- package/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp +194 -0
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +153 -0
- package/src/duckdb/src/include/duckdb/function/window/window_collection.hpp +146 -0
- package/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp +38 -0
- package/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp +32 -0
- package/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp +39 -0
- package/src/duckdb/src/include/duckdb/function/window/window_executor.hpp +122 -0
- package/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp +42 -0
- package/src/duckdb/src/include/duckdb/function/window/window_merge_sort_tree.hpp +108 -0
- package/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp +33 -0
- package/src/duckdb/src/include/duckdb/function/window/window_rank_function.hpp +63 -0
- package/src/duckdb/src/include/duckdb/function/window/window_rownumber_function.hpp +43 -0
- package/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp +31 -0
- package/src/duckdb/src/include/duckdb/function/window/window_shared_expressions.hpp +76 -0
- package/src/duckdb/src/include/duckdb/function/window/window_token_tree.hpp +46 -0
- package/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp +79 -0
- package/src/duckdb/src/include/duckdb/logging/http_logger.hpp +2 -0
- package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +81 -0
- package/src/duckdb/src/include/duckdb/logging/log_storage.hpp +127 -0
- package/src/duckdb/src/include/duckdb/logging/logger.hpp +287 -0
- package/src/duckdb/src/include/duckdb/logging/logging.hpp +83 -0
- package/src/duckdb/src/include/duckdb/main/appender.hpp +41 -18
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +6 -3
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +317 -231
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +17 -1
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +28 -6
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_context_wrapper.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +8 -3
- package/src/duckdb/src/include/duckdb/main/config.hpp +52 -8
- package/src/duckdb/src/include/duckdb/main/connection.hpp +18 -3
- package/src/duckdb/src/include/duckdb/main/database.hpp +8 -7
- package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +5 -1
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension.hpp +8 -2
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +548 -9
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +12 -7
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +8 -4
- package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/delete_relation.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/subquery_relation.hpp +1 -4
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/update_relation.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/relation/value_relation.hpp +7 -0
- package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/relation.hpp +45 -9
- package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +20 -22
- package/src/duckdb/src/include/duckdb/main/settings.hpp +613 -378
- package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -4
- package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +1 -3
- package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +14 -7
- package/src/duckdb/src/include/duckdb/optimizer/common_aggregate_optimizer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/empty_result_pullup.hpp +27 -0
- package/src/duckdb/src/include/duckdb/optimizer/expression_heuristics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +6 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/in_clause_rewriter.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +5 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +45 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +23 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +18 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +9 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp +33 -11
- package/src/duckdb/src/include/duckdb/optimizer/rule/distinct_aggregate_optimizer.hpp +34 -0
- package/src/duckdb/src/include/duckdb/optimizer/sampling_pushdown.hpp +25 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +3 -1
- package/src/duckdb/src/include/duckdb/optimizer/sum_rewriter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +26 -8
- package/src/duckdb/src/include/duckdb/parallel/thread_context.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +51 -3
- package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +28 -44
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +11 -1
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +12 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +22 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -4
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +16 -12
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +5 -5
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +12 -3
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +17 -57
- package/src/duckdb/src/include/duckdb/parser/qualified_name_set.hpp +19 -3
- package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +12 -9
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +45 -28
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +23 -11
- package/src/duckdb/src/include/duckdb/planner/binding_alias.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +11 -10
- package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +4 -4
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +9 -4
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/filter/dynamic_filter.hpp +48 -0
- package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +35 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +9 -9
- package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +16 -7
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +5 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +5 -3
- package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +14 -6
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +12 -8
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +82 -26
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +10 -3
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +4 -13
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +14 -15
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +13 -15
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/analyze.hpp +46 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/common.hpp +60 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp +61 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/decompression.hpp +50 -0
- package/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp +100 -0
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/roaring/appender.hpp +150 -0
- package/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp +618 -0
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +53 -31
- package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -1
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -4
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +16 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/storage_index.hpp +70 -0
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +5 -7
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +23 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +34 -6
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +39 -10
- package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +56 -14
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +35 -29
- package/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +19 -6
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +29 -6
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +10 -10
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +26 -19
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +8 -1
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +16 -14
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +228 -61
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +14 -10
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +3 -1
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +3 -2
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +19 -17
- package/src/duckdb/src/include/duckdb/transaction/rollback_state.hpp +5 -2
- package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -2
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +13 -8
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer_allocator.hpp +79 -0
- package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +43 -13
- package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +4 -1
- package/src/duckdb/src/include/duckdb/verification/copied_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/external_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/parsed_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +7 -3
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +11 -5
- package/src/duckdb/src/include/duckdb/verification/unoptimized_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb.h +424 -41
- package/src/duckdb/src/include/duckdb_extension.h +301 -195
- package/src/duckdb/src/logging/log_manager.cpp +157 -0
- package/src/duckdb/src/logging/log_storage.cpp +209 -0
- package/src/duckdb/src/logging/logger.cpp +211 -0
- package/src/duckdb/src/logging/logging.cpp +42 -0
- package/src/duckdb/src/main/appender.cpp +187 -45
- package/src/duckdb/src/main/attached_database.cpp +16 -8
- package/src/duckdb/src/main/capi/appender-c.cpp +47 -4
- package/src/duckdb/src/main/capi/arrow-c.cpp +9 -4
- package/src/duckdb/src/main/capi/config-c.cpp +17 -4
- package/src/duckdb/src/main/capi/datetime-c.cpp +15 -0
- package/src/duckdb/src/main/capi/duckdb-c.cpp +54 -13
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +212 -4
- package/src/duckdb/src/main/capi/helper-c.cpp +3 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +26 -7
- package/src/duckdb/src/main/capi/replacement_scan-c.cpp +1 -1
- package/src/duckdb/src/main/capi/result-c.cpp +3 -0
- package/src/duckdb/src/main/capi/table_description-c.cpp +43 -10
- package/src/duckdb/src/main/capi/threading-c.cpp +4 -4
- package/src/duckdb/src/main/client_context.cpp +125 -51
- package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
- package/src/duckdb/src/main/client_context_wrapper.cpp +4 -0
- package/src/duckdb/src/main/client_data.cpp +1 -1
- package/src/duckdb/src/main/client_verify.cpp +39 -20
- package/src/duckdb/src/main/config.cpp +266 -74
- package/src/duckdb/src/main/connection.cpp +53 -13
- package/src/duckdb/src/main/database.cpp +39 -18
- package/src/duckdb/src/main/database_manager.cpp +12 -11
- package/src/duckdb/src/main/db_instance_cache.cpp +14 -7
- package/src/duckdb/src/main/extension/extension_helper.cpp +24 -23
- package/src/duckdb/src/main/extension/extension_install.cpp +19 -7
- package/src/duckdb/src/main/extension/extension_load.cpp +91 -41
- package/src/duckdb/src/main/extension/extension_util.cpp +40 -19
- package/src/duckdb/src/main/extension.cpp +20 -11
- package/src/duckdb/src/main/profiling_info.cpp +19 -5
- package/src/duckdb/src/main/query_profiler.cpp +135 -36
- package/src/duckdb/src/main/query_result.cpp +2 -1
- package/src/duckdb/src/main/relation/aggregate_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/create_table_relation.cpp +5 -4
- package/src/duckdb/src/main/relation/create_view_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/delete_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/delim_get_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/distinct_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/explain_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/filter_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/insert_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation/order_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/projection_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/query_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +58 -20
- package/src/duckdb/src/main/relation/setop_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/subquery_relation.cpp +3 -8
- package/src/duckdb/src/main/relation/table_function_relation.cpp +10 -1
- package/src/duckdb/src/main/relation/table_relation.cpp +19 -3
- package/src/duckdb/src/main/relation/update_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/value_relation.cpp +42 -2
- package/src/duckdb/src/main/relation/view_relation.cpp +8 -2
- package/src/duckdb/src/main/relation/write_csv_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/write_parquet_relation.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +49 -28
- package/src/duckdb/src/main/secret/secret_manager.cpp +1 -1
- package/src/duckdb/src/main/secret/secret_storage.cpp +6 -4
- package/src/duckdb/src/main/settings/autogenerated_settings.cpp +1102 -0
- package/src/duckdb/src/main/settings/custom_settings.cpp +1343 -0
- package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +60 -37
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +1 -1
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +126 -72
- package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +22 -6
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -3
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +2 -2
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +3 -3
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +7 -7
- package/src/duckdb/src/optimizer/deliminator.cpp +6 -5
- package/src/duckdb/src/optimizer/empty_result_pullup.cpp +96 -0
- package/src/duckdb/src/optimizer/expression_heuristics.cpp +11 -3
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +9 -2
- package/src/duckdb/src/optimizer/filter_combiner.cpp +190 -88
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +6 -5
- package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +25 -9
- package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +170 -72
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +5 -4
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +3 -1
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +7 -7
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -6
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +37 -22
- package/src/duckdb/src/optimizer/late_materialization.cpp +414 -0
- package/src/duckdb/src/optimizer/limit_pushdown.cpp +1 -0
- package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +30 -2
- package/src/duckdb/src/optimizer/optimizer.cpp +67 -7
- package/src/duckdb/src/optimizer/pullup/pullup_filter.cpp +3 -3
- package/src/duckdb/src/optimizer/pullup/pullup_projection.cpp +2 -2
- package/src/duckdb/src/optimizer/pullup/pullup_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_filter.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +3 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +5 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_unnest.cpp +52 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +2 -2
- package/src/duckdb/src/optimizer/regex_range_filter.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +168 -38
- package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +8 -5
- package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/constant_folding.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/distinct_aggregate_optimizer.cpp +65 -0
- package/src/duckdb/src/optimizer/rule/distributivity.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +4 -3
- package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +3 -3
- package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -1
- package/src/duckdb/src/optimizer/rule/move_constants.cpp +9 -9
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +4 -3
- package/src/duckdb/src/optimizer/rule/timestamp_comparison.cpp +1 -1
- package/src/duckdb/src/optimizer/sampling_pushdown.cpp +24 -0
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +74 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +10 -7
- package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +3 -0
- package/src/duckdb/src/optimizer/sum_rewriter.cpp +174 -0
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +71 -0
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +5 -5
- package/src/duckdb/src/parallel/event.cpp +4 -0
- package/src/duckdb/src/parallel/executor.cpp +11 -29
- package/src/duckdb/src/parallel/executor_task.cpp +8 -3
- package/src/duckdb/src/parallel/pipeline.cpp +15 -8
- package/src/duckdb/src/parallel/pipeline_executor.cpp +67 -43
- package/src/duckdb/src/parallel/thread_context.cpp +12 -1
- package/src/duckdb/src/parser/column_definition.cpp +3 -3
- package/src/duckdb/src/parser/constraints/unique_constraint.cpp +72 -9
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -3
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +3 -3
- package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/star_expression.cpp +46 -2
- package/src/duckdb/src/parser/expression/window_expression.cpp +24 -1
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +26 -2
- package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +5 -3
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +29 -1
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +6 -6
- package/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_function_info.cpp +17 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +16 -15
- package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
- package/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +4 -4
- package/src/duckdb/src/parser/parsed_data/load_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +31 -1
- package/src/duckdb/src/parser/parsed_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +4 -1
- package/src/duckdb/src/parser/parser.cpp +129 -0
- package/src/duckdb/src/parser/qualified_name.cpp +99 -0
- package/src/duckdb/src/parser/query_error_context.cpp +35 -6
- package/src/duckdb/src/parser/query_node/select_node.cpp +4 -4
- package/src/duckdb/src/parser/statement/delete_statement.cpp +6 -1
- package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -3
- package/src/duckdb/src/parser/statement/update_statement.cpp +6 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +2 -2
- package/src/duckdb/src/parser/tableref.cpp +2 -2
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +16 -24
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +5 -5
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +61 -13
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +10 -4
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -2
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +30 -3
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +25 -6
- package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +10 -3
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +4 -3
- package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +18 -3
- package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +0 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +5 -5
- package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +26 -12
- package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +11 -3
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +3 -3
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +4 -4
- package/src/duckdb/src/parser/transform/statement/transform_set.cpp +2 -2
- package/src/duckdb/src/parser/transform/statement/transform_show.cpp +21 -3
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +8 -6
- package/src/duckdb/src/parser/transformer.cpp +2 -2
- package/src/duckdb/src/planner/bind_context.cpp +308 -136
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +30 -31
- package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +4 -2
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +102 -94
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +7 -5
- package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +7 -7
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -10
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -6
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +23 -15
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +97 -19
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +74 -16
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +6 -6
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +49 -15
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +32 -23
- package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +20 -3
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +3 -0
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +6 -5
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +38 -19
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +2 -12
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +117 -412
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +423 -144
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +5 -0
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +0 -4
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +31 -13
- package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +96 -27
- package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -3
- package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +7 -6
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +36 -9
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +34 -34
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +72 -35
- package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +99 -18
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +23 -11
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +22 -19
- package/src/duckdb/src/planner/binder.cpp +23 -45
- package/src/duckdb/src/planner/binding_alias.cpp +69 -0
- package/src/duckdb/src/planner/bound_parameter_map.cpp +1 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +6 -2
- package/src/duckdb/src/planner/collation_binding.cpp +38 -4
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +17 -5
- package/src/duckdb/src/planner/expression/bound_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -1
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +2 -2
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -4
- package/src/duckdb/src/planner/expression.cpp +7 -1
- package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +16 -0
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +53 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -8
- package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder.cpp +7 -7
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -3
- package/src/duckdb/src/planner/filter/constant_filter.cpp +17 -2
- package/src/duckdb/src/planner/filter/dynamic_filter.cpp +68 -0
- package/src/duckdb/src/planner/filter/in_filter.cpp +84 -0
- package/src/duckdb/src/planner/filter/null_filter.cpp +1 -2
- package/src/duckdb/src/planner/filter/optional_filter.cpp +29 -0
- package/src/duckdb/src/planner/filter/struct_filter.cpp +11 -6
- package/src/duckdb/src/planner/joinside.cpp +6 -5
- package/src/duckdb/src/planner/logical_operator.cpp +4 -1
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +68 -2
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +23 -0
- package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -12
- package/src/duckdb/src/planner/operator/logical_filter.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_get.cpp +48 -25
- package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_join.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_order.cpp +4 -11
- package/src/duckdb/src/planner/operator/logical_top_n.cpp +7 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +33 -5
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +2 -2
- package/src/duckdb/src/planner/table_binding.cpp +74 -36
- package/src/duckdb/src/planner/table_filter.cpp +5 -8
- package/src/duckdb/src/storage/arena_allocator.cpp +5 -4
- package/src/duckdb/src/storage/buffer/block_handle.cpp +88 -17
- package/src/duckdb/src/storage/buffer/block_manager.cpp +34 -26
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +70 -49
- package/src/duckdb/src/storage/buffer_manager.cpp +4 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -5
- package/src/duckdb/src/storage/compression/bitpacking.cpp +14 -16
- package/src/duckdb/src/storage/compression/dictionary/analyze.cpp +54 -0
- package/src/duckdb/src/storage/compression/dictionary/common.cpp +90 -0
- package/src/duckdb/src/storage/compression/dictionary/compression.cpp +174 -0
- package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +115 -0
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +53 -545
- package/src/duckdb/src/storage/compression/empty_validity.cpp +15 -0
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +25 -16
- package/src/duckdb/src/storage/compression/fsst.cpp +101 -47
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +92 -2
- package/src/duckdb/src/storage/compression/rle.cpp +216 -46
- package/src/duckdb/src/storage/compression/roaring/analyze.cpp +179 -0
- package/src/duckdb/src/storage/compression/roaring/common.cpp +282 -0
- package/src/duckdb/src/storage/compression/roaring/compress.cpp +481 -0
- package/src/duckdb/src/storage/compression/roaring/metadata.cpp +262 -0
- package/src/duckdb/src/storage/compression/roaring/scan.cpp +364 -0
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +47 -65
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +102 -39
- package/src/duckdb/src/storage/compression/zstd.cpp +1049 -0
- package/src/duckdb/src/storage/data_table.cpp +312 -172
- package/src/duckdb/src/storage/local_storage.cpp +104 -46
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -3
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +138 -58
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +14 -0
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +19 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -0
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +43 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +32 -5
- package/src/duckdb/src/storage/single_file_block_manager.cpp +6 -8
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +82 -71
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +3 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +18 -17
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +34 -22
- package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -3
- package/src/duckdb/src/storage/storage_info.cpp +72 -10
- package/src/duckdb/src/storage/storage_manager.cpp +41 -47
- package/src/duckdb/src/storage/table/array_column_data.cpp +7 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -9
- package/src/duckdb/src/storage/table/column_data.cpp +105 -43
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +307 -132
- package/src/duckdb/src/storage/table/column_segment.cpp +36 -13
- package/src/duckdb/src/storage/table/list_column_data.cpp +4 -4
- package/src/duckdb/src/storage/table/row_group.cpp +159 -66
- package/src/duckdb/src/storage/table/row_group_collection.cpp +157 -68
- package/src/duckdb/src/storage/table/row_version_manager.cpp +33 -10
- package/src/duckdb/src/storage/table/scan_state.cpp +21 -7
- package/src/duckdb/src/storage/table/standard_column_data.cpp +68 -5
- package/src/duckdb/src/storage/table/struct_column_data.cpp +42 -4
- package/src/duckdb/src/storage/table/table_statistics.cpp +91 -5
- package/src/duckdb/src/storage/table/update_segment.cpp +287 -210
- package/src/duckdb/src/storage/table_index_list.cpp +55 -58
- package/src/duckdb/src/storage/temporary_file_manager.cpp +412 -149
- package/src/duckdb/src/storage/wal_replay.cpp +132 -48
- package/src/duckdb/src/storage/write_ahead_log.cpp +75 -48
- package/src/duckdb/src/transaction/cleanup_state.cpp +0 -1
- package/src/duckdb/src/transaction/commit_state.cpp +23 -14
- package/src/duckdb/src/transaction/duck_transaction.cpp +29 -25
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +18 -6
- package/src/duckdb/src/transaction/meta_transaction.cpp +3 -2
- package/src/duckdb/src/transaction/rollback_state.cpp +5 -2
- package/src/duckdb/src/transaction/transaction_context.cpp +9 -1
- package/src/duckdb/src/transaction/undo_buffer.cpp +35 -27
- package/src/duckdb/src/transaction/undo_buffer_allocator.cpp +72 -0
- package/src/duckdb/src/transaction/wal_write_state.cpp +12 -10
- package/src/duckdb/src/verification/copied_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +7 -5
- package/src/duckdb/src/verification/external_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/fetch_row_verifier.cpp +7 -4
- package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +8 -4
- package/src/duckdb/src/verification/parsed_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -12
- package/src/duckdb/src/verification/statement_verifier.cpp +20 -15
- package/src/duckdb/src/verification/unoptimized_statement_verifier.cpp +7 -4
- package/src/duckdb/third_party/fsst/libfsst.hpp +1 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +15 -22
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +4 -2
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +2 -4
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +14278 -13832
- package/src/duckdb/third_party/parquet/parquet_types.cpp +3410 -1686
- package/src/duckdb/third_party/parquet/parquet_types.h +1585 -1204
- package/src/duckdb/third_party/skiplist/SkipList.h +0 -1
- package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +13 -15
- package/src/duckdb/third_party/zstd/common/debug.cpp +36 -0
- package/src/duckdb/third_party/zstd/common/entropy_common.cpp +173 -49
- package/src/duckdb/third_party/zstd/common/error_private.cpp +11 -3
- package/src/duckdb/third_party/zstd/common/fse_decompress.cpp +126 -97
- package/src/duckdb/third_party/zstd/common/pool.cpp +376 -0
- package/src/duckdb/third_party/zstd/common/threading.cpp +193 -0
- package/src/duckdb/third_party/zstd/common/xxhash.cpp +18 -14
- package/src/duckdb/third_party/zstd/common/zstd_common.cpp +3 -38
- package/src/duckdb/third_party/zstd/compress/fse_compress.cpp +93 -165
- package/src/duckdb/third_party/zstd/compress/hist.cpp +28 -31
- package/src/duckdb/third_party/zstd/compress/huf_compress.cpp +957 -291
- package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3988 -1124
- package/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp +120 -43
- package/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp +47 -23
- package/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp +274 -424
- package/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp +403 -153
- package/src/duckdb/third_party/zstd/compress/zstd_fast.cpp +741 -268
- package/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp +1339 -278
- package/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp +334 -222
- package/src/duckdb/third_party/zstd/compress/zstd_opt.cpp +674 -298
- package/src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp +1885 -0
- package/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp +1247 -586
- package/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp +18 -17
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp +724 -270
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1193 -393
- package/src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp +30 -0
- package/src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp +171 -0
- package/src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp +80 -0
- package/src/duckdb/third_party/zstd/dict/cover.cpp +1271 -0
- package/src/duckdb/third_party/zstd/dict/divsufsort.cpp +1916 -0
- package/src/duckdb/third_party/zstd/dict/fastcover.cpp +775 -0
- package/src/duckdb/third_party/zstd/dict/zdict.cpp +1139 -0
- package/src/duckdb/third_party/zstd/include/zdict.h +473 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/allocations.h +58 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/bits.h +204 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h +88 -85
- package/src/duckdb/third_party/zstd/include/zstd/common/compiler.h +243 -47
- package/src/duckdb/third_party/zstd/include/zstd/common/cpu.h +253 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/debug.h +31 -31
- package/src/duckdb/third_party/zstd/include/zstd/common/error_private.h +94 -6
- package/src/duckdb/third_party/zstd/include/zstd/common/fse.h +424 -64
- package/src/duckdb/third_party/zstd/include/zstd/common/huf.h +255 -70
- package/src/duckdb/third_party/zstd/include/zstd/common/mem.h +125 -85
- package/src/duckdb/third_party/zstd/include/zstd/common/pool.h +84 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/portability_macros.h +158 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/threading.h +152 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash.h → xxhash.hpp} +0 -1
- package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash_static.h → xxhash_static.hpp} +1 -1
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_deps.h +122 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h +143 -174
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_trace.h +159 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/clevels.h +136 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/hist.h +4 -4
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h +631 -220
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h +17 -7
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h +2 -2
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h +3 -2
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +256 -153
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h +16 -3
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h +4 -3
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h +145 -11
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h +14 -6
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm_geartab.h +110 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h +33 -9
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstdmt_compress.h +107 -0
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h +4 -3
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h +20 -6
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h +88 -16
- package/src/duckdb/third_party/zstd/include/zstd/deprecated/zbuff.h +214 -0
- package/src/duckdb/third_party/zstd/include/zstd/dict/cover.h +156 -0
- package/src/duckdb/third_party/zstd/include/zstd/dict/divsufsort.h +62 -0
- package/src/duckdb/third_party/zstd/include/zstd.h +2171 -93
- package/src/duckdb/third_party/zstd/include/{zstd/common/zstd_errors.h → zstd_errors.h} +32 -10
- package/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp +8 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp +20 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp +12 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_nested.cpp +6 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp +14 -0
- package/src/duckdb/ub_extension_core_functions_scalar_array.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_bit.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_blob.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_date.cpp +20 -0
- package/src/duckdb/ub_extension_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_enum.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_generic.cpp +18 -0
- package/src/duckdb/ub_extension_core_functions_scalar_list.cpp +22 -0
- package/src/duckdb/ub_extension_core_functions_scalar_map.cpp +14 -0
- package/src/duckdb/ub_extension_core_functions_scalar_math.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_operators.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_random.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_string.cpp +48 -0
- package/src/duckdb/ub_extension_core_functions_scalar_struct.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_union.cpp +6 -0
- package/src/duckdb/ub_src_common.cpp +4 -0
- package/src/duckdb/ub_src_common_arrow.cpp +3 -1
- package/src/duckdb/ub_src_execution.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_aggregate.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +2 -0
- package/src/duckdb/ub_src_execution_sample.cpp +4 -0
- package/src/duckdb/ub_src_function.cpp +6 -0
- package/src/duckdb/ub_src_function_aggregate.cpp +0 -2
- package/src/duckdb/ub_src_function_aggregate_distributive.cpp +3 -1
- package/src/duckdb/ub_src_function_scalar.cpp +2 -8
- package/src/duckdb/ub_src_function_scalar_date.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_generic.cpp +2 -2
- package/src/duckdb/ub_src_function_scalar_map.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_operator.cpp +8 -0
- package/src/duckdb/ub_src_function_scalar_string.cpp +10 -0
- package/src/duckdb/ub_src_function_scalar_struct.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_system.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +6 -0
- package/src/duckdb/ub_src_function_window.cpp +36 -0
- package/src/duckdb/ub_src_logging.cpp +8 -0
- package/src/duckdb/ub_src_main_settings.cpp +3 -1
- package/src/duckdb/ub_src_optimizer.cpp +8 -0
- package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
- package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
- package/src/duckdb/ub_src_parser.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
- package/src/duckdb/ub_src_planner.cpp +2 -0
- package/src/duckdb/ub_src_planner_filter.cpp +6 -0
- package/src/duckdb/ub_src_storage_compression.cpp +4 -0
- package/src/duckdb/ub_src_storage_compression_dictionary.cpp +8 -0
- package/src/duckdb/ub_src_storage_compression_roaring.cpp +10 -0
- package/src/duckdb/ub_src_transaction.cpp +2 -0
- package/vendor.py +1 -1
- package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +0 -6003
- package/src/duckdb/extension/json/yyjson/yyjson.cpp +0 -8218
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +0 -78
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +0 -91
- package/src/duckdb/src/common/cycle_counter.cpp +0 -76
- package/src/duckdb/src/common/field_writer.cpp +0 -97
- package/src/duckdb/src/common/http_state.cpp +0 -95
- package/src/duckdb/src/common/preserved_error.cpp +0 -87
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
- package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +0 -27
- package/src/duckdb/src/common/serializer/buffered_serializer.cpp +0 -36
- package/src/duckdb/src/common/serializer/format_serializer.cpp +0 -15
- package/src/duckdb/src/common/serializer.cpp +0 -24
- package/src/duckdb/src/common/types/chunk_collection.cpp +0 -190
- package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +0 -183
- package/src/duckdb/src/core_functions/scalar/date/current.cpp +0 -54
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +0 -78
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +0 -70
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +0 -412
- package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +0 -28
- package/src/duckdb/src/core_functions/scalar/string/jaro_winkler.cpp +0 -71
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/execution/index/art/node16.cpp +0 -196
- package/src/duckdb/src/execution/index/art/node4.cpp +0 -189
- package/src/duckdb/src/execution/index/unknown_index.cpp +0 -65
- package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +0 -595
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +0 -434
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +0 -89
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +0 -90
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +0 -95
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +0 -494
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +0 -35
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +0 -99
- package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -689
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +0 -242
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +0 -695
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -280
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +0 -666
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +0 -499
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -207
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/execution/physical_plan/plan_limit_percent.cpp +0 -18
- package/src/duckdb/src/execution/physical_plan/plan_show_select.cpp +0 -47
- package/src/duckdb/src/execution/reservoir_sample.cpp +0 -324
- package/src/duckdb/src/execution/window_executor.cpp +0 -1830
- package/src/duckdb/src/execution/window_segment_tree.cpp +0 -2073
- package/src/duckdb/src/extension_forward_decl/icu.cpp +0 -59
- package/src/duckdb/src/function/aggregate/distributive_functions.cpp +0 -15
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +0 -29
- package/src/duckdb/src/function/scalar/generic_functions.cpp +0 -11
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +0 -143
- package/src/duckdb/src/function/scalar/operators.cpp +0 -14
- package/src/duckdb/src/function/scalar/sequence_functions.cpp +0 -10
- package/src/duckdb/src/function/scalar/string_functions.cpp +0 -22
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +0 -173
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +0 -101
- package/src/duckdb/src/include/duckdb/catalog/mapping_value.hpp +0 -92
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +0 -42
- package/src/duckdb/src/include/duckdb/common/cycle_counter.hpp +0 -68
- package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +0 -34
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +0 -113
- package/src/duckdb/src/include/duckdb/common/platform.h +0 -58
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +0 -59
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +0 -192
- package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +0 -137
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +0 -63
- package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +0 -103
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +0 -60
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +0 -253
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +0 -155
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +0 -163
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +0 -35
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +0 -228
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +0 -93
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +0 -60
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +0 -197
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +0 -21
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +0 -30
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +0 -99
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +0 -87
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +0 -80
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +0 -137
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +0 -104
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +0 -79
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -119
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +0 -72
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +0 -110
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +0 -103
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_file_handle.hpp +0 -59
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +0 -46
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +0 -210
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +0 -131
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp +0 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +0 -167
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +0 -21
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +0 -343
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -165
- package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_optimizer.hpp +0 -45
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +0 -57
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +0 -45
- package/src/duckdb/src/include/duckdb/parser/statement/show_statement.hpp +0 -32
- package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +0 -49
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +0 -42
- package/src/duckdb/src/main/settings/settings.cpp +0 -2056
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +0 -36
- package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +0 -19
- package/src/duckdb/src/parser/statement/show_statement.cpp +0 -15
- package/src/duckdb/src/planner/binder/statement/bind_show.cpp +0 -30
- package/src/duckdb/src/planner/operator/logical_limit_percent.cpp +0 -14
- package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +0 -70
- package/src/duckdb/third_party/fsst/fsst_avx512.cpp +0 -140
- package/src/duckdb/third_party/fsst/fsst_avx512.inc +0 -57
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll1.inc +0 -57
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll2.inc +0 -114
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll3.inc +0 -171
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll4.inc +0 -228
- package/src/duckdb/third_party/parquet/parquet_constants.cpp +0 -17
- package/src/duckdb/third_party/parquet/parquet_constants.h +0 -24
- package/src/duckdb/third_party/re2/util/pod_array.h +0 -55
- package/src/duckdb/third_party/re2/util/sparse_array.h +0 -392
- package/src/duckdb/third_party/re2/util/sparse_set.h +0 -264
- package/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h +0 -421
- package/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h +0 -238
- package/src/duckdb/third_party/zstd/include/zstd_static.h +0 -1070
- package/src/duckdb/ub_src_core_functions.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_aggregate_algebraic.cpp +0 -8
- package/src/duckdb/ub_src_core_functions_aggregate_distributive.cpp +0 -24
- package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +0 -12
- package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_aggregate_regression.cpp +0 -14
- package/src/duckdb/ub_src_core_functions_scalar_array.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_bit.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_scalar_date.cpp +0 -22
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_enum.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +0 -18
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +0 -22
- package/src/duckdb/ub_src_core_functions_scalar_map.cpp +0 -16
- package/src/duckdb/ub_src_core_functions_scalar_math.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_operators.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_random.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +0 -58
- package/src/duckdb/ub_src_core_functions_scalar_struct.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_union.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +0 -18
- package/src/duckdb/ub_src_function_scalar_operators.cpp +0 -8
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/covar.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/stddev.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/sum_helpers.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/array_kernels.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/function_list.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/secret_functions.hpp +0 -0
- /package/src/duckdb/src/function/scalar/{operators → operator}/multiply.cpp +0 -0
- /package/src/duckdb/src/function/scalar/{operators → operator}/subtract.cpp +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
/* ******************************************************************
|
2
2
|
* Huffman encoder, part of New Generation Entropy library
|
3
|
-
* Copyright (c)
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
4
4
|
*
|
5
5
|
* You can contact the author at :
|
6
6
|
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
@@ -23,45 +23,131 @@
|
|
23
23
|
/* **************************************************************
|
24
24
|
* Includes
|
25
25
|
****************************************************************/
|
26
|
-
#include
|
27
|
-
#include <stdio.h> /* printf (debug) */
|
26
|
+
#include "zstd/common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
|
28
27
|
#include "zstd/common/compiler.h"
|
29
28
|
#include "zstd/common/bitstream.h"
|
30
29
|
#include "zstd/compress/hist.h"
|
30
|
+
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
|
31
31
|
#include "zstd/common/fse.h" /* header compression */
|
32
|
-
#include "zstd/common/fse_static.h"
|
33
32
|
#include "zstd/common/huf.h"
|
34
|
-
#include "zstd/common/huf_static.h"
|
35
33
|
#include "zstd/common/error_private.h"
|
34
|
+
#include "zstd/common/bits.h" /* ZSTD_highbit32 */
|
36
35
|
|
36
|
+
namespace duckdb_zstd {
|
37
37
|
|
38
38
|
/* **************************************************************
|
39
39
|
* Error Management
|
40
40
|
****************************************************************/
|
41
|
-
|
41
|
+
#define HUF_isError ERR_isError
|
42
42
|
#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
|
43
43
|
|
44
44
|
|
45
|
-
namespace duckdb_zstd {
|
46
45
|
/* **************************************************************
|
47
|
-
*
|
46
|
+
* Required declarations
|
47
|
+
****************************************************************/
|
48
|
+
typedef struct nodeElt_s {
|
49
|
+
U32 count;
|
50
|
+
U16 parent;
|
51
|
+
BYTE byte;
|
52
|
+
BYTE nbBits;
|
53
|
+
} nodeElt;
|
54
|
+
|
55
|
+
|
56
|
+
/* **************************************************************
|
57
|
+
* Debug Traces
|
48
58
|
****************************************************************/
|
49
|
-
|
59
|
+
|
60
|
+
#if DEBUGLEVEL >= 2
|
61
|
+
|
62
|
+
static size_t showU32(const U32* arr, size_t size)
|
50
63
|
{
|
51
|
-
|
64
|
+
size_t u;
|
65
|
+
for (u=0; u<size; u++) {
|
66
|
+
RAWLOG(6, " %u", arr[u]); (void)arr;
|
67
|
+
}
|
68
|
+
RAWLOG(6, " \n");
|
69
|
+
return size;
|
52
70
|
}
|
53
71
|
|
72
|
+
static size_t HUF_getNbBits(HUF_CElt elt);
|
73
|
+
|
74
|
+
static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
|
75
|
+
{
|
76
|
+
size_t u;
|
77
|
+
for (u=0; u<size; u++) {
|
78
|
+
RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
|
79
|
+
}
|
80
|
+
RAWLOG(6, " \n");
|
81
|
+
return size;
|
82
|
+
|
83
|
+
}
|
84
|
+
|
85
|
+
static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
|
86
|
+
{
|
87
|
+
size_t u;
|
88
|
+
for (u=0; u<size; u++) {
|
89
|
+
RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
|
90
|
+
}
|
91
|
+
RAWLOG(6, " \n");
|
92
|
+
return size;
|
93
|
+
}
|
94
|
+
|
95
|
+
static size_t showHNodeBits(const nodeElt* hnode, size_t size)
|
96
|
+
{
|
97
|
+
size_t u;
|
98
|
+
for (u=0; u<size; u++) {
|
99
|
+
RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
|
100
|
+
}
|
101
|
+
RAWLOG(6, " \n");
|
102
|
+
return size;
|
103
|
+
}
|
104
|
+
|
105
|
+
#endif
|
106
|
+
|
54
107
|
|
55
108
|
/* *******************************************************
|
56
109
|
* HUF : Huffman block compression
|
57
110
|
*********************************************************/
|
111
|
+
#define HUF_WORKSPACE_MAX_ALIGNMENT 8
|
112
|
+
|
113
|
+
static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
|
114
|
+
{
|
115
|
+
size_t const mask = align - 1;
|
116
|
+
size_t const rem = (size_t)workspace & mask;
|
117
|
+
size_t const add = (align - rem) & mask;
|
118
|
+
BYTE* const aligned = (BYTE*)workspace + add;
|
119
|
+
assert((align & (align - 1)) == 0); /* pow 2 */
|
120
|
+
assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
|
121
|
+
if (*workspaceSizePtr >= add) {
|
122
|
+
assert(add < align);
|
123
|
+
assert(((size_t)aligned & mask) == 0);
|
124
|
+
*workspaceSizePtr -= add;
|
125
|
+
return aligned;
|
126
|
+
} else {
|
127
|
+
*workspaceSizePtr = 0;
|
128
|
+
return NULL;
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
|
58
133
|
/* HUF_compressWeights() :
|
59
134
|
* Same as FSE_compress(), but dedicated to huff0's weights compression.
|
60
135
|
* The use case needs much less stack memory.
|
61
136
|
* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
|
62
137
|
*/
|
63
138
|
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
|
64
|
-
|
139
|
+
|
140
|
+
typedef struct {
|
141
|
+
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
|
142
|
+
U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
|
143
|
+
unsigned count[HUF_TABLELOG_MAX+1];
|
144
|
+
S16 norm[HUF_TABLELOG_MAX+1];
|
145
|
+
} HUF_CompressWeightsWksp;
|
146
|
+
|
147
|
+
static size_t
|
148
|
+
HUF_compressWeights(void* dst, size_t dstSize,
|
149
|
+
const void* weightTable, size_t wtSize,
|
150
|
+
void* workspace, size_t workspaceSize)
|
65
151
|
{
|
66
152
|
BYTE* const ostart = (BYTE*) dst;
|
67
153
|
BYTE* op = ostart;
|
@@ -69,33 +155,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
|
69
155
|
|
70
156
|
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
|
71
157
|
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
158
|
+
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
72
159
|
|
73
|
-
|
74
|
-
BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
|
75
|
-
|
76
|
-
unsigned count[HUF_TABLELOG_MAX+1];
|
77
|
-
S16 norm[HUF_TABLELOG_MAX+1];
|
160
|
+
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
|
78
161
|
|
79
162
|
/* init conditions */
|
80
163
|
if (wtSize <= 1) return 0; /* Not compressible */
|
81
164
|
|
82
165
|
/* Scan input and build symbol stats */
|
83
|
-
{ unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
166
|
+
{ unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
84
167
|
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
|
85
168
|
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
86
169
|
}
|
87
170
|
|
88
171
|
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
|
89
|
-
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
|
172
|
+
CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
|
90
173
|
|
91
174
|
/* Write table description header */
|
92
|
-
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
|
175
|
+
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
|
93
176
|
op += hSize;
|
94
177
|
}
|
95
178
|
|
96
179
|
/* Compress */
|
97
|
-
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
|
98
|
-
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
|
180
|
+
CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
|
181
|
+
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
|
99
182
|
if (cSize == 0) return 0; /* not enough space for compressed data */
|
100
183
|
op += cSize;
|
101
184
|
}
|
@@ -103,35 +186,94 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
|
103
186
|
return (size_t)(op-ostart);
|
104
187
|
}
|
105
188
|
|
189
|
+
static size_t HUF_getNbBits(HUF_CElt elt)
|
190
|
+
{
|
191
|
+
return elt & 0xFF;
|
192
|
+
}
|
193
|
+
|
194
|
+
static size_t HUF_getNbBitsFast(HUF_CElt elt)
|
195
|
+
{
|
196
|
+
return elt;
|
197
|
+
}
|
198
|
+
|
199
|
+
static size_t HUF_getValue(HUF_CElt elt)
|
200
|
+
{
|
201
|
+
return elt & ~(size_t)0xFF;
|
202
|
+
}
|
203
|
+
|
204
|
+
static size_t HUF_getValueFast(HUF_CElt elt)
|
205
|
+
{
|
206
|
+
return elt;
|
207
|
+
}
|
208
|
+
|
209
|
+
static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
|
210
|
+
{
|
211
|
+
assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
|
212
|
+
*elt = nbBits;
|
213
|
+
}
|
214
|
+
|
215
|
+
static void HUF_setValue(HUF_CElt* elt, size_t value)
|
216
|
+
{
|
217
|
+
size_t const nbBits = HUF_getNbBits(*elt);
|
218
|
+
if (nbBits > 0) {
|
219
|
+
assert((value >> nbBits) == 0);
|
220
|
+
*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
|
221
|
+
}
|
222
|
+
}
|
106
223
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
224
|
+
HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
|
225
|
+
{
|
226
|
+
HUF_CTableHeader header;
|
227
|
+
ZSTD_memcpy(&header, ctable, sizeof(header));
|
228
|
+
return header;
|
229
|
+
}
|
111
230
|
|
112
|
-
|
113
|
-
`CTable` : Huffman tree to save, using huf representation.
|
114
|
-
@return : size of saved CTable */
|
115
|
-
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
116
|
-
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
|
231
|
+
static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
|
117
232
|
{
|
233
|
+
HUF_CTableHeader header;
|
234
|
+
HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
|
235
|
+
ZSTD_memset(&header, 0, sizeof(header));
|
236
|
+
assert(tableLog < 256);
|
237
|
+
header.tableLog = (BYTE)tableLog;
|
238
|
+
assert(maxSymbolValue < 256);
|
239
|
+
header.maxSymbolValue = (BYTE)maxSymbolValue;
|
240
|
+
ZSTD_memcpy(ctable, &header, sizeof(header));
|
241
|
+
}
|
242
|
+
|
243
|
+
typedef struct {
|
244
|
+
HUF_CompressWeightsWksp wksp;
|
118
245
|
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
|
119
246
|
BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
|
247
|
+
} HUF_WriteCTableWksp;
|
248
|
+
|
249
|
+
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
250
|
+
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
|
251
|
+
void* workspace, size_t workspaceSize)
|
252
|
+
{
|
253
|
+
HUF_CElt const* const ct = CTable + 1;
|
120
254
|
BYTE* op = (BYTE*)dst;
|
121
255
|
U32 n;
|
256
|
+
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
122
257
|
|
123
|
-
|
258
|
+
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
|
259
|
+
|
260
|
+
assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
|
261
|
+
assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
|
262
|
+
|
263
|
+
/* check conditions */
|
264
|
+
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
|
124
265
|
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
|
125
266
|
|
126
267
|
/* convert to weight */
|
127
|
-
bitsToWeight[0] = 0;
|
268
|
+
wksp->bitsToWeight[0] = 0;
|
128
269
|
for (n=1; n<huffLog+1; n++)
|
129
|
-
bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
270
|
+
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
130
271
|
for (n=0; n<maxSymbolValue; n++)
|
131
|
-
huffWeight[n] = bitsToWeight[
|
272
|
+
wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
|
132
273
|
|
133
274
|
/* attempt weights compression by FSE */
|
134
|
-
|
275
|
+
if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
|
276
|
+
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
|
135
277
|
if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
|
136
278
|
op[0] = (BYTE)hSize;
|
137
279
|
return hSize+1;
|
@@ -141,9 +283,9 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
|
141
283
|
if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
|
142
284
|
if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
|
143
285
|
op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
|
144
|
-
huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
|
286
|
+
wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
|
145
287
|
for (n=0; n<maxSymbolValue; n+=2)
|
146
|
-
op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
|
288
|
+
op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
|
147
289
|
return ((maxSymbolValue+1)/2) + 1;
|
148
290
|
}
|
149
291
|
|
@@ -154,34 +296,38 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
154
296
|
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
|
155
297
|
U32 tableLog = 0;
|
156
298
|
U32 nbSymbols = 0;
|
299
|
+
HUF_CElt* const ct = CTable + 1;
|
157
300
|
|
158
301
|
/* get symbol weights */
|
159
302
|
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
|
303
|
+
*hasZeroWeights = (rankVal[0] > 0);
|
160
304
|
|
161
305
|
/* check result */
|
162
306
|
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
163
307
|
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
|
164
308
|
|
309
|
+
*maxSymbolValuePtr = nbSymbols - 1;
|
310
|
+
|
311
|
+
HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
|
312
|
+
|
165
313
|
/* Prepare base value per rank */
|
166
314
|
{ U32 n, nextRankStart = 0;
|
167
315
|
for (n=1; n<=tableLog; n++) {
|
168
|
-
U32
|
316
|
+
U32 curr = nextRankStart;
|
169
317
|
nextRankStart += (rankVal[n] << (n-1));
|
170
|
-
rankVal[n] =
|
318
|
+
rankVal[n] = curr;
|
171
319
|
} }
|
172
320
|
|
173
321
|
/* fill nbBits */
|
174
|
-
*hasZeroWeights = 0;
|
175
322
|
{ U32 n; for (n=0; n<nbSymbols; n++) {
|
176
323
|
const U32 w = huffWeight[n];
|
177
|
-
|
178
|
-
CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
|
324
|
+
HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
|
179
325
|
} }
|
180
326
|
|
181
327
|
/* fill val */
|
182
328
|
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
|
183
329
|
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
|
184
|
-
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[
|
330
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
|
185
331
|
/* determine stating value per rank */
|
186
332
|
valPerRank[tableLog+1] = 0; /* for w==0 */
|
187
333
|
{ U16 min = 0;
|
@@ -191,92 +337,151 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
191
337
|
min >>= 1;
|
192
338
|
} }
|
193
339
|
/* assign value within rank, symbol order */
|
194
|
-
{ U32 n; for (n=0; n<nbSymbols; n++)
|
340
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
|
195
341
|
}
|
196
342
|
|
197
|
-
*maxSymbolValuePtr = nbSymbols - 1;
|
198
343
|
return readSize;
|
199
344
|
}
|
200
345
|
|
201
|
-
U32
|
346
|
+
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
|
202
347
|
{
|
203
|
-
const HUF_CElt*
|
348
|
+
const HUF_CElt* const ct = CTable + 1;
|
204
349
|
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
|
205
|
-
|
350
|
+
if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
|
351
|
+
return 0;
|
352
|
+
return (U32)HUF_getNbBits(ct[symbolValue]);
|
206
353
|
}
|
207
354
|
|
208
355
|
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
356
|
+
/**
|
357
|
+
* HUF_setMaxHeight():
|
358
|
+
* Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
|
359
|
+
*
|
360
|
+
* It attempts to convert all nodes with nbBits > @targetNbBits
|
361
|
+
* to employ @targetNbBits instead. Then it adjusts the tree
|
362
|
+
* so that it remains a valid canonical Huffman tree.
|
363
|
+
*
|
364
|
+
* @pre The sum of the ranks of each symbol == 2^largestBits,
|
365
|
+
* where largestBits == huffNode[lastNonNull].nbBits.
|
366
|
+
* @post The sum of the ranks of each symbol == 2^largestBits,
|
367
|
+
* where largestBits is the return value (expected <= targetNbBits).
|
368
|
+
*
|
369
|
+
* @param huffNode The Huffman tree modified in place to enforce targetNbBits.
|
370
|
+
* It's presumed sorted, from most frequent to rarest symbol.
|
371
|
+
* @param lastNonNull The symbol with the lowest count in the Huffman tree.
|
372
|
+
* @param targetNbBits The allowed number of bits, which the Huffman tree
|
373
|
+
* may not respect. After this function the Huffman tree will
|
374
|
+
* respect targetNbBits.
|
375
|
+
* @return The maximum number of bits of the Huffman tree after adjustment.
|
376
|
+
*/
|
377
|
+
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
|
217
378
|
{
|
218
379
|
const U32 largestBits = huffNode[lastNonNull].nbBits;
|
219
|
-
|
380
|
+
/* early exit : no elt > targetNbBits, so the tree is already valid. */
|
381
|
+
if (largestBits <= targetNbBits) return largestBits;
|
382
|
+
|
383
|
+
DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
|
220
384
|
|
221
385
|
/* there are several too large elements (at least >= 2) */
|
222
386
|
{ int totalCost = 0;
|
223
|
-
const U32 baseCost = 1 << (largestBits -
|
387
|
+
const U32 baseCost = 1 << (largestBits - targetNbBits);
|
224
388
|
int n = (int)lastNonNull;
|
225
389
|
|
226
|
-
|
390
|
+
/* Adjust any ranks > targetNbBits to targetNbBits.
|
391
|
+
* Compute totalCost, which is how far the sum of the ranks is
|
392
|
+
* we are over 2^largestBits after adjust the offending ranks.
|
393
|
+
*/
|
394
|
+
while (huffNode[n].nbBits > targetNbBits) {
|
227
395
|
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
|
228
|
-
huffNode[n].nbBits = (BYTE)
|
229
|
-
n
|
230
|
-
}
|
231
|
-
|
396
|
+
huffNode[n].nbBits = (BYTE)targetNbBits;
|
397
|
+
n--;
|
398
|
+
}
|
399
|
+
/* n stops at huffNode[n].nbBits <= targetNbBits */
|
400
|
+
assert(huffNode[n].nbBits <= targetNbBits);
|
401
|
+
/* n end at index of smallest symbol using < targetNbBits */
|
402
|
+
while (huffNode[n].nbBits == targetNbBits) --n;
|
232
403
|
|
233
|
-
/* renorm totalCost
|
234
|
-
|
404
|
+
/* renorm totalCost from 2^largestBits to 2^targetNbBits
|
405
|
+
* note : totalCost is necessarily a multiple of baseCost */
|
406
|
+
assert(((U32)totalCost & (baseCost - 1)) == 0);
|
407
|
+
totalCost >>= (largestBits - targetNbBits);
|
408
|
+
assert(totalCost > 0);
|
235
409
|
|
236
410
|
/* repay normalized cost */
|
237
411
|
{ U32 const noSymbol = 0xF0F0F0F0;
|
238
412
|
U32 rankLast[HUF_TABLELOG_MAX+2];
|
239
413
|
|
240
|
-
/* Get pos of last (smallest) symbol per rank */
|
241
|
-
|
242
|
-
{ U32 currentNbBits =
|
414
|
+
/* Get pos of last (smallest = lowest cum. count) symbol per rank */
|
415
|
+
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
|
416
|
+
{ U32 currentNbBits = targetNbBits;
|
243
417
|
int pos;
|
244
418
|
for (pos=n ; pos >= 0; pos--) {
|
245
419
|
if (huffNode[pos].nbBits >= currentNbBits) continue;
|
246
|
-
currentNbBits = huffNode[pos].nbBits; /* <
|
247
|
-
rankLast[
|
420
|
+
currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
|
421
|
+
rankLast[targetNbBits-currentNbBits] = (U32)pos;
|
248
422
|
} }
|
249
423
|
|
250
424
|
while (totalCost > 0) {
|
251
|
-
|
425
|
+
/* Try to reduce the next power of 2 above totalCost because we
|
426
|
+
* gain back half the rank.
|
427
|
+
*/
|
428
|
+
U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
|
252
429
|
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
|
253
430
|
U32 const highPos = rankLast[nBitsToDecrease];
|
254
431
|
U32 const lowPos = rankLast[nBitsToDecrease-1];
|
255
432
|
if (highPos == noSymbol) continue;
|
433
|
+
/* Decrease highPos if no symbols of lowPos or if it is
|
434
|
+
* not cheaper to remove 2 lowPos than highPos.
|
435
|
+
*/
|
256
436
|
if (lowPos == noSymbol) break;
|
257
437
|
{ U32 const highTotal = huffNode[highPos].count;
|
258
438
|
U32 const lowTotal = 2 * huffNode[lowPos].count;
|
259
439
|
if (highTotal <= lowTotal) break;
|
260
440
|
} }
|
261
441
|
/* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
|
442
|
+
assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
|
262
443
|
/* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
|
263
444
|
while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
|
264
|
-
nBitsToDecrease
|
445
|
+
nBitsToDecrease++;
|
446
|
+
assert(rankLast[nBitsToDecrease] != noSymbol);
|
447
|
+
/* Increase the number of bits to gain back half the rank cost. */
|
265
448
|
totalCost -= 1 << (nBitsToDecrease-1);
|
449
|
+
huffNode[rankLast[nBitsToDecrease]].nbBits++;
|
450
|
+
|
451
|
+
/* Fix up the new rank.
|
452
|
+
* If the new rank was empty, this symbol is now its smallest.
|
453
|
+
* Otherwise, this symbol will be the largest in the new rank so no adjustment.
|
454
|
+
*/
|
266
455
|
if (rankLast[nBitsToDecrease-1] == noSymbol)
|
267
|
-
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
|
268
|
-
|
456
|
+
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
|
457
|
+
/* Fix up the old rank.
|
458
|
+
* If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
|
459
|
+
* it must be the only symbol in its rank, so the old rank now has no symbols.
|
460
|
+
* Otherwise, since the Huffman nodes are sorted by count, the previous position is now
|
461
|
+
* the smallest node in the rank. If the previous position belongs to a different rank,
|
462
|
+
* then the rank is now empty.
|
463
|
+
*/
|
269
464
|
if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
|
270
465
|
rankLast[nBitsToDecrease] = noSymbol;
|
271
466
|
else {
|
272
467
|
rankLast[nBitsToDecrease]--;
|
273
|
-
if (huffNode[rankLast[nBitsToDecrease]].nbBits !=
|
468
|
+
if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
|
274
469
|
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
|
275
|
-
|
276
|
-
|
470
|
+
}
|
471
|
+
} /* while (totalCost > 0) */
|
472
|
+
|
473
|
+
/* If we've removed too much weight, then we have to add it back.
|
474
|
+
* To avoid overshooting again, we only adjust the smallest rank.
|
475
|
+
* We take the largest nodes from the lowest rank 0 and move them
|
476
|
+
* to rank 1. There's guaranteed to be enough rank 0 symbols because
|
477
|
+
* TODO.
|
478
|
+
*/
|
277
479
|
while (totalCost < 0) { /* Sometimes, cost correction overshoot */
|
278
|
-
|
279
|
-
|
480
|
+
/* special case : no rank 1 symbol (using targetNbBits-1);
|
481
|
+
* let's create one from largest rank 0 (using targetNbBits).
|
482
|
+
*/
|
483
|
+
if (rankLast[1] == noSymbol) {
|
484
|
+
while (huffNode[n].nbBits == targetNbBits) n--;
|
280
485
|
huffNode[n+1].nbBits--;
|
281
486
|
assert(n >= 0);
|
282
487
|
rankLast[1] = (U32)(n+1);
|
@@ -286,47 +491,178 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
286
491
|
huffNode[ rankLast[1] + 1 ].nbBits--;
|
287
492
|
rankLast[1]++;
|
288
493
|
totalCost ++;
|
289
|
-
|
494
|
+
}
|
495
|
+
} /* repay normalized cost */
|
496
|
+
} /* there are several too large elements (at least >= 2) */
|
290
497
|
|
291
|
-
return
|
498
|
+
return targetNbBits;
|
292
499
|
}
|
293
500
|
|
294
501
|
typedef struct {
|
295
|
-
|
296
|
-
|
502
|
+
U16 base;
|
503
|
+
U16 curr;
|
297
504
|
} rankPos;
|
298
505
|
|
299
|
-
typedef nodeElt huffNodeTable[
|
506
|
+
typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
|
300
507
|
|
301
|
-
|
508
|
+
/* Number of buckets available for HUF_sort() */
|
509
|
+
#define RANK_POSITION_TABLE_SIZE 192
|
302
510
|
|
303
511
|
typedef struct {
|
304
512
|
huffNodeTable huffNodeTbl;
|
305
513
|
rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
|
306
514
|
} HUF_buildCTable_wksp_tables;
|
307
515
|
|
308
|
-
|
309
|
-
|
516
|
+
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
|
517
|
+
* Strategy is to use as many buckets as possible for representing distinct
|
518
|
+
* counts while using the remainder to represent all "large" counts.
|
519
|
+
*
|
520
|
+
* To satisfy this requirement for 192 buckets, we can do the following:
|
521
|
+
* Let buckets 0-166 represent distinct counts of [0, 166]
|
522
|
+
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
|
523
|
+
*/
|
524
|
+
#define RANK_POSITION_MAX_COUNT_LOG 32
|
525
|
+
#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
|
526
|
+
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
|
527
|
+
|
528
|
+
/* Return the appropriate bucket index for a given count. See definition of
|
529
|
+
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
|
530
|
+
*/
|
531
|
+
static U32 HUF_getIndex(U32 const count) {
|
532
|
+
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
|
533
|
+
? count
|
534
|
+
: ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
|
535
|
+
}
|
536
|
+
|
537
|
+
/* Helper swap function for HUF_quickSortPartition() */
|
538
|
+
static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
|
539
|
+
nodeElt tmp = *a;
|
540
|
+
*a = *b;
|
541
|
+
*b = tmp;
|
542
|
+
}
|
543
|
+
|
544
|
+
/* Returns 0 if the huffNode array is not sorted by descending count */
|
545
|
+
MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
|
546
|
+
U32 i;
|
547
|
+
for (i = 1; i < maxSymbolValue1; ++i) {
|
548
|
+
if (huffNode[i].count > huffNode[i-1].count) {
|
549
|
+
return 0;
|
550
|
+
}
|
551
|
+
}
|
552
|
+
return 1;
|
553
|
+
}
|
554
|
+
|
555
|
+
/* Insertion sort by descending order */
|
556
|
+
HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
|
557
|
+
int i;
|
558
|
+
int const size = high-low+1;
|
559
|
+
huffNode += low;
|
560
|
+
for (i = 1; i < size; ++i) {
|
561
|
+
nodeElt const key = huffNode[i];
|
562
|
+
int j = i - 1;
|
563
|
+
while (j >= 0 && huffNode[j].count < key.count) {
|
564
|
+
huffNode[j + 1] = huffNode[j];
|
565
|
+
j--;
|
566
|
+
}
|
567
|
+
huffNode[j + 1] = key;
|
568
|
+
}
|
569
|
+
}
|
570
|
+
|
571
|
+
/* Pivot helper function for quicksort. */
|
572
|
+
static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
|
573
|
+
/* Simply select rightmost element as pivot. "Better" selectors like
|
574
|
+
* median-of-three don't experimentally appear to have any benefit.
|
575
|
+
*/
|
576
|
+
U32 const pivot = arr[high].count;
|
577
|
+
int i = low - 1;
|
578
|
+
int j = low;
|
579
|
+
for ( ; j < high; j++) {
|
580
|
+
if (arr[j].count > pivot) {
|
581
|
+
i++;
|
582
|
+
HUF_swapNodes(&arr[i], &arr[j]);
|
583
|
+
}
|
584
|
+
}
|
585
|
+
HUF_swapNodes(&arr[i + 1], &arr[high]);
|
586
|
+
return i + 1;
|
587
|
+
}
|
588
|
+
|
589
|
+
/* Classic quicksort by descending with partially iterative calls
|
590
|
+
* to reduce worst case callstack size.
|
591
|
+
*/
|
592
|
+
static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
|
593
|
+
int const kInsertionSortThreshold = 8;
|
594
|
+
if (high - low < kInsertionSortThreshold) {
|
595
|
+
HUF_insertionSort(arr, low, high);
|
596
|
+
return;
|
597
|
+
}
|
598
|
+
while (low < high) {
|
599
|
+
int const idx = HUF_quickSortPartition(arr, low, high);
|
600
|
+
if (idx - low < high - idx) {
|
601
|
+
HUF_simpleQuickSort(arr, low, idx - 1);
|
602
|
+
low = idx + 1;
|
603
|
+
} else {
|
604
|
+
HUF_simpleQuickSort(arr, idx + 1, high);
|
605
|
+
high = idx - 1;
|
606
|
+
}
|
607
|
+
}
|
608
|
+
}
|
609
|
+
|
610
|
+
/**
|
611
|
+
* HUF_sort():
|
612
|
+
* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
|
613
|
+
* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
|
614
|
+
*
|
615
|
+
* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
|
616
|
+
* Must have (maxSymbolValue + 1) entries.
|
617
|
+
* @param[in] count Histogram of the symbols.
|
618
|
+
* @param[in] maxSymbolValue Maximum symbol value.
|
619
|
+
* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
|
620
|
+
*/
|
621
|
+
static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
|
310
622
|
U32 n;
|
623
|
+
U32 const maxSymbolValue1 = maxSymbolValue+1;
|
624
|
+
|
625
|
+
/* Compute base and set curr to base.
|
626
|
+
* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
|
627
|
+
* See HUF_getIndex to see bucketing strategy.
|
628
|
+
* We attribute each symbol to lowerRank's base value, because we want to know where
|
629
|
+
* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
|
630
|
+
*/
|
631
|
+
ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
|
632
|
+
for (n = 0; n < maxSymbolValue1; ++n) {
|
633
|
+
U32 lowerRank = HUF_getIndex(count[n]);
|
634
|
+
assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
|
635
|
+
rankPosition[lowerRank].base++;
|
636
|
+
}
|
311
637
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
rankPosition[
|
638
|
+
assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
|
639
|
+
/* Set up the rankPosition table */
|
640
|
+
for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
|
641
|
+
rankPosition[n-1].base += rankPosition[n].base;
|
642
|
+
rankPosition[n-1].curr = rankPosition[n-1].base;
|
316
643
|
}
|
317
|
-
|
318
|
-
|
319
|
-
for (n=0; n
|
644
|
+
|
645
|
+
/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
|
646
|
+
for (n = 0; n < maxSymbolValue1; ++n) {
|
320
647
|
U32 const c = count[n];
|
321
|
-
U32 const r =
|
322
|
-
U32 pos = rankPosition[r].
|
323
|
-
|
324
|
-
huffNode[pos] = huffNode[pos-1];
|
325
|
-
pos--;
|
326
|
-
}
|
648
|
+
U32 const r = HUF_getIndex(c) + 1;
|
649
|
+
U32 const pos = rankPosition[r].curr++;
|
650
|
+
assert(pos < maxSymbolValue1);
|
327
651
|
huffNode[pos].count = c;
|
328
652
|
huffNode[pos].byte = (BYTE)n;
|
329
653
|
}
|
654
|
+
|
655
|
+
/* Sort each bucket. */
|
656
|
+
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
|
657
|
+
int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
|
658
|
+
U32 const bucketStartIdx = rankPosition[n].base;
|
659
|
+
if (bucketSize > 1) {
|
660
|
+
assert(bucketStartIdx < maxSymbolValue1);
|
661
|
+
HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
|
662
|
+
}
|
663
|
+
}
|
664
|
+
|
665
|
+
assert(HUF_isSorted(huffNode, maxSymbolValue1));
|
330
666
|
}
|
331
667
|
|
332
668
|
|
@@ -336,28 +672,21 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu
|
|
336
672
|
*/
|
337
673
|
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
|
338
674
|
|
339
|
-
|
675
|
+
/* HUF_buildTree():
|
676
|
+
* Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
|
677
|
+
*
|
678
|
+
* @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
|
679
|
+
* @param maxSymbolValue The maximum symbol value.
|
680
|
+
* @return The smallest node in the Huffman tree (by count).
|
681
|
+
*/
|
682
|
+
static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
340
683
|
{
|
341
|
-
|
342
|
-
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
343
|
-
nodeElt* const huffNode = huffNode0+1;
|
684
|
+
nodeElt* const huffNode0 = huffNode - 1;
|
344
685
|
int nonNullRank;
|
345
686
|
int lowS, lowN;
|
346
687
|
int nodeNb = STARTNODE;
|
347
688
|
int n, nodeRoot;
|
348
|
-
|
349
|
-
/* safety checks */
|
350
|
-
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
351
|
-
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
352
|
-
return ERROR(workSpace_tooSmall);
|
353
|
-
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
354
|
-
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
|
355
|
-
return ERROR(maxSymbolValue_tooLarge);
|
356
|
-
memset(huffNode0, 0, sizeof(huffNodeTable));
|
357
|
-
|
358
|
-
/* sort, decreasing order */
|
359
|
-
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
|
360
|
-
|
689
|
+
DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
|
361
690
|
/* init for parents */
|
362
691
|
nonNullRank = (int)maxSymbolValue;
|
363
692
|
while(huffNode[nonNullRank].count == 0) nonNullRank--;
|
@@ -384,127 +713,414 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
|
|
384
713
|
for (n=0; n<=nonNullRank; n++)
|
385
714
|
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
|
386
715
|
|
387
|
-
|
388
|
-
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
716
|
+
DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
|
389
717
|
|
390
|
-
|
391
|
-
{ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
392
|
-
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
|
393
|
-
int const alphabetSize = (int)(maxSymbolValue + 1);
|
394
|
-
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
395
|
-
for (n=0; n<=nonNullRank; n++)
|
396
|
-
nbPerRank[huffNode[n].nbBits]++;
|
397
|
-
/* determine stating value per rank */
|
398
|
-
{ U16 min = 0;
|
399
|
-
for (n=(int)maxNbBits; n>0; n--) {
|
400
|
-
valPerRank[n] = min; /* get starting value within each rank */
|
401
|
-
min += nbPerRank[n];
|
402
|
-
min >>= 1;
|
403
|
-
} }
|
404
|
-
for (n=0; n<alphabetSize; n++)
|
405
|
-
tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
|
406
|
-
for (n=0; n<alphabetSize; n++)
|
407
|
-
tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
|
408
|
-
}
|
409
|
-
|
410
|
-
return maxNbBits;
|
718
|
+
return nonNullRank;
|
411
719
|
}
|
412
720
|
|
413
|
-
/**
|
414
|
-
*
|
415
|
-
*
|
721
|
+
/**
|
722
|
+
* HUF_buildCTableFromTree():
|
723
|
+
* Build the CTable given the Huffman tree in huffNode.
|
724
|
+
*
|
725
|
+
* @param[out] CTable The output Huffman CTable.
|
726
|
+
* @param huffNode The Huffman tree.
|
727
|
+
* @param nonNullRank The last and smallest node in the Huffman tree.
|
728
|
+
* @param maxSymbolValue The maximum symbol value.
|
729
|
+
* @param maxNbBits The exact maximum number of bits used in the Huffman tree.
|
416
730
|
*/
|
417
|
-
|
731
|
+
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
|
418
732
|
{
|
419
|
-
|
420
|
-
|
733
|
+
HUF_CElt* const ct = CTable + 1;
|
734
|
+
/* fill result into ctable (val, nbBits) */
|
735
|
+
int n;
|
736
|
+
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
737
|
+
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
|
738
|
+
int const alphabetSize = (int)(maxSymbolValue + 1);
|
739
|
+
for (n=0; n<=nonNullRank; n++)
|
740
|
+
nbPerRank[huffNode[n].nbBits]++;
|
741
|
+
/* determine starting value per rank */
|
742
|
+
{ U16 min = 0;
|
743
|
+
for (n=(int)maxNbBits; n>0; n--) {
|
744
|
+
valPerRank[n] = min; /* get starting value within each rank */
|
745
|
+
min += nbPerRank[n];
|
746
|
+
min >>= 1;
|
747
|
+
} }
|
748
|
+
for (n=0; n<alphabetSize; n++)
|
749
|
+
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
|
750
|
+
for (n=0; n<alphabetSize; n++)
|
751
|
+
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
|
752
|
+
|
753
|
+
HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
|
754
|
+
}
|
755
|
+
|
756
|
+
size_t
|
757
|
+
HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
|
758
|
+
void* workSpace, size_t wkspSize)
|
759
|
+
{
|
760
|
+
HUF_buildCTable_wksp_tables* const wksp_tables =
|
761
|
+
(HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
|
762
|
+
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
763
|
+
nodeElt* const huffNode = huffNode0+1;
|
764
|
+
int nonNullRank;
|
765
|
+
|
766
|
+
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
|
767
|
+
|
768
|
+
DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
|
769
|
+
|
770
|
+
/* safety checks */
|
771
|
+
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
772
|
+
return ERROR(workSpace_tooSmall);
|
773
|
+
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
774
|
+
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
|
775
|
+
return ERROR(maxSymbolValue_tooLarge);
|
776
|
+
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
|
777
|
+
|
778
|
+
/* sort, decreasing order */
|
779
|
+
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
|
780
|
+
DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
|
781
|
+
|
782
|
+
/* build tree */
|
783
|
+
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
|
784
|
+
|
785
|
+
/* determine and enforce maxTableLog */
|
786
|
+
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
787
|
+
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
788
|
+
|
789
|
+
HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
790
|
+
|
791
|
+
return maxNbBits;
|
421
792
|
}
|
422
793
|
|
423
794
|
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
424
795
|
{
|
796
|
+
HUF_CElt const* ct = CTable + 1;
|
425
797
|
size_t nbBits = 0;
|
426
798
|
int s;
|
427
799
|
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
428
|
-
nbBits +=
|
800
|
+
nbBits += HUF_getNbBits(ct[s]) * count[s];
|
429
801
|
}
|
430
802
|
return nbBits >> 3;
|
431
803
|
}
|
432
804
|
|
433
805
|
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
806
|
+
HUF_CTableHeader header = HUF_readCTableHeader(CTable);
|
807
|
+
HUF_CElt const* ct = CTable + 1;
|
808
|
+
int bad = 0;
|
809
|
+
int s;
|
810
|
+
|
811
|
+
assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
|
812
|
+
|
813
|
+
if (header.maxSymbolValue < maxSymbolValue)
|
814
|
+
return 0;
|
815
|
+
|
816
|
+
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
817
|
+
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
|
818
|
+
}
|
819
|
+
return !bad;
|
440
820
|
}
|
441
821
|
|
442
822
|
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
|
443
823
|
|
824
|
+
/** HUF_CStream_t:
|
825
|
+
* Huffman uses its own BIT_CStream_t implementation.
|
826
|
+
* There are three major differences from BIT_CStream_t:
|
827
|
+
* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
|
828
|
+
* the pair (nbBits, value) in the format:
|
829
|
+
* format:
|
830
|
+
* - Bits [0, 4) = nbBits
|
831
|
+
* - Bits [4, 64 - nbBits) = 0
|
832
|
+
* - Bits [64 - nbBits, 64) = value
|
833
|
+
* 2. The bitContainer is built from the upper bits and
|
834
|
+
* right shifted. E.g. to add a new value of N bits
|
835
|
+
* you right shift the bitContainer by N, then or in
|
836
|
+
* the new value into the N upper bits.
|
837
|
+
* 3. The bitstream has two bit containers. You can add
|
838
|
+
* bits to the second container and merge them into
|
839
|
+
* the first container.
|
840
|
+
*/
|
841
|
+
|
842
|
+
#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
|
843
|
+
|
844
|
+
typedef struct {
|
845
|
+
size_t bitContainer[2];
|
846
|
+
size_t bitPos[2];
|
847
|
+
|
848
|
+
BYTE* startPtr;
|
849
|
+
BYTE* ptr;
|
850
|
+
BYTE* endPtr;
|
851
|
+
} HUF_CStream_t;
|
852
|
+
|
853
|
+
/**! HUF_initCStream():
|
854
|
+
* Initializes the bitstream.
|
855
|
+
* @returns 0 or an error code.
|
856
|
+
*/
|
857
|
+
static size_t HUF_initCStream(HUF_CStream_t* bitC,
|
858
|
+
void* startPtr, size_t dstCapacity)
|
859
|
+
{
|
860
|
+
ZSTD_memset(bitC, 0, sizeof(*bitC));
|
861
|
+
bitC->startPtr = (BYTE*)startPtr;
|
862
|
+
bitC->ptr = bitC->startPtr;
|
863
|
+
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
|
864
|
+
if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
|
865
|
+
return 0;
|
866
|
+
}
|
867
|
+
|
868
|
+
/*! HUF_addBits():
|
869
|
+
* Adds the symbol stored in HUF_CElt elt to the bitstream.
|
870
|
+
*
|
871
|
+
* @param elt The element we're adding. This is a (nbBits, value) pair.
|
872
|
+
* See the HUF_CStream_t docs for the format.
|
873
|
+
* @param idx Insert into the bitstream at this idx.
|
874
|
+
* @param kFast This is a template parameter. If the bitstream is guaranteed
|
875
|
+
* to have at least 4 unused bits after this call it may be 1,
|
876
|
+
* otherwise it must be 0. HUF_addBits() is faster when fast is set.
|
877
|
+
*/
|
878
|
+
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
|
879
|
+
{
|
880
|
+
assert(idx <= 1);
|
881
|
+
assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
|
882
|
+
/* This is efficient on x86-64 with BMI2 because shrx
|
883
|
+
* only reads the low 6 bits of the register. The compiler
|
884
|
+
* knows this and elides the mask. When fast is set,
|
885
|
+
* every operation can use the same value loaded from elt.
|
886
|
+
*/
|
887
|
+
bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
|
888
|
+
bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
|
889
|
+
/* We only read the low 8 bits of bitC->bitPos[idx] so it
|
890
|
+
* doesn't matter that the high bits have noise from the value.
|
891
|
+
*/
|
892
|
+
bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
|
893
|
+
assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
894
|
+
/* The last 4-bits of elt are dirty if fast is set,
|
895
|
+
* so we must not be overwriting bits that have already been
|
896
|
+
* inserted into the bit container.
|
897
|
+
*/
|
898
|
+
#if DEBUGLEVEL >= 1
|
899
|
+
{
|
900
|
+
size_t const nbBits = HUF_getNbBits(elt);
|
901
|
+
size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
|
902
|
+
(void)dirtyBits;
|
903
|
+
/* Middle bits are 0. */
|
904
|
+
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
|
905
|
+
/* We didn't overwrite any bits in the bit container. */
|
906
|
+
assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
907
|
+
(void)dirtyBits;
|
908
|
+
}
|
909
|
+
#endif
|
910
|
+
}
|
911
|
+
|
912
|
+
FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
|
913
|
+
{
|
914
|
+
bitC->bitContainer[1] = 0;
|
915
|
+
bitC->bitPos[1] = 0;
|
916
|
+
}
|
917
|
+
|
918
|
+
/*! HUF_mergeIndex1() :
|
919
|
+
* Merges the bit container @ index 1 into the bit container @ index 0
|
920
|
+
* and zeros the bit container @ index 1.
|
921
|
+
*/
|
922
|
+
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
|
923
|
+
{
|
924
|
+
assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
|
925
|
+
bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
|
926
|
+
bitC->bitContainer[0] |= bitC->bitContainer[1];
|
927
|
+
bitC->bitPos[0] += bitC->bitPos[1];
|
928
|
+
assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
929
|
+
}
|
930
|
+
|
931
|
+
/*! HUF_flushBits() :
|
932
|
+
* Flushes the bits in the bit container @ index 0.
|
933
|
+
*
|
934
|
+
* @post bitPos will be < 8.
|
935
|
+
* @param kFast If kFast is set then we must know a-priori that
|
936
|
+
* the bit container will not overflow.
|
937
|
+
*/
|
938
|
+
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
|
939
|
+
{
|
940
|
+
/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
|
941
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
942
|
+
size_t const nbBytes = nbBits >> 3;
|
943
|
+
/* The top nbBits bits of bitContainer are the ones we need. */
|
944
|
+
size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
|
945
|
+
/* Mask bitPos to account for the bytes we consumed. */
|
946
|
+
bitC->bitPos[0] &= 7;
|
947
|
+
assert(nbBits > 0);
|
948
|
+
assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
|
949
|
+
assert(bitC->ptr <= bitC->endPtr);
|
950
|
+
MEM_writeLEST(bitC->ptr, bitContainer);
|
951
|
+
bitC->ptr += nbBytes;
|
952
|
+
assert(!kFast || bitC->ptr <= bitC->endPtr);
|
953
|
+
if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
954
|
+
/* bitContainer doesn't need to be modified because the leftover
|
955
|
+
* bits are already the top bitPos bits. And we don't care about
|
956
|
+
* noise in the lower values.
|
957
|
+
*/
|
958
|
+
}
|
959
|
+
|
960
|
+
/*! HUF_endMark()
|
961
|
+
* @returns The Huffman stream end mark: A 1-bit value = 1.
|
962
|
+
*/
|
963
|
+
static HUF_CElt HUF_endMark(void)
|
964
|
+
{
|
965
|
+
HUF_CElt endMark;
|
966
|
+
HUF_setNbBits(&endMark, 1);
|
967
|
+
HUF_setValue(&endMark, 1);
|
968
|
+
return endMark;
|
969
|
+
}
|
970
|
+
|
971
|
+
/*! HUF_closeCStream() :
|
972
|
+
* @return Size of CStream, in bytes,
|
973
|
+
* or 0 if it could not fit into dstBuffer */
|
974
|
+
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
|
975
|
+
{
|
976
|
+
HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
|
977
|
+
HUF_flushBits(bitC, /* kFast */ 0);
|
978
|
+
{
|
979
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
980
|
+
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
|
981
|
+
return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
|
982
|
+
}
|
983
|
+
}
|
984
|
+
|
444
985
|
FORCE_INLINE_TEMPLATE void
|
445
|
-
HUF_encodeSymbol(
|
986
|
+
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
|
446
987
|
{
|
447
|
-
|
988
|
+
HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
|
448
989
|
}
|
449
990
|
|
450
|
-
|
991
|
+
FORCE_INLINE_TEMPLATE void
|
992
|
+
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
|
993
|
+
const BYTE* ip, size_t srcSize,
|
994
|
+
const HUF_CElt* ct,
|
995
|
+
int kUnroll, int kFastFlush, int kLastFast)
|
996
|
+
{
|
997
|
+
/* Join to kUnroll */
|
998
|
+
int n = (int)srcSize;
|
999
|
+
int rem = n % kUnroll;
|
1000
|
+
if (rem > 0) {
|
1001
|
+
for (; rem > 0; --rem) {
|
1002
|
+
HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
|
1003
|
+
}
|
1004
|
+
HUF_flushBits(bitC, kFastFlush);
|
1005
|
+
}
|
1006
|
+
assert(n % kUnroll == 0);
|
451
1007
|
|
452
|
-
|
453
|
-
if (
|
1008
|
+
/* Join to 2 * kUnroll */
|
1009
|
+
if (n % (2 * kUnroll)) {
|
1010
|
+
int u;
|
1011
|
+
for (u = 1; u < kUnroll; ++u) {
|
1012
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
|
1013
|
+
}
|
1014
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
|
1015
|
+
HUF_flushBits(bitC, kFastFlush);
|
1016
|
+
n -= kUnroll;
|
1017
|
+
}
|
1018
|
+
assert(n % (2 * kUnroll) == 0);
|
1019
|
+
|
1020
|
+
for (; n>0; n-= 2 * kUnroll) {
|
1021
|
+
/* Encode kUnroll symbols into the bitstream @ index 0. */
|
1022
|
+
int u;
|
1023
|
+
for (u = 1; u < kUnroll; ++u) {
|
1024
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
|
1025
|
+
}
|
1026
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
|
1027
|
+
HUF_flushBits(bitC, kFastFlush);
|
1028
|
+
/* Encode kUnroll symbols into the bitstream @ index 1.
|
1029
|
+
* This allows us to start filling the bit container
|
1030
|
+
* without any data dependencies.
|
1031
|
+
*/
|
1032
|
+
HUF_zeroIndex1(bitC);
|
1033
|
+
for (u = 1; u < kUnroll; ++u) {
|
1034
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
|
1035
|
+
}
|
1036
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
|
1037
|
+
/* Merge bitstream @ index 1 into the bitstream @ index 0 */
|
1038
|
+
HUF_mergeIndex1(bitC);
|
1039
|
+
HUF_flushBits(bitC, kFastFlush);
|
1040
|
+
}
|
1041
|
+
assert(n == 0);
|
1042
|
+
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
/**
|
1046
|
+
* Returns a tight upper bound on the output space needed by Huffman
|
1047
|
+
* with 8 bytes buffer to handle over-writes. If the output is at least
|
1048
|
+
* this large we don't need to do bounds checks during Huffman encoding.
|
1049
|
+
*/
|
1050
|
+
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
|
1051
|
+
{
|
1052
|
+
return ((srcSize * tableLog) >> 3) + 8;
|
1053
|
+
}
|
454
1054
|
|
455
|
-
#define HUF_FLUSHBITS_2(stream) \
|
456
|
-
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
|
457
1055
|
|
458
1056
|
FORCE_INLINE_TEMPLATE size_t
|
459
1057
|
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
|
460
1058
|
const void* src, size_t srcSize,
|
461
1059
|
const HUF_CElt* CTable)
|
462
1060
|
{
|
1061
|
+
U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
|
1062
|
+
HUF_CElt const* ct = CTable + 1;
|
463
1063
|
const BYTE* ip = (const BYTE*) src;
|
464
1064
|
BYTE* const ostart = (BYTE*)dst;
|
465
1065
|
BYTE* const oend = ostart + dstSize;
|
466
|
-
|
467
|
-
size_t n;
|
468
|
-
BIT_CStream_t bitC;
|
1066
|
+
HUF_CStream_t bitC;
|
469
1067
|
|
470
1068
|
/* init */
|
471
1069
|
if (dstSize < 8) return 0; /* not enough space to compress */
|
472
|
-
{
|
1070
|
+
{ BYTE* op = ostart;
|
1071
|
+
size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
|
473
1072
|
if (HUF_isError(initErr)) return 0; }
|
474
1073
|
|
475
|
-
|
476
|
-
|
477
|
-
{
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
1074
|
+
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
|
1075
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
|
1076
|
+
else {
|
1077
|
+
if (MEM_32bits()) {
|
1078
|
+
switch (tableLog) {
|
1079
|
+
case 11:
|
1080
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
|
1081
|
+
break;
|
1082
|
+
case 10: ZSTD_FALLTHROUGH;
|
1083
|
+
case 9: ZSTD_FALLTHROUGH;
|
1084
|
+
case 8:
|
1085
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
|
1086
|
+
break;
|
1087
|
+
case 7: ZSTD_FALLTHROUGH;
|
1088
|
+
default:
|
1089
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
|
1090
|
+
break;
|
1091
|
+
}
|
1092
|
+
} else {
|
1093
|
+
switch (tableLog) {
|
1094
|
+
case 11:
|
1095
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
|
1096
|
+
break;
|
1097
|
+
case 10:
|
1098
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
|
1099
|
+
break;
|
1100
|
+
case 9:
|
1101
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
|
1102
|
+
break;
|
1103
|
+
case 8:
|
1104
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
|
1105
|
+
break;
|
1106
|
+
case 7:
|
1107
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
|
1108
|
+
break;
|
1109
|
+
case 6: ZSTD_FALLTHROUGH;
|
1110
|
+
default:
|
1111
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
|
1112
|
+
break;
|
1113
|
+
}
|
1114
|
+
}
|
500
1115
|
}
|
1116
|
+
assert(bitC.ptr <= bitC.endPtr);
|
501
1117
|
|
502
|
-
return
|
1118
|
+
return HUF_closeCStream(&bitC);
|
503
1119
|
}
|
504
1120
|
|
505
1121
|
#if DYNAMIC_BMI2
|
506
1122
|
|
507
|
-
static
|
1123
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
508
1124
|
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
|
509
1125
|
const void* src, size_t srcSize,
|
510
1126
|
const HUF_CElt* CTable)
|
@@ -523,9 +1139,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
|
|
523
1139
|
static size_t
|
524
1140
|
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
525
1141
|
const void* src, size_t srcSize,
|
526
|
-
const HUF_CElt* CTable, const int
|
1142
|
+
const HUF_CElt* CTable, const int flags)
|
527
1143
|
{
|
528
|
-
if (
|
1144
|
+
if (flags & HUF_flags_bmi2) {
|
529
1145
|
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
|
530
1146
|
}
|
531
1147
|
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
|
@@ -536,24 +1152,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
536
1152
|
static size_t
|
537
1153
|
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
538
1154
|
const void* src, size_t srcSize,
|
539
|
-
const HUF_CElt* CTable, const int
|
1155
|
+
const HUF_CElt* CTable, const int flags)
|
540
1156
|
{
|
541
|
-
(void)
|
1157
|
+
(void)flags;
|
542
1158
|
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
|
543
1159
|
}
|
544
1160
|
|
545
1161
|
#endif
|
546
1162
|
|
547
|
-
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
1163
|
+
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
|
548
1164
|
{
|
549
|
-
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable,
|
1165
|
+
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
|
550
1166
|
}
|
551
1167
|
|
552
|
-
|
553
1168
|
static size_t
|
554
1169
|
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
555
1170
|
const void* src, size_t srcSize,
|
556
|
-
const HUF_CElt* CTable, int
|
1171
|
+
const HUF_CElt* CTable, int flags)
|
557
1172
|
{
|
558
1173
|
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
|
559
1174
|
const BYTE* ip = (const BYTE*) src;
|
@@ -567,27 +1182,24 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
567
1182
|
op += 6; /* jumpTable */
|
568
1183
|
|
569
1184
|
assert(op <= oend);
|
570
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
571
|
-
if (cSize==0) return 0;
|
572
|
-
assert(cSize <= 65535);
|
1185
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
1186
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
573
1187
|
MEM_writeLE16(ostart, (U16)cSize);
|
574
1188
|
op += cSize;
|
575
1189
|
}
|
576
1190
|
|
577
1191
|
ip += segmentSize;
|
578
1192
|
assert(op <= oend);
|
579
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
580
|
-
if (cSize==0) return 0;
|
581
|
-
assert(cSize <= 65535);
|
1193
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
1194
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
582
1195
|
MEM_writeLE16(ostart+2, (U16)cSize);
|
583
1196
|
op += cSize;
|
584
1197
|
}
|
585
1198
|
|
586
1199
|
ip += segmentSize;
|
587
1200
|
assert(op <= oend);
|
588
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
589
|
-
if (cSize==0) return 0;
|
590
|
-
assert(cSize <= 65535);
|
1201
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
1202
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
591
1203
|
MEM_writeLE16(ostart+4, (U16)cSize);
|
592
1204
|
op += cSize;
|
593
1205
|
}
|
@@ -595,17 +1207,17 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
595
1207
|
ip += segmentSize;
|
596
1208
|
assert(op <= oend);
|
597
1209
|
assert(ip <= iend);
|
598
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable,
|
599
|
-
if (cSize==0) return 0;
|
1210
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
|
1211
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
600
1212
|
op += cSize;
|
601
1213
|
}
|
602
1214
|
|
603
1215
|
return (size_t)(op-ostart);
|
604
1216
|
}
|
605
1217
|
|
606
|
-
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
1218
|
+
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
|
607
1219
|
{
|
608
|
-
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable,
|
1220
|
+
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
|
609
1221
|
}
|
610
1222
|
|
611
1223
|
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
@@ -613,11 +1225,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
|
613
1225
|
static size_t HUF_compressCTable_internal(
|
614
1226
|
BYTE* const ostart, BYTE* op, BYTE* const oend,
|
615
1227
|
const void* src, size_t srcSize,
|
616
|
-
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int
|
1228
|
+
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
|
617
1229
|
{
|
618
1230
|
size_t const cSize = (nbStreams==HUF_singleStream) ?
|
619
|
-
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable,
|
620
|
-
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable,
|
1231
|
+
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
|
1232
|
+
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
|
621
1233
|
if (HUF_isError(cSize)) { return cSize; }
|
622
1234
|
if (cSize==0) { return 0; } /* uncompressible */
|
623
1235
|
op += cSize;
|
@@ -629,31 +1241,113 @@ static size_t HUF_compressCTable_internal(
|
|
629
1241
|
|
630
1242
|
typedef struct {
|
631
1243
|
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
|
632
|
-
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX
|
633
|
-
|
1244
|
+
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
|
1245
|
+
union {
|
1246
|
+
HUF_buildCTable_wksp_tables buildCTable_wksp;
|
1247
|
+
HUF_WriteCTableWksp writeCTable_wksp;
|
1248
|
+
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
1249
|
+
} wksps;
|
634
1250
|
} HUF_compress_tables_t;
|
635
1251
|
|
1252
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
|
1253
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
|
1254
|
+
|
1255
|
+
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
|
1256
|
+
{
|
1257
|
+
unsigned cardinality = 0;
|
1258
|
+
unsigned i;
|
1259
|
+
|
1260
|
+
for (i = 0; i < maxSymbolValue + 1; i++) {
|
1261
|
+
if (count[i] != 0) cardinality += 1;
|
1262
|
+
}
|
1263
|
+
|
1264
|
+
return cardinality;
|
1265
|
+
}
|
1266
|
+
|
1267
|
+
unsigned HUF_minTableLog(unsigned symbolCardinality)
|
1268
|
+
{
|
1269
|
+
U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
|
1270
|
+
return minBitsSymbols;
|
1271
|
+
}
|
1272
|
+
|
1273
|
+
unsigned HUF_optimalTableLog(
|
1274
|
+
unsigned maxTableLog,
|
1275
|
+
size_t srcSize,
|
1276
|
+
unsigned maxSymbolValue,
|
1277
|
+
void* workSpace, size_t wkspSize,
|
1278
|
+
HUF_CElt* table,
|
1279
|
+
const unsigned* count,
|
1280
|
+
int flags)
|
1281
|
+
{
|
1282
|
+
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
1283
|
+
assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
|
1284
|
+
|
1285
|
+
if (!(flags & HUF_flags_optimalDepth)) {
|
1286
|
+
/* cheap evaluation, based on FSE */
|
1287
|
+
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
|
1288
|
+
}
|
1289
|
+
|
1290
|
+
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
|
1291
|
+
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
|
1292
|
+
size_t hSize, newSize;
|
1293
|
+
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
|
1294
|
+
const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
|
1295
|
+
size_t optSize = ((size_t) ~0) - 1;
|
1296
|
+
unsigned optLog = maxTableLog, optLogGuess;
|
1297
|
+
|
1298
|
+
DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
|
1299
|
+
|
1300
|
+
/* Search until size increases */
|
1301
|
+
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
|
1302
|
+
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
|
1303
|
+
|
1304
|
+
{ size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
|
1305
|
+
if (ERR_isError(maxBits)) continue;
|
1306
|
+
|
1307
|
+
if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
|
1308
|
+
|
1309
|
+
hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
|
1310
|
+
}
|
1311
|
+
|
1312
|
+
if (ERR_isError(hSize)) continue;
|
1313
|
+
|
1314
|
+
newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
|
1315
|
+
|
1316
|
+
if (newSize > optSize + 1) {
|
1317
|
+
break;
|
1318
|
+
}
|
1319
|
+
|
1320
|
+
if (newSize < optSize) {
|
1321
|
+
optSize = newSize;
|
1322
|
+
optLog = optLogGuess;
|
1323
|
+
}
|
1324
|
+
}
|
1325
|
+
assert(optLog <= HUF_TABLELOG_MAX);
|
1326
|
+
return optLog;
|
1327
|
+
}
|
1328
|
+
}
|
1329
|
+
|
636
1330
|
/* HUF_compress_internal() :
|
637
|
-
* `
|
1331
|
+
* `workSpace_align4` must be aligned on 4-bytes boundaries,
|
1332
|
+
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
|
638
1333
|
static size_t
|
639
1334
|
HUF_compress_internal (void* dst, size_t dstSize,
|
640
1335
|
const void* src, size_t srcSize,
|
641
1336
|
unsigned maxSymbolValue, unsigned huffLog,
|
642
1337
|
HUF_nbStreams_e nbStreams,
|
643
1338
|
void* workSpace, size_t wkspSize,
|
644
|
-
HUF_CElt* oldHufTable, HUF_repeat* repeat, int
|
645
|
-
const int bmi2)
|
1339
|
+
HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
|
646
1340
|
{
|
647
|
-
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
|
1341
|
+
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
|
648
1342
|
BYTE* const ostart = (BYTE*)dst;
|
649
1343
|
BYTE* const oend = ostart + dstSize;
|
650
1344
|
BYTE* op = ostart;
|
651
1345
|
|
652
|
-
|
1346
|
+
DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
|
1347
|
+
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
|
653
1348
|
|
654
1349
|
/* checks & inits */
|
655
|
-
if (
|
656
|
-
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
|
1350
|
+
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
|
657
1351
|
if (!srcSize) return 0; /* Uncompressed */
|
658
1352
|
if (!dstSize) return 0; /* cannot fit anything within dst budget */
|
659
1353
|
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
@@ -663,17 +1357,34 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
663
1357
|
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
|
664
1358
|
|
665
1359
|
/* Heuristic : If old table is valid, use it for small inputs */
|
666
|
-
if (
|
1360
|
+
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
|
667
1361
|
return HUF_compressCTable_internal(ostart, op, oend,
|
668
1362
|
src, srcSize,
|
669
|
-
nbStreams, oldHufTable,
|
1363
|
+
nbStreams, oldHufTable, flags);
|
1364
|
+
}
|
1365
|
+
|
1366
|
+
/* If uncompressible data is suspected, do a smaller sampling first */
|
1367
|
+
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
|
1368
|
+
if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
|
1369
|
+
size_t largestTotal = 0;
|
1370
|
+
DEBUGLOG(5, "input suspected incompressible : sampling to check");
|
1371
|
+
{ unsigned maxSymbolValueBegin = maxSymbolValue;
|
1372
|
+
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
1373
|
+
largestTotal += largestBegin;
|
1374
|
+
}
|
1375
|
+
{ unsigned maxSymbolValueEnd = maxSymbolValue;
|
1376
|
+
CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
1377
|
+
largestTotal += largestEnd;
|
1378
|
+
}
|
1379
|
+
if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
670
1380
|
}
|
671
1381
|
|
672
1382
|
/* Scan input and build symbol stats */
|
673
|
-
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize,
|
1383
|
+
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
|
674
1384
|
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
675
1385
|
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
676
1386
|
}
|
1387
|
+
DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
|
677
1388
|
|
678
1389
|
/* Check validity of previous table */
|
679
1390
|
if ( repeat
|
@@ -682,26 +1393,25 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
682
1393
|
*repeat = HUF_repeat_none;
|
683
1394
|
}
|
684
1395
|
/* Heuristic : use existing table for small inputs */
|
685
|
-
if (
|
1396
|
+
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
|
686
1397
|
return HUF_compressCTable_internal(ostart, op, oend,
|
687
1398
|
src, srcSize,
|
688
|
-
nbStreams, oldHufTable,
|
1399
|
+
nbStreams, oldHufTable, flags);
|
689
1400
|
}
|
690
1401
|
|
691
1402
|
/* Build Huffman Tree */
|
692
|
-
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
1403
|
+
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
|
693
1404
|
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
|
694
1405
|
maxSymbolValue, huffLog,
|
695
|
-
&table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
|
1406
|
+
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
|
696
1407
|
CHECK_F(maxBits);
|
697
1408
|
huffLog = (U32)maxBits;
|
698
|
-
|
699
|
-
memset(table->CTable + (maxSymbolValue + 1), 0,
|
700
|
-
sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
|
1409
|
+
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
|
701
1410
|
}
|
702
1411
|
|
703
1412
|
/* Write table description header */
|
704
|
-
{ CHECK_V_F(hSize,
|
1413
|
+
{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
|
1414
|
+
&table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
|
705
1415
|
/* Check if using previous huffman table is beneficial */
|
706
1416
|
if (repeat && *repeat != HUF_repeat_none) {
|
707
1417
|
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
|
@@ -709,7 +1419,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
709
1419
|
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
|
710
1420
|
return HUF_compressCTable_internal(ostart, op, oend,
|
711
1421
|
src, srcSize,
|
712
|
-
nbStreams, oldHufTable,
|
1422
|
+
nbStreams, oldHufTable, flags);
|
713
1423
|
} }
|
714
1424
|
|
715
1425
|
/* Use the new huffman table */
|
@@ -717,85 +1427,41 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
717
1427
|
op += hSize;
|
718
1428
|
if (repeat) { *repeat = HUF_repeat_none; }
|
719
1429
|
if (oldHufTable)
|
720
|
-
|
1430
|
+
ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
|
721
1431
|
}
|
722
1432
|
return HUF_compressCTable_internal(ostart, op, oend,
|
723
1433
|
src, srcSize,
|
724
|
-
nbStreams, table->CTable,
|
725
|
-
}
|
726
|
-
|
727
|
-
|
728
|
-
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
729
|
-
const void* src, size_t srcSize,
|
730
|
-
unsigned maxSymbolValue, unsigned huffLog,
|
731
|
-
void* workSpace, size_t wkspSize)
|
732
|
-
{
|
733
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
734
|
-
maxSymbolValue, huffLog, HUF_singleStream,
|
735
|
-
workSpace, wkspSize,
|
736
|
-
NULL, NULL, 0, 0 /*bmi2*/);
|
1434
|
+
nbStreams, table->CTable, flags);
|
737
1435
|
}
|
738
1436
|
|
739
1437
|
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
740
1438
|
const void* src, size_t srcSize,
|
741
1439
|
unsigned maxSymbolValue, unsigned huffLog,
|
742
1440
|
void* workSpace, size_t wkspSize,
|
743
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int
|
1441
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
|
744
1442
|
{
|
1443
|
+
DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
|
745
1444
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
746
1445
|
maxSymbolValue, huffLog, HUF_singleStream,
|
747
1446
|
workSpace, wkspSize, hufTable,
|
748
|
-
repeat,
|
749
|
-
}
|
750
|
-
|
751
|
-
size_t HUF_compress1X (void* dst, size_t dstSize,
|
752
|
-
const void* src, size_t srcSize,
|
753
|
-
unsigned maxSymbolValue, unsigned huffLog)
|
754
|
-
{
|
755
|
-
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
756
|
-
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
1447
|
+
repeat, flags);
|
757
1448
|
}
|
758
1449
|
|
759
1450
|
/* HUF_compress4X_repeat():
|
760
1451
|
* compress input using 4 streams.
|
761
|
-
*
|
762
|
-
|
763
|
-
const void* src, size_t srcSize,
|
764
|
-
unsigned maxSymbolValue, unsigned huffLog,
|
765
|
-
void* workSpace, size_t wkspSize)
|
766
|
-
{
|
767
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
768
|
-
maxSymbolValue, huffLog, HUF_fourStreams,
|
769
|
-
workSpace, wkspSize,
|
770
|
-
NULL, NULL, 0, 0 /*bmi2*/);
|
771
|
-
}
|
772
|
-
|
773
|
-
/* HUF_compress4X_repeat():
|
774
|
-
* compress input using 4 streams.
|
775
|
-
* re-use an existing huffman compression table */
|
1452
|
+
* consider skipping quickly
|
1453
|
+
* reuse an existing huffman compression table */
|
776
1454
|
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
777
1455
|
const void* src, size_t srcSize,
|
778
1456
|
unsigned maxSymbolValue, unsigned huffLog,
|
779
1457
|
void* workSpace, size_t wkspSize,
|
780
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int
|
1458
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
|
781
1459
|
{
|
1460
|
+
DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
|
782
1461
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
783
1462
|
maxSymbolValue, huffLog, HUF_fourStreams,
|
784
1463
|
workSpace, wkspSize,
|
785
|
-
hufTable, repeat,
|
786
|
-
}
|
787
|
-
|
788
|
-
size_t HUF_compress2 (void* dst, size_t dstSize,
|
789
|
-
const void* src, size_t srcSize,
|
790
|
-
unsigned maxSymbolValue, unsigned huffLog)
|
791
|
-
{
|
792
|
-
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
793
|
-
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
1464
|
+
hufTable, repeat, flags);
|
794
1465
|
}
|
795
1466
|
|
796
|
-
|
797
|
-
{
|
798
|
-
return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
|
799
|
-
}
|
800
|
-
|
801
|
-
}
|
1467
|
+
} // namespace duckdb_zstd
|