duckdb 1.1.2-dev6.0 → 1.1.4-dev11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +5 -54
- package/binding.gyp +73 -52
- package/package.json +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/avg.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/corr.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/covar.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/stddev.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/approx_count.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/arg_min_max.cpp +66 -18
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitagg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitstring_agg.cpp +5 -7
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bool.cpp +3 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/kurtosis.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/product.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/skew.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/string_agg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/sum.cpp +13 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approx_top_k.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approximate_quantile.cpp +51 -15
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mad.cpp +25 -10
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mode.cpp +215 -71
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/quantile.cpp +58 -31
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/binned_histogram.cpp +9 -4
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/histogram.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/list.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_avg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_count.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_intercept.cpp +6 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_r2.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_slope.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxx_syy.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxy.cpp +3 -3
- package/src/duckdb/extension/core_functions/core_functions_extension.cpp +85 -0
- package/src/duckdb/{src → extension}/core_functions/function_list.cpp +30 -51
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/corr.hpp +3 -7
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/distributive_functions.hpp +16 -21
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/histogram_helpers.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/holistic_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/nested_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_helpers.hpp +2 -2
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_sort_tree.hpp +140 -58
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_state.hpp +50 -43
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_count.hpp +2 -2
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_slope.hpp +3 -7
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/array_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/bit_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/blob_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/date_functions.hpp +22 -55
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/debug_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/enum_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/generic_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/list_functions.hpp +4 -4
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/map_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/math_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/operators_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/random_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/string_functions.hpp +10 -103
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/struct_functions.hpp +1 -19
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/union_functions.hpp +1 -1
- package/src/duckdb/extension/core_functions/include/core_functions_extension.hpp +22 -0
- package/src/duckdb/{src → extension}/core_functions/lambda_functions.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/array/array_functions.cpp +11 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/array/array_value.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/bit/bitstring.cpp +12 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/blob/base64.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/blob/encode.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/date/age.cpp +9 -3
- package/src/duckdb/extension/core_functions/scalar/date/current.cpp +29 -0
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_diff.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_part.cpp +42 -9
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_sub.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_trunc.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/epoch.cpp +19 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/date/make_date.cpp +40 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/date/time_bucket.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/to_interval.cpp +54 -28
- package/src/duckdb/{src → extension}/core_functions/scalar/debug/vector_type.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/enum/enum_functions.cpp +2 -7
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/alias.cpp +2 -2
- package/src/duckdb/{src/function → extension/core_functions}/scalar/generic/binning.cpp +4 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/can_implicitly_cast.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/current_setting.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/hash.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/least.cpp +30 -10
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/stats.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/system_functions.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/typeof.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/array_slice.cpp +93 -88
- package/src/duckdb/{src → extension}/core_functions/scalar/list/flatten.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_aggregates.cpp +7 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_distance.cpp +8 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_filter.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_has_any_or_all.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_reduce.cpp +5 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_sort.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_transform.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_value.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/range.cpp +7 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/cardinality.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map.cpp +5 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_concat.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_entries.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_extract.cpp +13 -25
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_from_entries.cpp +2 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_keys_values.cpp +11 -9
- package/src/duckdb/{src → extension}/core_functions/scalar/math/numeric.cpp +83 -37
- package/src/duckdb/{src → extension}/core_functions/scalar/operators/bitwise.cpp +19 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/random/random.cpp +4 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/random/setseed.cpp +2 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/ascii.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/bar.cpp +6 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/chr.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/damerau_levenshtein.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/format_bytes.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/hamming.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/hex.cpp +7 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/string/instr.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/jaccard.cpp +1 -1
- package/src/duckdb/extension/core_functions/scalar/string/jaro_winkler.cpp +112 -0
- package/src/duckdb/{src → extension}/core_functions/scalar/string/left_right.cpp +6 -6
- package/src/duckdb/{src → extension}/core_functions/scalar/string/levenshtein.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/pad.cpp +9 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/string/parse_path.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/printf.cpp +3 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/repeat.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/replace.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/reverse.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/starts_with.cpp +5 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/string/to_base.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/translate.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/trim.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/unicode.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/url_encode.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/struct/struct_insert.cpp +25 -31
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_extract.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_tag.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_value.cpp +3 -3
- package/src/duckdb/extension/icu/icu-dateadd.cpp +16 -11
- package/src/duckdb/extension/icu/icu-datefunc.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datepart.cpp +8 -5
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +8 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +2 -2
- package/src/duckdb/extension/icu/icu-makedate.cpp +18 -7
- package/src/duckdb/extension/icu/icu-strptime.cpp +9 -3
- package/src/duckdb/extension/icu/icu-table-range.cpp +2 -2
- package/src/duckdb/extension/icu/icu-timebucket.cpp +4 -1
- package/src/duckdb/extension/icu/icu-timezone.cpp +67 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +60 -5
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -1
- package/src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/locid.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/lsr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/lsr.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/normlzr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/rbbinode.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/schriter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/stringtriebuilder.cpp +8 -8
- package/src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uchriter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/brkiter.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/bytestriebuilder.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/chariter.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/dtintrv.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/locid.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/messagepattern.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/normlzr.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/parsepos.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/rbbi.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/schriter.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/strenum.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringpiece.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringtriebuilder.h +9 -9
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/ucharstriebuilder.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uchriter.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uniset.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/unistr.h +12 -12
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uobject.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unifiedcache.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/uniset.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ustr_titlecase_brkiter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ustrenum.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/alphaindex.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/coll.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/datefmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/dcfmtsym.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvinf.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen_impl.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtrule.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/fpositer.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measunit.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measure.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/msgfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp +9 -9
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/plurfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/rbtz.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/region.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/rulebasedcollator.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/selfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/simpletz.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/sortkey.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tmutamt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames.cpp +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzrule.cpp +8 -8
- package/src/duckdb/extension/icu/third_party/icu/i18n/tztrans.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/alphaindex.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/calendar.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/choicfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coll.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/currpinf.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/datefmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dcfmtsym.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/decimfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtfmtsym.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvfmt.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvinf.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtptngen.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fieldpos.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fmtable.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fpositer.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measunit.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measure.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/msgfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/numfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbnf.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbtz.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/region.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/search.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/selfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/simpletz.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/smpdtfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/sortkey.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/stsearch.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tblcoll.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/timezone.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutamt.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tznames.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzrule.h +8 -8
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tztrans.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/vtzone.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp +2 -2
- package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -1
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
- package/src/duckdb/extension/json/include/json_common.hpp +14 -10
- package/src/duckdb/extension/json/include/json_scan.hpp +48 -7
- package/src/duckdb/extension/json/include/json_structure.hpp +2 -1
- package/src/duckdb/extension/json/include/json_transform.hpp +5 -2
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +57 -20
- package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +7 -6
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +6 -5
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +20 -17
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +48 -17
- package/src/duckdb/extension/json/json_functions/read_json.cpp +83 -34
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -3
- package/src/duckdb/extension/json/json_functions.cpp +14 -16
- package/src/duckdb/extension/json/json_scan.cpp +36 -16
- package/src/duckdb/extension/json/json_serializer.cpp +1 -1
- package/src/duckdb/extension/json/serialize_json.cpp +2 -2
- package/src/duckdb/extension/parquet/column_reader.cpp +136 -116
- package/src/duckdb/extension/parquet/column_writer.cpp +870 -604
- package/src/duckdb/extension/parquet/geo_parquet.cpp +4 -5
- package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +0 -4
- package/src/duckdb/extension/parquet/include/column_reader.hpp +24 -19
- package/src/duckdb/extension/parquet/include/column_writer.hpp +7 -5
- package/src/duckdb/extension/parquet/include/decode_utils.hpp +138 -18
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +4 -3
- package/src/duckdb/extension/parquet/include/null_column_reader.hpp +1 -14
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +45 -0
- package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +101 -90
- package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +179 -0
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +2 -3
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +48 -0
- package/src/duckdb/extension/parquet/include/parquet_extension.hpp +8 -0
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +5 -0
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +22 -18
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -5
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +87 -3
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +30 -16
- package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +0 -8
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +1 -42
- package/src/duckdb/extension/parquet/include/thrift_tools.hpp +13 -1
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +4 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +240 -197
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +138 -6
- package/src/duckdb/extension/parquet/parquet_reader.cpp +155 -79
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +258 -38
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +17 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +65 -34
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +4 -0
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +13 -0
- package/src/duckdb/src/catalog/catalog.cpp +272 -97
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +9 -4
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +145 -95
- package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +9 -3
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +15 -0
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +40 -24
- package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry.cpp +3 -0
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +60 -5
- package/src/duckdb/src/catalog/catalog_search_path.cpp +27 -14
- package/src/duckdb/src/catalog/catalog_set.cpp +75 -31
- package/src/duckdb/src/catalog/default/default_functions.cpp +13 -8
- package/src/duckdb/src/catalog/default/default_views.cpp +1 -0
- package/src/duckdb/src/catalog/dependency_manager.cpp +133 -5
- package/src/duckdb/src/catalog/duck_catalog.cpp +17 -9
- package/src/duckdb/src/common/adbc/adbc.cpp +18 -0
- package/src/duckdb/src/common/allocator.cpp +3 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +30 -9
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +63 -82
- package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +4 -3
- package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +361 -0
- package/src/duckdb/src/common/arrow/arrow_util.cpp +10 -6
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +6 -2
- package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +2 -1
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +27 -14
- package/src/duckdb/src/common/assert.cpp +1 -2
- package/src/duckdb/src/common/bind_helpers.cpp +1 -1
- package/src/duckdb/src/common/box_renderer.cpp +316 -26
- package/src/duckdb/src/common/cgroups.cpp +7 -1
- package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
- package/src/duckdb/src/common/enum_util.cpp +2865 -6882
- package/src/duckdb/src/common/enums/compression_type.cpp +12 -0
- package/src/duckdb/src/common/enums/metric_type.cpp +24 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +4 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/error_data.cpp +23 -6
- package/src/duckdb/src/common/exception/binder_exception.cpp +1 -1
- package/src/duckdb/src/common/exception.cpp +20 -28
- package/src/duckdb/src/common/extra_type_info.cpp +85 -20
- package/src/duckdb/src/common/file_buffer.cpp +5 -2
- package/src/duckdb/src/common/file_system.cpp +8 -3
- package/src/duckdb/src/common/fsst.cpp +3 -3
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/local_file_system.cpp +169 -60
- package/src/duckdb/src/common/multi_file_list.cpp +4 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +240 -63
- package/src/duckdb/src/common/opener_file_system.cpp +37 -0
- package/src/duckdb/src/common/operator/cast_operators.cpp +77 -11
- package/src/duckdb/src/common/operator/string_cast.cpp +6 -2
- package/src/duckdb/src/common/pipe_file_system.cpp +4 -4
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +25 -14
- package/src/duckdb/src/common/radix_partitioning.cpp +17 -16
- package/src/duckdb/src/common/random_engine.cpp +39 -3
- package/src/duckdb/src/common/render_tree.cpp +3 -19
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -58
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +2 -0
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +20 -19
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
- package/src/duckdb/src/common/serializer/memory_stream.cpp +36 -0
- package/src/duckdb/src/common/sort/comparators.cpp +7 -7
- package/src/duckdb/src/common/sort/partition_state.cpp +2 -2
- package/src/duckdb/src/common/stacktrace.cpp +127 -0
- package/src/duckdb/src/common/string_util.cpp +157 -32
- package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +15 -3
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +4 -0
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +71 -8
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +27 -6
- package/src/duckdb/src/common/types/conflict_manager.cpp +21 -7
- package/src/duckdb/src/common/types/date.cpp +39 -25
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +4 -11
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +21 -7
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +17 -17
- package/src/duckdb/src/common/types/timestamp.cpp +70 -33
- package/src/duckdb/src/common/types/uuid.cpp +11 -0
- package/src/duckdb/src/common/types/validity_mask.cpp +16 -5
- package/src/duckdb/src/common/types/value.cpp +357 -199
- package/src/duckdb/src/common/types/varint.cpp +64 -18
- package/src/duckdb/src/common/types/vector.cpp +78 -38
- package/src/duckdb/src/common/types.cpp +199 -92
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +2 -1
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +32 -5
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +3 -1
- package/src/duckdb/src/execution/adaptive_filter.cpp +6 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +410 -111
- package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_between.cpp +6 -0
- package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +4 -3
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_function.cpp +1 -0
- package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +5 -4
- package/src/duckdb/src/execution/expression_executor.cpp +5 -3
- package/src/duckdb/src/execution/index/art/art.cpp +208 -72
- package/src/duckdb/src/execution/index/art/base_leaf.cpp +1 -1
- package/src/duckdb/src/execution/index/art/leaf.cpp +12 -7
- package/src/duckdb/src/execution/index/art/node.cpp +2 -1
- package/src/duckdb/src/execution/index/art/node256_leaf.cpp +6 -6
- package/src/duckdb/src/execution/index/art/plan_art.cpp +50 -55
- package/src/duckdb/src/execution/index/art/prefix.cpp +7 -13
- package/src/duckdb/src/execution/index/bound_index.cpp +30 -5
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +3 -5
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -9
- package/src/duckdb/src/execution/join_hashtable.cpp +254 -158
- package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
- package/src/duckdb/src/execution/operator/aggregate/physical_partitioned_aggregate.cpp +226 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +77 -70
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +114 -50
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +19 -10
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +22 -15
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +95 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +6 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +75 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +40 -12
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +395 -163
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +20 -23
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +115 -49
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +66 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -23
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +220 -46
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +43 -32
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +54 -119
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +184 -20
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +83 -21
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_validator.cpp +63 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +7 -4
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +3 -2
- package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +9 -1
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +132 -15
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -55
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +284 -154
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +40 -55
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -3
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +298 -227
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +5 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +3 -4
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +32 -19
- package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -0
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +6 -0
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -19
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +11 -27
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +308 -119
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +105 -55
- package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +6 -2
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +1 -1
- package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +15 -6
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +92 -50
- package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +0 -1
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +8 -4
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +54 -22
- package/src/duckdb/src/execution/operator/set/physical_union.cpp +5 -1
- package/src/duckdb/src/execution/physical_operator.cpp +15 -9
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +101 -12
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +11 -140
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +6 -5
- package/src/duckdb/src/execution/physical_plan/plan_export.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +16 -13
- package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_order.cpp +7 -7
- package/src/duckdb/src/execution/physical_plan/plan_prepare.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_projection.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +8 -3
- package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -2
- package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +1 -2
- package/src/duckdb/src/execution/physical_plan/plan_top_n.cpp +3 -2
- package/src/duckdb/src/execution/physical_plan_generator.cpp +0 -22
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +136 -116
- package/src/duckdb/src/execution/sample/base_reservoir_sample.cpp +136 -0
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +930 -0
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +6 -12
- package/src/duckdb/src/function/aggregate/distributive/{first.cpp → first_last_any.cpp} +37 -18
- package/src/duckdb/src/{core_functions → function}/aggregate/distributive/minmax.cpp +19 -12
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +72 -13
- package/src/duckdb/src/function/built_in_functions.cpp +85 -2
- package/src/duckdb/src/function/cast/decimal_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/string_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/struct_cast.cpp +81 -49
- package/src/duckdb/src/function/cast/union/from_struct.cpp +7 -5
- package/src/duckdb/src/function/compression_config.cpp +6 -0
- package/src/duckdb/src/function/encoding_function.cpp +134 -0
- package/src/duckdb/src/function/function.cpp +8 -13
- package/src/duckdb/src/function/function_binder.cpp +100 -21
- package/src/duckdb/src/function/function_list.cpp +178 -0
- package/src/duckdb/src/function/macro_function.cpp +4 -4
- package/src/duckdb/src/function/pragma/pragma_functions.cpp +0 -2
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +0 -4
- package/src/duckdb/src/{core_functions/core_functions.cpp → function/register_function_list.cpp} +12 -8
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +62 -23
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +33 -16
- package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +21 -0
- package/src/duckdb/src/{core_functions/scalar/blob → function/scalar}/create_sort_key.cpp +86 -23
- package/src/duckdb/src/{core_functions → function}/scalar/date/strftime.cpp +6 -4
- package/src/duckdb/src/function/scalar/generic/constant_or_null.cpp +5 -7
- package/src/duckdb/src/{core_functions → function}/scalar/generic/error.cpp +3 -1
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +2 -2
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +1 -7
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +27 -21
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +8 -12
- package/src/duckdb/src/function/scalar/list/list_select.cpp +1 -4
- package/src/duckdb/src/function/scalar/list/list_zip.cpp +6 -6
- package/src/duckdb/src/{core_functions → function}/scalar/map/map_contains.cpp +2 -2
- package/src/duckdb/src/function/scalar/nested_functions.cpp +0 -11
- package/src/duckdb/src/function/scalar/{operators → operator}/add.cpp +2 -1
- package/src/duckdb/src/function/scalar/{operators → operator}/arithmetic.cpp +195 -127
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +30 -21
- package/src/duckdb/src/function/scalar/strftime_format.cpp +10 -0
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +11 -41
- package/src/duckdb/src/function/scalar/string/concat.cpp +22 -20
- package/src/duckdb/src/function/scalar/string/concat_ws.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/contains.cpp +16 -19
- package/src/duckdb/src/function/scalar/string/length.cpp +38 -24
- package/src/duckdb/src/function/scalar/string/like.cpp +80 -47
- package/src/duckdb/src/{core_functions → function}/scalar/string/md5.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/prefix.cpp +0 -4
- package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +2 -1
- package/src/duckdb/src/function/scalar/string/regexp.cpp +17 -7
- package/src/duckdb/src/{core_functions → function}/scalar/string/regexp_escape.cpp +2 -2
- package/src/duckdb/src/{core_functions → function}/scalar/string/sha1.cpp +1 -1
- package/src/duckdb/src/{core_functions → function}/scalar/string/sha256.cpp +1 -1
- package/src/duckdb/src/{core_functions → function}/scalar/string/string_split.cpp +4 -5
- package/src/duckdb/src/function/scalar/string/strip_accents.cpp +3 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +14 -13
- package/src/duckdb/src/function/scalar/string/suffix.cpp +0 -4
- package/src/duckdb/src/function/scalar/struct/struct_concat.cpp +115 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +35 -31
- package/src/duckdb/src/{core_functions → function}/scalar/struct/struct_pack.cpp +7 -7
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -8
- package/src/duckdb/src/function/scalar/system/write_log.cpp +170 -0
- package/src/duckdb/src/function/scalar_function.cpp +5 -5
- package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +3 -2
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +287 -1
- package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +6 -6
- package/src/duckdb/src/function/table/arrow.cpp +32 -352
- package/src/duckdb/src/function/table/arrow_conversion.cpp +43 -7
- package/src/duckdb/src/function/table/copy_csv.cpp +38 -23
- package/src/duckdb/src/function/table/glob.cpp +1 -1
- package/src/duckdb/src/function/table/query_function.cpp +12 -7
- package/src/duckdb/src/function/table/read_csv.cpp +114 -46
- package/src/duckdb/src/function/table/read_file.cpp +26 -6
- package/src/duckdb/src/function/table/sniff_csv.cpp +25 -5
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +6 -7
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +141 -16
- package/src/duckdb/src/function/table/system/duckdb_log.cpp +64 -0
- package/src/duckdb/src/function/table/system/duckdb_log_contexts.cpp +65 -0
- package/src/duckdb/src/function/table/system/duckdb_memory.cpp +0 -1
- package/src/duckdb/src/function/table/system/duckdb_settings.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_tables.cpp +1 -13
- package/src/duckdb/src/function/table/system/duckdb_types.cpp +1 -1
- package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +17 -0
- package/src/duckdb/src/function/table/system/pragma_table_info.cpp +6 -0
- package/src/duckdb/src/function/table/system/pragma_table_sample.cpp +95 -0
- package/src/duckdb/src/function/table/system/test_all_types.cpp +56 -46
- package/src/duckdb/src/function/table/system_functions.cpp +3 -0
- package/src/duckdb/src/function/table/table_scan.cpp +487 -289
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/table_function.cpp +10 -6
- package/src/duckdb/src/function/window/window_aggregate_function.cpp +248 -0
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +48 -0
- package/src/duckdb/src/function/window/window_aggregator.cpp +88 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +854 -0
- package/src/duckdb/src/function/window/window_collection.cpp +146 -0
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +357 -0
- package/src/duckdb/src/function/window/window_custom_aggregator.cpp +146 -0
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +758 -0
- package/src/duckdb/src/function/window/window_executor.cpp +99 -0
- package/src/duckdb/src/function/window/window_index_tree.cpp +63 -0
- package/src/duckdb/src/function/window/window_merge_sort_tree.cpp +275 -0
- package/src/duckdb/src/function/window/window_naive_aggregator.cpp +361 -0
- package/src/duckdb/src/function/window/window_rank_function.cpp +288 -0
- package/src/duckdb/src/function/window/window_rownumber_function.cpp +191 -0
- package/src/duckdb/src/function/window/window_segment_tree.cpp +594 -0
- package/src/duckdb/src/function/window/window_shared_expressions.cpp +50 -0
- package/src/duckdb/src/function/window/window_token_tree.cpp +142 -0
- package/src/duckdb/src/function/window/window_value_function.cpp +566 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +74 -17
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +9 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +4 -10
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +18 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +2 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +5 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +21 -18
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +3 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +10 -2
- package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +11 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +26 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +144 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +11 -4
- package/src/duckdb/src/include/duckdb/common/assert.hpp +12 -1
- package/src/duckdb/src/include/duckdb/common/atomic_ptr.hpp +102 -0
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +65 -6
- package/src/duckdb/src/include/duckdb/common/chrono.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/column_index.hpp +72 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +120 -0
- package/src/duckdb/src/include/duckdb/{core_functions/core_functions.hpp → common/enums/collation_type.hpp} +2 -7
- package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/enums/function_errors.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_preservation_type.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
- package/src/duckdb/src/include/duckdb/{core_functions/aggregate → common/enums}/quantile_enum.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enums/scan_vector_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/error_data.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/extension_type_info.hpp +37 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +9 -3
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +19 -10
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/fsst.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +6 -0
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +12 -2
- package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +147 -27
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +2 -7
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +16 -5
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/platform.hpp +34 -3
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +10 -13
- package/src/duckdb/src/include/duckdb/common/random_engine.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +0 -2
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/stacktrace.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +30 -2
- package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +13 -2
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +21 -4
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +58 -10
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +43 -16
- package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +63 -21
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +62 -16
- package/src/duckdb/src/include/duckdb/common/types/varint.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +34 -7
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +12 -7
- package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +12 -13
- package/src/duckdb/src/include/duckdb/common/vector_operations/binary_executor.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +111 -4
- package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +48 -10
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +25 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +28 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +23 -16
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +25 -16
- package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +15 -10
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent/physical_fixed_batch_copy.hpp → aggregate/physical_partitioned_aggregate.hpp} +25 -27
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +21 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +38 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +8 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +29 -23
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +15 -13
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +13 -5
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +36 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +21 -13
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +52 -22
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +6 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_validator.hpp +58 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +6 -3
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +16 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +9 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +8 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +55 -10
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +28 -7
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +6 -9
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +17 -16
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +7 -3
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +55 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +10 -9
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +16 -13
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +0 -4
- package/src/duckdb/src/include/duckdb/execution/partition_info.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +20 -9
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +1 -11
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/progress_data.hpp +58 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +160 -31
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_function_utils.hpp +31 -0
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +61 -10
- package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/minmax_n_helpers.hpp +1 -1
- package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/sort_key_helpers.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +47 -27
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +3 -10
- package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +13 -6
- package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +15 -0
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +29 -6
- package/src/duckdb/src/include/duckdb/{core_functions → function}/create_sort_key.hpp +4 -1
- package/src/duckdb/src/include/duckdb/function/encoding_function.hpp +78 -0
- package/src/duckdb/src/include/duckdb/function/function.hpp +22 -1
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/function_list.hpp +39 -0
- package/src/duckdb/src/include/duckdb/function/function_set.hpp +13 -7
- package/src/duckdb/src/include/duckdb/{core_functions → function}/lambda_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/partition_stats.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/register_function_list_helper.hpp +69 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +154 -23
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_utils.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar/date_functions.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar/generic_common.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +32 -23
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/list_functions.hpp +156 -0
- package/src/duckdb/src/include/duckdb/function/scalar/map_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +4 -45
- package/src/duckdb/src/include/duckdb/function/scalar/operator_functions.hpp +102 -0
- package/src/duckdb/src/include/duckdb/function/scalar/operators.hpp +2 -16
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +16 -25
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_utils.hpp +38 -0
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_common.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +401 -76
- package/src/duckdb/src/include/duckdb/function/scalar/struct_functions.hpp +63 -0
- package/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp +33 -0
- package/src/duckdb/src/include/duckdb/function/scalar/system_functions.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +17 -8
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +59 -6
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +12 -9
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +18 -13
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -4
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +14 -0
- package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +15 -10
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +94 -18
- package/src/duckdb/src/include/duckdb/{core_functions → function}/to_interval.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp +44 -0
- package/src/duckdb/src/include/duckdb/function/window/window_aggregate_states.hpp +56 -0
- package/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp +194 -0
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +153 -0
- package/src/duckdb/src/include/duckdb/function/window/window_collection.hpp +146 -0
- package/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp +38 -0
- package/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp +32 -0
- package/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp +39 -0
- package/src/duckdb/src/include/duckdb/function/window/window_executor.hpp +122 -0
- package/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp +42 -0
- package/src/duckdb/src/include/duckdb/function/window/window_merge_sort_tree.hpp +108 -0
- package/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp +33 -0
- package/src/duckdb/src/include/duckdb/function/window/window_rank_function.hpp +63 -0
- package/src/duckdb/src/include/duckdb/function/window/window_rownumber_function.hpp +43 -0
- package/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp +31 -0
- package/src/duckdb/src/include/duckdb/function/window/window_shared_expressions.hpp +76 -0
- package/src/duckdb/src/include/duckdb/function/window/window_token_tree.hpp +46 -0
- package/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp +79 -0
- package/src/duckdb/src/include/duckdb/logging/http_logger.hpp +2 -0
- package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +81 -0
- package/src/duckdb/src/include/duckdb/logging/log_storage.hpp +127 -0
- package/src/duckdb/src/include/duckdb/logging/logger.hpp +287 -0
- package/src/duckdb/src/include/duckdb/logging/logging.hpp +83 -0
- package/src/duckdb/src/include/duckdb/main/appender.hpp +41 -18
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +6 -3
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +317 -231
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +17 -1
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +28 -6
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_context_wrapper.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +8 -3
- package/src/duckdb/src/include/duckdb/main/config.hpp +52 -8
- package/src/duckdb/src/include/duckdb/main/connection.hpp +18 -3
- package/src/duckdb/src/include/duckdb/main/database.hpp +8 -7
- package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +5 -1
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension.hpp +8 -2
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +548 -9
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +12 -7
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +8 -4
- package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/delete_relation.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/subquery_relation.hpp +1 -4
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/update_relation.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/relation/value_relation.hpp +7 -0
- package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/relation.hpp +45 -9
- package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +20 -22
- package/src/duckdb/src/include/duckdb/main/settings.hpp +613 -378
- package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -4
- package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +1 -3
- package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +14 -7
- package/src/duckdb/src/include/duckdb/optimizer/common_aggregate_optimizer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/empty_result_pullup.hpp +27 -0
- package/src/duckdb/src/include/duckdb/optimizer/expression_heuristics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +6 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/in_clause_rewriter.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +5 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +45 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +23 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +18 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +9 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp +33 -11
- package/src/duckdb/src/include/duckdb/optimizer/rule/distinct_aggregate_optimizer.hpp +34 -0
- package/src/duckdb/src/include/duckdb/optimizer/sampling_pushdown.hpp +25 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +3 -1
- package/src/duckdb/src/include/duckdb/optimizer/sum_rewriter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +26 -8
- package/src/duckdb/src/include/duckdb/parallel/thread_context.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +51 -3
- package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +28 -44
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +11 -1
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +12 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +22 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -4
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +16 -12
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +5 -5
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +12 -3
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +17 -57
- package/src/duckdb/src/include/duckdb/parser/qualified_name_set.hpp +19 -3
- package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +12 -9
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +45 -28
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +23 -11
- package/src/duckdb/src/include/duckdb/planner/binding_alias.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +11 -10
- package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +4 -4
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +9 -4
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/filter/dynamic_filter.hpp +48 -0
- package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +35 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +9 -9
- package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +16 -7
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +5 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +5 -3
- package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +14 -6
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +12 -8
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +82 -26
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +10 -3
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +4 -13
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +14 -15
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +13 -15
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/analyze.hpp +46 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/common.hpp +60 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp +61 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/decompression.hpp +50 -0
- package/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp +100 -0
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/roaring/appender.hpp +150 -0
- package/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp +618 -0
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +53 -31
- package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -1
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -4
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +16 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/storage_index.hpp +70 -0
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +5 -7
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +23 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +34 -6
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +39 -10
- package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +56 -14
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +35 -29
- package/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +19 -6
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +29 -6
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +10 -10
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +26 -19
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +8 -1
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +16 -14
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +228 -61
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +14 -10
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +3 -1
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +3 -2
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +19 -17
- package/src/duckdb/src/include/duckdb/transaction/rollback_state.hpp +5 -2
- package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -2
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +13 -8
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer_allocator.hpp +79 -0
- package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +43 -13
- package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +4 -1
- package/src/duckdb/src/include/duckdb/verification/copied_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/external_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/parsed_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +7 -3
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +11 -5
- package/src/duckdb/src/include/duckdb/verification/unoptimized_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb.h +424 -41
- package/src/duckdb/src/include/duckdb_extension.h +301 -195
- package/src/duckdb/src/logging/log_manager.cpp +157 -0
- package/src/duckdb/src/logging/log_storage.cpp +209 -0
- package/src/duckdb/src/logging/logger.cpp +211 -0
- package/src/duckdb/src/logging/logging.cpp +42 -0
- package/src/duckdb/src/main/appender.cpp +187 -45
- package/src/duckdb/src/main/attached_database.cpp +16 -8
- package/src/duckdb/src/main/capi/appender-c.cpp +47 -4
- package/src/duckdb/src/main/capi/arrow-c.cpp +9 -4
- package/src/duckdb/src/main/capi/config-c.cpp +17 -4
- package/src/duckdb/src/main/capi/datetime-c.cpp +15 -0
- package/src/duckdb/src/main/capi/duckdb-c.cpp +54 -13
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +212 -4
- package/src/duckdb/src/main/capi/helper-c.cpp +3 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +26 -7
- package/src/duckdb/src/main/capi/replacement_scan-c.cpp +1 -1
- package/src/duckdb/src/main/capi/result-c.cpp +3 -0
- package/src/duckdb/src/main/capi/table_description-c.cpp +43 -10
- package/src/duckdb/src/main/capi/threading-c.cpp +4 -4
- package/src/duckdb/src/main/client_context.cpp +125 -51
- package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
- package/src/duckdb/src/main/client_context_wrapper.cpp +4 -0
- package/src/duckdb/src/main/client_data.cpp +1 -1
- package/src/duckdb/src/main/client_verify.cpp +39 -20
- package/src/duckdb/src/main/config.cpp +266 -74
- package/src/duckdb/src/main/connection.cpp +53 -13
- package/src/duckdb/src/main/database.cpp +39 -18
- package/src/duckdb/src/main/database_manager.cpp +12 -11
- package/src/duckdb/src/main/db_instance_cache.cpp +14 -7
- package/src/duckdb/src/main/extension/extension_helper.cpp +24 -23
- package/src/duckdb/src/main/extension/extension_install.cpp +19 -7
- package/src/duckdb/src/main/extension/extension_load.cpp +91 -41
- package/src/duckdb/src/main/extension/extension_util.cpp +40 -19
- package/src/duckdb/src/main/extension.cpp +20 -11
- package/src/duckdb/src/main/profiling_info.cpp +19 -5
- package/src/duckdb/src/main/query_profiler.cpp +135 -36
- package/src/duckdb/src/main/query_result.cpp +2 -1
- package/src/duckdb/src/main/relation/aggregate_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/create_table_relation.cpp +5 -4
- package/src/duckdb/src/main/relation/create_view_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/delete_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/delim_get_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/distinct_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/explain_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/filter_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/insert_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation/order_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/projection_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/query_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +58 -20
- package/src/duckdb/src/main/relation/setop_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/subquery_relation.cpp +3 -8
- package/src/duckdb/src/main/relation/table_function_relation.cpp +10 -1
- package/src/duckdb/src/main/relation/table_relation.cpp +19 -3
- package/src/duckdb/src/main/relation/update_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/value_relation.cpp +42 -2
- package/src/duckdb/src/main/relation/view_relation.cpp +8 -2
- package/src/duckdb/src/main/relation/write_csv_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/write_parquet_relation.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +49 -28
- package/src/duckdb/src/main/secret/secret_manager.cpp +1 -1
- package/src/duckdb/src/main/secret/secret_storage.cpp +6 -4
- package/src/duckdb/src/main/settings/autogenerated_settings.cpp +1102 -0
- package/src/duckdb/src/main/settings/custom_settings.cpp +1343 -0
- package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +60 -37
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +1 -1
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +126 -72
- package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +22 -6
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -3
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +2 -2
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +3 -3
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +7 -7
- package/src/duckdb/src/optimizer/deliminator.cpp +6 -5
- package/src/duckdb/src/optimizer/empty_result_pullup.cpp +96 -0
- package/src/duckdb/src/optimizer/expression_heuristics.cpp +11 -3
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +9 -2
- package/src/duckdb/src/optimizer/filter_combiner.cpp +190 -88
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +6 -5
- package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +25 -9
- package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +170 -72
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +5 -4
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +3 -1
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +7 -7
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -6
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +37 -22
- package/src/duckdb/src/optimizer/late_materialization.cpp +414 -0
- package/src/duckdb/src/optimizer/limit_pushdown.cpp +1 -0
- package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +30 -2
- package/src/duckdb/src/optimizer/optimizer.cpp +67 -7
- package/src/duckdb/src/optimizer/pullup/pullup_filter.cpp +3 -3
- package/src/duckdb/src/optimizer/pullup/pullup_projection.cpp +2 -2
- package/src/duckdb/src/optimizer/pullup/pullup_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_filter.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +3 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +5 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_unnest.cpp +52 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +2 -2
- package/src/duckdb/src/optimizer/regex_range_filter.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +168 -38
- package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +8 -5
- package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/constant_folding.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/distinct_aggregate_optimizer.cpp +65 -0
- package/src/duckdb/src/optimizer/rule/distributivity.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +4 -3
- package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +3 -3
- package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -1
- package/src/duckdb/src/optimizer/rule/move_constants.cpp +9 -9
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +4 -3
- package/src/duckdb/src/optimizer/rule/timestamp_comparison.cpp +1 -1
- package/src/duckdb/src/optimizer/sampling_pushdown.cpp +24 -0
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +74 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +10 -7
- package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +3 -0
- package/src/duckdb/src/optimizer/sum_rewriter.cpp +174 -0
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +71 -0
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +5 -5
- package/src/duckdb/src/parallel/event.cpp +4 -0
- package/src/duckdb/src/parallel/executor.cpp +11 -29
- package/src/duckdb/src/parallel/executor_task.cpp +8 -3
- package/src/duckdb/src/parallel/pipeline.cpp +15 -8
- package/src/duckdb/src/parallel/pipeline_executor.cpp +67 -43
- package/src/duckdb/src/parallel/thread_context.cpp +12 -1
- package/src/duckdb/src/parser/column_definition.cpp +3 -3
- package/src/duckdb/src/parser/constraints/unique_constraint.cpp +72 -9
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -3
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +3 -3
- package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/star_expression.cpp +46 -2
- package/src/duckdb/src/parser/expression/window_expression.cpp +24 -1
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +26 -2
- package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +5 -3
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +29 -1
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +6 -6
- package/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_function_info.cpp +17 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +16 -15
- package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
- package/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +4 -4
- package/src/duckdb/src/parser/parsed_data/load_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +31 -1
- package/src/duckdb/src/parser/parsed_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +4 -1
- package/src/duckdb/src/parser/parser.cpp +129 -0
- package/src/duckdb/src/parser/qualified_name.cpp +99 -0
- package/src/duckdb/src/parser/query_error_context.cpp +35 -6
- package/src/duckdb/src/parser/query_node/select_node.cpp +4 -4
- package/src/duckdb/src/parser/statement/delete_statement.cpp +6 -1
- package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -3
- package/src/duckdb/src/parser/statement/update_statement.cpp +6 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +2 -2
- package/src/duckdb/src/parser/tableref.cpp +2 -2
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +16 -24
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +5 -5
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +61 -13
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +10 -4
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -2
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +30 -3
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +25 -6
- package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +10 -3
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +4 -3
- package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +18 -3
- package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +0 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +5 -5
- package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +26 -12
- package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +11 -3
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +3 -3
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +4 -4
- package/src/duckdb/src/parser/transform/statement/transform_set.cpp +2 -2
- package/src/duckdb/src/parser/transform/statement/transform_show.cpp +21 -3
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +8 -6
- package/src/duckdb/src/parser/transformer.cpp +2 -2
- package/src/duckdb/src/planner/bind_context.cpp +308 -136
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +30 -31
- package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +4 -2
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +102 -94
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +7 -5
- package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +7 -7
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -10
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -6
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +23 -15
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +97 -19
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +74 -16
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +6 -6
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +49 -15
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +32 -23
- package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +20 -3
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +3 -0
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +6 -5
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +38 -19
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +2 -12
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +117 -412
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +423 -144
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +5 -0
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +0 -4
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +31 -13
- package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +96 -27
- package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -3
- package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +7 -6
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +36 -9
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +34 -34
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +72 -35
- package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +99 -18
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +23 -11
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +22 -19
- package/src/duckdb/src/planner/binder.cpp +23 -45
- package/src/duckdb/src/planner/binding_alias.cpp +69 -0
- package/src/duckdb/src/planner/bound_parameter_map.cpp +1 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +6 -2
- package/src/duckdb/src/planner/collation_binding.cpp +38 -4
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +17 -5
- package/src/duckdb/src/planner/expression/bound_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -1
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +2 -2
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -4
- package/src/duckdb/src/planner/expression.cpp +7 -1
- package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +16 -0
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +53 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -8
- package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder.cpp +7 -7
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -3
- package/src/duckdb/src/planner/filter/constant_filter.cpp +17 -2
- package/src/duckdb/src/planner/filter/dynamic_filter.cpp +68 -0
- package/src/duckdb/src/planner/filter/in_filter.cpp +84 -0
- package/src/duckdb/src/planner/filter/null_filter.cpp +1 -2
- package/src/duckdb/src/planner/filter/optional_filter.cpp +29 -0
- package/src/duckdb/src/planner/filter/struct_filter.cpp +11 -6
- package/src/duckdb/src/planner/joinside.cpp +6 -5
- package/src/duckdb/src/planner/logical_operator.cpp +4 -1
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +68 -2
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +23 -0
- package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -12
- package/src/duckdb/src/planner/operator/logical_filter.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_get.cpp +48 -25
- package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_join.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_order.cpp +4 -11
- package/src/duckdb/src/planner/operator/logical_top_n.cpp +7 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +33 -5
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +2 -2
- package/src/duckdb/src/planner/table_binding.cpp +74 -36
- package/src/duckdb/src/planner/table_filter.cpp +5 -8
- package/src/duckdb/src/storage/arena_allocator.cpp +5 -4
- package/src/duckdb/src/storage/buffer/block_handle.cpp +88 -17
- package/src/duckdb/src/storage/buffer/block_manager.cpp +34 -26
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +70 -49
- package/src/duckdb/src/storage/buffer_manager.cpp +4 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -5
- package/src/duckdb/src/storage/compression/bitpacking.cpp +14 -16
- package/src/duckdb/src/storage/compression/dictionary/analyze.cpp +54 -0
- package/src/duckdb/src/storage/compression/dictionary/common.cpp +90 -0
- package/src/duckdb/src/storage/compression/dictionary/compression.cpp +174 -0
- package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +115 -0
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +53 -545
- package/src/duckdb/src/storage/compression/empty_validity.cpp +15 -0
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +25 -16
- package/src/duckdb/src/storage/compression/fsst.cpp +101 -47
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +92 -2
- package/src/duckdb/src/storage/compression/rle.cpp +216 -46
- package/src/duckdb/src/storage/compression/roaring/analyze.cpp +179 -0
- package/src/duckdb/src/storage/compression/roaring/common.cpp +282 -0
- package/src/duckdb/src/storage/compression/roaring/compress.cpp +481 -0
- package/src/duckdb/src/storage/compression/roaring/metadata.cpp +262 -0
- package/src/duckdb/src/storage/compression/roaring/scan.cpp +364 -0
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +47 -65
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +102 -39
- package/src/duckdb/src/storage/compression/zstd.cpp +1049 -0
- package/src/duckdb/src/storage/data_table.cpp +312 -172
- package/src/duckdb/src/storage/local_storage.cpp +104 -46
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -3
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +138 -58
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +14 -0
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +19 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -0
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +43 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +32 -5
- package/src/duckdb/src/storage/single_file_block_manager.cpp +6 -8
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +82 -71
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +3 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +18 -17
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +34 -22
- package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -3
- package/src/duckdb/src/storage/storage_info.cpp +72 -10
- package/src/duckdb/src/storage/storage_manager.cpp +41 -47
- package/src/duckdb/src/storage/table/array_column_data.cpp +7 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -9
- package/src/duckdb/src/storage/table/column_data.cpp +105 -43
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +307 -132
- package/src/duckdb/src/storage/table/column_segment.cpp +36 -13
- package/src/duckdb/src/storage/table/list_column_data.cpp +4 -4
- package/src/duckdb/src/storage/table/row_group.cpp +159 -66
- package/src/duckdb/src/storage/table/row_group_collection.cpp +157 -68
- package/src/duckdb/src/storage/table/row_version_manager.cpp +33 -10
- package/src/duckdb/src/storage/table/scan_state.cpp +21 -7
- package/src/duckdb/src/storage/table/standard_column_data.cpp +68 -5
- package/src/duckdb/src/storage/table/struct_column_data.cpp +42 -4
- package/src/duckdb/src/storage/table/table_statistics.cpp +91 -5
- package/src/duckdb/src/storage/table/update_segment.cpp +287 -210
- package/src/duckdb/src/storage/table_index_list.cpp +55 -58
- package/src/duckdb/src/storage/temporary_file_manager.cpp +412 -149
- package/src/duckdb/src/storage/wal_replay.cpp +132 -48
- package/src/duckdb/src/storage/write_ahead_log.cpp +75 -48
- package/src/duckdb/src/transaction/cleanup_state.cpp +0 -1
- package/src/duckdb/src/transaction/commit_state.cpp +23 -14
- package/src/duckdb/src/transaction/duck_transaction.cpp +29 -25
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +18 -6
- package/src/duckdb/src/transaction/meta_transaction.cpp +3 -2
- package/src/duckdb/src/transaction/rollback_state.cpp +5 -2
- package/src/duckdb/src/transaction/transaction_context.cpp +9 -1
- package/src/duckdb/src/transaction/undo_buffer.cpp +35 -27
- package/src/duckdb/src/transaction/undo_buffer_allocator.cpp +72 -0
- package/src/duckdb/src/transaction/wal_write_state.cpp +12 -10
- package/src/duckdb/src/verification/copied_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +7 -5
- package/src/duckdb/src/verification/external_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/fetch_row_verifier.cpp +7 -4
- package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +8 -4
- package/src/duckdb/src/verification/parsed_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -12
- package/src/duckdb/src/verification/statement_verifier.cpp +20 -15
- package/src/duckdb/src/verification/unoptimized_statement_verifier.cpp +7 -4
- package/src/duckdb/third_party/fsst/libfsst.hpp +1 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +15 -22
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +4 -2
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +2 -4
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +14278 -13832
- package/src/duckdb/third_party/parquet/parquet_types.cpp +3410 -1686
- package/src/duckdb/third_party/parquet/parquet_types.h +1585 -1204
- package/src/duckdb/third_party/skiplist/SkipList.h +0 -1
- package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +13 -15
- package/src/duckdb/third_party/zstd/common/debug.cpp +36 -0
- package/src/duckdb/third_party/zstd/common/entropy_common.cpp +173 -49
- package/src/duckdb/third_party/zstd/common/error_private.cpp +11 -3
- package/src/duckdb/third_party/zstd/common/fse_decompress.cpp +126 -97
- package/src/duckdb/third_party/zstd/common/pool.cpp +376 -0
- package/src/duckdb/third_party/zstd/common/threading.cpp +193 -0
- package/src/duckdb/third_party/zstd/common/xxhash.cpp +18 -14
- package/src/duckdb/third_party/zstd/common/zstd_common.cpp +3 -38
- package/src/duckdb/third_party/zstd/compress/fse_compress.cpp +93 -165
- package/src/duckdb/third_party/zstd/compress/hist.cpp +28 -31
- package/src/duckdb/third_party/zstd/compress/huf_compress.cpp +957 -291
- package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3988 -1124
- package/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp +120 -43
- package/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp +47 -23
- package/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp +274 -424
- package/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp +403 -153
- package/src/duckdb/third_party/zstd/compress/zstd_fast.cpp +741 -268
- package/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp +1339 -278
- package/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp +334 -222
- package/src/duckdb/third_party/zstd/compress/zstd_opt.cpp +674 -298
- package/src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp +1885 -0
- package/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp +1247 -586
- package/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp +18 -17
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp +724 -270
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1193 -393
- package/src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp +30 -0
- package/src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp +171 -0
- package/src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp +80 -0
- package/src/duckdb/third_party/zstd/dict/cover.cpp +1271 -0
- package/src/duckdb/third_party/zstd/dict/divsufsort.cpp +1916 -0
- package/src/duckdb/third_party/zstd/dict/fastcover.cpp +775 -0
- package/src/duckdb/third_party/zstd/dict/zdict.cpp +1139 -0
- package/src/duckdb/third_party/zstd/include/zdict.h +473 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/allocations.h +58 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/bits.h +204 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h +88 -85
- package/src/duckdb/third_party/zstd/include/zstd/common/compiler.h +243 -47
- package/src/duckdb/third_party/zstd/include/zstd/common/cpu.h +253 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/debug.h +31 -31
- package/src/duckdb/third_party/zstd/include/zstd/common/error_private.h +94 -6
- package/src/duckdb/third_party/zstd/include/zstd/common/fse.h +424 -64
- package/src/duckdb/third_party/zstd/include/zstd/common/huf.h +255 -70
- package/src/duckdb/third_party/zstd/include/zstd/common/mem.h +125 -85
- package/src/duckdb/third_party/zstd/include/zstd/common/pool.h +84 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/portability_macros.h +158 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/threading.h +152 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash.h → xxhash.hpp} +0 -1
- package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash_static.h → xxhash_static.hpp} +1 -1
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_deps.h +122 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h +143 -174
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_trace.h +159 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/clevels.h +136 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/hist.h +4 -4
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h +631 -220
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h +17 -7
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h +2 -2
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h +3 -2
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +256 -153
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h +16 -3
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h +4 -3
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h +145 -11
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h +14 -6
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm_geartab.h +110 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h +33 -9
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstdmt_compress.h +107 -0
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h +4 -3
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h +20 -6
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h +88 -16
- package/src/duckdb/third_party/zstd/include/zstd/deprecated/zbuff.h +214 -0
- package/src/duckdb/third_party/zstd/include/zstd/dict/cover.h +156 -0
- package/src/duckdb/third_party/zstd/include/zstd/dict/divsufsort.h +62 -0
- package/src/duckdb/third_party/zstd/include/zstd.h +2171 -93
- package/src/duckdb/third_party/zstd/include/{zstd/common/zstd_errors.h → zstd_errors.h} +32 -10
- package/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp +8 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp +20 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp +12 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_nested.cpp +6 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp +14 -0
- package/src/duckdb/ub_extension_core_functions_scalar_array.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_bit.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_blob.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_date.cpp +20 -0
- package/src/duckdb/ub_extension_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_enum.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_generic.cpp +18 -0
- package/src/duckdb/ub_extension_core_functions_scalar_list.cpp +22 -0
- package/src/duckdb/ub_extension_core_functions_scalar_map.cpp +14 -0
- package/src/duckdb/ub_extension_core_functions_scalar_math.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_operators.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_random.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_string.cpp +48 -0
- package/src/duckdb/ub_extension_core_functions_scalar_struct.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_union.cpp +6 -0
- package/src/duckdb/ub_src_common.cpp +4 -0
- package/src/duckdb/ub_src_common_arrow.cpp +3 -1
- package/src/duckdb/ub_src_execution.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_aggregate.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +2 -0
- package/src/duckdb/ub_src_execution_sample.cpp +4 -0
- package/src/duckdb/ub_src_function.cpp +6 -0
- package/src/duckdb/ub_src_function_aggregate.cpp +0 -2
- package/src/duckdb/ub_src_function_aggregate_distributive.cpp +3 -1
- package/src/duckdb/ub_src_function_scalar.cpp +2 -8
- package/src/duckdb/ub_src_function_scalar_date.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_generic.cpp +2 -2
- package/src/duckdb/ub_src_function_scalar_map.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_operator.cpp +8 -0
- package/src/duckdb/ub_src_function_scalar_string.cpp +10 -0
- package/src/duckdb/ub_src_function_scalar_struct.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_system.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +6 -0
- package/src/duckdb/ub_src_function_window.cpp +36 -0
- package/src/duckdb/ub_src_logging.cpp +8 -0
- package/src/duckdb/ub_src_main_settings.cpp +3 -1
- package/src/duckdb/ub_src_optimizer.cpp +8 -0
- package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
- package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
- package/src/duckdb/ub_src_parser.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
- package/src/duckdb/ub_src_planner.cpp +2 -0
- package/src/duckdb/ub_src_planner_filter.cpp +6 -0
- package/src/duckdb/ub_src_storage_compression.cpp +4 -0
- package/src/duckdb/ub_src_storage_compression_dictionary.cpp +8 -0
- package/src/duckdb/ub_src_storage_compression_roaring.cpp +10 -0
- package/src/duckdb/ub_src_transaction.cpp +2 -0
- package/vendor.py +1 -1
- package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +0 -6003
- package/src/duckdb/extension/json/yyjson/yyjson.cpp +0 -8218
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +0 -78
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +0 -91
- package/src/duckdb/src/common/cycle_counter.cpp +0 -76
- package/src/duckdb/src/common/field_writer.cpp +0 -97
- package/src/duckdb/src/common/http_state.cpp +0 -95
- package/src/duckdb/src/common/preserved_error.cpp +0 -87
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
- package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +0 -27
- package/src/duckdb/src/common/serializer/buffered_serializer.cpp +0 -36
- package/src/duckdb/src/common/serializer/format_serializer.cpp +0 -15
- package/src/duckdb/src/common/serializer.cpp +0 -24
- package/src/duckdb/src/common/types/chunk_collection.cpp +0 -190
- package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +0 -183
- package/src/duckdb/src/core_functions/scalar/date/current.cpp +0 -54
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +0 -78
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +0 -70
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +0 -412
- package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +0 -28
- package/src/duckdb/src/core_functions/scalar/string/jaro_winkler.cpp +0 -71
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/execution/index/art/node16.cpp +0 -196
- package/src/duckdb/src/execution/index/art/node4.cpp +0 -189
- package/src/duckdb/src/execution/index/unknown_index.cpp +0 -65
- package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +0 -595
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +0 -434
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +0 -89
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +0 -90
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +0 -95
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +0 -494
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +0 -35
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +0 -99
- package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -689
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +0 -242
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +0 -695
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -280
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +0 -666
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +0 -499
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -207
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/execution/physical_plan/plan_limit_percent.cpp +0 -18
- package/src/duckdb/src/execution/physical_plan/plan_show_select.cpp +0 -47
- package/src/duckdb/src/execution/reservoir_sample.cpp +0 -324
- package/src/duckdb/src/execution/window_executor.cpp +0 -1830
- package/src/duckdb/src/execution/window_segment_tree.cpp +0 -2073
- package/src/duckdb/src/extension_forward_decl/icu.cpp +0 -59
- package/src/duckdb/src/function/aggregate/distributive_functions.cpp +0 -15
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +0 -29
- package/src/duckdb/src/function/scalar/generic_functions.cpp +0 -11
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +0 -143
- package/src/duckdb/src/function/scalar/operators.cpp +0 -14
- package/src/duckdb/src/function/scalar/sequence_functions.cpp +0 -10
- package/src/duckdb/src/function/scalar/string_functions.cpp +0 -22
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +0 -173
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +0 -101
- package/src/duckdb/src/include/duckdb/catalog/mapping_value.hpp +0 -92
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +0 -42
- package/src/duckdb/src/include/duckdb/common/cycle_counter.hpp +0 -68
- package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +0 -34
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +0 -113
- package/src/duckdb/src/include/duckdb/common/platform.h +0 -58
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +0 -59
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +0 -192
- package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +0 -137
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +0 -63
- package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +0 -103
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +0 -60
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +0 -253
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +0 -155
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +0 -163
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +0 -35
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +0 -228
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +0 -93
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +0 -60
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +0 -197
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +0 -21
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +0 -30
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +0 -99
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +0 -87
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +0 -80
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +0 -137
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +0 -104
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +0 -79
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -119
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +0 -72
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +0 -110
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +0 -103
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_file_handle.hpp +0 -59
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +0 -46
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +0 -210
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +0 -131
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp +0 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +0 -167
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +0 -21
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +0 -343
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -165
- package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_optimizer.hpp +0 -45
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +0 -57
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +0 -45
- package/src/duckdb/src/include/duckdb/parser/statement/show_statement.hpp +0 -32
- package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +0 -49
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +0 -42
- package/src/duckdb/src/main/settings/settings.cpp +0 -2056
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +0 -36
- package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +0 -19
- package/src/duckdb/src/parser/statement/show_statement.cpp +0 -15
- package/src/duckdb/src/planner/binder/statement/bind_show.cpp +0 -30
- package/src/duckdb/src/planner/operator/logical_limit_percent.cpp +0 -14
- package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +0 -70
- package/src/duckdb/third_party/fsst/fsst_avx512.cpp +0 -140
- package/src/duckdb/third_party/fsst/fsst_avx512.inc +0 -57
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll1.inc +0 -57
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll2.inc +0 -114
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll3.inc +0 -171
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll4.inc +0 -228
- package/src/duckdb/third_party/parquet/parquet_constants.cpp +0 -17
- package/src/duckdb/third_party/parquet/parquet_constants.h +0 -24
- package/src/duckdb/third_party/re2/util/pod_array.h +0 -55
- package/src/duckdb/third_party/re2/util/sparse_array.h +0 -392
- package/src/duckdb/third_party/re2/util/sparse_set.h +0 -264
- package/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h +0 -421
- package/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h +0 -238
- package/src/duckdb/third_party/zstd/include/zstd_static.h +0 -1070
- package/src/duckdb/ub_src_core_functions.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_aggregate_algebraic.cpp +0 -8
- package/src/duckdb/ub_src_core_functions_aggregate_distributive.cpp +0 -24
- package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +0 -12
- package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_aggregate_regression.cpp +0 -14
- package/src/duckdb/ub_src_core_functions_scalar_array.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_bit.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_scalar_date.cpp +0 -22
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_enum.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +0 -18
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +0 -22
- package/src/duckdb/ub_src_core_functions_scalar_map.cpp +0 -16
- package/src/duckdb/ub_src_core_functions_scalar_math.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_operators.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_random.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +0 -58
- package/src/duckdb/ub_src_core_functions_scalar_struct.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_union.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +0 -18
- package/src/duckdb/ub_src_function_scalar_operators.cpp +0 -8
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/covar.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/stddev.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/sum_helpers.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/array_kernels.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/function_list.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/secret_functions.hpp +0 -0
- /package/src/duckdb/src/function/scalar/{operators → operator}/multiply.cpp +0 -0
- /package/src/duckdb/src/function/scalar/{operators → operator}/subtract.cpp +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -12,44 +12,54 @@
|
|
12
12
|
#include "zstd/compress/hist.h"
|
13
13
|
#include "zstd/compress/zstd_opt.h"
|
14
14
|
|
15
|
+
namespace duckdb_zstd {
|
16
|
+
|
17
|
+
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
|
18
|
+
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
|
19
|
+
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
|
15
20
|
|
16
21
|
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
17
|
-
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
18
22
|
#define ZSTD_MAX_PRICE (1<<30)
|
19
23
|
|
20
|
-
#define ZSTD_PREDEF_THRESHOLD
|
24
|
+
#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
21
25
|
|
22
26
|
|
23
27
|
/*-*************************************
|
24
28
|
* Price functions for optimal parser
|
25
29
|
***************************************/
|
26
30
|
|
27
|
-
#if 0 /* approximation at bit level */
|
31
|
+
#if 0 /* approximation at bit level (for tests) */
|
28
32
|
# define BITCOST_ACCURACY 0
|
29
33
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
30
|
-
# define WEIGHT(stat)
|
31
|
-
#elif 0 /* fractional bit accuracy */
|
34
|
+
# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
|
35
|
+
#elif 0 /* fractional bit accuracy (for tests) */
|
32
36
|
# define BITCOST_ACCURACY 8
|
33
37
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
34
|
-
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
38
|
+
# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
|
35
39
|
#else /* opt==approx, ultra==accurate */
|
36
40
|
# define BITCOST_ACCURACY 8
|
37
41
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
38
|
-
# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
|
42
|
+
# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
|
39
43
|
#endif
|
40
44
|
|
41
|
-
|
42
|
-
|
45
|
+
/* ZSTD_bitWeight() :
|
46
|
+
* provide estimated "cost" of a stat in full bits only */
|
43
47
|
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
|
44
48
|
{
|
45
49
|
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
|
46
50
|
}
|
47
51
|
|
52
|
+
/* ZSTD_fracWeight() :
|
53
|
+
* provide fractional-bit "cost" of a stat,
|
54
|
+
* using linear interpolation approximation */
|
48
55
|
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
49
56
|
{
|
50
57
|
U32 const stat = rawStat + 1;
|
51
58
|
U32 const hb = ZSTD_highbit32(stat);
|
52
59
|
U32 const BWeight = hb * BITCOST_MULTIPLIER;
|
60
|
+
/* Fweight was meant for "Fractional weight"
|
61
|
+
* but it's effectively a value between 1 and 2
|
62
|
+
* using fixed point arithmetic */
|
53
63
|
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
|
54
64
|
U32 const weight = BWeight + FWeight;
|
55
65
|
assert(hb + BITCOST_ACCURACY < 31);
|
@@ -60,7 +70,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
|
60
70
|
/* debugging function,
|
61
71
|
* @return price in bytes as fractional value
|
62
72
|
* for debug messages only */
|
63
|
-
MEM_STATIC double ZSTD_fCost(
|
73
|
+
MEM_STATIC double ZSTD_fCost(int price)
|
64
74
|
{
|
65
75
|
return (double)price / (BITCOST_MULTIPLIER*8);
|
66
76
|
}
|
@@ -68,7 +78,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
|
|
68
78
|
|
69
79
|
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
70
80
|
{
|
71
|
-
return optPtr->literalCompressionMode !=
|
81
|
+
return optPtr->literalCompressionMode != ZSTD_ps_disable;
|
72
82
|
}
|
73
83
|
|
74
84
|
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
@@ -81,25 +91,52 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
|
81
91
|
}
|
82
92
|
|
83
93
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
94
|
+
static U32 sum_u32(const unsigned table[], size_t nbElts)
|
95
|
+
{
|
96
|
+
size_t n;
|
97
|
+
U32 total = 0;
|
98
|
+
for (n=0; n<nbElts; n++) {
|
99
|
+
total += table[n];
|
100
|
+
}
|
101
|
+
return total;
|
102
|
+
}
|
103
|
+
|
104
|
+
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
|
105
|
+
|
106
|
+
static U32
|
107
|
+
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
|
88
108
|
{
|
89
109
|
U32 s, sum=0;
|
90
|
-
DEBUGLOG(5, "
|
91
|
-
|
110
|
+
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
|
111
|
+
(unsigned)lastEltIndex+1, (unsigned)shift );
|
112
|
+
assert(shift < 30);
|
92
113
|
for (s=0; s<lastEltIndex+1; s++) {
|
93
|
-
|
94
|
-
|
114
|
+
unsigned const base = base1 ? 1 : (table[s]>0);
|
115
|
+
unsigned const newStat = base + (table[s] >> shift);
|
116
|
+
sum += newStat;
|
117
|
+
table[s] = newStat;
|
95
118
|
}
|
96
119
|
return sum;
|
97
120
|
}
|
98
121
|
|
122
|
+
/* ZSTD_scaleStats() :
|
123
|
+
* reduce all elt frequencies in table if sum too large
|
124
|
+
* return the resulting sum of elements */
|
125
|
+
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
126
|
+
{
|
127
|
+
U32 const prevsum = sum_u32(table, lastEltIndex+1);
|
128
|
+
U32 const factor = prevsum >> logTarget;
|
129
|
+
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
|
130
|
+
assert(logTarget < 30);
|
131
|
+
if (factor <= 1) return prevsum;
|
132
|
+
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
|
133
|
+
}
|
134
|
+
|
99
135
|
/* ZSTD_rescaleFreqs() :
|
100
136
|
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
101
137
|
* take hints from dictionary if there is one
|
102
|
-
*
|
138
|
+
* and init from zero if there is none,
|
139
|
+
* using src for literals stats, and baseline stats for sequence symbols
|
103
140
|
* otherwise downscale existing stats, to be used as seed for next block.
|
104
141
|
*/
|
105
142
|
static void
|
@@ -111,24 +148,28 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
111
148
|
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
|
112
149
|
optPtr->priceType = zop_dynamic;
|
113
150
|
|
114
|
-
if (optPtr->litLengthSum == 0) { /* first block
|
115
|
-
|
116
|
-
|
151
|
+
if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
|
152
|
+
|
153
|
+
/* heuristic: use pre-defined stats for too small inputs */
|
154
|
+
if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
|
155
|
+
DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
|
117
156
|
optPtr->priceType = zop_predef;
|
118
157
|
}
|
119
158
|
|
120
159
|
assert(optPtr->symbolCosts != NULL);
|
121
160
|
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
|
122
|
-
|
161
|
+
|
162
|
+
/* huffman stats covering the full value set : table presumed generated by dictionary */
|
123
163
|
optPtr->priceType = zop_dynamic;
|
124
164
|
|
125
165
|
if (compressedLiterals) {
|
166
|
+
/* generate literals statistics from huffman table */
|
126
167
|
unsigned lit;
|
127
168
|
assert(optPtr->litFreq != NULL);
|
128
169
|
optPtr->litSum = 0;
|
129
170
|
for (lit=0; lit<=MaxLit; lit++) {
|
130
171
|
U32 const scaleLog = 11; /* scale to 2K */
|
131
|
-
U32 const bitCost =
|
172
|
+
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
|
132
173
|
assert(bitCost <= scaleLog);
|
133
174
|
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
134
175
|
optPtr->litSum += optPtr->litFreq[lit];
|
@@ -170,20 +211,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
170
211
|
optPtr->offCodeSum += optPtr->offCodeFreq[of];
|
171
212
|
} }
|
172
213
|
|
173
|
-
} else { /*
|
214
|
+
} else { /* first block, no dictionary */
|
174
215
|
|
175
216
|
assert(optPtr->litFreq != NULL);
|
176
217
|
if (compressedLiterals) {
|
218
|
+
/* base initial cost of literals on direct frequency within src */
|
177
219
|
unsigned lit = MaxLit;
|
178
220
|
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
179
|
-
optPtr->litSum =
|
221
|
+
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
|
180
222
|
}
|
181
223
|
|
182
|
-
{ unsigned
|
183
|
-
|
184
|
-
|
224
|
+
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
225
|
+
4, 2, 1, 1, 1, 1, 1, 1,
|
226
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
227
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
228
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
229
|
+
1, 1, 1, 1
|
230
|
+
};
|
231
|
+
ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
|
232
|
+
optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
|
185
233
|
}
|
186
|
-
optPtr->litLengthSum = MaxLL+1;
|
187
234
|
|
188
235
|
{ unsigned ml;
|
189
236
|
for (ml=0; ml<=MaxML; ml++)
|
@@ -191,21 +238,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
191
238
|
}
|
192
239
|
optPtr->matchLengthSum = MaxML+1;
|
193
240
|
|
194
|
-
{ unsigned
|
195
|
-
|
196
|
-
|
241
|
+
{ unsigned const baseOFCfreqs[MaxOff+1] = {
|
242
|
+
6, 2, 1, 1, 2, 3, 4, 4,
|
243
|
+
4, 3, 2, 1, 1, 1, 1, 1,
|
244
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
245
|
+
1, 1, 1, 1, 1, 1, 1, 1
|
246
|
+
};
|
247
|
+
ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
|
248
|
+
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
|
197
249
|
}
|
198
|
-
optPtr->offCodeSum = MaxOff+1;
|
199
250
|
|
200
251
|
}
|
201
252
|
|
202
|
-
} else { /* new block :
|
253
|
+
} else { /* new block : scale down accumulated statistics */
|
203
254
|
|
204
255
|
if (compressedLiterals)
|
205
|
-
optPtr->litSum =
|
206
|
-
optPtr->litLengthSum =
|
207
|
-
optPtr->matchLengthSum =
|
208
|
-
optPtr->offCodeSum =
|
256
|
+
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
|
257
|
+
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
|
258
|
+
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
|
259
|
+
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
|
209
260
|
}
|
210
261
|
|
211
262
|
ZSTD_setBasePrices(optPtr, optLevel);
|
@@ -218,6 +269,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
218
269
|
const optState_t* const optPtr,
|
219
270
|
int optLevel)
|
220
271
|
{
|
272
|
+
DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
|
221
273
|
if (litLength == 0) return 0;
|
222
274
|
|
223
275
|
if (!ZSTD_compressedLiterals(optPtr))
|
@@ -227,11 +279,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
227
279
|
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
|
228
280
|
|
229
281
|
/* dynamic statistics */
|
230
|
-
{ U32 price =
|
282
|
+
{ U32 price = optPtr->litSumBasePrice * litLength;
|
283
|
+
U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
|
231
284
|
U32 u;
|
285
|
+
assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
|
232
286
|
for (u=0; u < litLength; u++) {
|
233
|
-
|
234
|
-
|
287
|
+
U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
|
288
|
+
if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
|
289
|
+
price -= litPrice;
|
235
290
|
}
|
236
291
|
return price;
|
237
292
|
}
|
@@ -241,33 +296,46 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
241
296
|
* cost of literalLength symbol */
|
242
297
|
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
243
298
|
{
|
244
|
-
|
299
|
+
assert(litLength <= ZSTD_BLOCKSIZE_MAX);
|
300
|
+
if (optPtr->priceType == zop_predef)
|
301
|
+
return WEIGHT(litLength, optLevel);
|
302
|
+
|
303
|
+
/* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
|
304
|
+
* because it isn't representable in the zstd format.
|
305
|
+
* So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
|
306
|
+
* In such a case, the block would be all literals.
|
307
|
+
*/
|
308
|
+
if (litLength == ZSTD_BLOCKSIZE_MAX)
|
309
|
+
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
|
245
310
|
|
246
311
|
/* dynamic statistics */
|
247
312
|
{ U32 const llCode = ZSTD_LLcode(litLength);
|
248
|
-
return (
|
313
|
+
return (LL_bits[llCode] * BITCOST_MULTIPLIER)
|
249
314
|
+ optPtr->litLengthSumBasePrice
|
250
315
|
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
251
316
|
}
|
252
317
|
}
|
253
318
|
|
254
319
|
/* ZSTD_getMatchPrice() :
|
255
|
-
* Provides the cost of the match part (offset + matchLength) of a sequence
|
320
|
+
* Provides the cost of the match part (offset + matchLength) of a sequence.
|
256
321
|
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
|
257
|
-
*
|
322
|
+
* @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
|
323
|
+
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
|
324
|
+
*/
|
258
325
|
FORCE_INLINE_TEMPLATE U32
|
259
|
-
ZSTD_getMatchPrice(U32 const
|
326
|
+
ZSTD_getMatchPrice(U32 const offBase,
|
260
327
|
U32 const matchLength,
|
261
328
|
const optState_t* const optPtr,
|
262
329
|
int const optLevel)
|
263
330
|
{
|
264
331
|
U32 price;
|
265
|
-
U32 const offCode = ZSTD_highbit32(
|
332
|
+
U32 const offCode = ZSTD_highbit32(offBase);
|
266
333
|
U32 const mlBase = matchLength - MINMATCH;
|
267
334
|
assert(matchLength >= MINMATCH);
|
268
335
|
|
269
|
-
if (optPtr->priceType == zop_predef) /* fixed scheme,
|
270
|
-
return WEIGHT(mlBase, optLevel)
|
336
|
+
if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
|
337
|
+
return WEIGHT(mlBase, optLevel)
|
338
|
+
+ ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
|
271
339
|
|
272
340
|
/* dynamic statistics */
|
273
341
|
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
|
@@ -276,7 +344,7 @@ ZSTD_getMatchPrice(U32 const offset,
|
|
276
344
|
|
277
345
|
/* match Length */
|
278
346
|
{ U32 const mlCode = ZSTD_MLcode(mlBase);
|
279
|
-
price += (
|
347
|
+
price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
|
280
348
|
}
|
281
349
|
|
282
350
|
price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
|
@@ -286,10 +354,10 @@ ZSTD_getMatchPrice(U32 const offset,
|
|
286
354
|
}
|
287
355
|
|
288
356
|
/* ZSTD_updateStats() :
|
289
|
-
* assumption : literals +
|
357
|
+
* assumption : literals + litLength <= iend */
|
290
358
|
static void ZSTD_updateStats(optState_t* const optPtr,
|
291
359
|
U32 litLength, const BYTE* literals,
|
292
|
-
U32
|
360
|
+
U32 offBase, U32 matchLength)
|
293
361
|
{
|
294
362
|
/* literals */
|
295
363
|
if (ZSTD_compressedLiterals(optPtr)) {
|
@@ -305,8 +373,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
|
|
305
373
|
optPtr->litLengthSum++;
|
306
374
|
}
|
307
375
|
|
308
|
-
/*
|
309
|
-
{ U32 const offCode = ZSTD_highbit32(
|
376
|
+
/* offset code : follows storeSeq() numeric representation */
|
377
|
+
{ U32 const offCode = ZSTD_highbit32(offBase);
|
310
378
|
assert(offCode <= MaxOff);
|
311
379
|
optPtr->offCodeFreq[offCode]++;
|
312
380
|
optPtr->offCodeSum++;
|
@@ -340,9 +408,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
|
340
408
|
|
341
409
|
/* Update hashTable3 up to ip (excluded)
|
342
410
|
Assumption : always within prefix (i.e. not within extDict) */
|
343
|
-
static
|
344
|
-
|
345
|
-
|
411
|
+
static
|
412
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
413
|
+
U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
|
414
|
+
U32* nextToUpdate3,
|
415
|
+
const BYTE* const ip)
|
346
416
|
{
|
347
417
|
U32* const hashTable3 = ms->hashTable3;
|
348
418
|
U32 const hashLog3 = ms->hashLog3;
|
@@ -366,11 +436,15 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
|
366
436
|
* Binary Tree search
|
367
437
|
***************************************/
|
368
438
|
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
369
|
-
*
|
439
|
+
* @param ip assumed <= iend-8 .
|
440
|
+
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
370
441
|
* @return : nb of positions added */
|
371
|
-
static
|
372
|
-
|
442
|
+
static
|
443
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
444
|
+
U32 ZSTD_insertBt1(
|
445
|
+
const ZSTD_matchState_t* ms,
|
373
446
|
const BYTE* const ip, const BYTE* const iend,
|
447
|
+
U32 const target,
|
374
448
|
U32 const mls, const int extDict)
|
375
449
|
{
|
376
450
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
@@ -388,32 +462,36 @@ static U32 ZSTD_insertBt1(
|
|
388
462
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
389
463
|
const BYTE* const prefixStart = base + dictLimit;
|
390
464
|
const BYTE* match;
|
391
|
-
const U32
|
392
|
-
const U32 btLow = btMask >=
|
393
|
-
U32* smallerPtr = bt + 2*(
|
465
|
+
const U32 curr = (U32)(ip-base);
|
466
|
+
const U32 btLow = btMask >= curr ? 0 : curr - btMask;
|
467
|
+
U32* smallerPtr = bt + 2*(curr&btMask);
|
394
468
|
U32* largerPtr = smallerPtr + 1;
|
395
469
|
U32 dummy32; /* to be nullified at the end */
|
396
|
-
|
397
|
-
|
470
|
+
/* windowLow is based on target because
|
471
|
+
* we only need positions that will be in the window at the end of the tree update.
|
472
|
+
*/
|
473
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
|
474
|
+
U32 matchEndIdx = curr+8+1;
|
398
475
|
size_t bestLength = 8;
|
399
476
|
U32 nbCompares = 1U << cParams->searchLog;
|
400
477
|
#ifdef ZSTD_C_PREDICT
|
401
|
-
U32 predictedSmall = *(bt + 2*((
|
402
|
-
U32 predictedLarge = *(bt + 2*((
|
478
|
+
U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
|
479
|
+
U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
|
403
480
|
predictedSmall += (predictedSmall>0);
|
404
481
|
predictedLarge += (predictedLarge>0);
|
405
482
|
#endif /* ZSTD_C_PREDICT */
|
406
483
|
|
407
|
-
DEBUGLOG(8, "ZSTD_insertBt1 (%u)",
|
484
|
+
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
408
485
|
|
486
|
+
assert(curr <= target);
|
409
487
|
assert(ip <= iend-8); /* required for h calculation */
|
410
|
-
hashTable[h] =
|
488
|
+
hashTable[h] = curr; /* Update Hash Table */
|
411
489
|
|
412
490
|
assert(windowLow > 0);
|
413
|
-
|
491
|
+
for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
|
414
492
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
415
493
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
416
|
-
assert(matchIndex <
|
494
|
+
assert(matchIndex < curr);
|
417
495
|
|
418
496
|
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
|
419
497
|
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
|
@@ -476,12 +554,13 @@ static U32 ZSTD_insertBt1(
|
|
476
554
|
*smallerPtr = *largerPtr = 0;
|
477
555
|
{ U32 positions = 0;
|
478
556
|
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
479
|
-
assert(matchEndIdx >
|
480
|
-
return MAX(positions, matchEndIdx - (
|
557
|
+
assert(matchEndIdx > curr + 8);
|
558
|
+
return MAX(positions, matchEndIdx - (curr + 8));
|
481
559
|
}
|
482
560
|
}
|
483
561
|
|
484
562
|
FORCE_INLINE_TEMPLATE
|
563
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
485
564
|
void ZSTD_updateTree_internal(
|
486
565
|
ZSTD_matchState_t* ms,
|
487
566
|
const BYTE* const ip, const BYTE* const iend,
|
@@ -490,11 +569,11 @@ void ZSTD_updateTree_internal(
|
|
490
569
|
const BYTE* const base = ms->window.base;
|
491
570
|
U32 const target = (U32)(ip - base);
|
492
571
|
U32 idx = ms->nextToUpdate;
|
493
|
-
DEBUGLOG(
|
572
|
+
DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
|
494
573
|
idx, target, dictMode);
|
495
574
|
|
496
575
|
while(idx < target) {
|
497
|
-
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
576
|
+
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
|
498
577
|
assert(idx < (U32)(idx + forward));
|
499
578
|
idx += forward;
|
500
579
|
}
|
@@ -508,20 +587,23 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
|
|
508
587
|
}
|
509
588
|
|
510
589
|
FORCE_INLINE_TEMPLATE
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
590
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
591
|
+
U32
|
592
|
+
ZSTD_insertBtAndGetAllMatches (
|
593
|
+
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
|
594
|
+
ZSTD_matchState_t* ms,
|
595
|
+
U32* nextToUpdate3,
|
596
|
+
const BYTE* const ip, const BYTE* const iLimit,
|
597
|
+
const ZSTD_dictMode_e dictMode,
|
598
|
+
const U32 rep[ZSTD_REP_NUM],
|
599
|
+
const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
|
600
|
+
const U32 lengthToBeat,
|
601
|
+
const U32 mls /* template */)
|
520
602
|
{
|
521
603
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
522
604
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
523
605
|
const BYTE* const base = ms->window.base;
|
524
|
-
U32 const
|
606
|
+
U32 const curr = (U32)(ip-base);
|
525
607
|
U32 const hashLog = cParams->hashLog;
|
526
608
|
U32 const minMatch = (mls==3) ? 3 : 4;
|
527
609
|
U32* const hashTable = ms->hashTable;
|
@@ -535,12 +617,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
535
617
|
U32 const dictLimit = ms->window.dictLimit;
|
536
618
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
537
619
|
const BYTE* const prefixStart = base + dictLimit;
|
538
|
-
U32 const btLow = (btMask >=
|
539
|
-
U32 const windowLow = ZSTD_getLowestMatchIndex(ms,
|
620
|
+
U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
|
621
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
|
540
622
|
U32 const matchLow = windowLow ? windowLow : 1;
|
541
|
-
U32* smallerPtr = bt + 2*(
|
542
|
-
U32* largerPtr = bt + 2*(
|
543
|
-
U32 matchEndIdx =
|
623
|
+
U32* smallerPtr = bt + 2*(curr&btMask);
|
624
|
+
U32* largerPtr = bt + 2*(curr&btMask) + 1;
|
625
|
+
U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
|
544
626
|
U32 dummy32; /* to be nullified at the end */
|
545
627
|
U32 mnum = 0;
|
546
628
|
U32 nbCompares = 1U << cParams->searchLog;
|
@@ -559,7 +641,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
559
641
|
U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
|
560
642
|
|
561
643
|
size_t bestLength = lengthToBeat-1;
|
562
|
-
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u",
|
644
|
+
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
|
563
645
|
|
564
646
|
/* check repCode */
|
565
647
|
assert(ll0 <= 1); /* necessarily 1 or 0 */
|
@@ -567,29 +649,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
567
649
|
U32 repCode;
|
568
650
|
for (repCode = ll0; repCode < lastR; repCode++) {
|
569
651
|
U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
570
|
-
U32 const repIndex =
|
652
|
+
U32 const repIndex = curr - repOffset;
|
571
653
|
U32 repLen = 0;
|
572
|
-
assert(
|
573
|
-
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ <
|
654
|
+
assert(curr >= dictLimit);
|
655
|
+
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
|
574
656
|
/* We must validate the repcode offset because when we're using a dictionary the
|
575
657
|
* valid offset range shrinks when the dictionary goes out of bounds.
|
576
658
|
*/
|
577
659
|
if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
|
578
660
|
repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
|
579
661
|
}
|
580
|
-
} else { /* repIndex < dictLimit || repIndex >=
|
662
|
+
} else { /* repIndex < dictLimit || repIndex >= curr */
|
581
663
|
const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
|
582
664
|
dmsBase + repIndex - dmsIndexDelta :
|
583
665
|
dictBase + repIndex;
|
584
|
-
assert(
|
666
|
+
assert(curr >= windowLow);
|
585
667
|
if ( dictMode == ZSTD_extDict
|
586
|
-
&& ( ((repOffset-1) /*intentional overflow*/ <
|
668
|
+
&& ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
|
587
669
|
& (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
|
588
670
|
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
589
671
|
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
|
590
672
|
}
|
591
673
|
if (dictMode == ZSTD_dictMatchState
|
592
|
-
&& ( ((repOffset-1) /*intentional overflow*/ <
|
674
|
+
&& ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
|
593
675
|
& ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
594
676
|
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
595
677
|
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
|
@@ -599,7 +681,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
599
681
|
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
|
600
682
|
repCode, ll0, repOffset, repLen);
|
601
683
|
bestLength = repLen;
|
602
|
-
matches[mnum].off = repCode - ll0;
|
684
|
+
matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
|
603
685
|
matches[mnum].len = (U32)repLen;
|
604
686
|
mnum++;
|
605
687
|
if ( (repLen > sufficient_len)
|
@@ -611,7 +693,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
611
693
|
if ((mls == 3) /*static*/ && (bestLength < mls)) {
|
612
694
|
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
|
613
695
|
if ((matchIndex3 >= matchLow)
|
614
|
-
& (
|
696
|
+
& (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
|
615
697
|
size_t mlen;
|
616
698
|
if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
|
617
699
|
const BYTE* const match = base + matchIndex3;
|
@@ -626,26 +708,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
626
708
|
DEBUGLOG(8, "found small match with hlog3, of length %u",
|
627
709
|
(U32)mlen);
|
628
710
|
bestLength = mlen;
|
629
|
-
assert(
|
711
|
+
assert(curr > matchIndex3);
|
630
712
|
assert(mnum==0); /* no prior solution */
|
631
|
-
matches[0].off = (
|
713
|
+
matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
|
632
714
|
matches[0].len = (U32)mlen;
|
633
715
|
mnum = 1;
|
634
716
|
if ( (mlen > sufficient_len) |
|
635
717
|
(ip+mlen == iLimit) ) { /* best possible length */
|
636
|
-
ms->nextToUpdate =
|
718
|
+
ms->nextToUpdate = curr+1; /* skip insertion */
|
637
719
|
return 1;
|
638
720
|
} } }
|
639
721
|
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
640
|
-
}
|
722
|
+
} /* if (mls == 3) */
|
641
723
|
|
642
|
-
hashTable[h] =
|
724
|
+
hashTable[h] = curr; /* Update Hash Table */
|
643
725
|
|
644
|
-
|
726
|
+
for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
|
645
727
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
646
728
|
const BYTE* match;
|
647
729
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
648
|
-
assert(
|
730
|
+
assert(curr > matchIndex);
|
649
731
|
|
650
732
|
if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
|
651
733
|
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
|
@@ -661,21 +743,20 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
661
743
|
}
|
662
744
|
|
663
745
|
if (matchLength > bestLength) {
|
664
|
-
DEBUGLOG(8, "found match of length %u at distance %u (
|
665
|
-
(U32)matchLength,
|
746
|
+
DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
|
747
|
+
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
|
666
748
|
assert(matchEndIdx > matchIndex);
|
667
749
|
if (matchLength > matchEndIdx - matchIndex)
|
668
750
|
matchEndIdx = matchIndex + (U32)matchLength;
|
669
751
|
bestLength = matchLength;
|
670
|
-
matches[mnum].off = (
|
752
|
+
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
|
671
753
|
matches[mnum].len = (U32)matchLength;
|
672
754
|
mnum++;
|
673
755
|
if ( (matchLength > ZSTD_OPT_NUM)
|
674
756
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
675
757
|
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
|
676
758
|
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
|
677
|
-
|
678
|
-
}
|
759
|
+
} }
|
679
760
|
|
680
761
|
if (match[matchLength] < ip[matchLength]) {
|
681
762
|
/* match smaller than current */
|
@@ -694,12 +775,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
694
775
|
|
695
776
|
*smallerPtr = *largerPtr = 0;
|
696
777
|
|
778
|
+
assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
|
697
779
|
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
698
780
|
size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
|
699
781
|
U32 dictMatchIndex = dms->hashTable[dmsH];
|
700
782
|
const U32* const dmsBt = dms->chainTable;
|
701
783
|
commonLengthSmaller = commonLengthLarger = 0;
|
702
|
-
|
784
|
+
for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
|
703
785
|
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
|
704
786
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
705
787
|
const BYTE* match = dmsBase + dictMatchIndex;
|
@@ -709,19 +791,18 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
709
791
|
|
710
792
|
if (matchLength > bestLength) {
|
711
793
|
matchIndex = dictMatchIndex + dmsIndexDelta;
|
712
|
-
DEBUGLOG(8, "found dms match of length %u at distance %u (
|
713
|
-
(U32)matchLength,
|
794
|
+
DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
|
795
|
+
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
|
714
796
|
if (matchLength > matchEndIdx - matchIndex)
|
715
797
|
matchEndIdx = matchIndex + (U32)matchLength;
|
716
798
|
bestLength = matchLength;
|
717
|
-
matches[mnum].off = (
|
799
|
+
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
|
718
800
|
matches[mnum].len = (U32)matchLength;
|
719
801
|
mnum++;
|
720
802
|
if ( (matchLength > ZSTD_OPT_NUM)
|
721
803
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
722
804
|
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
723
|
-
|
724
|
-
}
|
805
|
+
} }
|
725
806
|
|
726
807
|
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
|
727
808
|
if (match[matchLength] < ip[matchLength]) {
|
@@ -731,52 +812,244 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
731
812
|
/* match is larger than current */
|
732
813
|
commonLengthLarger = matchLength;
|
733
814
|
dictMatchIndex = nextPtr[0];
|
734
|
-
|
735
|
-
}
|
736
|
-
}
|
815
|
+
} } } /* if (dictMode == ZSTD_dictMatchState) */
|
737
816
|
|
738
|
-
assert(matchEndIdx >
|
817
|
+
assert(matchEndIdx > curr+8);
|
739
818
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
740
819
|
return mnum;
|
741
820
|
}
|
742
821
|
|
822
|
+
typedef U32 (*ZSTD_getAllMatchesFn)(
|
823
|
+
ZSTD_match_t*,
|
824
|
+
ZSTD_matchState_t*,
|
825
|
+
U32*,
|
826
|
+
const BYTE*,
|
827
|
+
const BYTE*,
|
828
|
+
const U32 rep[ZSTD_REP_NUM],
|
829
|
+
U32 const ll0,
|
830
|
+
U32 const lengthToBeat);
|
743
831
|
|
744
|
-
FORCE_INLINE_TEMPLATE
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
832
|
+
FORCE_INLINE_TEMPLATE
|
833
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
834
|
+
U32 ZSTD_btGetAllMatches_internal(
|
835
|
+
ZSTD_match_t* matches,
|
836
|
+
ZSTD_matchState_t* ms,
|
837
|
+
U32* nextToUpdate3,
|
838
|
+
const BYTE* ip,
|
839
|
+
const BYTE* const iHighLimit,
|
840
|
+
const U32 rep[ZSTD_REP_NUM],
|
841
|
+
U32 const ll0,
|
842
|
+
U32 const lengthToBeat,
|
843
|
+
const ZSTD_dictMode_e dictMode,
|
844
|
+
const U32 mls)
|
752
845
|
{
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
ZSTD_updateTree_internal(ms, ip, iHighLimit,
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
846
|
+
assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
|
847
|
+
DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
|
848
|
+
if (ip < ms->window.base + ms->nextToUpdate)
|
849
|
+
return 0; /* skipped area */
|
850
|
+
ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
|
851
|
+
return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
|
852
|
+
}
|
853
|
+
|
854
|
+
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
|
855
|
+
|
856
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
|
857
|
+
static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
|
858
|
+
ZSTD_match_t* matches, \
|
859
|
+
ZSTD_matchState_t* ms, \
|
860
|
+
U32* nextToUpdate3, \
|
861
|
+
const BYTE* ip, \
|
862
|
+
const BYTE* const iHighLimit, \
|
863
|
+
const U32 rep[ZSTD_REP_NUM], \
|
864
|
+
U32 const ll0, \
|
865
|
+
U32 const lengthToBeat) \
|
866
|
+
{ \
|
867
|
+
return ZSTD_btGetAllMatches_internal( \
|
868
|
+
matches, ms, nextToUpdate3, ip, iHighLimit, \
|
869
|
+
rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
|
870
|
+
}
|
871
|
+
|
872
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
|
873
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
|
874
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
|
875
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
|
876
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
|
877
|
+
|
878
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
|
879
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
|
880
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
|
881
|
+
|
882
|
+
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
|
883
|
+
{ \
|
884
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
|
885
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
|
886
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
|
887
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
|
888
|
+
}
|
889
|
+
|
890
|
+
static ZSTD_getAllMatchesFn
|
891
|
+
ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
|
892
|
+
{
|
893
|
+
ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
|
894
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
|
895
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
|
896
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
|
897
|
+
};
|
898
|
+
U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
|
899
|
+
assert((U32)dictMode < 3);
|
900
|
+
assert(mls - 3 < 4);
|
901
|
+
return getAllMatchesFns[(int)dictMode][mls - 3];
|
902
|
+
}
|
903
|
+
|
904
|
+
/*************************
|
905
|
+
* LDM helper functions *
|
906
|
+
*************************/
|
907
|
+
|
908
|
+
/* Struct containing info needed to make decision about ldm inclusion */
|
909
|
+
typedef struct {
|
910
|
+
rawSeqStore_t seqStore; /* External match candidates store for this block */
|
911
|
+
U32 startPosInBlock; /* Start position of the current match candidate */
|
912
|
+
U32 endPosInBlock; /* End position of the current match candidate */
|
913
|
+
U32 offset; /* Offset of the match candidate */
|
914
|
+
} ZSTD_optLdm_t;
|
915
|
+
|
916
|
+
/* ZSTD_optLdm_skipRawSeqStoreBytes():
|
917
|
+
* Moves forward in @rawSeqStore by @nbBytes,
|
918
|
+
* which will update the fields 'pos' and 'posInSequence'.
|
919
|
+
*/
|
920
|
+
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
|
921
|
+
{
|
922
|
+
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
|
923
|
+
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
|
924
|
+
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
|
925
|
+
if (currPos >= currSeq.litLength + currSeq.matchLength) {
|
926
|
+
currPos -= currSeq.litLength + currSeq.matchLength;
|
927
|
+
rawSeqStore->pos++;
|
928
|
+
} else {
|
929
|
+
rawSeqStore->posInSequence = currPos;
|
930
|
+
break;
|
931
|
+
}
|
932
|
+
}
|
933
|
+
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
|
934
|
+
rawSeqStore->posInSequence = 0;
|
766
935
|
}
|
767
936
|
}
|
768
937
|
|
938
|
+
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
|
939
|
+
* Calculates the beginning and end of the next match in the current block.
|
940
|
+
* Updates 'pos' and 'posInSequence' of the ldmSeqStore.
|
941
|
+
*/
|
942
|
+
static void
|
943
|
+
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
|
944
|
+
U32 blockBytesRemaining)
|
945
|
+
{
|
946
|
+
rawSeq currSeq;
|
947
|
+
U32 currBlockEndPos;
|
948
|
+
U32 literalsBytesRemaining;
|
949
|
+
U32 matchBytesRemaining;
|
950
|
+
|
951
|
+
/* Setting match end position to MAX to ensure we never use an LDM during this block */
|
952
|
+
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
953
|
+
optLdm->startPosInBlock = UINT_MAX;
|
954
|
+
optLdm->endPosInBlock = UINT_MAX;
|
955
|
+
return;
|
956
|
+
}
|
957
|
+
/* Calculate appropriate bytes left in matchLength and litLength
|
958
|
+
* after adjusting based on ldmSeqStore->posInSequence */
|
959
|
+
currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
|
960
|
+
assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
|
961
|
+
currBlockEndPos = currPosInBlock + blockBytesRemaining;
|
962
|
+
literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
|
963
|
+
currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
|
964
|
+
0;
|
965
|
+
matchBytesRemaining = (literalsBytesRemaining == 0) ?
|
966
|
+
currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
|
967
|
+
currSeq.matchLength;
|
968
|
+
|
969
|
+
/* If there are more literal bytes than bytes remaining in block, no ldm is possible */
|
970
|
+
if (literalsBytesRemaining >= blockBytesRemaining) {
|
971
|
+
optLdm->startPosInBlock = UINT_MAX;
|
972
|
+
optLdm->endPosInBlock = UINT_MAX;
|
973
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
|
974
|
+
return;
|
975
|
+
}
|
769
976
|
|
770
|
-
|
771
|
-
|
772
|
-
|
977
|
+
/* Matches may be < MINMATCH by this process. In that case, we will reject them
|
978
|
+
when we are deciding whether or not to add the ldm */
|
979
|
+
optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
|
980
|
+
optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
|
981
|
+
optLdm->offset = currSeq.offset;
|
982
|
+
|
983
|
+
if (optLdm->endPosInBlock > currBlockEndPos) {
|
984
|
+
/* Match ends after the block ends, we can't use the whole match */
|
985
|
+
optLdm->endPosInBlock = currBlockEndPos;
|
986
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
|
987
|
+
} else {
|
988
|
+
/* Consume nb of bytes equal to size of sequence left */
|
989
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
|
990
|
+
}
|
991
|
+
}
|
992
|
+
|
993
|
+
/* ZSTD_optLdm_maybeAddMatch():
|
994
|
+
* Adds a match if it's long enough,
|
995
|
+
* based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
|
996
|
+
* into 'matches'. Maintains the correct ordering of 'matches'.
|
997
|
+
*/
|
998
|
+
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
999
|
+
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
|
1000
|
+
{
|
1001
|
+
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
|
1002
|
+
/* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
|
1003
|
+
U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
|
1004
|
+
|
1005
|
+
/* Ensure that current block position is not outside of the match */
|
1006
|
+
if (currPosInBlock < optLdm->startPosInBlock
|
1007
|
+
|| currPosInBlock >= optLdm->endPosInBlock
|
1008
|
+
|| candidateMatchLength < MINMATCH) {
|
1009
|
+
return;
|
1010
|
+
}
|
773
1011
|
|
1012
|
+
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
|
1013
|
+
U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
|
1014
|
+
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
|
1015
|
+
candidateOffBase, candidateMatchLength, currPosInBlock);
|
1016
|
+
matches[*nbMatches].len = candidateMatchLength;
|
1017
|
+
matches[*nbMatches].off = candidateOffBase;
|
1018
|
+
(*nbMatches)++;
|
1019
|
+
}
|
1020
|
+
}
|
774
1021
|
|
775
|
-
|
1022
|
+
/* ZSTD_optLdm_processMatchCandidate():
|
1023
|
+
* Wrapper function to update ldm seq store and call ldm functions as necessary.
|
1024
|
+
*/
|
1025
|
+
static void
|
1026
|
+
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
|
1027
|
+
ZSTD_match_t* matches, U32* nbMatches,
|
1028
|
+
U32 currPosInBlock, U32 remainingBytes)
|
776
1029
|
{
|
777
|
-
|
1030
|
+
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
1031
|
+
return;
|
1032
|
+
}
|
1033
|
+
|
1034
|
+
if (currPosInBlock >= optLdm->endPosInBlock) {
|
1035
|
+
if (currPosInBlock > optLdm->endPosInBlock) {
|
1036
|
+
/* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
|
1037
|
+
* at the end of a match from the ldm seq store, and will often be some bytes
|
1038
|
+
* over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
|
1039
|
+
*/
|
1040
|
+
U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
1041
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
1042
|
+
}
|
1043
|
+
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
1044
|
+
}
|
1045
|
+
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
778
1046
|
}
|
779
1047
|
|
1048
|
+
|
1049
|
+
/*-*******************************
|
1050
|
+
* Optimal parser
|
1051
|
+
*********************************/
|
1052
|
+
|
780
1053
|
#if 0 /* debug */
|
781
1054
|
|
782
1055
|
static void
|
@@ -794,7 +1067,13 @@ listStats(const U32* table, int lastEltID)
|
|
794
1067
|
|
795
1068
|
#endif
|
796
1069
|
|
797
|
-
|
1070
|
+
#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
|
1071
|
+
#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
|
1072
|
+
#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
|
1073
|
+
|
1074
|
+
FORCE_INLINE_TEMPLATE
|
1075
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1076
|
+
size_t
|
798
1077
|
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
799
1078
|
seqStore_t* seqStore,
|
800
1079
|
U32 rep[ZSTD_REP_NUM],
|
@@ -812,13 +1091,22 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
812
1091
|
const BYTE* const prefixStart = base + ms->window.dictLimit;
|
813
1092
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
814
1093
|
|
1094
|
+
ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
|
1095
|
+
|
815
1096
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
816
1097
|
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
817
1098
|
U32 nextToUpdate3 = ms->nextToUpdate;
|
818
1099
|
|
819
1100
|
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
820
1101
|
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
821
|
-
ZSTD_optimal_t
|
1102
|
+
ZSTD_optimal_t lastStretch;
|
1103
|
+
ZSTD_optLdm_t optLdm;
|
1104
|
+
|
1105
|
+
ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
|
1106
|
+
|
1107
|
+
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
|
1108
|
+
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
|
1109
|
+
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
|
822
1110
|
|
823
1111
|
/* init */
|
824
1112
|
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
|
@@ -834,102 +1122,141 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
834
1122
|
/* find first match */
|
835
1123
|
{ U32 const litlen = (U32)(ip - anchor);
|
836
1124
|
U32 const ll0 = !litlen;
|
837
|
-
U32
|
838
|
-
|
1125
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
|
1126
|
+
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
1127
|
+
(U32)(ip-istart), (U32)(iend-ip));
|
1128
|
+
if (!nbMatches) {
|
1129
|
+
DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
|
1130
|
+
ip++;
|
1131
|
+
continue;
|
1132
|
+
}
|
1133
|
+
|
1134
|
+
/* Match found: let's store this solution, and eventually find more candidates.
|
1135
|
+
* During this forward pass, @opt is used to store stretches,
|
1136
|
+
* defined as "a match followed by N literals".
|
1137
|
+
* Note how this is different from a Sequence, which is "N literals followed by a match".
|
1138
|
+
* Storing stretches allows us to store different match predecessors
|
1139
|
+
* for each literal position part of a literals run. */
|
839
1140
|
|
840
1141
|
/* initialize opt[0] */
|
841
|
-
|
842
|
-
opt[0].mlen = 0; /* means is_a_literal */
|
1142
|
+
opt[0].mlen = 0; /* there are only literals so far */
|
843
1143
|
opt[0].litlen = litlen;
|
844
|
-
/*
|
845
|
-
* it is static for the duration of the forward pass, and is included
|
846
|
-
* in every price.
|
847
|
-
*
|
1144
|
+
/* No need to include the actual price of the literals before the first match
|
1145
|
+
* because it is static for the duration of the forward pass, and is included
|
1146
|
+
* in every subsequent price. But, we include the literal length because
|
1147
|
+
* the cost variation of litlen depends on the value of litlen.
|
848
1148
|
*/
|
849
|
-
opt[0].price =
|
1149
|
+
opt[0].price = LL_PRICE(litlen);
|
1150
|
+
ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
|
1151
|
+
ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
|
850
1152
|
|
851
1153
|
/* large match -> immediate encoding */
|
852
1154
|
{ U32 const maxML = matches[nbMatches-1].len;
|
853
|
-
U32 const
|
854
|
-
DEBUGLOG(6, "found %u matches of maxLength=%u and
|
855
|
-
nbMatches, maxML,
|
1155
|
+
U32 const maxOffBase = matches[nbMatches-1].off;
|
1156
|
+
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
|
1157
|
+
nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
|
856
1158
|
|
857
1159
|
if (maxML > sufficient_len) {
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
DEBUGLOG(6, "large match (%u>%u)
|
1160
|
+
lastStretch.litlen = 0;
|
1161
|
+
lastStretch.mlen = maxML;
|
1162
|
+
lastStretch.off = maxOffBase;
|
1163
|
+
DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
|
862
1164
|
maxML, sufficient_len);
|
863
1165
|
cur = 0;
|
864
|
-
last_pos =
|
1166
|
+
last_pos = maxML;
|
865
1167
|
goto _shortestPath;
|
866
1168
|
} }
|
867
1169
|
|
868
1170
|
/* set prices for first matches starting position == 0 */
|
869
|
-
|
870
|
-
|
1171
|
+
assert(opt[0].price >= 0);
|
1172
|
+
{ U32 pos;
|
871
1173
|
U32 matchNb;
|
872
1174
|
for (pos = 1; pos < minMatch; pos++) {
|
873
|
-
opt[pos].price = ZSTD_MAX_PRICE;
|
1175
|
+
opt[pos].price = ZSTD_MAX_PRICE;
|
1176
|
+
opt[pos].mlen = 0;
|
1177
|
+
opt[pos].litlen = litlen + pos;
|
874
1178
|
}
|
875
1179
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
876
|
-
U32 const
|
1180
|
+
U32 const offBase = matches[matchNb].off;
|
877
1181
|
U32 const end = matches[matchNb].len;
|
878
1182
|
for ( ; pos <= end ; pos++ ) {
|
879
|
-
|
880
|
-
|
1183
|
+
int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
|
1184
|
+
int const sequencePrice = opt[0].price + matchPrice;
|
881
1185
|
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
|
882
1186
|
pos, ZSTD_fCost(sequencePrice));
|
883
1187
|
opt[pos].mlen = pos;
|
884
|
-
opt[pos].off =
|
885
|
-
opt[pos].litlen =
|
886
|
-
opt[pos].price = sequencePrice;
|
887
|
-
|
1188
|
+
opt[pos].off = offBase;
|
1189
|
+
opt[pos].litlen = 0; /* end of match */
|
1190
|
+
opt[pos].price = sequencePrice + LL_PRICE(0);
|
1191
|
+
}
|
1192
|
+
}
|
888
1193
|
last_pos = pos-1;
|
1194
|
+
opt[pos].price = ZSTD_MAX_PRICE;
|
889
1195
|
}
|
890
1196
|
}
|
891
1197
|
|
892
1198
|
/* check further positions */
|
893
1199
|
for (cur = 1; cur <= last_pos; cur++) {
|
894
1200
|
const BYTE* const inr = ip + cur;
|
895
|
-
assert(cur
|
896
|
-
DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
|
1201
|
+
assert(cur <= ZSTD_OPT_NUM);
|
1202
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
|
897
1203
|
|
898
1204
|
/* Fix current position with one literal if cheaper */
|
899
|
-
{ U32 const litlen =
|
1205
|
+
{ U32 const litlen = opt[cur-1].litlen + 1;
|
900
1206
|
int const price = opt[cur-1].price
|
901
|
-
+
|
902
|
-
+
|
903
|
-
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
1207
|
+
+ LIT_PRICE(ip+cur-1)
|
1208
|
+
+ LL_INCPRICE(litlen);
|
904
1209
|
assert(price < 1000000000); /* overflow check */
|
905
1210
|
if (price <= opt[cur].price) {
|
1211
|
+
ZSTD_optimal_t const prevMatch = opt[cur];
|
906
1212
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
907
1213
|
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
|
908
1214
|
opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
|
909
|
-
opt[cur]
|
910
|
-
opt[cur].off = 0;
|
1215
|
+
opt[cur] = opt[cur-1];
|
911
1216
|
opt[cur].litlen = litlen;
|
912
1217
|
opt[cur].price = price;
|
1218
|
+
if ( (optLevel >= 1) /* additional check only for higher modes */
|
1219
|
+
&& (prevMatch.litlen == 0) /* replace a match */
|
1220
|
+
&& (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
|
1221
|
+
&& LIKELY(ip + cur < iend)
|
1222
|
+
) {
|
1223
|
+
/* check next position, in case it would be cheaper */
|
1224
|
+
int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
|
1225
|
+
int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
|
1226
|
+
DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
|
1227
|
+
cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
|
1228
|
+
if ( (with1literal < withMoreLiterals)
|
1229
|
+
&& (with1literal < opt[cur+1].price) ) {
|
1230
|
+
/* update offset history - before it disappears */
|
1231
|
+
U32 const prev = cur - prevMatch.mlen;
|
1232
|
+
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
|
1233
|
+
assert(cur >= prevMatch.mlen);
|
1234
|
+
DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
|
1235
|
+
ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
|
1236
|
+
newReps.rep[0], newReps.rep[1], newReps.rep[2] );
|
1237
|
+
opt[cur+1] = prevMatch; /* mlen & offbase */
|
1238
|
+
ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
|
1239
|
+
opt[cur+1].litlen = 1;
|
1240
|
+
opt[cur+1].price = with1literal;
|
1241
|
+
if (last_pos < cur+1) last_pos = cur+1;
|
1242
|
+
}
|
1243
|
+
}
|
913
1244
|
} else {
|
914
|
-
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)
|
915
|
-
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)
|
916
|
-
opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
|
1245
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
|
1246
|
+
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
|
917
1247
|
}
|
918
1248
|
}
|
919
1249
|
|
920
|
-
/*
|
921
|
-
*
|
922
|
-
* correct to set the next chunks repcodes during the backward
|
923
|
-
* traversal.
|
1250
|
+
/* Offset history is not updated during match comparison.
|
1251
|
+
* Do it here, now that the match is selected and confirmed.
|
924
1252
|
*/
|
925
1253
|
ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
|
926
1254
|
assert(cur >= opt[cur].mlen);
|
927
|
-
if (opt[cur].
|
1255
|
+
if (opt[cur].litlen == 0) {
|
1256
|
+
/* just finished a match => alter offset history */
|
928
1257
|
U32 const prev = cur - opt[cur].mlen;
|
929
|
-
repcodes_t newReps =
|
930
|
-
|
931
|
-
} else {
|
932
|
-
memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
1258
|
+
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
|
1259
|
+
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
933
1260
|
}
|
934
1261
|
|
935
1262
|
/* last match must start at a minimum distance of 8 from oend */
|
@@ -939,33 +1266,36 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
939
1266
|
|
940
1267
|
if ( (optLevel==0) /*static_test*/
|
941
1268
|
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
|
942
|
-
DEBUGLOG(7, "
|
1269
|
+
DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
|
943
1270
|
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
944
1271
|
}
|
945
1272
|
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
U32
|
1273
|
+
assert(opt[cur].price >= 0);
|
1274
|
+
{ U32 const ll0 = (opt[cur].litlen == 0);
|
1275
|
+
int const previousPrice = opt[cur].price;
|
1276
|
+
int const basePrice = previousPrice + LL_PRICE(0);
|
1277
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
|
951
1278
|
U32 matchNb;
|
1279
|
+
|
1280
|
+
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
1281
|
+
(U32)(inr-istart), (U32)(iend-inr));
|
1282
|
+
|
952
1283
|
if (!nbMatches) {
|
953
1284
|
DEBUGLOG(7, "rPos:%u : no match found", cur);
|
954
1285
|
continue;
|
955
1286
|
}
|
956
1287
|
|
957
|
-
{ U32 const
|
958
|
-
DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of
|
959
|
-
inr-istart, cur, nbMatches,
|
960
|
-
|
961
|
-
if ( (
|
962
|
-
|| (cur +
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
last_pos = cur +
|
968
|
-
if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
|
1288
|
+
{ U32 const longestML = matches[nbMatches-1].len;
|
1289
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
|
1290
|
+
inr-istart, cur, nbMatches, longestML);
|
1291
|
+
|
1292
|
+
if ( (longestML > sufficient_len)
|
1293
|
+
|| (cur + longestML >= ZSTD_OPT_NUM)
|
1294
|
+
|| (ip + cur + longestML >= iend) ) {
|
1295
|
+
lastStretch.mlen = longestML;
|
1296
|
+
lastStretch.off = matches[nbMatches-1].off;
|
1297
|
+
lastStretch.litlen = 0;
|
1298
|
+
last_pos = cur + longestML;
|
969
1299
|
goto _shortestPath;
|
970
1300
|
} }
|
971
1301
|
|
@@ -976,20 +1306,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
976
1306
|
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
|
977
1307
|
U32 mlen;
|
978
1308
|
|
979
|
-
DEBUGLOG(7, "testing match %u =>
|
980
|
-
matchNb, matches[matchNb].off, lastML, litlen);
|
1309
|
+
DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
|
1310
|
+
matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
|
981
1311
|
|
982
1312
|
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
983
1313
|
U32 const pos = cur + mlen;
|
984
|
-
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
1314
|
+
int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
985
1315
|
|
986
1316
|
if ((pos > last_pos) || (price < opt[pos].price)) {
|
987
1317
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
988
1318
|
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
|
989
|
-
while (last_pos < pos) {
|
1319
|
+
while (last_pos < pos) {
|
1320
|
+
/* fill empty positions, for future comparisons */
|
1321
|
+
last_pos++;
|
1322
|
+
opt[last_pos].price = ZSTD_MAX_PRICE;
|
1323
|
+
opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
|
1324
|
+
}
|
990
1325
|
opt[pos].mlen = mlen;
|
991
1326
|
opt[pos].off = offset;
|
992
|
-
opt[pos].litlen =
|
1327
|
+
opt[pos].litlen = 0;
|
993
1328
|
opt[pos].price = price;
|
994
1329
|
} else {
|
995
1330
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
|
@@ -997,52 +1332,86 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
997
1332
|
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
|
998
1333
|
}
|
999
1334
|
} } }
|
1335
|
+
opt[last_pos+1].price = ZSTD_MAX_PRICE;
|
1000
1336
|
} /* for (cur = 1; cur <= last_pos; cur++) */
|
1001
1337
|
|
1002
|
-
|
1003
|
-
cur
|
1004
|
-
|
1338
|
+
lastStretch = opt[last_pos];
|
1339
|
+
assert(cur >= lastStretch.mlen);
|
1340
|
+
cur = last_pos - lastStretch.mlen;
|
1005
1341
|
|
1006
1342
|
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
1007
1343
|
assert(opt[0].mlen == 0);
|
1344
|
+
assert(last_pos >= lastStretch.mlen);
|
1345
|
+
assert(cur == last_pos - lastStretch.mlen);
|
1008
1346
|
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1347
|
+
if (lastStretch.mlen==0) {
|
1348
|
+
/* no solution : all matches have been converted into literals */
|
1349
|
+
assert(lastStretch.litlen == (ip - anchor) + last_pos);
|
1350
|
+
ip += last_pos;
|
1351
|
+
continue;
|
1352
|
+
}
|
1353
|
+
assert(lastStretch.off > 0);
|
1354
|
+
|
1355
|
+
/* Update offset history */
|
1356
|
+
if (lastStretch.litlen == 0) {
|
1357
|
+
/* finishing on a match : update offset history */
|
1358
|
+
repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
|
1359
|
+
ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
|
1016
1360
|
} else {
|
1017
|
-
|
1361
|
+
ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
|
1362
|
+
assert(cur >= lastStretch.litlen);
|
1363
|
+
cur -= lastStretch.litlen;
|
1018
1364
|
}
|
1019
1365
|
|
1020
|
-
|
1366
|
+
/* Let's write the shortest path solution.
|
1367
|
+
* It is stored in @opt in reverse order,
|
1368
|
+
* starting from @storeEnd (==cur+2),
|
1369
|
+
* effectively partially @opt overwriting.
|
1370
|
+
* Content is changed too:
|
1371
|
+
* - So far, @opt stored stretches, aka a match followed by literals
|
1372
|
+
* - Now, it will store sequences, aka literals followed by a match
|
1373
|
+
*/
|
1374
|
+
{ U32 const storeEnd = cur + 2;
|
1021
1375
|
U32 storeStart = storeEnd;
|
1022
|
-
U32
|
1376
|
+
U32 stretchPos = cur;
|
1023
1377
|
|
1024
1378
|
DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
|
1025
1379
|
last_pos, cur); (void)last_pos;
|
1026
|
-
assert(storeEnd <
|
1027
|
-
DEBUGLOG(6, "last
|
1028
|
-
storeEnd,
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1380
|
+
assert(storeEnd < ZSTD_OPT_SIZE);
|
1381
|
+
DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
|
1382
|
+
storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
|
1383
|
+
if (lastStretch.litlen > 0) {
|
1384
|
+
/* last "sequence" is unfinished: just a bunch of literals */
|
1385
|
+
opt[storeEnd].litlen = lastStretch.litlen;
|
1386
|
+
opt[storeEnd].mlen = 0;
|
1387
|
+
storeStart = storeEnd-1;
|
1388
|
+
opt[storeStart] = lastStretch;
|
1389
|
+
} {
|
1390
|
+
opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
|
1391
|
+
storeStart = storeEnd;
|
1392
|
+
}
|
1393
|
+
while (1) {
|
1394
|
+
ZSTD_optimal_t nextStretch = opt[stretchPos];
|
1395
|
+
opt[storeStart].litlen = nextStretch.litlen;
|
1396
|
+
DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
|
1397
|
+
opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
|
1398
|
+
if (nextStretch.mlen == 0) {
|
1399
|
+
/* reaching beginning of segment */
|
1400
|
+
break;
|
1401
|
+
}
|
1032
1402
|
storeStart--;
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
|
1403
|
+
opt[storeStart] = nextStretch; /* note: litlen will be fixed */
|
1404
|
+
assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
|
1405
|
+
stretchPos -= nextStretch.litlen + nextStretch.mlen;
|
1037
1406
|
}
|
1038
1407
|
|
1039
1408
|
/* save sequences */
|
1040
|
-
DEBUGLOG(6, "sending selected sequences into seqStore")
|
1409
|
+
DEBUGLOG(6, "sending selected sequences into seqStore");
|
1041
1410
|
{ U32 storePos;
|
1042
1411
|
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
|
1043
1412
|
U32 const llen = opt[storePos].litlen;
|
1044
1413
|
U32 const mlen = opt[storePos].mlen;
|
1045
|
-
U32 const
|
1414
|
+
U32 const offBase = opt[storePos].off;
|
1046
1415
|
U32 const advance = llen + mlen;
|
1047
1416
|
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
|
1048
1417
|
anchor - istart, (unsigned)llen, (unsigned)mlen);
|
@@ -1054,11 +1423,14 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1054
1423
|
}
|
1055
1424
|
|
1056
1425
|
assert(anchor + llen <= iend);
|
1057
|
-
ZSTD_updateStats(optStatePtr, llen, anchor,
|
1058
|
-
ZSTD_storeSeq(seqStore, llen, anchor, iend,
|
1426
|
+
ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
|
1427
|
+
ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
|
1059
1428
|
anchor += advance;
|
1060
1429
|
ip = anchor;
|
1061
1430
|
} }
|
1431
|
+
DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
|
1432
|
+
|
1433
|
+
/* update all costs */
|
1062
1434
|
ZSTD_setBasePrices(optStatePtr, optLevel);
|
1063
1435
|
}
|
1064
1436
|
} /* while (ip < ilimit) */
|
@@ -1066,53 +1438,54 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1066
1438
|
/* Return the last literals size */
|
1067
1439
|
return (size_t)(iend - anchor);
|
1068
1440
|
}
|
1441
|
+
#endif /* build exclusions */
|
1442
|
+
|
1443
|
+
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
|
1444
|
+
static size_t ZSTD_compressBlock_opt0(
|
1445
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1446
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
1447
|
+
{
|
1448
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
|
1449
|
+
}
|
1450
|
+
#endif
|
1069
1451
|
|
1452
|
+
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
|
1453
|
+
static size_t ZSTD_compressBlock_opt2(
|
1454
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1455
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
1456
|
+
{
|
1457
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
|
1458
|
+
}
|
1459
|
+
#endif
|
1070
1460
|
|
1461
|
+
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
|
1071
1462
|
size_t ZSTD_compressBlock_btopt(
|
1072
1463
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1073
1464
|
const void* src, size_t srcSize)
|
1074
1465
|
{
|
1075
1466
|
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
|
1076
|
-
return
|
1467
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1077
1468
|
}
|
1469
|
+
#endif
|
1078
1470
|
|
1079
1471
|
|
1080
|
-
/* used in 2-pass strategy */
|
1081
|
-
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
1082
|
-
{
|
1083
|
-
U32 s, sum=0;
|
1084
|
-
assert(ZSTD_FREQ_DIV+bonus >= 0);
|
1085
|
-
for (s=0; s<lastEltIndex+1; s++) {
|
1086
|
-
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
1087
|
-
table[s]--;
|
1088
|
-
sum += table[s];
|
1089
|
-
}
|
1090
|
-
return sum;
|
1091
|
-
}
|
1092
1472
|
|
1093
|
-
/* used in 2-pass strategy */
|
1094
|
-
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
1095
|
-
{
|
1096
|
-
if (ZSTD_compressedLiterals(optPtr))
|
1097
|
-
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
1098
|
-
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
1099
|
-
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
1100
|
-
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
1101
|
-
}
|
1102
1473
|
|
1474
|
+
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
|
1103
1475
|
/* ZSTD_initStats_ultra():
|
1104
1476
|
* make a first compression pass, just to seed stats with more accurate starting values.
|
1105
1477
|
* only works on first block, with no dictionary and no ldm.
|
1106
|
-
* this function cannot error,
|
1478
|
+
* this function cannot error out, its narrow contract must be respected.
|
1107
1479
|
*/
|
1108
|
-
static
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1480
|
+
static
|
1481
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1482
|
+
void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
1483
|
+
seqStore_t* seqStore,
|
1484
|
+
U32 rep[ZSTD_REP_NUM],
|
1485
|
+
const void* src, size_t srcSize)
|
1113
1486
|
{
|
1114
1487
|
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
|
1115
|
-
|
1488
|
+
ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
|
1116
1489
|
|
1117
1490
|
DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
|
1118
1491
|
assert(ms->opt.litLengthSum == 0); /* first block */
|
@@ -1120,17 +1493,15 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
1120
1493
|
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
|
1121
1494
|
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
|
1122
1495
|
|
1123
|
-
|
1496
|
+
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
|
1124
1497
|
|
1125
|
-
/* invalidate first scan from history */
|
1498
|
+
/* invalidate first scan from history, only keep entropy stats */
|
1126
1499
|
ZSTD_resetSeqStore(seqStore);
|
1127
1500
|
ms->window.base -= srcSize;
|
1128
1501
|
ms->window.dictLimit += (U32)srcSize;
|
1129
1502
|
ms->window.lowLimit = ms->window.dictLimit;
|
1130
1503
|
ms->nextToUpdate = ms->window.dictLimit;
|
1131
1504
|
|
1132
|
-
/* re-inforce weight of collected statistics */
|
1133
|
-
ZSTD_upscaleStats(&ms->opt);
|
1134
1505
|
}
|
1135
1506
|
|
1136
1507
|
size_t ZSTD_compressBlock_btultra(
|
@@ -1138,20 +1509,20 @@ size_t ZSTD_compressBlock_btultra(
|
|
1138
1509
|
const void* src, size_t srcSize)
|
1139
1510
|
{
|
1140
1511
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
|
1141
|
-
return
|
1512
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1142
1513
|
}
|
1143
1514
|
|
1144
1515
|
size_t ZSTD_compressBlock_btultra2(
|
1145
1516
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1146
1517
|
const void* src, size_t srcSize)
|
1147
1518
|
{
|
1148
|
-
U32 const
|
1519
|
+
U32 const curr = (U32)((const BYTE*)src - ms->window.base);
|
1149
1520
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
|
1150
1521
|
|
1151
|
-
/* 2-
|
1522
|
+
/* 2-passes strategy:
|
1152
1523
|
* this strategy makes a first pass over first block to collect statistics
|
1153
|
-
*
|
1154
|
-
* After 1st pass, function forgets
|
1524
|
+
* in order to seed next round's statistics with it.
|
1525
|
+
* After 1st pass, function forgets history, and starts a new block.
|
1155
1526
|
* Consequently, this can only work if no data has been previously loaded in tables,
|
1156
1527
|
* aka, no dictionary, no prefix, no ldm preprocessing.
|
1157
1528
|
* The compression ratio gain is generally small (~0.5% on first block),
|
@@ -1160,45 +1531,50 @@ size_t ZSTD_compressBlock_btultra2(
|
|
1160
1531
|
if ( (ms->opt.litLengthSum==0) /* first block */
|
1161
1532
|
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
1162
1533
|
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
|
1163
|
-
&& (
|
1164
|
-
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
|
1534
|
+
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
|
1535
|
+
&& (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
|
1165
1536
|
) {
|
1166
1537
|
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
1167
1538
|
}
|
1168
1539
|
|
1169
|
-
return
|
1540
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1170
1541
|
}
|
1542
|
+
#endif
|
1171
1543
|
|
1544
|
+
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
|
1172
1545
|
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
1173
1546
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1174
1547
|
const void* src, size_t srcSize)
|
1175
1548
|
{
|
1176
|
-
return
|
1549
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
1177
1550
|
}
|
1178
1551
|
|
1179
|
-
size_t
|
1552
|
+
size_t ZSTD_compressBlock_btopt_extDict(
|
1180
1553
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1181
1554
|
const void* src, size_t srcSize)
|
1182
1555
|
{
|
1183
|
-
return
|
1556
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
1184
1557
|
}
|
1558
|
+
#endif
|
1185
1559
|
|
1186
|
-
|
1560
|
+
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
|
1561
|
+
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
1187
1562
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1188
1563
|
const void* src, size_t srcSize)
|
1189
1564
|
{
|
1190
|
-
return
|
1565
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
1191
1566
|
}
|
1192
1567
|
|
1193
1568
|
size_t ZSTD_compressBlock_btultra_extDict(
|
1194
1569
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1195
1570
|
const void* src, size_t srcSize)
|
1196
1571
|
{
|
1197
|
-
return
|
1572
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
1198
1573
|
}
|
1574
|
+
#endif
|
1199
1575
|
|
1200
1576
|
/* note : no btultra2 variant for extDict nor dictMatchState,
|
1201
1577
|
* because btultra2 is not meant to work with dictionaries
|
1202
1578
|
* and is only specific for the first block (no prefix) */
|
1203
1579
|
|
1204
|
-
}
|
1580
|
+
} // namespace duckdb_zstd
|