duckdb 1.1.2-dev6.0 → 1.1.4-dev11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +5 -54
- package/binding.gyp +73 -52
- package/package.json +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/avg.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/corr.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/covar.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/stddev.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/approx_count.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/arg_min_max.cpp +66 -18
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitagg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitstring_agg.cpp +5 -7
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bool.cpp +3 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/kurtosis.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/product.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/skew.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/string_agg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/sum.cpp +13 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approx_top_k.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approximate_quantile.cpp +51 -15
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mad.cpp +25 -10
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mode.cpp +215 -71
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/quantile.cpp +58 -31
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/binned_histogram.cpp +9 -4
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/histogram.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/list.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_avg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_count.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_intercept.cpp +6 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_r2.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_slope.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxx_syy.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxy.cpp +3 -3
- package/src/duckdb/extension/core_functions/core_functions_extension.cpp +85 -0
- package/src/duckdb/{src → extension}/core_functions/function_list.cpp +30 -51
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/corr.hpp +3 -7
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/distributive_functions.hpp +16 -21
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/histogram_helpers.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/holistic_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/nested_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_helpers.hpp +2 -2
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_sort_tree.hpp +140 -58
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_state.hpp +50 -43
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_count.hpp +2 -2
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_slope.hpp +3 -7
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/array_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/bit_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/blob_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/date_functions.hpp +22 -55
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/debug_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/enum_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/generic_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/list_functions.hpp +4 -4
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/map_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/math_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/operators_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/random_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/string_functions.hpp +10 -103
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/struct_functions.hpp +1 -19
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/union_functions.hpp +1 -1
- package/src/duckdb/extension/core_functions/include/core_functions_extension.hpp +22 -0
- package/src/duckdb/{src → extension}/core_functions/lambda_functions.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/array/array_functions.cpp +11 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/array/array_value.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/bit/bitstring.cpp +12 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/blob/base64.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/blob/encode.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/date/age.cpp +9 -3
- package/src/duckdb/extension/core_functions/scalar/date/current.cpp +29 -0
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_diff.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_part.cpp +42 -9
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_sub.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_trunc.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/epoch.cpp +19 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/date/make_date.cpp +40 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/date/time_bucket.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/to_interval.cpp +54 -28
- package/src/duckdb/{src → extension}/core_functions/scalar/debug/vector_type.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/enum/enum_functions.cpp +2 -7
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/alias.cpp +2 -2
- package/src/duckdb/{src/function → extension/core_functions}/scalar/generic/binning.cpp +4 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/can_implicitly_cast.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/current_setting.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/hash.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/least.cpp +30 -10
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/stats.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/system_functions.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/typeof.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/array_slice.cpp +93 -88
- package/src/duckdb/{src → extension}/core_functions/scalar/list/flatten.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_aggregates.cpp +7 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_distance.cpp +8 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_filter.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_has_any_or_all.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_reduce.cpp +5 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_sort.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_transform.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_value.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/range.cpp +7 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/cardinality.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map.cpp +5 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_concat.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_entries.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_extract.cpp +13 -25
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_from_entries.cpp +2 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_keys_values.cpp +11 -9
- package/src/duckdb/{src → extension}/core_functions/scalar/math/numeric.cpp +83 -37
- package/src/duckdb/{src → extension}/core_functions/scalar/operators/bitwise.cpp +19 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/random/random.cpp +4 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/random/setseed.cpp +2 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/ascii.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/bar.cpp +6 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/chr.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/damerau_levenshtein.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/format_bytes.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/hamming.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/hex.cpp +7 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/string/instr.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/jaccard.cpp +1 -1
- package/src/duckdb/extension/core_functions/scalar/string/jaro_winkler.cpp +112 -0
- package/src/duckdb/{src → extension}/core_functions/scalar/string/left_right.cpp +6 -6
- package/src/duckdb/{src → extension}/core_functions/scalar/string/levenshtein.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/pad.cpp +9 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/string/parse_path.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/printf.cpp +3 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/repeat.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/replace.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/reverse.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/starts_with.cpp +5 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/string/to_base.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/translate.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/trim.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/unicode.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/url_encode.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/struct/struct_insert.cpp +25 -31
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_extract.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_tag.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_value.cpp +3 -3
- package/src/duckdb/extension/icu/icu-dateadd.cpp +16 -11
- package/src/duckdb/extension/icu/icu-datefunc.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datepart.cpp +8 -5
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +8 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +2 -2
- package/src/duckdb/extension/icu/icu-makedate.cpp +18 -7
- package/src/duckdb/extension/icu/icu-strptime.cpp +9 -3
- package/src/duckdb/extension/icu/icu-table-range.cpp +2 -2
- package/src/duckdb/extension/icu/icu-timebucket.cpp +4 -1
- package/src/duckdb/extension/icu/icu-timezone.cpp +67 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +60 -5
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -1
- package/src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/locid.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/lsr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/lsr.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/normlzr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/rbbinode.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/schriter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/stringtriebuilder.cpp +8 -8
- package/src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uchriter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/brkiter.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/bytestriebuilder.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/chariter.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/dtintrv.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/locid.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/messagepattern.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/normlzr.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/parsepos.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/rbbi.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/schriter.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/strenum.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringpiece.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringtriebuilder.h +9 -9
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/ucharstriebuilder.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uchriter.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uniset.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/unistr.h +12 -12
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uobject.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unifiedcache.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/uniset.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ustr_titlecase_brkiter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ustrenum.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/alphaindex.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/coll.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/datefmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/dcfmtsym.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvinf.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen_impl.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtrule.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/fpositer.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measunit.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measure.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/msgfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp +9 -9
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/plurfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/rbtz.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/region.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/rulebasedcollator.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/selfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/simpletz.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/sortkey.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tmutamt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames.cpp +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzrule.cpp +8 -8
- package/src/duckdb/extension/icu/third_party/icu/i18n/tztrans.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/alphaindex.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/calendar.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/choicfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coll.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/currpinf.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/datefmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dcfmtsym.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/decimfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtfmtsym.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvfmt.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvinf.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtptngen.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fieldpos.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fmtable.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fpositer.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measunit.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measure.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/msgfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/numfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbnf.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbtz.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/region.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/search.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/selfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/simpletz.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/smpdtfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/sortkey.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/stsearch.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tblcoll.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/timezone.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutamt.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tznames.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzrule.h +8 -8
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tztrans.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/vtzone.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp +2 -2
- package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -1
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
- package/src/duckdb/extension/json/include/json_common.hpp +14 -10
- package/src/duckdb/extension/json/include/json_scan.hpp +48 -7
- package/src/duckdb/extension/json/include/json_structure.hpp +2 -1
- package/src/duckdb/extension/json/include/json_transform.hpp +5 -2
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +57 -20
- package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +7 -6
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +6 -5
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +20 -17
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +48 -17
- package/src/duckdb/extension/json/json_functions/read_json.cpp +83 -34
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -3
- package/src/duckdb/extension/json/json_functions.cpp +14 -16
- package/src/duckdb/extension/json/json_scan.cpp +36 -16
- package/src/duckdb/extension/json/json_serializer.cpp +1 -1
- package/src/duckdb/extension/json/serialize_json.cpp +2 -2
- package/src/duckdb/extension/parquet/column_reader.cpp +136 -116
- package/src/duckdb/extension/parquet/column_writer.cpp +870 -604
- package/src/duckdb/extension/parquet/geo_parquet.cpp +4 -5
- package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +0 -4
- package/src/duckdb/extension/parquet/include/column_reader.hpp +24 -19
- package/src/duckdb/extension/parquet/include/column_writer.hpp +7 -5
- package/src/duckdb/extension/parquet/include/decode_utils.hpp +138 -18
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +4 -3
- package/src/duckdb/extension/parquet/include/null_column_reader.hpp +1 -14
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +45 -0
- package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +101 -90
- package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +179 -0
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +2 -3
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +48 -0
- package/src/duckdb/extension/parquet/include/parquet_extension.hpp +8 -0
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +5 -0
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +22 -18
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -5
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +87 -3
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +30 -16
- package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +0 -8
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +1 -42
- package/src/duckdb/extension/parquet/include/thrift_tools.hpp +13 -1
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +4 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +240 -197
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +138 -6
- package/src/duckdb/extension/parquet/parquet_reader.cpp +155 -79
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +258 -38
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +17 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +65 -34
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +4 -0
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +13 -0
- package/src/duckdb/src/catalog/catalog.cpp +272 -97
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +9 -4
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +145 -95
- package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +9 -3
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +15 -0
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +40 -24
- package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry.cpp +3 -0
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +60 -5
- package/src/duckdb/src/catalog/catalog_search_path.cpp +27 -14
- package/src/duckdb/src/catalog/catalog_set.cpp +75 -31
- package/src/duckdb/src/catalog/default/default_functions.cpp +13 -8
- package/src/duckdb/src/catalog/default/default_views.cpp +1 -0
- package/src/duckdb/src/catalog/dependency_manager.cpp +133 -5
- package/src/duckdb/src/catalog/duck_catalog.cpp +17 -9
- package/src/duckdb/src/common/adbc/adbc.cpp +18 -0
- package/src/duckdb/src/common/allocator.cpp +3 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +30 -9
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +63 -82
- package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +4 -3
- package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +361 -0
- package/src/duckdb/src/common/arrow/arrow_util.cpp +10 -6
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +6 -2
- package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +2 -1
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +27 -14
- package/src/duckdb/src/common/assert.cpp +1 -2
- package/src/duckdb/src/common/bind_helpers.cpp +1 -1
- package/src/duckdb/src/common/box_renderer.cpp +316 -26
- package/src/duckdb/src/common/cgroups.cpp +7 -1
- package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
- package/src/duckdb/src/common/enum_util.cpp +2865 -6882
- package/src/duckdb/src/common/enums/compression_type.cpp +12 -0
- package/src/duckdb/src/common/enums/metric_type.cpp +24 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +4 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/error_data.cpp +23 -6
- package/src/duckdb/src/common/exception/binder_exception.cpp +1 -1
- package/src/duckdb/src/common/exception.cpp +20 -28
- package/src/duckdb/src/common/extra_type_info.cpp +85 -20
- package/src/duckdb/src/common/file_buffer.cpp +5 -2
- package/src/duckdb/src/common/file_system.cpp +8 -3
- package/src/duckdb/src/common/fsst.cpp +3 -3
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/local_file_system.cpp +169 -60
- package/src/duckdb/src/common/multi_file_list.cpp +4 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +240 -63
- package/src/duckdb/src/common/opener_file_system.cpp +37 -0
- package/src/duckdb/src/common/operator/cast_operators.cpp +77 -11
- package/src/duckdb/src/common/operator/string_cast.cpp +6 -2
- package/src/duckdb/src/common/pipe_file_system.cpp +4 -4
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +25 -14
- package/src/duckdb/src/common/radix_partitioning.cpp +17 -16
- package/src/duckdb/src/common/random_engine.cpp +39 -3
- package/src/duckdb/src/common/render_tree.cpp +3 -19
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -58
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +2 -0
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +20 -19
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
- package/src/duckdb/src/common/serializer/memory_stream.cpp +36 -0
- package/src/duckdb/src/common/sort/comparators.cpp +7 -7
- package/src/duckdb/src/common/sort/partition_state.cpp +2 -2
- package/src/duckdb/src/common/stacktrace.cpp +127 -0
- package/src/duckdb/src/common/string_util.cpp +157 -32
- package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +15 -3
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +4 -0
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +71 -8
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +27 -6
- package/src/duckdb/src/common/types/conflict_manager.cpp +21 -7
- package/src/duckdb/src/common/types/date.cpp +39 -25
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +4 -11
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +21 -7
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +17 -17
- package/src/duckdb/src/common/types/timestamp.cpp +70 -33
- package/src/duckdb/src/common/types/uuid.cpp +11 -0
- package/src/duckdb/src/common/types/validity_mask.cpp +16 -5
- package/src/duckdb/src/common/types/value.cpp +357 -199
- package/src/duckdb/src/common/types/varint.cpp +64 -18
- package/src/duckdb/src/common/types/vector.cpp +78 -38
- package/src/duckdb/src/common/types.cpp +199 -92
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +2 -1
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +32 -5
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +3 -1
- package/src/duckdb/src/execution/adaptive_filter.cpp +6 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +410 -111
- package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_between.cpp +6 -0
- package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +4 -3
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_function.cpp +1 -0
- package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +5 -4
- package/src/duckdb/src/execution/expression_executor.cpp +5 -3
- package/src/duckdb/src/execution/index/art/art.cpp +208 -72
- package/src/duckdb/src/execution/index/art/base_leaf.cpp +1 -1
- package/src/duckdb/src/execution/index/art/leaf.cpp +12 -7
- package/src/duckdb/src/execution/index/art/node.cpp +2 -1
- package/src/duckdb/src/execution/index/art/node256_leaf.cpp +6 -6
- package/src/duckdb/src/execution/index/art/plan_art.cpp +50 -55
- package/src/duckdb/src/execution/index/art/prefix.cpp +7 -13
- package/src/duckdb/src/execution/index/bound_index.cpp +30 -5
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +3 -5
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -9
- package/src/duckdb/src/execution/join_hashtable.cpp +254 -158
- package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
- package/src/duckdb/src/execution/operator/aggregate/physical_partitioned_aggregate.cpp +226 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +77 -70
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +114 -50
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +19 -10
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +22 -15
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +95 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +6 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +75 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +40 -12
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +395 -163
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +20 -23
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +115 -49
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +66 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -23
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +220 -46
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +43 -32
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +54 -119
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +184 -20
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +83 -21
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_validator.cpp +63 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +7 -4
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +3 -2
- package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +9 -1
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +132 -15
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -55
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +284 -154
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +40 -55
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -3
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +298 -227
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +5 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +3 -4
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +32 -19
- package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -0
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +6 -0
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -19
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +11 -27
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +308 -119
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +105 -55
- package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +6 -2
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +1 -1
- package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +15 -6
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +92 -50
- package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +0 -1
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +8 -4
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +54 -22
- package/src/duckdb/src/execution/operator/set/physical_union.cpp +5 -1
- package/src/duckdb/src/execution/physical_operator.cpp +15 -9
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +101 -12
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +11 -140
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +6 -5
- package/src/duckdb/src/execution/physical_plan/plan_export.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +16 -13
- package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_order.cpp +7 -7
- package/src/duckdb/src/execution/physical_plan/plan_prepare.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_projection.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +8 -3
- package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -2
- package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +1 -2
- package/src/duckdb/src/execution/physical_plan/plan_top_n.cpp +3 -2
- package/src/duckdb/src/execution/physical_plan_generator.cpp +0 -22
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +136 -116
- package/src/duckdb/src/execution/sample/base_reservoir_sample.cpp +136 -0
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +930 -0
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +6 -12
- package/src/duckdb/src/function/aggregate/distributive/{first.cpp → first_last_any.cpp} +37 -18
- package/src/duckdb/src/{core_functions → function}/aggregate/distributive/minmax.cpp +19 -12
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +72 -13
- package/src/duckdb/src/function/built_in_functions.cpp +85 -2
- package/src/duckdb/src/function/cast/decimal_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/string_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/struct_cast.cpp +81 -49
- package/src/duckdb/src/function/cast/union/from_struct.cpp +7 -5
- package/src/duckdb/src/function/compression_config.cpp +6 -0
- package/src/duckdb/src/function/encoding_function.cpp +134 -0
- package/src/duckdb/src/function/function.cpp +8 -13
- package/src/duckdb/src/function/function_binder.cpp +100 -21
- package/src/duckdb/src/function/function_list.cpp +178 -0
- package/src/duckdb/src/function/macro_function.cpp +4 -4
- package/src/duckdb/src/function/pragma/pragma_functions.cpp +0 -2
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +0 -4
- package/src/duckdb/src/{core_functions/core_functions.cpp → function/register_function_list.cpp} +12 -8
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +62 -23
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +33 -16
- package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +21 -0
- package/src/duckdb/src/{core_functions/scalar/blob → function/scalar}/create_sort_key.cpp +86 -23
- package/src/duckdb/src/{core_functions → function}/scalar/date/strftime.cpp +6 -4
- package/src/duckdb/src/function/scalar/generic/constant_or_null.cpp +5 -7
- package/src/duckdb/src/{core_functions → function}/scalar/generic/error.cpp +3 -1
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +2 -2
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +1 -7
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +27 -21
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +8 -12
- package/src/duckdb/src/function/scalar/list/list_select.cpp +1 -4
- package/src/duckdb/src/function/scalar/list/list_zip.cpp +6 -6
- package/src/duckdb/src/{core_functions → function}/scalar/map/map_contains.cpp +2 -2
- package/src/duckdb/src/function/scalar/nested_functions.cpp +0 -11
- package/src/duckdb/src/function/scalar/{operators → operator}/add.cpp +2 -1
- package/src/duckdb/src/function/scalar/{operators → operator}/arithmetic.cpp +195 -127
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +30 -21
- package/src/duckdb/src/function/scalar/strftime_format.cpp +10 -0
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +11 -41
- package/src/duckdb/src/function/scalar/string/concat.cpp +22 -20
- package/src/duckdb/src/function/scalar/string/concat_ws.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/contains.cpp +16 -19
- package/src/duckdb/src/function/scalar/string/length.cpp +38 -24
- package/src/duckdb/src/function/scalar/string/like.cpp +80 -47
- package/src/duckdb/src/{core_functions → function}/scalar/string/md5.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/prefix.cpp +0 -4
- package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +2 -1
- package/src/duckdb/src/function/scalar/string/regexp.cpp +17 -7
- package/src/duckdb/src/{core_functions → function}/scalar/string/regexp_escape.cpp +2 -2
- package/src/duckdb/src/{core_functions → function}/scalar/string/sha1.cpp +1 -1
- package/src/duckdb/src/{core_functions → function}/scalar/string/sha256.cpp +1 -1
- package/src/duckdb/src/{core_functions → function}/scalar/string/string_split.cpp +4 -5
- package/src/duckdb/src/function/scalar/string/strip_accents.cpp +3 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +14 -13
- package/src/duckdb/src/function/scalar/string/suffix.cpp +0 -4
- package/src/duckdb/src/function/scalar/struct/struct_concat.cpp +115 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +35 -31
- package/src/duckdb/src/{core_functions → function}/scalar/struct/struct_pack.cpp +7 -7
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -8
- package/src/duckdb/src/function/scalar/system/write_log.cpp +170 -0
- package/src/duckdb/src/function/scalar_function.cpp +5 -5
- package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +3 -2
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +287 -1
- package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +6 -6
- package/src/duckdb/src/function/table/arrow.cpp +32 -352
- package/src/duckdb/src/function/table/arrow_conversion.cpp +43 -7
- package/src/duckdb/src/function/table/copy_csv.cpp +38 -23
- package/src/duckdb/src/function/table/glob.cpp +1 -1
- package/src/duckdb/src/function/table/query_function.cpp +12 -7
- package/src/duckdb/src/function/table/read_csv.cpp +114 -46
- package/src/duckdb/src/function/table/read_file.cpp +26 -6
- package/src/duckdb/src/function/table/sniff_csv.cpp +25 -5
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +6 -7
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +141 -16
- package/src/duckdb/src/function/table/system/duckdb_log.cpp +64 -0
- package/src/duckdb/src/function/table/system/duckdb_log_contexts.cpp +65 -0
- package/src/duckdb/src/function/table/system/duckdb_memory.cpp +0 -1
- package/src/duckdb/src/function/table/system/duckdb_settings.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_tables.cpp +1 -13
- package/src/duckdb/src/function/table/system/duckdb_types.cpp +1 -1
- package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +17 -0
- package/src/duckdb/src/function/table/system/pragma_table_info.cpp +6 -0
- package/src/duckdb/src/function/table/system/pragma_table_sample.cpp +95 -0
- package/src/duckdb/src/function/table/system/test_all_types.cpp +56 -46
- package/src/duckdb/src/function/table/system_functions.cpp +3 -0
- package/src/duckdb/src/function/table/table_scan.cpp +487 -289
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/table_function.cpp +10 -6
- package/src/duckdb/src/function/window/window_aggregate_function.cpp +248 -0
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +48 -0
- package/src/duckdb/src/function/window/window_aggregator.cpp +88 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +854 -0
- package/src/duckdb/src/function/window/window_collection.cpp +146 -0
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +357 -0
- package/src/duckdb/src/function/window/window_custom_aggregator.cpp +146 -0
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +758 -0
- package/src/duckdb/src/function/window/window_executor.cpp +99 -0
- package/src/duckdb/src/function/window/window_index_tree.cpp +63 -0
- package/src/duckdb/src/function/window/window_merge_sort_tree.cpp +275 -0
- package/src/duckdb/src/function/window/window_naive_aggregator.cpp +361 -0
- package/src/duckdb/src/function/window/window_rank_function.cpp +288 -0
- package/src/duckdb/src/function/window/window_rownumber_function.cpp +191 -0
- package/src/duckdb/src/function/window/window_segment_tree.cpp +594 -0
- package/src/duckdb/src/function/window/window_shared_expressions.cpp +50 -0
- package/src/duckdb/src/function/window/window_token_tree.cpp +142 -0
- package/src/duckdb/src/function/window/window_value_function.cpp +566 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +74 -17
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +9 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +4 -10
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +18 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +2 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +5 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +21 -18
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +3 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +10 -2
- package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +11 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +26 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +144 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +11 -4
- package/src/duckdb/src/include/duckdb/common/assert.hpp +12 -1
- package/src/duckdb/src/include/duckdb/common/atomic_ptr.hpp +102 -0
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +65 -6
- package/src/duckdb/src/include/duckdb/common/chrono.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/column_index.hpp +72 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +120 -0
- package/src/duckdb/src/include/duckdb/{core_functions/core_functions.hpp → common/enums/collation_type.hpp} +2 -7
- package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/enums/function_errors.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_preservation_type.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
- package/src/duckdb/src/include/duckdb/{core_functions/aggregate → common/enums}/quantile_enum.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enums/scan_vector_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/error_data.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/extension_type_info.hpp +37 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +9 -3
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +19 -10
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/fsst.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +6 -0
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +12 -2
- package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +147 -27
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +2 -7
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +16 -5
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/platform.hpp +34 -3
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +10 -13
- package/src/duckdb/src/include/duckdb/common/random_engine.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +0 -2
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/stacktrace.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +30 -2
- package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +13 -2
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +21 -4
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +58 -10
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +43 -16
- package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +63 -21
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +62 -16
- package/src/duckdb/src/include/duckdb/common/types/varint.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +34 -7
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +12 -7
- package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +12 -13
- package/src/duckdb/src/include/duckdb/common/vector_operations/binary_executor.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +111 -4
- package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +48 -10
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +25 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +28 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +23 -16
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +25 -16
- package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +15 -10
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent/physical_fixed_batch_copy.hpp → aggregate/physical_partitioned_aggregate.hpp} +25 -27
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +21 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +38 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +8 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +29 -23
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +15 -13
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +13 -5
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +36 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +21 -13
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +52 -22
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +6 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_validator.hpp +58 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +6 -3
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +16 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +9 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +8 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +55 -10
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +28 -7
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +6 -9
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +17 -16
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +7 -3
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +55 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +10 -9
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +16 -13
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +0 -4
- package/src/duckdb/src/include/duckdb/execution/partition_info.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +20 -9
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +1 -11
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/progress_data.hpp +58 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +160 -31
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_function_utils.hpp +31 -0
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +61 -10
- package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/minmax_n_helpers.hpp +1 -1
- package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/sort_key_helpers.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +47 -27
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +3 -10
- package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +13 -6
- package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +15 -0
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +29 -6
- package/src/duckdb/src/include/duckdb/{core_functions → function}/create_sort_key.hpp +4 -1
- package/src/duckdb/src/include/duckdb/function/encoding_function.hpp +78 -0
- package/src/duckdb/src/include/duckdb/function/function.hpp +22 -1
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/function_list.hpp +39 -0
- package/src/duckdb/src/include/duckdb/function/function_set.hpp +13 -7
- package/src/duckdb/src/include/duckdb/{core_functions → function}/lambda_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/partition_stats.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/register_function_list_helper.hpp +69 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +154 -23
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_utils.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar/date_functions.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar/generic_common.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +32 -23
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/list_functions.hpp +156 -0
- package/src/duckdb/src/include/duckdb/function/scalar/map_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +4 -45
- package/src/duckdb/src/include/duckdb/function/scalar/operator_functions.hpp +102 -0
- package/src/duckdb/src/include/duckdb/function/scalar/operators.hpp +2 -16
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +16 -25
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_utils.hpp +38 -0
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_common.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +401 -76
- package/src/duckdb/src/include/duckdb/function/scalar/struct_functions.hpp +63 -0
- package/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp +33 -0
- package/src/duckdb/src/include/duckdb/function/scalar/system_functions.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +17 -8
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +59 -6
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +12 -9
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +18 -13
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -4
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +14 -0
- package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +15 -10
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +94 -18
- package/src/duckdb/src/include/duckdb/{core_functions → function}/to_interval.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp +44 -0
- package/src/duckdb/src/include/duckdb/function/window/window_aggregate_states.hpp +56 -0
- package/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp +194 -0
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +153 -0
- package/src/duckdb/src/include/duckdb/function/window/window_collection.hpp +146 -0
- package/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp +38 -0
- package/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp +32 -0
- package/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp +39 -0
- package/src/duckdb/src/include/duckdb/function/window/window_executor.hpp +122 -0
- package/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp +42 -0
- package/src/duckdb/src/include/duckdb/function/window/window_merge_sort_tree.hpp +108 -0
- package/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp +33 -0
- package/src/duckdb/src/include/duckdb/function/window/window_rank_function.hpp +63 -0
- package/src/duckdb/src/include/duckdb/function/window/window_rownumber_function.hpp +43 -0
- package/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp +31 -0
- package/src/duckdb/src/include/duckdb/function/window/window_shared_expressions.hpp +76 -0
- package/src/duckdb/src/include/duckdb/function/window/window_token_tree.hpp +46 -0
- package/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp +79 -0
- package/src/duckdb/src/include/duckdb/logging/http_logger.hpp +2 -0
- package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +81 -0
- package/src/duckdb/src/include/duckdb/logging/log_storage.hpp +127 -0
- package/src/duckdb/src/include/duckdb/logging/logger.hpp +287 -0
- package/src/duckdb/src/include/duckdb/logging/logging.hpp +83 -0
- package/src/duckdb/src/include/duckdb/main/appender.hpp +41 -18
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +6 -3
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +317 -231
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +17 -1
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +28 -6
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_context_wrapper.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +8 -3
- package/src/duckdb/src/include/duckdb/main/config.hpp +52 -8
- package/src/duckdb/src/include/duckdb/main/connection.hpp +18 -3
- package/src/duckdb/src/include/duckdb/main/database.hpp +8 -7
- package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +5 -1
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension.hpp +8 -2
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +548 -9
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +12 -7
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +8 -4
- package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/delete_relation.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/subquery_relation.hpp +1 -4
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/update_relation.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/relation/value_relation.hpp +7 -0
- package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/relation.hpp +45 -9
- package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +20 -22
- package/src/duckdb/src/include/duckdb/main/settings.hpp +613 -378
- package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -4
- package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +1 -3
- package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +14 -7
- package/src/duckdb/src/include/duckdb/optimizer/common_aggregate_optimizer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/empty_result_pullup.hpp +27 -0
- package/src/duckdb/src/include/duckdb/optimizer/expression_heuristics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +6 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/in_clause_rewriter.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +5 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +45 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +23 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +18 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +9 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp +33 -11
- package/src/duckdb/src/include/duckdb/optimizer/rule/distinct_aggregate_optimizer.hpp +34 -0
- package/src/duckdb/src/include/duckdb/optimizer/sampling_pushdown.hpp +25 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +3 -1
- package/src/duckdb/src/include/duckdb/optimizer/sum_rewriter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +26 -8
- package/src/duckdb/src/include/duckdb/parallel/thread_context.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +51 -3
- package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +28 -44
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +11 -1
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +12 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +22 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -4
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +16 -12
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +5 -5
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +12 -3
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +17 -57
- package/src/duckdb/src/include/duckdb/parser/qualified_name_set.hpp +19 -3
- package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +12 -9
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +45 -28
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +23 -11
- package/src/duckdb/src/include/duckdb/planner/binding_alias.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +11 -10
- package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +4 -4
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +9 -4
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/filter/dynamic_filter.hpp +48 -0
- package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +35 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +9 -9
- package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +16 -7
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +5 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +5 -3
- package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +14 -6
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +12 -8
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +82 -26
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +10 -3
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +4 -13
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +14 -15
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +13 -15
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/analyze.hpp +46 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/common.hpp +60 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp +61 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/decompression.hpp +50 -0
- package/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp +100 -0
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/roaring/appender.hpp +150 -0
- package/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp +618 -0
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +53 -31
- package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -1
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -4
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +16 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/storage_index.hpp +70 -0
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +5 -7
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +23 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +34 -6
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +39 -10
- package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +56 -14
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +35 -29
- package/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +19 -6
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +29 -6
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +10 -10
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +26 -19
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +8 -1
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +16 -14
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +228 -61
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +14 -10
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +3 -1
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +3 -2
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +19 -17
- package/src/duckdb/src/include/duckdb/transaction/rollback_state.hpp +5 -2
- package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -2
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +13 -8
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer_allocator.hpp +79 -0
- package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +43 -13
- package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +4 -1
- package/src/duckdb/src/include/duckdb/verification/copied_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/external_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/parsed_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +7 -3
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +11 -5
- package/src/duckdb/src/include/duckdb/verification/unoptimized_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb.h +424 -41
- package/src/duckdb/src/include/duckdb_extension.h +301 -195
- package/src/duckdb/src/logging/log_manager.cpp +157 -0
- package/src/duckdb/src/logging/log_storage.cpp +209 -0
- package/src/duckdb/src/logging/logger.cpp +211 -0
- package/src/duckdb/src/logging/logging.cpp +42 -0
- package/src/duckdb/src/main/appender.cpp +187 -45
- package/src/duckdb/src/main/attached_database.cpp +16 -8
- package/src/duckdb/src/main/capi/appender-c.cpp +47 -4
- package/src/duckdb/src/main/capi/arrow-c.cpp +9 -4
- package/src/duckdb/src/main/capi/config-c.cpp +17 -4
- package/src/duckdb/src/main/capi/datetime-c.cpp +15 -0
- package/src/duckdb/src/main/capi/duckdb-c.cpp +54 -13
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +212 -4
- package/src/duckdb/src/main/capi/helper-c.cpp +3 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +26 -7
- package/src/duckdb/src/main/capi/replacement_scan-c.cpp +1 -1
- package/src/duckdb/src/main/capi/result-c.cpp +3 -0
- package/src/duckdb/src/main/capi/table_description-c.cpp +43 -10
- package/src/duckdb/src/main/capi/threading-c.cpp +4 -4
- package/src/duckdb/src/main/client_context.cpp +125 -51
- package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
- package/src/duckdb/src/main/client_context_wrapper.cpp +4 -0
- package/src/duckdb/src/main/client_data.cpp +1 -1
- package/src/duckdb/src/main/client_verify.cpp +39 -20
- package/src/duckdb/src/main/config.cpp +266 -74
- package/src/duckdb/src/main/connection.cpp +53 -13
- package/src/duckdb/src/main/database.cpp +39 -18
- package/src/duckdb/src/main/database_manager.cpp +12 -11
- package/src/duckdb/src/main/db_instance_cache.cpp +14 -7
- package/src/duckdb/src/main/extension/extension_helper.cpp +24 -23
- package/src/duckdb/src/main/extension/extension_install.cpp +19 -7
- package/src/duckdb/src/main/extension/extension_load.cpp +91 -41
- package/src/duckdb/src/main/extension/extension_util.cpp +40 -19
- package/src/duckdb/src/main/extension.cpp +20 -11
- package/src/duckdb/src/main/profiling_info.cpp +19 -5
- package/src/duckdb/src/main/query_profiler.cpp +135 -36
- package/src/duckdb/src/main/query_result.cpp +2 -1
- package/src/duckdb/src/main/relation/aggregate_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/create_table_relation.cpp +5 -4
- package/src/duckdb/src/main/relation/create_view_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/delete_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/delim_get_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/distinct_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/explain_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/filter_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/insert_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation/order_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/projection_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/query_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +58 -20
- package/src/duckdb/src/main/relation/setop_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/subquery_relation.cpp +3 -8
- package/src/duckdb/src/main/relation/table_function_relation.cpp +10 -1
- package/src/duckdb/src/main/relation/table_relation.cpp +19 -3
- package/src/duckdb/src/main/relation/update_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/value_relation.cpp +42 -2
- package/src/duckdb/src/main/relation/view_relation.cpp +8 -2
- package/src/duckdb/src/main/relation/write_csv_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/write_parquet_relation.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +49 -28
- package/src/duckdb/src/main/secret/secret_manager.cpp +1 -1
- package/src/duckdb/src/main/secret/secret_storage.cpp +6 -4
- package/src/duckdb/src/main/settings/autogenerated_settings.cpp +1102 -0
- package/src/duckdb/src/main/settings/custom_settings.cpp +1343 -0
- package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +60 -37
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +1 -1
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +126 -72
- package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +22 -6
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -3
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +2 -2
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +3 -3
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +7 -7
- package/src/duckdb/src/optimizer/deliminator.cpp +6 -5
- package/src/duckdb/src/optimizer/empty_result_pullup.cpp +96 -0
- package/src/duckdb/src/optimizer/expression_heuristics.cpp +11 -3
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +9 -2
- package/src/duckdb/src/optimizer/filter_combiner.cpp +190 -88
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +6 -5
- package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +25 -9
- package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +170 -72
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +5 -4
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +3 -1
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +7 -7
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -6
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +37 -22
- package/src/duckdb/src/optimizer/late_materialization.cpp +414 -0
- package/src/duckdb/src/optimizer/limit_pushdown.cpp +1 -0
- package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +30 -2
- package/src/duckdb/src/optimizer/optimizer.cpp +67 -7
- package/src/duckdb/src/optimizer/pullup/pullup_filter.cpp +3 -3
- package/src/duckdb/src/optimizer/pullup/pullup_projection.cpp +2 -2
- package/src/duckdb/src/optimizer/pullup/pullup_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_filter.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +3 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +5 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_unnest.cpp +52 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +2 -2
- package/src/duckdb/src/optimizer/regex_range_filter.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +168 -38
- package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +8 -5
- package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/constant_folding.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/distinct_aggregate_optimizer.cpp +65 -0
- package/src/duckdb/src/optimizer/rule/distributivity.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +4 -3
- package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +3 -3
- package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -1
- package/src/duckdb/src/optimizer/rule/move_constants.cpp +9 -9
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +4 -3
- package/src/duckdb/src/optimizer/rule/timestamp_comparison.cpp +1 -1
- package/src/duckdb/src/optimizer/sampling_pushdown.cpp +24 -0
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +74 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +10 -7
- package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +3 -0
- package/src/duckdb/src/optimizer/sum_rewriter.cpp +174 -0
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +71 -0
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +5 -5
- package/src/duckdb/src/parallel/event.cpp +4 -0
- package/src/duckdb/src/parallel/executor.cpp +11 -29
- package/src/duckdb/src/parallel/executor_task.cpp +8 -3
- package/src/duckdb/src/parallel/pipeline.cpp +15 -8
- package/src/duckdb/src/parallel/pipeline_executor.cpp +67 -43
- package/src/duckdb/src/parallel/thread_context.cpp +12 -1
- package/src/duckdb/src/parser/column_definition.cpp +3 -3
- package/src/duckdb/src/parser/constraints/unique_constraint.cpp +72 -9
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -3
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +3 -3
- package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/star_expression.cpp +46 -2
- package/src/duckdb/src/parser/expression/window_expression.cpp +24 -1
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +26 -2
- package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +5 -3
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +29 -1
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +6 -6
- package/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_function_info.cpp +17 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +16 -15
- package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
- package/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +4 -4
- package/src/duckdb/src/parser/parsed_data/load_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +31 -1
- package/src/duckdb/src/parser/parsed_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +4 -1
- package/src/duckdb/src/parser/parser.cpp +129 -0
- package/src/duckdb/src/parser/qualified_name.cpp +99 -0
- package/src/duckdb/src/parser/query_error_context.cpp +35 -6
- package/src/duckdb/src/parser/query_node/select_node.cpp +4 -4
- package/src/duckdb/src/parser/statement/delete_statement.cpp +6 -1
- package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -3
- package/src/duckdb/src/parser/statement/update_statement.cpp +6 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +2 -2
- package/src/duckdb/src/parser/tableref.cpp +2 -2
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +16 -24
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +5 -5
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +61 -13
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +10 -4
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -2
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +30 -3
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +25 -6
- package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +10 -3
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +4 -3
- package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +18 -3
- package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +0 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +5 -5
- package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +26 -12
- package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +11 -3
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +3 -3
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +4 -4
- package/src/duckdb/src/parser/transform/statement/transform_set.cpp +2 -2
- package/src/duckdb/src/parser/transform/statement/transform_show.cpp +21 -3
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +8 -6
- package/src/duckdb/src/parser/transformer.cpp +2 -2
- package/src/duckdb/src/planner/bind_context.cpp +308 -136
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +30 -31
- package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +4 -2
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +102 -94
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +7 -5
- package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +7 -7
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -10
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -6
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +23 -15
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +97 -19
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +74 -16
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +6 -6
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +49 -15
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +32 -23
- package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +20 -3
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +3 -0
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +6 -5
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +38 -19
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +2 -12
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +117 -412
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +423 -144
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +5 -0
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +0 -4
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +31 -13
- package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +96 -27
- package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -3
- package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +7 -6
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +36 -9
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +34 -34
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +72 -35
- package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +99 -18
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +23 -11
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +22 -19
- package/src/duckdb/src/planner/binder.cpp +23 -45
- package/src/duckdb/src/planner/binding_alias.cpp +69 -0
- package/src/duckdb/src/planner/bound_parameter_map.cpp +1 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +6 -2
- package/src/duckdb/src/planner/collation_binding.cpp +38 -4
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +17 -5
- package/src/duckdb/src/planner/expression/bound_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -1
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +2 -2
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -4
- package/src/duckdb/src/planner/expression.cpp +7 -1
- package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +16 -0
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +53 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -8
- package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder.cpp +7 -7
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -3
- package/src/duckdb/src/planner/filter/constant_filter.cpp +17 -2
- package/src/duckdb/src/planner/filter/dynamic_filter.cpp +68 -0
- package/src/duckdb/src/planner/filter/in_filter.cpp +84 -0
- package/src/duckdb/src/planner/filter/null_filter.cpp +1 -2
- package/src/duckdb/src/planner/filter/optional_filter.cpp +29 -0
- package/src/duckdb/src/planner/filter/struct_filter.cpp +11 -6
- package/src/duckdb/src/planner/joinside.cpp +6 -5
- package/src/duckdb/src/planner/logical_operator.cpp +4 -1
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +68 -2
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +23 -0
- package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -12
- package/src/duckdb/src/planner/operator/logical_filter.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_get.cpp +48 -25
- package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_join.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_order.cpp +4 -11
- package/src/duckdb/src/planner/operator/logical_top_n.cpp +7 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +33 -5
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +2 -2
- package/src/duckdb/src/planner/table_binding.cpp +74 -36
- package/src/duckdb/src/planner/table_filter.cpp +5 -8
- package/src/duckdb/src/storage/arena_allocator.cpp +5 -4
- package/src/duckdb/src/storage/buffer/block_handle.cpp +88 -17
- package/src/duckdb/src/storage/buffer/block_manager.cpp +34 -26
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +70 -49
- package/src/duckdb/src/storage/buffer_manager.cpp +4 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -5
- package/src/duckdb/src/storage/compression/bitpacking.cpp +14 -16
- package/src/duckdb/src/storage/compression/dictionary/analyze.cpp +54 -0
- package/src/duckdb/src/storage/compression/dictionary/common.cpp +90 -0
- package/src/duckdb/src/storage/compression/dictionary/compression.cpp +174 -0
- package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +115 -0
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +53 -545
- package/src/duckdb/src/storage/compression/empty_validity.cpp +15 -0
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +25 -16
- package/src/duckdb/src/storage/compression/fsst.cpp +101 -47
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +92 -2
- package/src/duckdb/src/storage/compression/rle.cpp +216 -46
- package/src/duckdb/src/storage/compression/roaring/analyze.cpp +179 -0
- package/src/duckdb/src/storage/compression/roaring/common.cpp +282 -0
- package/src/duckdb/src/storage/compression/roaring/compress.cpp +481 -0
- package/src/duckdb/src/storage/compression/roaring/metadata.cpp +262 -0
- package/src/duckdb/src/storage/compression/roaring/scan.cpp +364 -0
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +47 -65
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +102 -39
- package/src/duckdb/src/storage/compression/zstd.cpp +1049 -0
- package/src/duckdb/src/storage/data_table.cpp +312 -172
- package/src/duckdb/src/storage/local_storage.cpp +104 -46
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -3
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +138 -58
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +14 -0
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +19 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -0
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +43 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +32 -5
- package/src/duckdb/src/storage/single_file_block_manager.cpp +6 -8
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +82 -71
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +3 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +18 -17
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +34 -22
- package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -3
- package/src/duckdb/src/storage/storage_info.cpp +72 -10
- package/src/duckdb/src/storage/storage_manager.cpp +41 -47
- package/src/duckdb/src/storage/table/array_column_data.cpp +7 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -9
- package/src/duckdb/src/storage/table/column_data.cpp +105 -43
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +307 -132
- package/src/duckdb/src/storage/table/column_segment.cpp +36 -13
- package/src/duckdb/src/storage/table/list_column_data.cpp +4 -4
- package/src/duckdb/src/storage/table/row_group.cpp +159 -66
- package/src/duckdb/src/storage/table/row_group_collection.cpp +157 -68
- package/src/duckdb/src/storage/table/row_version_manager.cpp +33 -10
- package/src/duckdb/src/storage/table/scan_state.cpp +21 -7
- package/src/duckdb/src/storage/table/standard_column_data.cpp +68 -5
- package/src/duckdb/src/storage/table/struct_column_data.cpp +42 -4
- package/src/duckdb/src/storage/table/table_statistics.cpp +91 -5
- package/src/duckdb/src/storage/table/update_segment.cpp +287 -210
- package/src/duckdb/src/storage/table_index_list.cpp +55 -58
- package/src/duckdb/src/storage/temporary_file_manager.cpp +412 -149
- package/src/duckdb/src/storage/wal_replay.cpp +132 -48
- package/src/duckdb/src/storage/write_ahead_log.cpp +75 -48
- package/src/duckdb/src/transaction/cleanup_state.cpp +0 -1
- package/src/duckdb/src/transaction/commit_state.cpp +23 -14
- package/src/duckdb/src/transaction/duck_transaction.cpp +29 -25
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +18 -6
- package/src/duckdb/src/transaction/meta_transaction.cpp +3 -2
- package/src/duckdb/src/transaction/rollback_state.cpp +5 -2
- package/src/duckdb/src/transaction/transaction_context.cpp +9 -1
- package/src/duckdb/src/transaction/undo_buffer.cpp +35 -27
- package/src/duckdb/src/transaction/undo_buffer_allocator.cpp +72 -0
- package/src/duckdb/src/transaction/wal_write_state.cpp +12 -10
- package/src/duckdb/src/verification/copied_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +7 -5
- package/src/duckdb/src/verification/external_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/fetch_row_verifier.cpp +7 -4
- package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +8 -4
- package/src/duckdb/src/verification/parsed_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -12
- package/src/duckdb/src/verification/statement_verifier.cpp +20 -15
- package/src/duckdb/src/verification/unoptimized_statement_verifier.cpp +7 -4
- package/src/duckdb/third_party/fsst/libfsst.hpp +1 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +15 -22
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +4 -2
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +2 -4
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +14278 -13832
- package/src/duckdb/third_party/parquet/parquet_types.cpp +3410 -1686
- package/src/duckdb/third_party/parquet/parquet_types.h +1585 -1204
- package/src/duckdb/third_party/skiplist/SkipList.h +0 -1
- package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +13 -15
- package/src/duckdb/third_party/zstd/common/debug.cpp +36 -0
- package/src/duckdb/third_party/zstd/common/entropy_common.cpp +173 -49
- package/src/duckdb/third_party/zstd/common/error_private.cpp +11 -3
- package/src/duckdb/third_party/zstd/common/fse_decompress.cpp +126 -97
- package/src/duckdb/third_party/zstd/common/pool.cpp +376 -0
- package/src/duckdb/third_party/zstd/common/threading.cpp +193 -0
- package/src/duckdb/third_party/zstd/common/xxhash.cpp +18 -14
- package/src/duckdb/third_party/zstd/common/zstd_common.cpp +3 -38
- package/src/duckdb/third_party/zstd/compress/fse_compress.cpp +93 -165
- package/src/duckdb/third_party/zstd/compress/hist.cpp +28 -31
- package/src/duckdb/third_party/zstd/compress/huf_compress.cpp +957 -291
- package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3988 -1124
- package/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp +120 -43
- package/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp +47 -23
- package/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp +274 -424
- package/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp +403 -153
- package/src/duckdb/third_party/zstd/compress/zstd_fast.cpp +741 -268
- package/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp +1339 -278
- package/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp +334 -222
- package/src/duckdb/third_party/zstd/compress/zstd_opt.cpp +674 -298
- package/src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp +1885 -0
- package/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp +1247 -586
- package/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp +18 -17
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp +724 -270
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1193 -393
- package/src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp +30 -0
- package/src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp +171 -0
- package/src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp +80 -0
- package/src/duckdb/third_party/zstd/dict/cover.cpp +1271 -0
- package/src/duckdb/third_party/zstd/dict/divsufsort.cpp +1916 -0
- package/src/duckdb/third_party/zstd/dict/fastcover.cpp +775 -0
- package/src/duckdb/third_party/zstd/dict/zdict.cpp +1139 -0
- package/src/duckdb/third_party/zstd/include/zdict.h +473 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/allocations.h +58 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/bits.h +204 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h +88 -85
- package/src/duckdb/third_party/zstd/include/zstd/common/compiler.h +243 -47
- package/src/duckdb/third_party/zstd/include/zstd/common/cpu.h +253 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/debug.h +31 -31
- package/src/duckdb/third_party/zstd/include/zstd/common/error_private.h +94 -6
- package/src/duckdb/third_party/zstd/include/zstd/common/fse.h +424 -64
- package/src/duckdb/third_party/zstd/include/zstd/common/huf.h +255 -70
- package/src/duckdb/third_party/zstd/include/zstd/common/mem.h +125 -85
- package/src/duckdb/third_party/zstd/include/zstd/common/pool.h +84 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/portability_macros.h +158 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/threading.h +152 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash.h → xxhash.hpp} +0 -1
- package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash_static.h → xxhash_static.hpp} +1 -1
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_deps.h +122 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h +143 -174
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_trace.h +159 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/clevels.h +136 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/hist.h +4 -4
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h +631 -220
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h +17 -7
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h +2 -2
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h +3 -2
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +256 -153
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h +16 -3
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h +4 -3
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h +145 -11
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h +14 -6
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm_geartab.h +110 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h +33 -9
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstdmt_compress.h +107 -0
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h +4 -3
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h +20 -6
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h +88 -16
- package/src/duckdb/third_party/zstd/include/zstd/deprecated/zbuff.h +214 -0
- package/src/duckdb/third_party/zstd/include/zstd/dict/cover.h +156 -0
- package/src/duckdb/third_party/zstd/include/zstd/dict/divsufsort.h +62 -0
- package/src/duckdb/third_party/zstd/include/zstd.h +2171 -93
- package/src/duckdb/third_party/zstd/include/{zstd/common/zstd_errors.h → zstd_errors.h} +32 -10
- package/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp +8 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp +20 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp +12 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_nested.cpp +6 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp +14 -0
- package/src/duckdb/ub_extension_core_functions_scalar_array.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_bit.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_blob.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_date.cpp +20 -0
- package/src/duckdb/ub_extension_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_enum.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_generic.cpp +18 -0
- package/src/duckdb/ub_extension_core_functions_scalar_list.cpp +22 -0
- package/src/duckdb/ub_extension_core_functions_scalar_map.cpp +14 -0
- package/src/duckdb/ub_extension_core_functions_scalar_math.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_operators.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_random.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_string.cpp +48 -0
- package/src/duckdb/ub_extension_core_functions_scalar_struct.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_union.cpp +6 -0
- package/src/duckdb/ub_src_common.cpp +4 -0
- package/src/duckdb/ub_src_common_arrow.cpp +3 -1
- package/src/duckdb/ub_src_execution.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_aggregate.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +2 -0
- package/src/duckdb/ub_src_execution_sample.cpp +4 -0
- package/src/duckdb/ub_src_function.cpp +6 -0
- package/src/duckdb/ub_src_function_aggregate.cpp +0 -2
- package/src/duckdb/ub_src_function_aggregate_distributive.cpp +3 -1
- package/src/duckdb/ub_src_function_scalar.cpp +2 -8
- package/src/duckdb/ub_src_function_scalar_date.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_generic.cpp +2 -2
- package/src/duckdb/ub_src_function_scalar_map.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_operator.cpp +8 -0
- package/src/duckdb/ub_src_function_scalar_string.cpp +10 -0
- package/src/duckdb/ub_src_function_scalar_struct.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_system.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +6 -0
- package/src/duckdb/ub_src_function_window.cpp +36 -0
- package/src/duckdb/ub_src_logging.cpp +8 -0
- package/src/duckdb/ub_src_main_settings.cpp +3 -1
- package/src/duckdb/ub_src_optimizer.cpp +8 -0
- package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
- package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
- package/src/duckdb/ub_src_parser.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
- package/src/duckdb/ub_src_planner.cpp +2 -0
- package/src/duckdb/ub_src_planner_filter.cpp +6 -0
- package/src/duckdb/ub_src_storage_compression.cpp +4 -0
- package/src/duckdb/ub_src_storage_compression_dictionary.cpp +8 -0
- package/src/duckdb/ub_src_storage_compression_roaring.cpp +10 -0
- package/src/duckdb/ub_src_transaction.cpp +2 -0
- package/vendor.py +1 -1
- package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +0 -6003
- package/src/duckdb/extension/json/yyjson/yyjson.cpp +0 -8218
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +0 -78
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +0 -91
- package/src/duckdb/src/common/cycle_counter.cpp +0 -76
- package/src/duckdb/src/common/field_writer.cpp +0 -97
- package/src/duckdb/src/common/http_state.cpp +0 -95
- package/src/duckdb/src/common/preserved_error.cpp +0 -87
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
- package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +0 -27
- package/src/duckdb/src/common/serializer/buffered_serializer.cpp +0 -36
- package/src/duckdb/src/common/serializer/format_serializer.cpp +0 -15
- package/src/duckdb/src/common/serializer.cpp +0 -24
- package/src/duckdb/src/common/types/chunk_collection.cpp +0 -190
- package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +0 -183
- package/src/duckdb/src/core_functions/scalar/date/current.cpp +0 -54
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +0 -78
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +0 -70
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +0 -412
- package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +0 -28
- package/src/duckdb/src/core_functions/scalar/string/jaro_winkler.cpp +0 -71
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/execution/index/art/node16.cpp +0 -196
- package/src/duckdb/src/execution/index/art/node4.cpp +0 -189
- package/src/duckdb/src/execution/index/unknown_index.cpp +0 -65
- package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +0 -595
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +0 -434
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +0 -89
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +0 -90
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +0 -95
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +0 -494
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +0 -35
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +0 -99
- package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -689
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +0 -242
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +0 -695
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -280
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +0 -666
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +0 -499
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -207
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/execution/physical_plan/plan_limit_percent.cpp +0 -18
- package/src/duckdb/src/execution/physical_plan/plan_show_select.cpp +0 -47
- package/src/duckdb/src/execution/reservoir_sample.cpp +0 -324
- package/src/duckdb/src/execution/window_executor.cpp +0 -1830
- package/src/duckdb/src/execution/window_segment_tree.cpp +0 -2073
- package/src/duckdb/src/extension_forward_decl/icu.cpp +0 -59
- package/src/duckdb/src/function/aggregate/distributive_functions.cpp +0 -15
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +0 -29
- package/src/duckdb/src/function/scalar/generic_functions.cpp +0 -11
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +0 -143
- package/src/duckdb/src/function/scalar/operators.cpp +0 -14
- package/src/duckdb/src/function/scalar/sequence_functions.cpp +0 -10
- package/src/duckdb/src/function/scalar/string_functions.cpp +0 -22
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +0 -173
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +0 -101
- package/src/duckdb/src/include/duckdb/catalog/mapping_value.hpp +0 -92
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +0 -42
- package/src/duckdb/src/include/duckdb/common/cycle_counter.hpp +0 -68
- package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +0 -34
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +0 -113
- package/src/duckdb/src/include/duckdb/common/platform.h +0 -58
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +0 -59
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +0 -192
- package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +0 -137
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +0 -63
- package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +0 -103
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +0 -60
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +0 -253
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +0 -155
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +0 -163
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +0 -35
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +0 -228
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +0 -93
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +0 -60
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +0 -197
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +0 -21
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +0 -30
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +0 -99
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +0 -87
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +0 -80
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +0 -137
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +0 -104
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +0 -79
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -119
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +0 -72
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +0 -110
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +0 -103
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_file_handle.hpp +0 -59
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +0 -46
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +0 -210
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +0 -131
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp +0 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +0 -167
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +0 -21
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +0 -343
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -165
- package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_optimizer.hpp +0 -45
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +0 -57
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +0 -45
- package/src/duckdb/src/include/duckdb/parser/statement/show_statement.hpp +0 -32
- package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +0 -49
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +0 -42
- package/src/duckdb/src/main/settings/settings.cpp +0 -2056
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +0 -36
- package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +0 -19
- package/src/duckdb/src/parser/statement/show_statement.cpp +0 -15
- package/src/duckdb/src/planner/binder/statement/bind_show.cpp +0 -30
- package/src/duckdb/src/planner/operator/logical_limit_percent.cpp +0 -14
- package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +0 -70
- package/src/duckdb/third_party/fsst/fsst_avx512.cpp +0 -140
- package/src/duckdb/third_party/fsst/fsst_avx512.inc +0 -57
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll1.inc +0 -57
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll2.inc +0 -114
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll3.inc +0 -171
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll4.inc +0 -228
- package/src/duckdb/third_party/parquet/parquet_constants.cpp +0 -17
- package/src/duckdb/third_party/parquet/parquet_constants.h +0 -24
- package/src/duckdb/third_party/re2/util/pod_array.h +0 -55
- package/src/duckdb/third_party/re2/util/sparse_array.h +0 -392
- package/src/duckdb/third_party/re2/util/sparse_set.h +0 -264
- package/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h +0 -421
- package/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h +0 -238
- package/src/duckdb/third_party/zstd/include/zstd_static.h +0 -1070
- package/src/duckdb/ub_src_core_functions.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_aggregate_algebraic.cpp +0 -8
- package/src/duckdb/ub_src_core_functions_aggregate_distributive.cpp +0 -24
- package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +0 -12
- package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_aggregate_regression.cpp +0 -14
- package/src/duckdb/ub_src_core_functions_scalar_array.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_bit.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_scalar_date.cpp +0 -22
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_enum.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +0 -18
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +0 -22
- package/src/duckdb/ub_src_core_functions_scalar_map.cpp +0 -16
- package/src/duckdb/ub_src_core_functions_scalar_math.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_operators.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_random.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +0 -58
- package/src/duckdb/ub_src_core_functions_scalar_struct.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_union.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +0 -18
- package/src/duckdb/ub_src_function_scalar_operators.cpp +0 -8
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/covar.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/stddev.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/sum_helpers.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/array_kernels.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/function_list.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/secret_functions.hpp +0 -0
- /package/src/duckdb/src/function/scalar/{operators → operator}/multiply.cpp +0 -0
- /package/src/duckdb/src/function/scalar/{operators → operator}/subtract.cpp +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -14,18 +14,19 @@
|
|
14
14
|
/*-*******************************************************
|
15
15
|
* Dependencies
|
16
16
|
*********************************************************/
|
17
|
-
#include
|
17
|
+
#include "zstd/common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
18
18
|
#include "zstd/common/compiler.h" /* prefetch */
|
19
|
+
#include "zstd/common/cpu.h" /* bmi2 */
|
19
20
|
#include "zstd/common/mem.h" /* low level memory routines */
|
21
|
+
#define FSE_STATIC_LINKING_ONLY
|
20
22
|
#include "zstd/common/fse.h"
|
21
|
-
#include "zstd/common/fse_static.h"
|
22
23
|
#include "zstd/common/huf.h"
|
23
|
-
#include "zstd/common/huf_static.h"
|
24
24
|
#include "zstd/common/zstd_internal.h"
|
25
25
|
#include "zstd/decompress/zstd_decompress_internal.h" /* ZSTD_DCtx */
|
26
26
|
#include "zstd/decompress/zstd_ddict.h" /* ZSTD_DDictDictContent */
|
27
27
|
#include "zstd/decompress/zstd_decompress_block.h"
|
28
|
-
|
28
|
+
#include "zstd/common/bits.h" /* ZSTD_highbit32 */
|
29
|
+
|
29
30
|
/*_*******************************************************
|
30
31
|
* Macros
|
31
32
|
**********************************************************/
|
@@ -39,23 +40,31 @@ namespace duckdb_zstd {
|
|
39
40
|
#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
|
40
41
|
#endif
|
41
42
|
|
43
|
+
namespace duckdb_zstd {
|
42
44
|
|
43
45
|
/*_*******************************************************
|
44
46
|
* Memory operations
|
45
47
|
**********************************************************/
|
46
|
-
static void ZSTD_copy4(void* dst, const void* src) {
|
48
|
+
static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
|
47
49
|
|
48
50
|
|
49
51
|
/*-*************************************************************
|
50
52
|
* Block decoding
|
51
53
|
***************************************************************/
|
52
54
|
|
55
|
+
static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
|
56
|
+
{
|
57
|
+
size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
|
58
|
+
assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
|
59
|
+
return blockSizeMax;
|
60
|
+
}
|
61
|
+
|
53
62
|
/*! ZSTD_getcBlockSize() :
|
54
63
|
* Provides the size of compressed block from block header `src` */
|
55
64
|
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
56
65
|
blockProperties_t* bpPtr)
|
57
66
|
{
|
58
|
-
RETURN_ERROR_IF(srcSize <
|
67
|
+
RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
|
59
68
|
|
60
69
|
{ U32 const cBlockHeader = MEM_readLE24(src);
|
61
70
|
U32 const cSize = cBlockHeader >> 3;
|
@@ -68,36 +77,90 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
68
77
|
}
|
69
78
|
}
|
70
79
|
|
80
|
+
/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
|
81
|
+
static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
|
82
|
+
const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
|
83
|
+
{
|
84
|
+
size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
|
85
|
+
assert(litSize <= blockSizeMax);
|
86
|
+
assert(dctx->isFrameDecompression || streaming == not_streaming);
|
87
|
+
assert(expectedWriteSize <= blockSizeMax);
|
88
|
+
if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
|
89
|
+
/* If we aren't streaming, we can just put the literals after the output
|
90
|
+
* of the current block. We don't need to worry about overwriting the
|
91
|
+
* extDict of our window, because it doesn't exist.
|
92
|
+
* So if we have space after the end of the block, just put it there.
|
93
|
+
*/
|
94
|
+
dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
|
95
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
96
|
+
dctx->litBufferLocation = ZSTD_in_dst;
|
97
|
+
} else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
|
98
|
+
/* Literals fit entirely within the extra buffer, put them there to avoid
|
99
|
+
* having to split the literals.
|
100
|
+
*/
|
101
|
+
dctx->litBuffer = dctx->litExtraBuffer;
|
102
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize;
|
103
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
104
|
+
} else {
|
105
|
+
assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
|
106
|
+
/* Literals must be split between the output block and the extra lit
|
107
|
+
* buffer. We fill the extra lit buffer with the tail of the literals,
|
108
|
+
* and put the rest of the literals at the end of the block, with
|
109
|
+
* WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
|
110
|
+
* This MUST not write more than our maxBlockSize beyond dst, because in
|
111
|
+
* streaming mode, that could overwrite part of our extDict window.
|
112
|
+
*/
|
113
|
+
if (splitImmediately) {
|
114
|
+
/* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
|
115
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
116
|
+
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
|
117
|
+
} else {
|
118
|
+
/* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
|
119
|
+
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
|
120
|
+
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
|
121
|
+
}
|
122
|
+
dctx->litBufferLocation = ZSTD_split;
|
123
|
+
assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
|
124
|
+
}
|
125
|
+
}
|
71
126
|
|
72
|
-
/* Hidden declaration for fullbench */
|
73
|
-
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
74
|
-
const void* src, size_t srcSize);
|
75
127
|
/*! ZSTD_decodeLiteralsBlock() :
|
128
|
+
* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
|
129
|
+
* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
|
130
|
+
* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
|
131
|
+
* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
|
132
|
+
*
|
76
133
|
* @return : nb of bytes read from src (< srcSize )
|
77
134
|
* note : symbol not declared but exposed for fullbench */
|
78
|
-
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
79
|
-
const void* src, size_t srcSize
|
135
|
+
static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
136
|
+
const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
|
137
|
+
void* dst, size_t dstCapacity, const streaming_operation streaming)
|
80
138
|
{
|
81
139
|
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
82
140
|
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
|
83
141
|
|
84
142
|
{ const BYTE* const istart = (const BYTE*) src;
|
85
143
|
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
|
144
|
+
size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
|
86
145
|
|
87
146
|
switch(litEncType)
|
88
147
|
{
|
89
148
|
case set_repeat:
|
90
149
|
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
91
150
|
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
|
92
|
-
|
151
|
+
ZSTD_FALLTHROUGH;
|
93
152
|
|
94
153
|
case set_compressed:
|
95
|
-
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE ==
|
154
|
+
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
|
96
155
|
{ size_t lhSize, litSize, litCSize;
|
97
156
|
U32 singleStream=0;
|
98
157
|
U32 const lhlCode = (istart[0] >> 2) & 3;
|
99
158
|
U32 const lhc = MEM_readLE32(istart);
|
100
159
|
size_t hufSuccess;
|
160
|
+
size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
|
161
|
+
int const flags = 0
|
162
|
+
| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
|
163
|
+
| (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
|
101
164
|
switch(lhlCode)
|
102
165
|
{
|
103
166
|
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
|
@@ -120,8 +183,15 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
120
183
|
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
121
184
|
break;
|
122
185
|
}
|
123
|
-
RETURN_ERROR_IF(litSize >
|
186
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
187
|
+
RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
|
188
|
+
if (!singleStream)
|
189
|
+
RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
|
190
|
+
"Not enough literals (%zu) for the 4-streams mode (min %u)",
|
191
|
+
litSize, MIN_LITERALS_FOR_4_STREAMS);
|
124
192
|
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
|
193
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
|
194
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
|
125
195
|
|
126
196
|
/* prefetch huffman table if cold */
|
127
197
|
if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
|
@@ -130,13 +200,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
130
200
|
|
131
201
|
if (litEncType==set_repeat) {
|
132
202
|
if (singleStream) {
|
133
|
-
hufSuccess =
|
203
|
+
hufSuccess = HUF_decompress1X_usingDTable(
|
134
204
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
135
|
-
dctx->HUFptr,
|
205
|
+
dctx->HUFptr, flags);
|
136
206
|
} else {
|
137
|
-
|
207
|
+
assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
|
208
|
+
hufSuccess = HUF_decompress4X_usingDTable(
|
138
209
|
dctx->litBuffer, litSize, istart+lhSize, litCSize,
|
139
|
-
dctx->HUFptr,
|
210
|
+
dctx->HUFptr, flags);
|
140
211
|
}
|
141
212
|
} else {
|
142
213
|
if (singleStream) {
|
@@ -144,20 +215,29 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
144
215
|
hufSuccess = HUF_decompress1X_DCtx_wksp(
|
145
216
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
146
217
|
istart+lhSize, litCSize, dctx->workspace,
|
147
|
-
sizeof(dctx->workspace));
|
218
|
+
sizeof(dctx->workspace), flags);
|
148
219
|
#else
|
149
|
-
hufSuccess =
|
220
|
+
hufSuccess = HUF_decompress1X1_DCtx_wksp(
|
150
221
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
151
222
|
istart+lhSize, litCSize, dctx->workspace,
|
152
|
-
sizeof(dctx->workspace),
|
223
|
+
sizeof(dctx->workspace), flags);
|
153
224
|
#endif
|
154
225
|
} else {
|
155
|
-
hufSuccess =
|
226
|
+
hufSuccess = HUF_decompress4X_hufOnly_wksp(
|
156
227
|
dctx->entropy.hufTable, dctx->litBuffer, litSize,
|
157
228
|
istart+lhSize, litCSize, dctx->workspace,
|
158
|
-
sizeof(dctx->workspace),
|
229
|
+
sizeof(dctx->workspace), flags);
|
159
230
|
}
|
160
231
|
}
|
232
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
233
|
+
{
|
234
|
+
assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
|
235
|
+
ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
236
|
+
ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
237
|
+
dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
|
238
|
+
dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
|
239
|
+
assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
|
240
|
+
}
|
161
241
|
|
162
242
|
RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
|
163
243
|
|
@@ -165,13 +245,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
165
245
|
dctx->litSize = litSize;
|
166
246
|
dctx->litEntropy = 1;
|
167
247
|
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
|
168
|
-
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
169
248
|
return litCSize + lhSize;
|
170
249
|
}
|
171
250
|
|
172
251
|
case set_basic:
|
173
252
|
{ size_t litSize, lhSize;
|
174
253
|
U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
254
|
+
size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
|
175
255
|
switch(lhlCode)
|
176
256
|
{
|
177
257
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
@@ -184,27 +264,42 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
184
264
|
break;
|
185
265
|
case 3:
|
186
266
|
lhSize = 3;
|
267
|
+
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
|
187
268
|
litSize = MEM_readLE24(istart) >> 4;
|
188
269
|
break;
|
189
270
|
}
|
190
271
|
|
272
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
273
|
+
RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
|
274
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
275
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
191
276
|
if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
|
192
277
|
RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
|
193
|
-
|
278
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
279
|
+
{
|
280
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
|
281
|
+
ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
|
282
|
+
}
|
283
|
+
else
|
284
|
+
{
|
285
|
+
ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
|
286
|
+
}
|
194
287
|
dctx->litPtr = dctx->litBuffer;
|
195
288
|
dctx->litSize = litSize;
|
196
|
-
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
|
197
289
|
return lhSize+litSize;
|
198
290
|
}
|
199
291
|
/* direct reference into compressed stream */
|
200
292
|
dctx->litPtr = istart+lhSize;
|
201
293
|
dctx->litSize = litSize;
|
294
|
+
dctx->litBufferEnd = dctx->litPtr + litSize;
|
295
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
202
296
|
return lhSize+litSize;
|
203
297
|
}
|
204
298
|
|
205
299
|
case set_rle:
|
206
300
|
{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
|
207
301
|
size_t litSize, lhSize;
|
302
|
+
size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
|
208
303
|
switch(lhlCode)
|
209
304
|
{
|
210
305
|
case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
|
@@ -213,16 +308,28 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
213
308
|
break;
|
214
309
|
case 1:
|
215
310
|
lhSize = 2;
|
311
|
+
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
|
216
312
|
litSize = MEM_readLE16(istart) >> 4;
|
217
313
|
break;
|
218
314
|
case 3:
|
219
315
|
lhSize = 3;
|
316
|
+
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
|
220
317
|
litSize = MEM_readLE24(istart) >> 4;
|
221
|
-
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
|
222
318
|
break;
|
223
319
|
}
|
224
|
-
RETURN_ERROR_IF(litSize >
|
225
|
-
|
320
|
+
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
|
321
|
+
RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
|
322
|
+
RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
|
323
|
+
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
|
324
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
325
|
+
{
|
326
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
|
327
|
+
ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
|
328
|
+
}
|
329
|
+
else
|
330
|
+
{
|
331
|
+
ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
|
332
|
+
}
|
226
333
|
dctx->litPtr = dctx->litBuffer;
|
227
334
|
dctx->litSize = litSize;
|
228
335
|
return lhSize+1;
|
@@ -233,9 +340,21 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
233
340
|
}
|
234
341
|
}
|
235
342
|
|
343
|
+
/* Hidden declaration for fullbench */
|
344
|
+
size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
|
345
|
+
const void* src, size_t srcSize,
|
346
|
+
void* dst, size_t dstCapacity);
|
347
|
+
size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
|
348
|
+
const void* src, size_t srcSize,
|
349
|
+
void* dst, size_t dstCapacity)
|
350
|
+
{
|
351
|
+
dctx->isFrameDecompression = 0;
|
352
|
+
return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
|
353
|
+
}
|
354
|
+
|
236
355
|
/* Default FSE distribution tables.
|
237
356
|
* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
|
238
|
-
* https://github.com/facebook/zstd/blob/
|
357
|
+
* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
|
239
358
|
* They were generated programmatically with following method :
|
240
359
|
* - start from default distributions, present in /lib/common/zstd_internal.h
|
241
360
|
* - generate tables normally, using ZSTD_buildFSETable()
|
@@ -342,7 +461,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
|
|
342
461
|
}; /* ML_defaultDTable */
|
343
462
|
|
344
463
|
|
345
|
-
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue,
|
464
|
+
static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
|
346
465
|
{
|
347
466
|
void* ptr = dt;
|
348
467
|
ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
|
@@ -354,7 +473,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
354
473
|
cell->nbBits = 0;
|
355
474
|
cell->nextState = 0;
|
356
475
|
assert(nbAddBits < 255);
|
357
|
-
cell->nbAdditionalBits =
|
476
|
+
cell->nbAdditionalBits = nbAddBits;
|
358
477
|
cell->baseValue = baseValue;
|
359
478
|
}
|
360
479
|
|
@@ -363,23 +482,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
|
|
363
482
|
* generate FSE decoding table for one symbol (ll, ml or off)
|
364
483
|
* cannot fail if input is valid =>
|
365
484
|
* all inputs are presumed validated at this stage */
|
366
|
-
|
367
|
-
|
485
|
+
FORCE_INLINE_TEMPLATE
|
486
|
+
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
|
368
487
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
369
|
-
const U32* baseValue, const
|
370
|
-
unsigned tableLog)
|
488
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
489
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
371
490
|
{
|
372
491
|
ZSTD_seqSymbol* const tableDecode = dt+1;
|
373
|
-
U16 symbolNext[MaxSeq+1];
|
374
|
-
|
375
492
|
U32 const maxSV1 = maxSymbolValue + 1;
|
376
493
|
U32 const tableSize = 1 << tableLog;
|
377
|
-
|
494
|
+
|
495
|
+
U16* symbolNext = (U16*)wksp;
|
496
|
+
BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
|
497
|
+
U32 highThreshold = tableSize - 1;
|
498
|
+
|
378
499
|
|
379
500
|
/* Sanity Checks */
|
380
501
|
assert(maxSymbolValue <= MaxSeq);
|
381
502
|
assert(tableLog <= MaxFSELog);
|
382
|
-
|
503
|
+
assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
|
504
|
+
(void)wkspSize;
|
383
505
|
/* Init, lay down lowprob symbols */
|
384
506
|
{ ZSTD_seqSymbol_header DTableH;
|
385
507
|
DTableH.tableLog = tableLog;
|
@@ -395,34 +517,128 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
395
517
|
assert(normalizedCounter[s]>=0);
|
396
518
|
symbolNext[s] = (U16)normalizedCounter[s];
|
397
519
|
} } }
|
398
|
-
|
520
|
+
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
399
521
|
}
|
400
522
|
|
401
523
|
/* Spread symbols */
|
402
|
-
|
524
|
+
assert(tableSize <= 512);
|
525
|
+
/* Specialized symbol spreading for the case when there are
|
526
|
+
* no low probability (-1 count) symbols. When compressing
|
527
|
+
* small blocks we avoid low probability symbols to hit this
|
528
|
+
* case, since header decoding speed matters more.
|
529
|
+
*/
|
530
|
+
if (highThreshold == tableSize - 1) {
|
531
|
+
size_t const tableMask = tableSize-1;
|
532
|
+
size_t const step = FSE_TABLESTEP(tableSize);
|
533
|
+
/* First lay down the symbols in order.
|
534
|
+
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
535
|
+
* misses since small blocks generally have small table logs, so nearly
|
536
|
+
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
537
|
+
* our buffer to handle the over-write.
|
538
|
+
*/
|
539
|
+
{
|
540
|
+
U64 const add = 0x0101010101010101ull;
|
541
|
+
size_t pos = 0;
|
542
|
+
U64 sv = 0;
|
543
|
+
U32 s;
|
544
|
+
for (s=0; s<maxSV1; ++s, sv += add) {
|
545
|
+
int i;
|
546
|
+
int const n = normalizedCounter[s];
|
547
|
+
MEM_write64(spread + pos, sv);
|
548
|
+
for (i = 8; i < n; i += 8) {
|
549
|
+
MEM_write64(spread + pos + i, sv);
|
550
|
+
}
|
551
|
+
assert(n>=0);
|
552
|
+
pos += (size_t)n;
|
553
|
+
}
|
554
|
+
}
|
555
|
+
/* Now we spread those positions across the table.
|
556
|
+
* The benefit of doing it in two stages is that we avoid the
|
557
|
+
* variable size inner loop, which caused lots of branch misses.
|
558
|
+
* Now we can run through all the positions without any branch misses.
|
559
|
+
* We unroll the loop twice, since that is what empirically worked best.
|
560
|
+
*/
|
561
|
+
{
|
562
|
+
size_t position = 0;
|
563
|
+
size_t s;
|
564
|
+
size_t const unroll = 2;
|
565
|
+
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
566
|
+
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
567
|
+
size_t u;
|
568
|
+
for (u = 0; u < unroll; ++u) {
|
569
|
+
size_t const uPosition = (position + (u * step)) & tableMask;
|
570
|
+
tableDecode[uPosition].baseValue = spread[s + u];
|
571
|
+
}
|
572
|
+
position = (position + (unroll * step)) & tableMask;
|
573
|
+
}
|
574
|
+
assert(position == 0);
|
575
|
+
}
|
576
|
+
} else {
|
577
|
+
U32 const tableMask = tableSize-1;
|
403
578
|
U32 const step = FSE_TABLESTEP(tableSize);
|
404
579
|
U32 s, position = 0;
|
405
580
|
for (s=0; s<maxSV1; s++) {
|
406
581
|
int i;
|
407
|
-
|
582
|
+
int const n = normalizedCounter[s];
|
583
|
+
for (i=0; i<n; i++) {
|
408
584
|
tableDecode[position].baseValue = s;
|
409
585
|
position = (position + step) & tableMask;
|
410
|
-
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
|
586
|
+
while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
|
411
587
|
} }
|
412
588
|
assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
|
413
589
|
}
|
414
590
|
|
415
591
|
/* Build Decoding table */
|
416
|
-
{
|
592
|
+
{
|
593
|
+
U32 u;
|
417
594
|
for (u=0; u<tableSize; u++) {
|
418
595
|
U32 const symbol = tableDecode[u].baseValue;
|
419
596
|
U32 const nextState = symbolNext[symbol]++;
|
420
|
-
tableDecode[u].nbBits = (BYTE) (tableLog -
|
597
|
+
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
|
421
598
|
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
422
599
|
assert(nbAdditionalBits[symbol] < 255);
|
423
|
-
tableDecode[u].nbAdditionalBits =
|
600
|
+
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
|
424
601
|
tableDecode[u].baseValue = baseValue[symbol];
|
425
|
-
|
602
|
+
}
|
603
|
+
}
|
604
|
+
}
|
605
|
+
|
606
|
+
/* Avoids the FORCE_INLINE of the _body() function. */
|
607
|
+
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
608
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
609
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
610
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
611
|
+
{
|
612
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
613
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
614
|
+
}
|
615
|
+
|
616
|
+
#if DYNAMIC_BMI2
|
617
|
+
BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
618
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
619
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
620
|
+
unsigned tableLog, void* wksp, size_t wkspSize)
|
621
|
+
{
|
622
|
+
ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
|
623
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
624
|
+
}
|
625
|
+
#endif
|
626
|
+
|
627
|
+
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
628
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
629
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
630
|
+
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
|
631
|
+
{
|
632
|
+
#if DYNAMIC_BMI2
|
633
|
+
if (bmi2) {
|
634
|
+
ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
|
635
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
636
|
+
return;
|
637
|
+
}
|
638
|
+
#endif
|
639
|
+
(void)bmi2;
|
640
|
+
ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
|
641
|
+
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
|
426
642
|
}
|
427
643
|
|
428
644
|
|
@@ -432,9 +648,10 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
432
648
|
static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
|
433
649
|
symbolEncodingType_e type, unsigned max, U32 maxLog,
|
434
650
|
const void* src, size_t srcSize,
|
435
|
-
const U32* baseValue, const
|
651
|
+
const U32* baseValue, const U8* nbAdditionalBits,
|
436
652
|
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
437
|
-
int ddictIsCold, int nbSeq
|
653
|
+
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
|
654
|
+
int bmi2)
|
438
655
|
{
|
439
656
|
switch(type)
|
440
657
|
{
|
@@ -443,7 +660,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
443
660
|
RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
|
444
661
|
{ U32 const symbol = *(const BYTE*)src;
|
445
662
|
U32 const baseline = baseValue[symbol];
|
446
|
-
|
663
|
+
U8 const nbBits = nbAdditionalBits[symbol];
|
447
664
|
ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
|
448
665
|
}
|
449
666
|
*DTablePtr = DTableSpace;
|
@@ -466,7 +683,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
466
683
|
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
467
684
|
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
|
468
685
|
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
|
469
|
-
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
|
686
|
+
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
|
470
687
|
*DTablePtr = DTableSpace;
|
471
688
|
return headerSize;
|
472
689
|
}
|
@@ -479,7 +696,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|
479
696
|
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
480
697
|
const void* src, size_t srcSize)
|
481
698
|
{
|
482
|
-
const BYTE* const istart = (const BYTE*
|
699
|
+
const BYTE* const istart = (const BYTE*)src;
|
483
700
|
const BYTE* const iend = istart + srcSize;
|
484
701
|
const BYTE* ip = istart;
|
485
702
|
int nbSeq;
|
@@ -490,15 +707,11 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
490
707
|
|
491
708
|
/* SeqHead */
|
492
709
|
nbSeq = *ip++;
|
493
|
-
if (!nbSeq) {
|
494
|
-
*nbSeqPtr=0;
|
495
|
-
RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
|
496
|
-
return 1;
|
497
|
-
}
|
498
710
|
if (nbSeq > 0x7F) {
|
499
711
|
if (nbSeq == 0xFF) {
|
500
712
|
RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
|
501
|
-
nbSeq = MEM_readLE16(ip) + LONGNBSEQ
|
713
|
+
nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
|
714
|
+
ip+=2;
|
502
715
|
} else {
|
503
716
|
RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
|
504
717
|
nbSeq = ((nbSeq-0x80)<<8) + *ip++;
|
@@ -506,8 +719,16 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
506
719
|
}
|
507
720
|
*nbSeqPtr = nbSeq;
|
508
721
|
|
722
|
+
if (nbSeq == 0) {
|
723
|
+
/* No sequence : section ends immediately */
|
724
|
+
RETURN_ERROR_IF(ip != iend, corruption_detected,
|
725
|
+
"extraneous data present in the Sequences section");
|
726
|
+
return (size_t)(ip - istart);
|
727
|
+
}
|
728
|
+
|
509
729
|
/* FSE table descriptors */
|
510
730
|
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
|
731
|
+
RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
|
511
732
|
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
512
733
|
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
513
734
|
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
@@ -517,9 +738,11 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
517
738
|
{ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
|
518
739
|
LLtype, MaxLL, LLFSELog,
|
519
740
|
ip, iend-ip,
|
520
|
-
|
741
|
+
LL_base, LL_bits,
|
521
742
|
LL_defaultDTable, dctx->fseEntropy,
|
522
|
-
dctx->ddictIsCold, nbSeq
|
743
|
+
dctx->ddictIsCold, nbSeq,
|
744
|
+
dctx->workspace, sizeof(dctx->workspace),
|
745
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
523
746
|
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
524
747
|
ip += llhSize;
|
525
748
|
}
|
@@ -527,9 +750,11 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
527
750
|
{ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
|
528
751
|
OFtype, MaxOff, OffFSELog,
|
529
752
|
ip, iend-ip,
|
530
|
-
|
753
|
+
OF_base, OF_bits,
|
531
754
|
OF_defaultDTable, dctx->fseEntropy,
|
532
|
-
dctx->ddictIsCold, nbSeq
|
755
|
+
dctx->ddictIsCold, nbSeq,
|
756
|
+
dctx->workspace, sizeof(dctx->workspace),
|
757
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
533
758
|
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
534
759
|
ip += ofhSize;
|
535
760
|
}
|
@@ -537,9 +762,11 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
537
762
|
{ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
|
538
763
|
MLtype, MaxML, MLFSELog,
|
539
764
|
ip, iend-ip,
|
540
|
-
|
765
|
+
ML_base, ML_bits,
|
541
766
|
ML_defaultDTable, dctx->fseEntropy,
|
542
|
-
dctx->ddictIsCold, nbSeq
|
767
|
+
dctx->ddictIsCold, nbSeq,
|
768
|
+
dctx->workspace, sizeof(dctx->workspace),
|
769
|
+
ZSTD_DCtx_get_bmi2(dctx));
|
543
770
|
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
|
544
771
|
ip += mlhSize;
|
545
772
|
}
|
@@ -553,7 +780,6 @@ typedef struct {
|
|
553
780
|
size_t litLength;
|
554
781
|
size_t matchLength;
|
555
782
|
size_t offset;
|
556
|
-
const BYTE* match;
|
557
783
|
} seq_t;
|
558
784
|
|
559
785
|
typedef struct {
|
@@ -567,9 +793,6 @@ typedef struct {
|
|
567
793
|
ZSTD_fseState stateOffb;
|
568
794
|
ZSTD_fseState stateML;
|
569
795
|
size_t prevOffset[ZSTD_REP_NUM];
|
570
|
-
const BYTE* prefixStart;
|
571
|
-
const BYTE* dictEnd;
|
572
|
-
size_t pos;
|
573
796
|
} seqState_t;
|
574
797
|
|
575
798
|
/*! ZSTD_overlapCopy8() :
|
@@ -612,7 +835,7 @@ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
|
612
835
|
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
613
836
|
* The src buffer must be before the dst buffer.
|
614
837
|
*/
|
615
|
-
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
838
|
+
static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
616
839
|
ptrdiff_t const diff = op - ip;
|
617
840
|
BYTE* const oend = op + length;
|
618
841
|
|
@@ -628,6 +851,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
628
851
|
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
629
852
|
assert(length >= 8);
|
630
853
|
ZSTD_overlapCopy8(&op, &ip, diff);
|
854
|
+
length -= 8;
|
631
855
|
assert(op - ip >= 8);
|
632
856
|
assert(op <= oend);
|
633
857
|
}
|
@@ -642,12 +866,35 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
642
866
|
assert(oend > oend_w);
|
643
867
|
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
644
868
|
ip += oend_w - op;
|
645
|
-
op
|
869
|
+
op += oend_w - op;
|
646
870
|
}
|
647
871
|
/* Handle the leftovers. */
|
648
872
|
while (op < oend) *op++ = *ip++;
|
649
873
|
}
|
650
874
|
|
875
|
+
/* ZSTD_safecopyDstBeforeSrc():
|
876
|
+
* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
|
877
|
+
* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
|
878
|
+
static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
|
879
|
+
ptrdiff_t const diff = op - ip;
|
880
|
+
BYTE* const oend = op + length;
|
881
|
+
|
882
|
+
if (length < 8 || diff > -8) {
|
883
|
+
/* Handle short lengths, close overlaps, and dst not before src. */
|
884
|
+
while (op < oend) *op++ = *ip++;
|
885
|
+
return;
|
886
|
+
}
|
887
|
+
|
888
|
+
if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
|
889
|
+
ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
|
890
|
+
ip += oend - WILDCOPY_OVERLENGTH - op;
|
891
|
+
op += oend - WILDCOPY_OVERLENGTH - op;
|
892
|
+
}
|
893
|
+
|
894
|
+
/* Handle the leftovers. */
|
895
|
+
while (op < oend) *op++ = *ip++;
|
896
|
+
}
|
897
|
+
|
651
898
|
/* ZSTD_execSequenceEnd():
|
652
899
|
* This version handles cases that are near the end of the output buffer. It requires
|
653
900
|
* more careful checks to make sure there is no overflow. By separating out these hard
|
@@ -657,10 +904,11 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
|
|
657
904
|
* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
|
658
905
|
*/
|
659
906
|
FORCE_NOINLINE
|
907
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
660
908
|
size_t ZSTD_execSequenceEnd(BYTE* op,
|
661
|
-
|
662
|
-
|
663
|
-
|
909
|
+
BYTE* const oend, seq_t sequence,
|
910
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
911
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
664
912
|
{
|
665
913
|
BYTE* const oLitEnd = op + sequence.litLength;
|
666
914
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
@@ -683,27 +931,78 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
|
|
683
931
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
684
932
|
/* offset beyond prefix */
|
685
933
|
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
686
|
-
match = dictEnd - (prefixStart-match);
|
934
|
+
match = dictEnd - (prefixStart - match);
|
687
935
|
if (match + sequence.matchLength <= dictEnd) {
|
688
|
-
|
936
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
689
937
|
return sequenceLength;
|
690
938
|
}
|
691
939
|
/* span extDict & currentPrefixSegment */
|
692
940
|
{ size_t const length1 = dictEnd - match;
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
941
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
942
|
+
op = oLitEnd + length1;
|
943
|
+
sequence.matchLength -= length1;
|
944
|
+
match = prefixStart;
|
945
|
+
}
|
946
|
+
}
|
947
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
948
|
+
return sequenceLength;
|
949
|
+
}
|
950
|
+
|
951
|
+
/* ZSTD_execSequenceEndSplitLitBuffer():
|
952
|
+
* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
|
953
|
+
*/
|
954
|
+
FORCE_NOINLINE
|
955
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
956
|
+
size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
|
957
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
958
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
959
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
960
|
+
{
|
961
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
962
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
963
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
964
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
965
|
+
|
966
|
+
|
967
|
+
/* bounds checks : careful of address space overflow in 32-bit mode */
|
968
|
+
RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
|
969
|
+
RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
|
970
|
+
assert(op < op + sequenceLength);
|
971
|
+
assert(oLitEnd < op + sequenceLength);
|
972
|
+
|
973
|
+
/* copy literals */
|
974
|
+
RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
|
975
|
+
ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
|
976
|
+
op = oLitEnd;
|
977
|
+
*litPtr = iLitEnd;
|
978
|
+
|
979
|
+
/* copy Match */
|
980
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
981
|
+
/* offset beyond prefix */
|
982
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
|
983
|
+
match = dictEnd - (prefixStart - match);
|
984
|
+
if (match + sequence.matchLength <= dictEnd) {
|
985
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
986
|
+
return sequenceLength;
|
987
|
+
}
|
988
|
+
/* span extDict & currentPrefixSegment */
|
989
|
+
{ size_t const length1 = dictEnd - match;
|
990
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
991
|
+
op = oLitEnd + length1;
|
992
|
+
sequence.matchLength -= length1;
|
993
|
+
match = prefixStart;
|
994
|
+
}
|
995
|
+
}
|
698
996
|
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
699
997
|
return sequenceLength;
|
700
998
|
}
|
701
999
|
|
702
1000
|
HINT_INLINE
|
1001
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
703
1002
|
size_t ZSTD_execSequence(BYTE* op,
|
704
|
-
|
705
|
-
|
706
|
-
|
1003
|
+
BYTE* const oend, seq_t sequence,
|
1004
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
1005
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
707
1006
|
{
|
708
1007
|
BYTE* const oLitEnd = op + sequence.litLength;
|
709
1008
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
@@ -714,6 +1013,104 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
714
1013
|
|
715
1014
|
assert(op != NULL /* Precondition */);
|
716
1015
|
assert(oend_w < oend /* No underflow */);
|
1016
|
+
|
1017
|
+
#if defined(__aarch64__)
|
1018
|
+
/* prefetch sequence starting from match that will be used for copy later */
|
1019
|
+
PREFETCH_L1(match);
|
1020
|
+
#endif
|
1021
|
+
/* Handle edge cases in a slow path:
|
1022
|
+
* - Read beyond end of literals
|
1023
|
+
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
1024
|
+
* - 32-bit mode and the match length overflows
|
1025
|
+
*/
|
1026
|
+
if (UNLIKELY(
|
1027
|
+
iLitEnd > litLimit ||
|
1028
|
+
oMatchEnd > oend_w ||
|
1029
|
+
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
1030
|
+
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
1031
|
+
|
1032
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
1033
|
+
assert(op <= oLitEnd /* No overflow */);
|
1034
|
+
assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
|
1035
|
+
assert(oMatchEnd <= oend /* No underflow */);
|
1036
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
1037
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
1038
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
1039
|
+
|
1040
|
+
/* Copy Literals:
|
1041
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
1042
|
+
* We likely don't need the full 32-byte wildcopy.
|
1043
|
+
*/
|
1044
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
1045
|
+
ZSTD_copy16(op, (*litPtr));
|
1046
|
+
if (UNLIKELY(sequence.litLength > 16)) {
|
1047
|
+
ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
|
1048
|
+
}
|
1049
|
+
op = oLitEnd;
|
1050
|
+
*litPtr = iLitEnd; /* update for next sequence */
|
1051
|
+
|
1052
|
+
/* Copy Match */
|
1053
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
1054
|
+
/* offset beyond prefix -> go into extDict */
|
1055
|
+
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
1056
|
+
match = dictEnd + (match - prefixStart);
|
1057
|
+
if (match + sequence.matchLength <= dictEnd) {
|
1058
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
1059
|
+
return sequenceLength;
|
1060
|
+
}
|
1061
|
+
/* span extDict & currentPrefixSegment */
|
1062
|
+
{ size_t const length1 = dictEnd - match;
|
1063
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
1064
|
+
op = oLitEnd + length1;
|
1065
|
+
sequence.matchLength -= length1;
|
1066
|
+
match = prefixStart;
|
1067
|
+
}
|
1068
|
+
}
|
1069
|
+
/* Match within prefix of 1 or more bytes */
|
1070
|
+
assert(op <= oMatchEnd);
|
1071
|
+
assert(oMatchEnd <= oend_w);
|
1072
|
+
assert(match >= prefixStart);
|
1073
|
+
assert(sequence.matchLength >= 1);
|
1074
|
+
|
1075
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
1076
|
+
* without overlap checking.
|
1077
|
+
*/
|
1078
|
+
if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
|
1079
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
1080
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
1081
|
+
* than 16 bytes.
|
1082
|
+
*/
|
1083
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
1084
|
+
return sequenceLength;
|
1085
|
+
}
|
1086
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
1087
|
+
|
1088
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
1089
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
1090
|
+
|
1091
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
1092
|
+
if (sequence.matchLength > 8) {
|
1093
|
+
assert(op < oMatchEnd);
|
1094
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
|
1095
|
+
}
|
1096
|
+
return sequenceLength;
|
1097
|
+
}
|
1098
|
+
|
1099
|
+
HINT_INLINE
|
1100
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1101
|
+
size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
|
1102
|
+
BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
|
1103
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
1104
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
1105
|
+
{
|
1106
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
1107
|
+
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
1108
|
+
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
1109
|
+
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
1110
|
+
const BYTE* match = oLitEnd - sequence.offset;
|
1111
|
+
|
1112
|
+
assert(op != NULL /* Precondition */);
|
1113
|
+
assert(oend_w < oend /* No underflow */);
|
717
1114
|
/* Handle edge cases in a slow path:
|
718
1115
|
* - Read beyond end of literals
|
719
1116
|
* - Match end is within WILDCOPY_OVERLIMIT of oend
|
@@ -723,7 +1120,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
723
1120
|
iLitEnd > litLimit ||
|
724
1121
|
oMatchEnd > oend_w ||
|
725
1122
|
(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
|
726
|
-
return
|
1123
|
+
return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
727
1124
|
|
728
1125
|
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
729
1126
|
assert(op <= oLitEnd /* No overflow */);
|
@@ -751,12 +1148,12 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
751
1148
|
RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
|
752
1149
|
match = dictEnd + (match - prefixStart);
|
753
1150
|
if (match + sequence.matchLength <= dictEnd) {
|
754
|
-
|
1151
|
+
ZSTD_memmove(oLitEnd, match, sequence.matchLength);
|
755
1152
|
return sequenceLength;
|
756
1153
|
}
|
757
1154
|
/* span extDict & currentPrefixSegment */
|
758
1155
|
{ size_t const length1 = dictEnd - match;
|
759
|
-
|
1156
|
+
ZSTD_memmove(oLitEnd, match, length1);
|
760
1157
|
op = oLitEnd + length1;
|
761
1158
|
sequence.matchLength -= length1;
|
762
1159
|
match = prefixStart;
|
@@ -791,6 +1188,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
791
1188
|
return sequenceLength;
|
792
1189
|
}
|
793
1190
|
|
1191
|
+
|
794
1192
|
static void
|
795
1193
|
ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
|
796
1194
|
{
|
@@ -804,24 +1202,14 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS
|
|
804
1202
|
}
|
805
1203
|
|
806
1204
|
FORCE_INLINE_TEMPLATE void
|
807
|
-
|
1205
|
+
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
|
808
1206
|
{
|
809
|
-
ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
|
810
|
-
U32 const nbBits = DInfo.nbBits;
|
811
1207
|
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
812
|
-
DStatePtr->state =
|
813
|
-
}
|
814
|
-
|
815
|
-
FORCE_INLINE_TEMPLATE void
|
816
|
-
ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
|
817
|
-
{
|
818
|
-
U32 const nbBits = DInfo.nbBits;
|
819
|
-
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
820
|
-
DStatePtr->state = DInfo.nextState + lowBits;
|
1208
|
+
DStatePtr->state = nextState + lowBits;
|
821
1209
|
}
|
822
1210
|
|
823
1211
|
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
824
|
-
* offset bits. But we can only read at most
|
1212
|
+
* offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
|
825
1213
|
* bits before reloading. This value is the maximum number of bytes we read
|
826
1214
|
* after reloading when we are decoding long offsets.
|
827
1215
|
*/
|
@@ -831,122 +1219,135 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD
|
|
831
1219
|
: 0)
|
832
1220
|
|
833
1221
|
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
834
|
-
typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
|
835
1222
|
|
1223
|
+
/**
|
1224
|
+
* ZSTD_decodeSequence():
|
1225
|
+
* @p longOffsets : tells the decoder to reload more bit while decoding large offsets
|
1226
|
+
* only used in 32-bit mode
|
1227
|
+
* @return : Sequence (litL + matchL + offset)
|
1228
|
+
*/
|
836
1229
|
FORCE_INLINE_TEMPLATE seq_t
|
837
|
-
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const
|
1230
|
+
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
|
838
1231
|
{
|
839
1232
|
seq_t seq;
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
1233
|
+
/*
|
1234
|
+
* ZSTD_seqSymbol is a 64 bits wide structure.
|
1235
|
+
* It can be loaded in one operation
|
1236
|
+
* and its fields extracted by simply shifting or bit-extracting on aarch64.
|
1237
|
+
* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
|
1238
|
+
* operations that cause performance drop. This can be avoided by using this
|
1239
|
+
* ZSTD_memcpy hack.
|
1240
|
+
*/
|
1241
|
+
#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
|
1242
|
+
ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
|
1243
|
+
ZSTD_seqSymbol* const llDInfo = &llDInfoS;
|
1244
|
+
ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
|
1245
|
+
ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
|
1246
|
+
ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
|
1247
|
+
ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
|
1248
|
+
ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
|
1249
|
+
#else
|
1250
|
+
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
|
1251
|
+
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
|
1252
|
+
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
|
1253
|
+
#endif
|
1254
|
+
seq.matchLength = mlDInfo->baseValue;
|
1255
|
+
seq.litLength = llDInfo->baseValue;
|
1256
|
+
{ U32 const ofBase = ofDInfo->baseValue;
|
1257
|
+
BYTE const llBits = llDInfo->nbAdditionalBits;
|
1258
|
+
BYTE const mlBits = mlDInfo->nbAdditionalBits;
|
1259
|
+
BYTE const ofBits = ofDInfo->nbAdditionalBits;
|
1260
|
+
BYTE const totalBits = llBits+mlBits+ofBits;
|
1261
|
+
|
1262
|
+
U16 const llNext = llDInfo->nextState;
|
1263
|
+
U16 const mlNext = mlDInfo->nextState;
|
1264
|
+
U16 const ofNext = ofDInfo->nextState;
|
1265
|
+
U32 const llnbBits = llDInfo->nbBits;
|
1266
|
+
U32 const mlnbBits = mlDInfo->nbBits;
|
1267
|
+
U32 const ofnbBits = ofDInfo->nbBits;
|
1268
|
+
|
1269
|
+
assert(llBits <= MaxLLBits);
|
1270
|
+
assert(mlBits <= MaxMLBits);
|
1271
|
+
assert(ofBits <= MaxOff);
|
1272
|
+
/*
|
1273
|
+
* As gcc has better branch and block analyzers, sometimes it is only
|
1274
|
+
* valuable to mark likeliness for clang, it gives around 3-4% of
|
1275
|
+
* performance.
|
1276
|
+
*/
|
1277
|
+
|
1278
|
+
/* sequence */
|
1279
|
+
{ size_t offset;
|
1280
|
+
if (ofBits > 1) {
|
1281
|
+
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
1282
|
+
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
1283
|
+
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
|
1284
|
+
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
|
1285
|
+
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
|
1286
|
+
/* Always read extra bits, this keeps the logic simple,
|
1287
|
+
* avoids branches, and avoids accidentally reading 0 bits.
|
1288
|
+
*/
|
1289
|
+
U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
|
1290
|
+
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
1291
|
+
BIT_reloadDStream(&seqState->DStream);
|
1292
|
+
offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
1293
|
+
} else {
|
1294
|
+
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
1295
|
+
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
879
1296
|
}
|
1297
|
+
seqState->prevOffset[2] = seqState->prevOffset[1];
|
1298
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
1299
|
+
seqState->prevOffset[0] = offset;
|
880
1300
|
} else {
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
seqState->prevOffset[
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
1301
|
+
U32 const ll0 = (llDInfo->baseValue == 0);
|
1302
|
+
if (LIKELY((ofBits == 0))) {
|
1303
|
+
offset = seqState->prevOffset[ll0];
|
1304
|
+
seqState->prevOffset[1] = seqState->prevOffset[!ll0];
|
1305
|
+
seqState->prevOffset[0] = offset;
|
1306
|
+
} else {
|
1307
|
+
offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
|
1308
|
+
{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
|
1309
|
+
temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
|
1310
|
+
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
1311
|
+
seqState->prevOffset[1] = seqState->prevOffset[0];
|
1312
|
+
seqState->prevOffset[0] = offset = temp;
|
1313
|
+
} } }
|
1314
|
+
seq.offset = offset;
|
1315
|
+
}
|
890
1316
|
|
891
|
-
|
892
|
-
|
893
|
-
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
1317
|
+
if (mlBits > 0)
|
1318
|
+
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
|
894
1319
|
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
1320
|
+
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
1321
|
+
BIT_reloadDStream(&seqState->DStream);
|
1322
|
+
if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
1323
|
+
BIT_reloadDStream(&seqState->DStream);
|
1324
|
+
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
1325
|
+
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
901
1326
|
|
902
|
-
|
903
|
-
|
904
|
-
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
1327
|
+
if (llBits > 0)
|
1328
|
+
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
|
905
1329
|
|
906
|
-
|
907
|
-
|
1330
|
+
if (MEM_32bits())
|
1331
|
+
BIT_reloadDStream(&seqState->DStream);
|
908
1332
|
|
909
|
-
|
910
|
-
|
1333
|
+
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
1334
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
911
1335
|
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
* No consequence though : no memory access will occur, offset is only used for prefetching */
|
917
|
-
seqState->pos = pos + seq.matchLength;
|
918
|
-
}
|
919
|
-
|
920
|
-
/* ANS state update
|
921
|
-
* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
|
922
|
-
* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
|
923
|
-
* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
|
924
|
-
* better option, so it is the default for other compilers. But, if you
|
925
|
-
* measure that it is worse, please put up a pull request.
|
926
|
-
*/
|
927
|
-
{
|
928
|
-
#if defined(__GNUC__) && !defined(__clang__)
|
929
|
-
const int kUseUpdateFseState = 1;
|
930
|
-
#else
|
931
|
-
const int kUseUpdateFseState = 0;
|
932
|
-
#endif
|
933
|
-
if (kUseUpdateFseState) {
|
934
|
-
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
935
|
-
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
1336
|
+
if (!isLastSeq) {
|
1337
|
+
/* don't update FSE state for last Sequence */
|
1338
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
|
1339
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
|
936
1340
|
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
937
|
-
|
938
|
-
|
939
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
|
940
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
|
941
|
-
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
942
|
-
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
|
1341
|
+
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
|
1342
|
+
BIT_reloadDStream(&seqState->DStream);
|
943
1343
|
}
|
944
1344
|
}
|
945
1345
|
|
946
1346
|
return seq;
|
947
1347
|
}
|
948
1348
|
|
949
|
-
#
|
1349
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1350
|
+
#if DEBUGLEVEL >= 1
|
950
1351
|
static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
|
951
1352
|
{
|
952
1353
|
size_t const windowSize = dctx->fParams.windowSize;
|
@@ -961,59 +1362,65 @@ static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStar
|
|
961
1362
|
/* Dictionary is active. */
|
962
1363
|
return 1;
|
963
1364
|
}
|
1365
|
+
#endif
|
964
1366
|
|
965
|
-
|
1367
|
+
static void ZSTD_assertValidSequence(
|
966
1368
|
ZSTD_DCtx const* dctx,
|
967
1369
|
BYTE const* op, BYTE const* oend,
|
968
1370
|
seq_t const seq,
|
969
1371
|
BYTE const* prefixStart, BYTE const* virtualStart)
|
970
1372
|
{
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
1373
|
+
#if DEBUGLEVEL >= 1
|
1374
|
+
if (dctx->isFrameDecompression) {
|
1375
|
+
size_t const windowSize = dctx->fParams.windowSize;
|
1376
|
+
size_t const sequenceSize = seq.litLength + seq.matchLength;
|
1377
|
+
BYTE const* const oLitEnd = op + seq.litLength;
|
1378
|
+
DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
|
1379
|
+
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
1380
|
+
assert(op <= oend);
|
1381
|
+
assert((size_t)(oend - op) >= sequenceSize);
|
1382
|
+
assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
|
1383
|
+
if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
|
1384
|
+
size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
|
1385
|
+
/* Offset must be within the dictionary. */
|
1386
|
+
assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
|
1387
|
+
assert(seq.offset <= windowSize + dictSize);
|
1388
|
+
} else {
|
1389
|
+
/* Offset must be within our window. */
|
1390
|
+
assert(seq.offset <= windowSize);
|
1391
|
+
}
|
987
1392
|
}
|
1393
|
+
#else
|
1394
|
+
(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
|
1395
|
+
#endif
|
988
1396
|
}
|
989
1397
|
#endif
|
990
1398
|
|
991
1399
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1400
|
+
|
1401
|
+
|
992
1402
|
FORCE_INLINE_TEMPLATE size_t
|
993
1403
|
DONT_VECTORIZE
|
994
|
-
|
1404
|
+
ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
|
995
1405
|
void* dst, size_t maxDstSize,
|
996
1406
|
const void* seqStart, size_t seqSize, int nbSeq,
|
997
|
-
const ZSTD_longOffset_e isLongOffset
|
998
|
-
const int frame)
|
1407
|
+
const ZSTD_longOffset_e isLongOffset)
|
999
1408
|
{
|
1000
1409
|
const BYTE* ip = (const BYTE*)seqStart;
|
1001
1410
|
const BYTE* const iend = ip + seqSize;
|
1002
|
-
BYTE* const ostart = (BYTE*
|
1003
|
-
BYTE* const oend = ostart
|
1411
|
+
BYTE* const ostart = (BYTE*)dst;
|
1412
|
+
BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
|
1004
1413
|
BYTE* op = ostart;
|
1005
1414
|
const BYTE* litPtr = dctx->litPtr;
|
1006
|
-
const BYTE*
|
1415
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
1007
1416
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
1008
1417
|
const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
|
1009
1418
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
1010
|
-
DEBUGLOG(5, "
|
1011
|
-
(void)frame;
|
1419
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
|
1012
1420
|
|
1013
|
-
/*
|
1421
|
+
/* Literals are split between internal buffer & output buffer */
|
1014
1422
|
if (nbSeq) {
|
1015
1423
|
seqState_t seqState;
|
1016
|
-
size_t error = 0;
|
1017
1424
|
dctx->fseEntropy = 1;
|
1018
1425
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
1019
1426
|
RETURN_ERROR_IF(
|
@@ -1029,134 +1436,331 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
1029
1436
|
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
1030
1437
|
BIT_DStream_completed < BIT_DStream_overflow);
|
1031
1438
|
|
1439
|
+
/* decompress without overrunning litPtr begins */
|
1440
|
+
{ seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
|
1441
|
+
/* Align the decompression loop to 32 + 16 bytes.
|
1442
|
+
*
|
1443
|
+
* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
|
1444
|
+
* speed swings based on the alignment of the decompression loop. This
|
1445
|
+
* performance swing is caused by parts of the decompression loop falling
|
1446
|
+
* out of the DSB. The entire decompression loop should fit in the DSB,
|
1447
|
+
* when it can't we get much worse performance. You can measure if you've
|
1448
|
+
* hit the good case or the bad case with this perf command for some
|
1449
|
+
* compressed file test.zst:
|
1450
|
+
*
|
1451
|
+
* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
|
1452
|
+
* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
|
1453
|
+
*
|
1454
|
+
* If you see most cycles served out of the MITE you've hit the bad case.
|
1455
|
+
* If you see most cycles served out of the DSB you've hit the good case.
|
1456
|
+
* If it is pretty even then you may be in an okay case.
|
1457
|
+
*
|
1458
|
+
* This issue has been reproduced on the following CPUs:
|
1459
|
+
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
1460
|
+
* Use Instruments->Counters to get DSB/MITE cycles.
|
1461
|
+
* I never got performance swings, but I was able to
|
1462
|
+
* go from the good case of mostly DSB to half of the
|
1463
|
+
* cycles served from MITE.
|
1464
|
+
* - Coffeelake: Intel i9-9900k
|
1465
|
+
* - Coffeelake: Intel i7-9700k
|
1466
|
+
*
|
1467
|
+
* I haven't been able to reproduce the instability or DSB misses on any
|
1468
|
+
* of the following CPUS:
|
1469
|
+
* - Haswell
|
1470
|
+
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
1471
|
+
* - Skylake
|
1472
|
+
*
|
1473
|
+
* Alignment is done for each of the three major decompression loops:
|
1474
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
|
1475
|
+
* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
|
1476
|
+
* - ZSTD_decompressSequences_body
|
1477
|
+
* Alignment choices are made to minimize large swings on bad cases and influence on performance
|
1478
|
+
* from changes external to this code, rather than to overoptimize on the current commit.
|
1479
|
+
*
|
1480
|
+
* If you are seeing performance stability this script can help test.
|
1481
|
+
* It tests on 4 commits in zstd where I saw performance change.
|
1482
|
+
*
|
1483
|
+
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
1484
|
+
*/
|
1032
1485
|
#if defined(__GNUC__) && defined(__x86_64__)
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
* If you see most cycles served out of the MITE you've hit the bad case.
|
1047
|
-
* If you see most cycles served out of the DSB you've hit the good case.
|
1048
|
-
* If it is pretty even then you may be in an okay case.
|
1049
|
-
*
|
1050
|
-
* I've been able to reproduce this issue on the following CPUs:
|
1051
|
-
* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
|
1052
|
-
* Use Instruments->Counters to get DSB/MITE cycles.
|
1053
|
-
* I never got performance swings, but I was able to
|
1054
|
-
* go from the good case of mostly DSB to half of the
|
1055
|
-
* cycles served from MITE.
|
1056
|
-
* - Coffeelake: Intel i9-9900k
|
1057
|
-
*
|
1058
|
-
* I haven't been able to reproduce the instability or DSB misses on any
|
1059
|
-
* of the following CPUS:
|
1060
|
-
* - Haswell
|
1061
|
-
* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
|
1062
|
-
* - Skylake
|
1063
|
-
*
|
1064
|
-
* If you are seeing performance stability this script can help test.
|
1065
|
-
* It tests on 4 commits in zstd where I saw performance change.
|
1066
|
-
*
|
1067
|
-
* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
|
1068
|
-
*/
|
1069
|
-
__asm__(".p2align 5");
|
1070
|
-
__asm__("nop");
|
1071
|
-
__asm__(".p2align 4");
|
1486
|
+
__asm__(".p2align 6");
|
1487
|
+
# if __GNUC__ >= 7
|
1488
|
+
/* good for gcc-7, gcc-9, and gcc-11 */
|
1489
|
+
__asm__("nop");
|
1490
|
+
__asm__(".p2align 5");
|
1491
|
+
__asm__("nop");
|
1492
|
+
__asm__(".p2align 4");
|
1493
|
+
# if __GNUC__ == 8 || __GNUC__ == 10
|
1494
|
+
/* good for gcc-8 and gcc-10 */
|
1495
|
+
__asm__("nop");
|
1496
|
+
__asm__(".p2align 3");
|
1497
|
+
# endif
|
1498
|
+
# endif
|
1072
1499
|
#endif
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1500
|
+
|
1501
|
+
/* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
|
1502
|
+
for ( ; nbSeq; nbSeq--) {
|
1503
|
+
sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
|
1504
|
+
if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
|
1505
|
+
{ size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
1076
1506
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1077
|
-
|
1078
|
-
|
1507
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1508
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1079
1509
|
#endif
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
if (
|
1089
|
-
|
1090
|
-
|
1510
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1511
|
+
return oneSeqSize;
|
1512
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1513
|
+
op += oneSeqSize;
|
1514
|
+
} }
|
1515
|
+
DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
|
1516
|
+
|
1517
|
+
/* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
|
1518
|
+
if (nbSeq > 0) {
|
1519
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
1520
|
+
DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
|
1521
|
+
if (leftoverLit) {
|
1522
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
1523
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
1524
|
+
sequence.litLength -= leftoverLit;
|
1525
|
+
op += leftoverLit;
|
1526
|
+
}
|
1527
|
+
litPtr = dctx->litExtraBuffer;
|
1528
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1529
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1530
|
+
{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
1531
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1532
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1533
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1534
|
+
#endif
|
1535
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1536
|
+
return oneSeqSize;
|
1537
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1538
|
+
op += oneSeqSize;
|
1539
|
+
}
|
1540
|
+
nbSeq--;
|
1541
|
+
}
|
1542
|
+
}
|
1543
|
+
|
1544
|
+
if (nbSeq > 0) {
|
1545
|
+
/* there is remaining lit from extra buffer */
|
1546
|
+
|
1547
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
1548
|
+
__asm__(".p2align 6");
|
1549
|
+
__asm__("nop");
|
1550
|
+
# if __GNUC__ != 7
|
1551
|
+
/* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
|
1552
|
+
__asm__(".p2align 4");
|
1553
|
+
__asm__("nop");
|
1554
|
+
__asm__(".p2align 3");
|
1555
|
+
# elif __GNUC__ >= 11
|
1556
|
+
__asm__(".p2align 3");
|
1557
|
+
# else
|
1558
|
+
__asm__(".p2align 5");
|
1559
|
+
__asm__("nop");
|
1560
|
+
__asm__(".p2align 3");
|
1561
|
+
# endif
|
1562
|
+
#endif
|
1563
|
+
|
1564
|
+
for ( ; nbSeq ; nbSeq--) {
|
1565
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
|
1566
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
|
1567
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1568
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1569
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1570
|
+
#endif
|
1571
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1572
|
+
return oneSeqSize;
|
1573
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1574
|
+
op += oneSeqSize;
|
1575
|
+
}
|
1091
1576
|
}
|
1092
1577
|
|
1093
1578
|
/* check if reached exact end */
|
1094
|
-
DEBUGLOG(5, "
|
1095
|
-
if (ZSTD_isError(error)) return error;
|
1579
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
|
1096
1580
|
RETURN_ERROR_IF(nbSeq, corruption_detected, "");
|
1097
|
-
|
1581
|
+
DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
|
1582
|
+
RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
|
1098
1583
|
/* save reps for next block */
|
1099
1584
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
1100
1585
|
}
|
1101
1586
|
|
1102
1587
|
/* last literal segment */
|
1103
|
-
|
1588
|
+
if (dctx->litBufferLocation == ZSTD_split) {
|
1589
|
+
/* split hasn't been reached yet, first get dst then copy litExtraBuffer */
|
1590
|
+
size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
|
1591
|
+
DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
|
1592
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
1593
|
+
if (op != NULL) {
|
1594
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
1595
|
+
op += lastLLSize;
|
1596
|
+
}
|
1597
|
+
litPtr = dctx->litExtraBuffer;
|
1598
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1599
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1600
|
+
}
|
1601
|
+
/* copy last literals from internal buffer */
|
1602
|
+
{ size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
|
1603
|
+
DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
|
1104
1604
|
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1105
1605
|
if (op != NULL) {
|
1106
|
-
|
1606
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
1107
1607
|
op += lastLLSize;
|
1608
|
+
} }
|
1609
|
+
|
1610
|
+
DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
|
1611
|
+
return (size_t)(op - ostart);
|
1612
|
+
}
|
1613
|
+
|
1614
|
+
FORCE_INLINE_TEMPLATE size_t
|
1615
|
+
DONT_VECTORIZE
|
1616
|
+
ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
|
1617
|
+
void* dst, size_t maxDstSize,
|
1618
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1619
|
+
const ZSTD_longOffset_e isLongOffset)
|
1620
|
+
{
|
1621
|
+
const BYTE* ip = (const BYTE*)seqStart;
|
1622
|
+
const BYTE* const iend = ip + seqSize;
|
1623
|
+
BYTE* const ostart = (BYTE*)dst;
|
1624
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
|
1625
|
+
BYTE* op = ostart;
|
1626
|
+
const BYTE* litPtr = dctx->litPtr;
|
1627
|
+
const BYTE* const litEnd = litPtr + dctx->litSize;
|
1628
|
+
const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
|
1629
|
+
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
|
1630
|
+
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
|
1631
|
+
DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
|
1632
|
+
|
1633
|
+
/* Regen sequences */
|
1634
|
+
if (nbSeq) {
|
1635
|
+
seqState_t seqState;
|
1636
|
+
dctx->fseEntropy = 1;
|
1637
|
+
{ U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
1638
|
+
RETURN_ERROR_IF(
|
1639
|
+
ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
|
1640
|
+
corruption_detected, "");
|
1641
|
+
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
1642
|
+
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
1643
|
+
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
1644
|
+
assert(dst != NULL);
|
1645
|
+
|
1646
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
1647
|
+
__asm__(".p2align 6");
|
1648
|
+
__asm__("nop");
|
1649
|
+
# if __GNUC__ >= 7
|
1650
|
+
__asm__(".p2align 5");
|
1651
|
+
__asm__("nop");
|
1652
|
+
__asm__(".p2align 3");
|
1653
|
+
# else
|
1654
|
+
__asm__(".p2align 4");
|
1655
|
+
__asm__("nop");
|
1656
|
+
__asm__(".p2align 3");
|
1657
|
+
# endif
|
1658
|
+
#endif
|
1659
|
+
|
1660
|
+
for ( ; nbSeq ; nbSeq--) {
|
1661
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
|
1662
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
|
1663
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1664
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1665
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
|
1666
|
+
#endif
|
1667
|
+
if (UNLIKELY(ZSTD_isError(oneSeqSize)))
|
1668
|
+
return oneSeqSize;
|
1669
|
+
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
|
1670
|
+
op += oneSeqSize;
|
1108
1671
|
}
|
1672
|
+
|
1673
|
+
/* check if reached exact end */
|
1674
|
+
assert(nbSeq == 0);
|
1675
|
+
RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
|
1676
|
+
/* save reps for next block */
|
1677
|
+
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
1109
1678
|
}
|
1110
1679
|
|
1111
|
-
|
1680
|
+
/* last literal segment */
|
1681
|
+
{ size_t const lastLLSize = (size_t)(litEnd - litPtr);
|
1682
|
+
DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
|
1683
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1684
|
+
if (op != NULL) {
|
1685
|
+
ZSTD_memcpy(op, litPtr, lastLLSize);
|
1686
|
+
op += lastLLSize;
|
1687
|
+
} }
|
1688
|
+
|
1689
|
+
DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
|
1690
|
+
return (size_t)(op - ostart);
|
1112
1691
|
}
|
1113
1692
|
|
1114
1693
|
static size_t
|
1115
1694
|
ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
|
1116
1695
|
void* dst, size_t maxDstSize,
|
1117
1696
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1118
|
-
const ZSTD_longOffset_e isLongOffset
|
1119
|
-
const int frame)
|
1697
|
+
const ZSTD_longOffset_e isLongOffset)
|
1120
1698
|
{
|
1121
|
-
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
1699
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1700
|
+
}
|
1701
|
+
|
1702
|
+
static size_t
|
1703
|
+
ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
|
1704
|
+
void* dst, size_t maxDstSize,
|
1705
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1706
|
+
const ZSTD_longOffset_e isLongOffset)
|
1707
|
+
{
|
1708
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1122
1709
|
}
|
1123
1710
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1124
1711
|
|
1125
1712
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
1713
|
+
|
1714
|
+
FORCE_INLINE_TEMPLATE
|
1715
|
+
|
1716
|
+
size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
|
1717
|
+
const BYTE* const prefixStart, const BYTE* const dictEnd)
|
1718
|
+
{
|
1719
|
+
prefetchPos += sequence.litLength;
|
1720
|
+
{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
|
1721
|
+
/* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
|
1722
|
+
* No consequence though : memory address is only used for prefetching, not for dereferencing */
|
1723
|
+
const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
|
1724
|
+
PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
1725
|
+
}
|
1726
|
+
return prefetchPos + sequence.matchLength;
|
1727
|
+
}
|
1728
|
+
|
1729
|
+
/* This decoding function employs prefetching
|
1730
|
+
* to reduce latency impact of cache misses.
|
1731
|
+
* It's generally employed when block contains a significant portion of long-distance matches
|
1732
|
+
* or when coupled with a "cold" dictionary */
|
1126
1733
|
FORCE_INLINE_TEMPLATE size_t
|
1127
1734
|
ZSTD_decompressSequencesLong_body(
|
1128
1735
|
ZSTD_DCtx* dctx,
|
1129
1736
|
void* dst, size_t maxDstSize,
|
1130
1737
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1131
|
-
const ZSTD_longOffset_e isLongOffset
|
1132
|
-
const int frame)
|
1738
|
+
const ZSTD_longOffset_e isLongOffset)
|
1133
1739
|
{
|
1134
1740
|
const BYTE* ip = (const BYTE*)seqStart;
|
1135
1741
|
const BYTE* const iend = ip + seqSize;
|
1136
|
-
BYTE* const ostart = (BYTE*
|
1137
|
-
BYTE* const oend = ostart
|
1742
|
+
BYTE* const ostart = (BYTE*)dst;
|
1743
|
+
BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
|
1138
1744
|
BYTE* op = ostart;
|
1139
1745
|
const BYTE* litPtr = dctx->litPtr;
|
1140
|
-
const BYTE*
|
1746
|
+
const BYTE* litBufferEnd = dctx->litBufferEnd;
|
1141
1747
|
const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
|
1142
1748
|
const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
|
1143
1749
|
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
1144
|
-
(void)frame;
|
1145
1750
|
|
1146
1751
|
/* Regen sequences */
|
1147
1752
|
if (nbSeq) {
|
1148
|
-
#define STORED_SEQS
|
1753
|
+
#define STORED_SEQS 8
|
1149
1754
|
#define STORED_SEQS_MASK (STORED_SEQS-1)
|
1150
|
-
#define ADVANCED_SEQS
|
1755
|
+
#define ADVANCED_SEQS STORED_SEQS
|
1151
1756
|
seq_t sequences[STORED_SEQS];
|
1152
1757
|
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
|
1153
1758
|
seqState_t seqState;
|
1154
1759
|
int seqNb;
|
1760
|
+
size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
|
1761
|
+
|
1155
1762
|
dctx->fseEntropy = 1;
|
1156
1763
|
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
1157
|
-
seqState.prefixStart = prefixStart;
|
1158
|
-
seqState.pos = (size_t)(op-prefixStart);
|
1159
|
-
seqState.dictEnd = dictEnd;
|
1160
1764
|
assert(dst != NULL);
|
1161
1765
|
assert(iend >= ip);
|
1162
1766
|
RETURN_ERROR_IF(
|
@@ -1167,37 +1771,95 @@ ZSTD_decompressSequencesLong_body(
|
|
1167
1771
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
1168
1772
|
|
1169
1773
|
/* prepare in advance */
|
1170
|
-
for (seqNb=0;
|
1171
|
-
|
1172
|
-
|
1774
|
+
for (seqNb=0; seqNb<seqAdvance; seqNb++) {
|
1775
|
+
seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
|
1776
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
1777
|
+
sequences[seqNb] = sequence;
|
1173
1778
|
}
|
1174
|
-
RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
|
1175
1779
|
|
1176
|
-
/*
|
1177
|
-
for (
|
1178
|
-
seq_t
|
1179
|
-
|
1780
|
+
/* decompress without stomping litBuffer */
|
1781
|
+
for (; seqNb < nbSeq; seqNb++) {
|
1782
|
+
seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
|
1783
|
+
|
1784
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
|
1785
|
+
/* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
|
1786
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
1787
|
+
if (leftoverLit)
|
1788
|
+
{
|
1789
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
1790
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
1791
|
+
sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
|
1792
|
+
op += leftoverLit;
|
1793
|
+
}
|
1794
|
+
litPtr = dctx->litExtraBuffer;
|
1795
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1796
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1797
|
+
{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1180
1798
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1181
|
-
|
1182
|
-
|
1799
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1800
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
1183
1801
|
#endif
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1802
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1803
|
+
|
1804
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
1805
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
1806
|
+
op += oneSeqSize;
|
1807
|
+
} }
|
1808
|
+
else
|
1809
|
+
{
|
1810
|
+
/* lit buffer is either wholly contained in first or second split, or not split at all*/
|
1811
|
+
size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
1812
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
1813
|
+
ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1814
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1815
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1816
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
|
1817
|
+
#endif
|
1818
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1819
|
+
|
1820
|
+
prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
|
1821
|
+
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
1822
|
+
op += oneSeqSize;
|
1823
|
+
}
|
1188
1824
|
}
|
1189
|
-
RETURN_ERROR_IF(
|
1825
|
+
RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
|
1190
1826
|
|
1191
1827
|
/* finish queue */
|
1192
1828
|
seqNb -= seqAdvance;
|
1193
1829
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
1194
|
-
|
1830
|
+
seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
|
1831
|
+
if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
|
1832
|
+
const size_t leftoverLit = dctx->litBufferEnd - litPtr;
|
1833
|
+
if (leftoverLit) {
|
1834
|
+
RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
|
1835
|
+
ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
|
1836
|
+
sequence->litLength -= leftoverLit;
|
1837
|
+
op += leftoverLit;
|
1838
|
+
}
|
1839
|
+
litPtr = dctx->litExtraBuffer;
|
1840
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1841
|
+
dctx->litBufferLocation = ZSTD_not_in_dst;
|
1842
|
+
{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1195
1843
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1196
|
-
|
1197
|
-
|
1844
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1845
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
1198
1846
|
#endif
|
1199
|
-
|
1200
|
-
|
1847
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1848
|
+
op += oneSeqSize;
|
1849
|
+
}
|
1850
|
+
}
|
1851
|
+
else
|
1852
|
+
{
|
1853
|
+
size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
|
1854
|
+
ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
|
1855
|
+
ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
|
1856
|
+
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
|
1857
|
+
assert(!ZSTD_isError(oneSeqSize));
|
1858
|
+
ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
|
1859
|
+
#endif
|
1860
|
+
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1861
|
+
op += oneSeqSize;
|
1862
|
+
}
|
1201
1863
|
}
|
1202
1864
|
|
1203
1865
|
/* save reps for next block */
|
@@ -1205,25 +1867,34 @@ ZSTD_decompressSequencesLong_body(
|
|
1205
1867
|
}
|
1206
1868
|
|
1207
1869
|
/* last literal segment */
|
1208
|
-
{
|
1870
|
+
if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
|
1871
|
+
size_t const lastLLSize = litBufferEnd - litPtr;
|
1872
|
+
RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
|
1873
|
+
if (op != NULL) {
|
1874
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
1875
|
+
op += lastLLSize;
|
1876
|
+
}
|
1877
|
+
litPtr = dctx->litExtraBuffer;
|
1878
|
+
litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
|
1879
|
+
}
|
1880
|
+
{ size_t const lastLLSize = litBufferEnd - litPtr;
|
1209
1881
|
RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
|
1210
1882
|
if (op != NULL) {
|
1211
|
-
|
1883
|
+
ZSTD_memmove(op, litPtr, lastLLSize);
|
1212
1884
|
op += lastLLSize;
|
1213
1885
|
}
|
1214
1886
|
}
|
1215
1887
|
|
1216
|
-
return op-ostart;
|
1888
|
+
return (size_t)(op - ostart);
|
1217
1889
|
}
|
1218
1890
|
|
1219
1891
|
static size_t
|
1220
1892
|
ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
1221
1893
|
void* dst, size_t maxDstSize,
|
1222
1894
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1223
|
-
const ZSTD_longOffset_e isLongOffset
|
1224
|
-
const int frame)
|
1895
|
+
const ZSTD_longOffset_e isLongOffset)
|
1225
1896
|
{
|
1226
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
1897
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1227
1898
|
}
|
1228
1899
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
1229
1900
|
|
@@ -1232,17 +1903,34 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
1232
1903
|
#if DYNAMIC_BMI2
|
1233
1904
|
|
1234
1905
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1906
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
1907
|
+
DONT_VECTORIZE
|
1908
|
+
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
1909
|
+
void* dst, size_t maxDstSize,
|
1910
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1911
|
+
const ZSTD_longOffset_e isLongOffset)
|
1912
|
+
{
|
1913
|
+
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1914
|
+
}
|
1915
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
1916
|
+
DONT_VECTORIZE
|
1917
|
+
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
|
1918
|
+
void* dst, size_t maxDstSize,
|
1919
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1920
|
+
const ZSTD_longOffset_e isLongOffset)
|
1921
|
+
{
|
1922
|
+
return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1923
|
+
}
|
1235
1924
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1236
1925
|
|
1237
1926
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
1238
|
-
static
|
1927
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
1239
1928
|
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
1240
1929
|
void* dst, size_t maxDstSize,
|
1241
1930
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1242
|
-
const ZSTD_longOffset_e isLongOffset
|
1243
|
-
const int frame)
|
1931
|
+
const ZSTD_longOffset_e isLongOffset)
|
1244
1932
|
{
|
1245
|
-
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
1933
|
+
return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1246
1934
|
}
|
1247
1935
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
1248
1936
|
|
@@ -1252,18 +1940,34 @@ typedef size_t (*ZSTD_decompressSequences_t)(
|
|
1252
1940
|
ZSTD_DCtx* dctx,
|
1253
1941
|
void* dst, size_t maxDstSize,
|
1254
1942
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1255
|
-
const ZSTD_longOffset_e isLongOffset
|
1256
|
-
const int frame);
|
1943
|
+
const ZSTD_longOffset_e isLongOffset);
|
1257
1944
|
|
1258
1945
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1259
1946
|
static size_t
|
1260
1947
|
ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
1261
1948
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1262
|
-
const ZSTD_longOffset_e isLongOffset
|
1263
|
-
const int frame)
|
1949
|
+
const ZSTD_longOffset_e isLongOffset)
|
1264
1950
|
{
|
1265
1951
|
DEBUGLOG(5, "ZSTD_decompressSequences");
|
1266
|
-
|
1952
|
+
#if DYNAMIC_BMI2
|
1953
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
1954
|
+
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1955
|
+
}
|
1956
|
+
#endif
|
1957
|
+
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1958
|
+
}
|
1959
|
+
static size_t
|
1960
|
+
ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
1961
|
+
const void* seqStart, size_t seqSize, int nbSeq,
|
1962
|
+
const ZSTD_longOffset_e isLongOffset)
|
1963
|
+
{
|
1964
|
+
DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
|
1965
|
+
#if DYNAMIC_BMI2
|
1966
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
1967
|
+
return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1968
|
+
}
|
1969
|
+
#endif
|
1970
|
+
return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1267
1971
|
}
|
1268
1972
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
1269
1973
|
|
@@ -1278,69 +1982,114 @@ static size_t
|
|
1278
1982
|
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
1279
1983
|
void* dst, size_t maxDstSize,
|
1280
1984
|
const void* seqStart, size_t seqSize, int nbSeq,
|
1281
|
-
const ZSTD_longOffset_e isLongOffset
|
1282
|
-
const int frame)
|
1985
|
+
const ZSTD_longOffset_e isLongOffset)
|
1283
1986
|
{
|
1284
1987
|
DEBUGLOG(5, "ZSTD_decompressSequencesLong");
|
1285
1988
|
#if DYNAMIC_BMI2
|
1286
|
-
if (dctx
|
1287
|
-
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
1989
|
+
if (ZSTD_DCtx_get_bmi2(dctx)) {
|
1990
|
+
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1288
1991
|
}
|
1289
1992
|
#endif
|
1290
|
-
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset
|
1993
|
+
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
1291
1994
|
}
|
1292
1995
|
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
|
1293
1996
|
|
1294
1997
|
|
1998
|
+
/**
|
1999
|
+
* @returns The total size of the history referenceable by zstd, including
|
2000
|
+
* both the prefix and the extDict. At @p op any offset larger than this
|
2001
|
+
* is invalid.
|
2002
|
+
*/
|
2003
|
+
static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
|
2004
|
+
{
|
2005
|
+
return (size_t)(op - virtualStart);
|
2006
|
+
}
|
1295
2007
|
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
2008
|
+
typedef struct {
|
2009
|
+
unsigned longOffsetShare;
|
2010
|
+
unsigned maxNbAdditionalBits;
|
2011
|
+
} ZSTD_OffsetInfo;
|
2012
|
+
|
2013
|
+
/* ZSTD_getOffsetInfo() :
|
1299
2014
|
* condition : offTable must be valid
|
1300
2015
|
* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
|
1301
|
-
* compared to maximum possible of (1<<OffFSELog)
|
1302
|
-
|
1303
|
-
|
2016
|
+
* compared to maximum possible of (1<<OffFSELog),
|
2017
|
+
* as well as the maximum number additional bits required.
|
2018
|
+
*/
|
2019
|
+
static ZSTD_OffsetInfo
|
2020
|
+
ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
|
1304
2021
|
{
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
2022
|
+
ZSTD_OffsetInfo info = {0, 0};
|
2023
|
+
/* If nbSeq == 0, then the offTable is uninitialized, but we have
|
2024
|
+
* no sequences, so both values should be 0.
|
2025
|
+
*/
|
2026
|
+
if (nbSeq != 0) {
|
2027
|
+
const void* ptr = offTable;
|
2028
|
+
U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
|
2029
|
+
const ZSTD_seqSymbol* table = offTable + 1;
|
2030
|
+
U32 const max = 1 << tableLog;
|
2031
|
+
U32 u;
|
2032
|
+
DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
|
2033
|
+
|
2034
|
+
assert(max <= (1 << OffFSELog)); /* max not too large */
|
2035
|
+
for (u=0; u<max; u++) {
|
2036
|
+
info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
|
2037
|
+
if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
|
2038
|
+
}
|
2039
|
+
|
2040
|
+
assert(tableLog <= OffFSELog);
|
2041
|
+
info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
|
1315
2042
|
}
|
1316
2043
|
|
1317
|
-
|
1318
|
-
|
2044
|
+
return info;
|
2045
|
+
}
|
1319
2046
|
|
1320
|
-
|
2047
|
+
/**
|
2048
|
+
* @returns The maximum offset we can decode in one read of our bitstream, without
|
2049
|
+
* reloading more bits in the middle of the offset bits read. Any offsets larger
|
2050
|
+
* than this must use the long offset decoder.
|
2051
|
+
*/
|
2052
|
+
static size_t ZSTD_maxShortOffset(void)
|
2053
|
+
{
|
2054
|
+
if (MEM_64bits()) {
|
2055
|
+
/* We can decode any offset without reloading bits.
|
2056
|
+
* This might change if the max window size grows.
|
2057
|
+
*/
|
2058
|
+
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
2059
|
+
return (size_t)-1;
|
2060
|
+
} else {
|
2061
|
+
/* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
|
2062
|
+
* This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
|
2063
|
+
* Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
|
2064
|
+
*/
|
2065
|
+
size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
|
2066
|
+
size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
|
2067
|
+
assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
|
2068
|
+
return maxOffset;
|
2069
|
+
}
|
1321
2070
|
}
|
1322
|
-
#endif
|
1323
2071
|
|
1324
2072
|
size_t
|
1325
2073
|
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
1326
2074
|
void* dst, size_t dstCapacity,
|
1327
|
-
const void* src, size_t srcSize, const
|
2075
|
+
const void* src, size_t srcSize, const streaming_operation streaming)
|
1328
2076
|
{ /* blockType == blockCompressed */
|
1329
2077
|
const BYTE* ip = (const BYTE*)src;
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
*
|
1334
|
-
*
|
1335
|
-
|
1336
|
-
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
2078
|
+
DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
|
2079
|
+
|
2080
|
+
/* Note : the wording of the specification
|
2081
|
+
* allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
|
2082
|
+
* This generally does not happen, as it makes little sense,
|
2083
|
+
* since an uncompressed block would feature same size and have no decompression cost.
|
2084
|
+
* Also, note that decoder from reference libzstd before < v1.5.4
|
2085
|
+
* would consider this edge case as an error.
|
2086
|
+
* As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
|
2087
|
+
* for broader compatibility with the deployed ecosystem of zstd decoders */
|
2088
|
+
RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
|
1340
2089
|
|
1341
2090
|
/* Decode literals section */
|
1342
|
-
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
1343
|
-
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock :
|
2091
|
+
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
|
2092
|
+
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
|
1344
2093
|
if (ZSTD_isError(litCSize)) return litCSize;
|
1345
2094
|
ip += litCSize;
|
1346
2095
|
srcSize -= litCSize;
|
@@ -1348,6 +2097,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1348
2097
|
|
1349
2098
|
/* Build Decoding Tables */
|
1350
2099
|
{
|
2100
|
+
/* Compute the maximum block size, which must also work when !frame and fParams are unset.
|
2101
|
+
* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
|
2102
|
+
*/
|
2103
|
+
size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
|
2104
|
+
size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
|
2105
|
+
/* isLongOffset must be true if there are long offsets.
|
2106
|
+
* Offsets are long if they are larger than ZSTD_maxShortOffset().
|
2107
|
+
* We don't expect that to be the case in 64-bit mode.
|
2108
|
+
*
|
2109
|
+
* We check here to see if our history is large enough to allow long offsets.
|
2110
|
+
* If it isn't, then we can't possible have (valid) long offsets. If the offset
|
2111
|
+
* is invalid, then it is okay to read it incorrectly.
|
2112
|
+
*
|
2113
|
+
* If isLongOffsets is true, then we will later check our decoding table to see
|
2114
|
+
* if it is even possible to generate long offsets.
|
2115
|
+
*/
|
2116
|
+
ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
|
1351
2117
|
/* These macros control at build-time which decompressor implementation
|
1352
2118
|
* we use. If neither is defined, we do some inspection and dispatch at
|
1353
2119
|
* runtime.
|
@@ -1355,6 +2121,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1355
2121
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
1356
2122
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
1357
2123
|
int usePrefetchDecoder = dctx->ddictIsCold;
|
2124
|
+
#else
|
2125
|
+
/* Set to 1 to avoid computing offset info if we don't need to.
|
2126
|
+
* Otherwise this value is ignored.
|
2127
|
+
*/
|
2128
|
+
int usePrefetchDecoder = 1;
|
1358
2129
|
#endif
|
1359
2130
|
int nbSeq;
|
1360
2131
|
size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
|
@@ -1362,40 +2133,58 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
1362
2133
|
ip += seqHSize;
|
1363
2134
|
srcSize -= seqHSize;
|
1364
2135
|
|
1365
|
-
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
2136
|
+
RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
|
2137
|
+
RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
|
2138
|
+
"invalid dst");
|
1366
2139
|
|
1367
|
-
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1373
|
-
|
1374
|
-
|
2140
|
+
/* If we could potentially have long offsets, or we might want to use the prefetch decoder,
|
2141
|
+
* compute information about the share of long offsets, and the maximum nbAdditionalBits.
|
2142
|
+
* NOTE: could probably use a larger nbSeq limit
|
2143
|
+
*/
|
2144
|
+
if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
|
2145
|
+
ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
|
2146
|
+
if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
|
2147
|
+
/* If isLongOffset, but the maximum number of additional bits that we see in our table is small
|
2148
|
+
* enough, then we know it is impossible to have too long an offset in this block, so we can
|
2149
|
+
* use the regular offset decoder.
|
2150
|
+
*/
|
2151
|
+
isLongOffset = ZSTD_lo_isRegularOffset;
|
2152
|
+
}
|
2153
|
+
if (!usePrefetchDecoder) {
|
2154
|
+
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
|
2155
|
+
usePrefetchDecoder = (info.longOffsetShare >= minShare);
|
2156
|
+
}
|
1375
2157
|
}
|
1376
|
-
#endif
|
1377
2158
|
|
1378
2159
|
dctx->ddictIsCold = 0;
|
1379
2160
|
|
1380
2161
|
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
|
1381
2162
|
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
|
1382
|
-
if (usePrefetchDecoder)
|
2163
|
+
if (usePrefetchDecoder) {
|
2164
|
+
#else
|
2165
|
+
(void)usePrefetchDecoder;
|
2166
|
+
{
|
1383
2167
|
#endif
|
1384
2168
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
1385
|
-
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset
|
2169
|
+
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
1386
2170
|
#endif
|
2171
|
+
}
|
1387
2172
|
|
1388
2173
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1389
2174
|
/* else */
|
1390
|
-
|
2175
|
+
if (dctx->litBufferLocation == ZSTD_split)
|
2176
|
+
return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
2177
|
+
else
|
2178
|
+
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
1391
2179
|
#endif
|
1392
2180
|
}
|
1393
2181
|
}
|
1394
2182
|
|
1395
2183
|
|
1396
|
-
|
2184
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
2185
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
|
1397
2186
|
{
|
1398
|
-
if (dst != dctx->previousDstEnd) { /* not contiguous */
|
2187
|
+
if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
|
1399
2188
|
dctx->dictEnd = dctx->previousDstEnd;
|
1400
2189
|
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
|
1401
2190
|
dctx->prefixStart = dst;
|
@@ -1404,15 +2193,26 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
|
|
1404
2193
|
}
|
1405
2194
|
|
1406
2195
|
|
1407
|
-
size_t
|
1408
|
-
|
1409
|
-
|
2196
|
+
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
|
2197
|
+
void* dst, size_t dstCapacity,
|
2198
|
+
const void* src, size_t srcSize)
|
1410
2199
|
{
|
1411
2200
|
size_t dSize;
|
1412
|
-
|
1413
|
-
|
2201
|
+
dctx->isFrameDecompression = 0;
|
2202
|
+
ZSTD_checkContinuity(dctx, dst, dstCapacity);
|
2203
|
+
dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
|
2204
|
+
FORWARD_IF_ERROR(dSize, "");
|
1414
2205
|
dctx->previousDstEnd = (char*)dst + dSize;
|
1415
2206
|
return dSize;
|
1416
2207
|
}
|
1417
2208
|
|
2209
|
+
|
2210
|
+
/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
|
2211
|
+
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
|
2212
|
+
void* dst, size_t dstCapacity,
|
2213
|
+
const void* src, size_t srcSize)
|
2214
|
+
{
|
2215
|
+
return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
|
1418
2216
|
}
|
2217
|
+
|
2218
|
+
} // namespace duckdb_zstd
|