duckdb 1.1.2-dev6.0 → 1.1.4-dev11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +5 -54
- package/binding.gyp +73 -52
- package/package.json +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/avg.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/corr.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/covar.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/stddev.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/approx_count.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/arg_min_max.cpp +66 -18
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitagg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitstring_agg.cpp +5 -7
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bool.cpp +3 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/kurtosis.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/product.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/skew.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/string_agg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/sum.cpp +13 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approx_top_k.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approximate_quantile.cpp +51 -15
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mad.cpp +25 -10
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mode.cpp +215 -71
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/quantile.cpp +58 -31
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/binned_histogram.cpp +9 -4
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/histogram.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/list.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_avg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_count.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_intercept.cpp +6 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_r2.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_slope.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxx_syy.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxy.cpp +3 -3
- package/src/duckdb/extension/core_functions/core_functions_extension.cpp +85 -0
- package/src/duckdb/{src → extension}/core_functions/function_list.cpp +30 -51
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/corr.hpp +3 -7
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/distributive_functions.hpp +16 -21
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/histogram_helpers.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/holistic_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/nested_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_helpers.hpp +2 -2
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_sort_tree.hpp +140 -58
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_state.hpp +50 -43
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_count.hpp +2 -2
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_slope.hpp +3 -7
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/array_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/bit_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/blob_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/date_functions.hpp +22 -55
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/debug_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/enum_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/generic_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/list_functions.hpp +4 -4
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/map_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/math_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/operators_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/random_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/string_functions.hpp +10 -103
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/struct_functions.hpp +1 -19
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/union_functions.hpp +1 -1
- package/src/duckdb/extension/core_functions/include/core_functions_extension.hpp +22 -0
- package/src/duckdb/{src → extension}/core_functions/lambda_functions.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/array/array_functions.cpp +11 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/array/array_value.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/bit/bitstring.cpp +12 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/blob/base64.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/blob/encode.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/date/age.cpp +9 -3
- package/src/duckdb/extension/core_functions/scalar/date/current.cpp +29 -0
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_diff.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_part.cpp +42 -9
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_sub.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_trunc.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/epoch.cpp +19 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/date/make_date.cpp +40 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/date/time_bucket.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/to_interval.cpp +54 -28
- package/src/duckdb/{src → extension}/core_functions/scalar/debug/vector_type.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/enum/enum_functions.cpp +2 -7
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/alias.cpp +2 -2
- package/src/duckdb/{src/function → extension/core_functions}/scalar/generic/binning.cpp +4 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/can_implicitly_cast.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/current_setting.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/hash.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/least.cpp +30 -10
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/stats.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/system_functions.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/typeof.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/array_slice.cpp +93 -88
- package/src/duckdb/{src → extension}/core_functions/scalar/list/flatten.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_aggregates.cpp +7 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_distance.cpp +8 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_filter.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_has_any_or_all.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_reduce.cpp +5 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_sort.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_transform.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_value.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/range.cpp +7 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/cardinality.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map.cpp +5 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_concat.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_entries.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_extract.cpp +13 -25
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_from_entries.cpp +2 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_keys_values.cpp +11 -9
- package/src/duckdb/{src → extension}/core_functions/scalar/math/numeric.cpp +83 -37
- package/src/duckdb/{src → extension}/core_functions/scalar/operators/bitwise.cpp +19 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/random/random.cpp +4 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/random/setseed.cpp +2 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/ascii.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/bar.cpp +6 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/chr.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/damerau_levenshtein.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/format_bytes.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/hamming.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/hex.cpp +7 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/string/instr.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/jaccard.cpp +1 -1
- package/src/duckdb/extension/core_functions/scalar/string/jaro_winkler.cpp +112 -0
- package/src/duckdb/{src → extension}/core_functions/scalar/string/left_right.cpp +6 -6
- package/src/duckdb/{src → extension}/core_functions/scalar/string/levenshtein.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/pad.cpp +9 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/string/parse_path.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/printf.cpp +3 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/repeat.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/replace.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/reverse.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/starts_with.cpp +5 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/string/to_base.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/translate.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/trim.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/unicode.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/url_encode.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/struct/struct_insert.cpp +25 -31
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_extract.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_tag.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_value.cpp +3 -3
- package/src/duckdb/extension/icu/icu-dateadd.cpp +16 -11
- package/src/duckdb/extension/icu/icu-datefunc.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datepart.cpp +8 -5
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +8 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +2 -2
- package/src/duckdb/extension/icu/icu-makedate.cpp +18 -7
- package/src/duckdb/extension/icu/icu-strptime.cpp +9 -3
- package/src/duckdb/extension/icu/icu-table-range.cpp +2 -2
- package/src/duckdb/extension/icu/icu-timebucket.cpp +4 -1
- package/src/duckdb/extension/icu/icu-timezone.cpp +67 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +60 -5
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -1
- package/src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/locid.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/lsr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/lsr.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/normlzr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/rbbinode.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/schriter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/stringtriebuilder.cpp +8 -8
- package/src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uchriter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/brkiter.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/bytestriebuilder.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/chariter.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/dtintrv.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/locid.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/messagepattern.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/normlzr.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/parsepos.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/rbbi.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/schriter.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/strenum.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringpiece.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringtriebuilder.h +9 -9
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/ucharstriebuilder.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uchriter.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uniset.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/unistr.h +12 -12
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uobject.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unifiedcache.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/uniset.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ustr_titlecase_brkiter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ustrenum.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/alphaindex.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/coll.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/datefmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/dcfmtsym.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvinf.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen_impl.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtrule.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/fpositer.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measunit.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measure.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/msgfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp +9 -9
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/plurfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/rbtz.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/region.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/rulebasedcollator.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/selfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/simpletz.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/sortkey.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tmutamt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames.cpp +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzrule.cpp +8 -8
- package/src/duckdb/extension/icu/third_party/icu/i18n/tztrans.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/alphaindex.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/calendar.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/choicfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coll.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/currpinf.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/datefmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dcfmtsym.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/decimfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtfmtsym.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvfmt.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvinf.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtptngen.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fieldpos.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fmtable.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fpositer.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measunit.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measure.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/msgfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/numfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbnf.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbtz.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/region.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/search.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/selfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/simpletz.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/smpdtfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/sortkey.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/stsearch.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tblcoll.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/timezone.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutamt.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tznames.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzrule.h +8 -8
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tztrans.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/vtzone.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp +2 -2
- package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -1
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
- package/src/duckdb/extension/json/include/json_common.hpp +14 -10
- package/src/duckdb/extension/json/include/json_scan.hpp +48 -7
- package/src/duckdb/extension/json/include/json_structure.hpp +2 -1
- package/src/duckdb/extension/json/include/json_transform.hpp +5 -2
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +57 -20
- package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +7 -6
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +6 -5
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +20 -17
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +48 -17
- package/src/duckdb/extension/json/json_functions/read_json.cpp +83 -34
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -3
- package/src/duckdb/extension/json/json_functions.cpp +14 -16
- package/src/duckdb/extension/json/json_scan.cpp +36 -16
- package/src/duckdb/extension/json/json_serializer.cpp +1 -1
- package/src/duckdb/extension/json/serialize_json.cpp +2 -2
- package/src/duckdb/extension/parquet/column_reader.cpp +136 -116
- package/src/duckdb/extension/parquet/column_writer.cpp +870 -604
- package/src/duckdb/extension/parquet/geo_parquet.cpp +4 -5
- package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +0 -4
- package/src/duckdb/extension/parquet/include/column_reader.hpp +24 -19
- package/src/duckdb/extension/parquet/include/column_writer.hpp +7 -5
- package/src/duckdb/extension/parquet/include/decode_utils.hpp +138 -18
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +4 -3
- package/src/duckdb/extension/parquet/include/null_column_reader.hpp +1 -14
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +45 -0
- package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +101 -90
- package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +179 -0
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +2 -3
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +48 -0
- package/src/duckdb/extension/parquet/include/parquet_extension.hpp +8 -0
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +5 -0
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +22 -18
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -5
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +87 -3
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +30 -16
- package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +0 -8
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +1 -42
- package/src/duckdb/extension/parquet/include/thrift_tools.hpp +13 -1
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +4 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +240 -197
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +138 -6
- package/src/duckdb/extension/parquet/parquet_reader.cpp +155 -79
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +258 -38
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +17 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +65 -34
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +4 -0
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +13 -0
- package/src/duckdb/src/catalog/catalog.cpp +272 -97
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +9 -4
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +145 -95
- package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +9 -3
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +15 -0
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +40 -24
- package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry.cpp +3 -0
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +60 -5
- package/src/duckdb/src/catalog/catalog_search_path.cpp +27 -14
- package/src/duckdb/src/catalog/catalog_set.cpp +75 -31
- package/src/duckdb/src/catalog/default/default_functions.cpp +13 -8
- package/src/duckdb/src/catalog/default/default_views.cpp +1 -0
- package/src/duckdb/src/catalog/dependency_manager.cpp +133 -5
- package/src/duckdb/src/catalog/duck_catalog.cpp +17 -9
- package/src/duckdb/src/common/adbc/adbc.cpp +18 -0
- package/src/duckdb/src/common/allocator.cpp +3 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +30 -9
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +63 -82
- package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +4 -3
- package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +361 -0
- package/src/duckdb/src/common/arrow/arrow_util.cpp +10 -6
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +6 -2
- package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +2 -1
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +27 -14
- package/src/duckdb/src/common/assert.cpp +1 -2
- package/src/duckdb/src/common/bind_helpers.cpp +1 -1
- package/src/duckdb/src/common/box_renderer.cpp +316 -26
- package/src/duckdb/src/common/cgroups.cpp +7 -1
- package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
- package/src/duckdb/src/common/enum_util.cpp +2865 -6882
- package/src/duckdb/src/common/enums/compression_type.cpp +12 -0
- package/src/duckdb/src/common/enums/metric_type.cpp +24 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +4 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/error_data.cpp +23 -6
- package/src/duckdb/src/common/exception/binder_exception.cpp +1 -1
- package/src/duckdb/src/common/exception.cpp +20 -28
- package/src/duckdb/src/common/extra_type_info.cpp +85 -20
- package/src/duckdb/src/common/file_buffer.cpp +5 -2
- package/src/duckdb/src/common/file_system.cpp +8 -3
- package/src/duckdb/src/common/fsst.cpp +3 -3
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/local_file_system.cpp +169 -60
- package/src/duckdb/src/common/multi_file_list.cpp +4 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +240 -63
- package/src/duckdb/src/common/opener_file_system.cpp +37 -0
- package/src/duckdb/src/common/operator/cast_operators.cpp +77 -11
- package/src/duckdb/src/common/operator/string_cast.cpp +6 -2
- package/src/duckdb/src/common/pipe_file_system.cpp +4 -4
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +25 -14
- package/src/duckdb/src/common/radix_partitioning.cpp +17 -16
- package/src/duckdb/src/common/random_engine.cpp +39 -3
- package/src/duckdb/src/common/render_tree.cpp +3 -19
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -58
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +2 -0
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +20 -19
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
- package/src/duckdb/src/common/serializer/memory_stream.cpp +36 -0
- package/src/duckdb/src/common/sort/comparators.cpp +7 -7
- package/src/duckdb/src/common/sort/partition_state.cpp +2 -2
- package/src/duckdb/src/common/stacktrace.cpp +127 -0
- package/src/duckdb/src/common/string_util.cpp +157 -32
- package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +15 -3
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +4 -0
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +71 -8
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +27 -6
- package/src/duckdb/src/common/types/conflict_manager.cpp +21 -7
- package/src/duckdb/src/common/types/date.cpp +39 -25
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +4 -11
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +21 -7
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +17 -17
- package/src/duckdb/src/common/types/timestamp.cpp +70 -33
- package/src/duckdb/src/common/types/uuid.cpp +11 -0
- package/src/duckdb/src/common/types/validity_mask.cpp +16 -5
- package/src/duckdb/src/common/types/value.cpp +357 -199
- package/src/duckdb/src/common/types/varint.cpp +64 -18
- package/src/duckdb/src/common/types/vector.cpp +78 -38
- package/src/duckdb/src/common/types.cpp +199 -92
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +2 -1
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +32 -5
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +3 -1
- package/src/duckdb/src/execution/adaptive_filter.cpp +6 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +410 -111
- package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_between.cpp +6 -0
- package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +4 -3
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_function.cpp +1 -0
- package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +5 -4
- package/src/duckdb/src/execution/expression_executor.cpp +5 -3
- package/src/duckdb/src/execution/index/art/art.cpp +208 -72
- package/src/duckdb/src/execution/index/art/base_leaf.cpp +1 -1
- package/src/duckdb/src/execution/index/art/leaf.cpp +12 -7
- package/src/duckdb/src/execution/index/art/node.cpp +2 -1
- package/src/duckdb/src/execution/index/art/node256_leaf.cpp +6 -6
- package/src/duckdb/src/execution/index/art/plan_art.cpp +50 -55
- package/src/duckdb/src/execution/index/art/prefix.cpp +7 -13
- package/src/duckdb/src/execution/index/bound_index.cpp +30 -5
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +3 -5
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -9
- package/src/duckdb/src/execution/join_hashtable.cpp +254 -158
- package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
- package/src/duckdb/src/execution/operator/aggregate/physical_partitioned_aggregate.cpp +226 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +77 -70
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +114 -50
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +19 -10
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +22 -15
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +95 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +6 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +75 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +40 -12
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +395 -163
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +20 -23
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +115 -49
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +66 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -23
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +220 -46
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +43 -32
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +54 -119
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +184 -20
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +83 -21
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_validator.cpp +63 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +7 -4
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +3 -2
- package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +9 -1
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +132 -15
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -55
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +284 -154
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +40 -55
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -3
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +298 -227
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +5 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +3 -4
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +32 -19
- package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -0
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +6 -0
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -19
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +11 -27
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +308 -119
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +105 -55
- package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +6 -2
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +1 -1
- package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +15 -6
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +92 -50
- package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +0 -1
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +8 -4
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +54 -22
- package/src/duckdb/src/execution/operator/set/physical_union.cpp +5 -1
- package/src/duckdb/src/execution/physical_operator.cpp +15 -9
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +101 -12
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +11 -140
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +6 -5
- package/src/duckdb/src/execution/physical_plan/plan_export.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +16 -13
- package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_order.cpp +7 -7
- package/src/duckdb/src/execution/physical_plan/plan_prepare.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_projection.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +8 -3
- package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -2
- package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +1 -2
- package/src/duckdb/src/execution/physical_plan/plan_top_n.cpp +3 -2
- package/src/duckdb/src/execution/physical_plan_generator.cpp +0 -22
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +136 -116
- package/src/duckdb/src/execution/sample/base_reservoir_sample.cpp +136 -0
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +930 -0
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +6 -12
- package/src/duckdb/src/function/aggregate/distributive/{first.cpp → first_last_any.cpp} +37 -18
- package/src/duckdb/src/{core_functions → function}/aggregate/distributive/minmax.cpp +19 -12
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +72 -13
- package/src/duckdb/src/function/built_in_functions.cpp +85 -2
- package/src/duckdb/src/function/cast/decimal_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/string_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/struct_cast.cpp +81 -49
- package/src/duckdb/src/function/cast/union/from_struct.cpp +7 -5
- package/src/duckdb/src/function/compression_config.cpp +6 -0
- package/src/duckdb/src/function/encoding_function.cpp +134 -0
- package/src/duckdb/src/function/function.cpp +8 -13
- package/src/duckdb/src/function/function_binder.cpp +100 -21
- package/src/duckdb/src/function/function_list.cpp +178 -0
- package/src/duckdb/src/function/macro_function.cpp +4 -4
- package/src/duckdb/src/function/pragma/pragma_functions.cpp +0 -2
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +0 -4
- package/src/duckdb/src/{core_functions/core_functions.cpp → function/register_function_list.cpp} +12 -8
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +62 -23
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +33 -16
- package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +21 -0
- package/src/duckdb/src/{core_functions/scalar/blob → function/scalar}/create_sort_key.cpp +86 -23
- package/src/duckdb/src/{core_functions → function}/scalar/date/strftime.cpp +6 -4
- package/src/duckdb/src/function/scalar/generic/constant_or_null.cpp +5 -7
- package/src/duckdb/src/{core_functions → function}/scalar/generic/error.cpp +3 -1
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +2 -2
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +1 -7
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +27 -21
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +8 -12
- package/src/duckdb/src/function/scalar/list/list_select.cpp +1 -4
- package/src/duckdb/src/function/scalar/list/list_zip.cpp +6 -6
- package/src/duckdb/src/{core_functions → function}/scalar/map/map_contains.cpp +2 -2
- package/src/duckdb/src/function/scalar/nested_functions.cpp +0 -11
- package/src/duckdb/src/function/scalar/{operators → operator}/add.cpp +2 -1
- package/src/duckdb/src/function/scalar/{operators → operator}/arithmetic.cpp +195 -127
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +30 -21
- package/src/duckdb/src/function/scalar/strftime_format.cpp +10 -0
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +11 -41
- package/src/duckdb/src/function/scalar/string/concat.cpp +22 -20
- package/src/duckdb/src/function/scalar/string/concat_ws.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/contains.cpp +16 -19
- package/src/duckdb/src/function/scalar/string/length.cpp +38 -24
- package/src/duckdb/src/function/scalar/string/like.cpp +80 -47
- package/src/duckdb/src/{core_functions → function}/scalar/string/md5.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/prefix.cpp +0 -4
- package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +2 -1
- package/src/duckdb/src/function/scalar/string/regexp.cpp +17 -7
- package/src/duckdb/src/{core_functions → function}/scalar/string/regexp_escape.cpp +2 -2
- package/src/duckdb/src/{core_functions → function}/scalar/string/sha1.cpp +1 -1
- package/src/duckdb/src/{core_functions → function}/scalar/string/sha256.cpp +1 -1
- package/src/duckdb/src/{core_functions → function}/scalar/string/string_split.cpp +4 -5
- package/src/duckdb/src/function/scalar/string/strip_accents.cpp +3 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +14 -13
- package/src/duckdb/src/function/scalar/string/suffix.cpp +0 -4
- package/src/duckdb/src/function/scalar/struct/struct_concat.cpp +115 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +35 -31
- package/src/duckdb/src/{core_functions → function}/scalar/struct/struct_pack.cpp +7 -7
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -8
- package/src/duckdb/src/function/scalar/system/write_log.cpp +170 -0
- package/src/duckdb/src/function/scalar_function.cpp +5 -5
- package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +3 -2
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +287 -1
- package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +6 -6
- package/src/duckdb/src/function/table/arrow.cpp +32 -352
- package/src/duckdb/src/function/table/arrow_conversion.cpp +43 -7
- package/src/duckdb/src/function/table/copy_csv.cpp +38 -23
- package/src/duckdb/src/function/table/glob.cpp +1 -1
- package/src/duckdb/src/function/table/query_function.cpp +12 -7
- package/src/duckdb/src/function/table/read_csv.cpp +114 -46
- package/src/duckdb/src/function/table/read_file.cpp +26 -6
- package/src/duckdb/src/function/table/sniff_csv.cpp +25 -5
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +6 -7
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +141 -16
- package/src/duckdb/src/function/table/system/duckdb_log.cpp +64 -0
- package/src/duckdb/src/function/table/system/duckdb_log_contexts.cpp +65 -0
- package/src/duckdb/src/function/table/system/duckdb_memory.cpp +0 -1
- package/src/duckdb/src/function/table/system/duckdb_settings.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_tables.cpp +1 -13
- package/src/duckdb/src/function/table/system/duckdb_types.cpp +1 -1
- package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +17 -0
- package/src/duckdb/src/function/table/system/pragma_table_info.cpp +6 -0
- package/src/duckdb/src/function/table/system/pragma_table_sample.cpp +95 -0
- package/src/duckdb/src/function/table/system/test_all_types.cpp +56 -46
- package/src/duckdb/src/function/table/system_functions.cpp +3 -0
- package/src/duckdb/src/function/table/table_scan.cpp +487 -289
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/table_function.cpp +10 -6
- package/src/duckdb/src/function/window/window_aggregate_function.cpp +248 -0
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +48 -0
- package/src/duckdb/src/function/window/window_aggregator.cpp +88 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +854 -0
- package/src/duckdb/src/function/window/window_collection.cpp +146 -0
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +357 -0
- package/src/duckdb/src/function/window/window_custom_aggregator.cpp +146 -0
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +758 -0
- package/src/duckdb/src/function/window/window_executor.cpp +99 -0
- package/src/duckdb/src/function/window/window_index_tree.cpp +63 -0
- package/src/duckdb/src/function/window/window_merge_sort_tree.cpp +275 -0
- package/src/duckdb/src/function/window/window_naive_aggregator.cpp +361 -0
- package/src/duckdb/src/function/window/window_rank_function.cpp +288 -0
- package/src/duckdb/src/function/window/window_rownumber_function.cpp +191 -0
- package/src/duckdb/src/function/window/window_segment_tree.cpp +594 -0
- package/src/duckdb/src/function/window/window_shared_expressions.cpp +50 -0
- package/src/duckdb/src/function/window/window_token_tree.cpp +142 -0
- package/src/duckdb/src/function/window/window_value_function.cpp +566 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +74 -17
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +9 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +4 -10
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +18 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +2 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +5 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +21 -18
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +3 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +10 -2
- package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +11 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +26 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +144 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +11 -4
- package/src/duckdb/src/include/duckdb/common/assert.hpp +12 -1
- package/src/duckdb/src/include/duckdb/common/atomic_ptr.hpp +102 -0
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +65 -6
- package/src/duckdb/src/include/duckdb/common/chrono.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/column_index.hpp +72 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +120 -0
- package/src/duckdb/src/include/duckdb/{core_functions/core_functions.hpp → common/enums/collation_type.hpp} +2 -7
- package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/enums/function_errors.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_preservation_type.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
- package/src/duckdb/src/include/duckdb/{core_functions/aggregate → common/enums}/quantile_enum.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enums/scan_vector_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/error_data.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/extension_type_info.hpp +37 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +9 -3
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +19 -10
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/fsst.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +6 -0
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +12 -2
- package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +147 -27
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +2 -7
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +16 -5
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/platform.hpp +34 -3
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +10 -13
- package/src/duckdb/src/include/duckdb/common/random_engine.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +0 -2
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/stacktrace.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +30 -2
- package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +13 -2
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +21 -4
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +58 -10
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +43 -16
- package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +63 -21
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +62 -16
- package/src/duckdb/src/include/duckdb/common/types/varint.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +34 -7
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +12 -7
- package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +12 -13
- package/src/duckdb/src/include/duckdb/common/vector_operations/binary_executor.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +111 -4
- package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +48 -10
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +25 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +28 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +23 -16
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +25 -16
- package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +15 -10
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent/physical_fixed_batch_copy.hpp → aggregate/physical_partitioned_aggregate.hpp} +25 -27
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +21 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +38 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +8 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +29 -23
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +15 -13
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +13 -5
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +36 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +21 -13
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +52 -22
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +6 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_validator.hpp +58 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +6 -3
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +16 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +9 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +8 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +55 -10
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +28 -7
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +6 -9
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +17 -16
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +7 -3
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +55 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +10 -9
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +16 -13
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +0 -4
- package/src/duckdb/src/include/duckdb/execution/partition_info.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +20 -9
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +1 -11
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/progress_data.hpp +58 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +160 -31
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_function_utils.hpp +31 -0
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +61 -10
- package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/minmax_n_helpers.hpp +1 -1
- package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/sort_key_helpers.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +47 -27
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +3 -10
- package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +13 -6
- package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +15 -0
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +29 -6
- package/src/duckdb/src/include/duckdb/{core_functions → function}/create_sort_key.hpp +4 -1
- package/src/duckdb/src/include/duckdb/function/encoding_function.hpp +78 -0
- package/src/duckdb/src/include/duckdb/function/function.hpp +22 -1
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/function_list.hpp +39 -0
- package/src/duckdb/src/include/duckdb/function/function_set.hpp +13 -7
- package/src/duckdb/src/include/duckdb/{core_functions → function}/lambda_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/partition_stats.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/register_function_list_helper.hpp +69 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +154 -23
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_utils.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar/date_functions.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar/generic_common.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +32 -23
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/list_functions.hpp +156 -0
- package/src/duckdb/src/include/duckdb/function/scalar/map_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +4 -45
- package/src/duckdb/src/include/duckdb/function/scalar/operator_functions.hpp +102 -0
- package/src/duckdb/src/include/duckdb/function/scalar/operators.hpp +2 -16
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +16 -25
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_utils.hpp +38 -0
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_common.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +401 -76
- package/src/duckdb/src/include/duckdb/function/scalar/struct_functions.hpp +63 -0
- package/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp +33 -0
- package/src/duckdb/src/include/duckdb/function/scalar/system_functions.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +17 -8
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +59 -6
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +12 -9
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +18 -13
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -4
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +14 -0
- package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +15 -10
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +94 -18
- package/src/duckdb/src/include/duckdb/{core_functions → function}/to_interval.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp +44 -0
- package/src/duckdb/src/include/duckdb/function/window/window_aggregate_states.hpp +56 -0
- package/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp +194 -0
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +153 -0
- package/src/duckdb/src/include/duckdb/function/window/window_collection.hpp +146 -0
- package/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp +38 -0
- package/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp +32 -0
- package/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp +39 -0
- package/src/duckdb/src/include/duckdb/function/window/window_executor.hpp +122 -0
- package/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp +42 -0
- package/src/duckdb/src/include/duckdb/function/window/window_merge_sort_tree.hpp +108 -0
- package/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp +33 -0
- package/src/duckdb/src/include/duckdb/function/window/window_rank_function.hpp +63 -0
- package/src/duckdb/src/include/duckdb/function/window/window_rownumber_function.hpp +43 -0
- package/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp +31 -0
- package/src/duckdb/src/include/duckdb/function/window/window_shared_expressions.hpp +76 -0
- package/src/duckdb/src/include/duckdb/function/window/window_token_tree.hpp +46 -0
- package/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp +79 -0
- package/src/duckdb/src/include/duckdb/logging/http_logger.hpp +2 -0
- package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +81 -0
- package/src/duckdb/src/include/duckdb/logging/log_storage.hpp +127 -0
- package/src/duckdb/src/include/duckdb/logging/logger.hpp +287 -0
- package/src/duckdb/src/include/duckdb/logging/logging.hpp +83 -0
- package/src/duckdb/src/include/duckdb/main/appender.hpp +41 -18
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +6 -3
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +317 -231
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +17 -1
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +28 -6
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_context_wrapper.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +8 -3
- package/src/duckdb/src/include/duckdb/main/config.hpp +52 -8
- package/src/duckdb/src/include/duckdb/main/connection.hpp +18 -3
- package/src/duckdb/src/include/duckdb/main/database.hpp +8 -7
- package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +5 -1
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension.hpp +8 -2
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +548 -9
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +12 -7
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +8 -4
- package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/delete_relation.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/subquery_relation.hpp +1 -4
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/update_relation.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/relation/value_relation.hpp +7 -0
- package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/relation.hpp +45 -9
- package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +20 -22
- package/src/duckdb/src/include/duckdb/main/settings.hpp +613 -378
- package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -4
- package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +1 -3
- package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +14 -7
- package/src/duckdb/src/include/duckdb/optimizer/common_aggregate_optimizer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/empty_result_pullup.hpp +27 -0
- package/src/duckdb/src/include/duckdb/optimizer/expression_heuristics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +6 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/in_clause_rewriter.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +5 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +45 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +23 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +18 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +9 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp +33 -11
- package/src/duckdb/src/include/duckdb/optimizer/rule/distinct_aggregate_optimizer.hpp +34 -0
- package/src/duckdb/src/include/duckdb/optimizer/sampling_pushdown.hpp +25 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +3 -1
- package/src/duckdb/src/include/duckdb/optimizer/sum_rewriter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +26 -8
- package/src/duckdb/src/include/duckdb/parallel/thread_context.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +51 -3
- package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +28 -44
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +11 -1
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +12 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +22 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -4
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +16 -12
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +5 -5
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +12 -3
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +17 -57
- package/src/duckdb/src/include/duckdb/parser/qualified_name_set.hpp +19 -3
- package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +12 -9
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +45 -28
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +23 -11
- package/src/duckdb/src/include/duckdb/planner/binding_alias.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +11 -10
- package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +4 -4
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +9 -4
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/filter/dynamic_filter.hpp +48 -0
- package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +35 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +9 -9
- package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +16 -7
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +5 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +5 -3
- package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +14 -6
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +12 -8
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +82 -26
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +10 -3
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +4 -13
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +14 -15
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +13 -15
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/analyze.hpp +46 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/common.hpp +60 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp +61 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/decompression.hpp +50 -0
- package/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp +100 -0
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/roaring/appender.hpp +150 -0
- package/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp +618 -0
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +53 -31
- package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -1
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -4
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +16 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/storage_index.hpp +70 -0
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +5 -7
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +23 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +34 -6
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +39 -10
- package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +56 -14
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +35 -29
- package/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +19 -6
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +29 -6
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +10 -10
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +26 -19
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +8 -1
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +16 -14
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +228 -61
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +14 -10
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +3 -1
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +3 -2
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +19 -17
- package/src/duckdb/src/include/duckdb/transaction/rollback_state.hpp +5 -2
- package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -2
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +13 -8
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer_allocator.hpp +79 -0
- package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +43 -13
- package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +4 -1
- package/src/duckdb/src/include/duckdb/verification/copied_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/external_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/parsed_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +7 -3
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +11 -5
- package/src/duckdb/src/include/duckdb/verification/unoptimized_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb.h +424 -41
- package/src/duckdb/src/include/duckdb_extension.h +301 -195
- package/src/duckdb/src/logging/log_manager.cpp +157 -0
- package/src/duckdb/src/logging/log_storage.cpp +209 -0
- package/src/duckdb/src/logging/logger.cpp +211 -0
- package/src/duckdb/src/logging/logging.cpp +42 -0
- package/src/duckdb/src/main/appender.cpp +187 -45
- package/src/duckdb/src/main/attached_database.cpp +16 -8
- package/src/duckdb/src/main/capi/appender-c.cpp +47 -4
- package/src/duckdb/src/main/capi/arrow-c.cpp +9 -4
- package/src/duckdb/src/main/capi/config-c.cpp +17 -4
- package/src/duckdb/src/main/capi/datetime-c.cpp +15 -0
- package/src/duckdb/src/main/capi/duckdb-c.cpp +54 -13
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +212 -4
- package/src/duckdb/src/main/capi/helper-c.cpp +3 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +26 -7
- package/src/duckdb/src/main/capi/replacement_scan-c.cpp +1 -1
- package/src/duckdb/src/main/capi/result-c.cpp +3 -0
- package/src/duckdb/src/main/capi/table_description-c.cpp +43 -10
- package/src/duckdb/src/main/capi/threading-c.cpp +4 -4
- package/src/duckdb/src/main/client_context.cpp +125 -51
- package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
- package/src/duckdb/src/main/client_context_wrapper.cpp +4 -0
- package/src/duckdb/src/main/client_data.cpp +1 -1
- package/src/duckdb/src/main/client_verify.cpp +39 -20
- package/src/duckdb/src/main/config.cpp +266 -74
- package/src/duckdb/src/main/connection.cpp +53 -13
- package/src/duckdb/src/main/database.cpp +39 -18
- package/src/duckdb/src/main/database_manager.cpp +12 -11
- package/src/duckdb/src/main/db_instance_cache.cpp +14 -7
- package/src/duckdb/src/main/extension/extension_helper.cpp +24 -23
- package/src/duckdb/src/main/extension/extension_install.cpp +19 -7
- package/src/duckdb/src/main/extension/extension_load.cpp +91 -41
- package/src/duckdb/src/main/extension/extension_util.cpp +40 -19
- package/src/duckdb/src/main/extension.cpp +20 -11
- package/src/duckdb/src/main/profiling_info.cpp +19 -5
- package/src/duckdb/src/main/query_profiler.cpp +135 -36
- package/src/duckdb/src/main/query_result.cpp +2 -1
- package/src/duckdb/src/main/relation/aggregate_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/create_table_relation.cpp +5 -4
- package/src/duckdb/src/main/relation/create_view_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/delete_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/delim_get_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/distinct_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/explain_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/filter_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/insert_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation/order_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/projection_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/query_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +58 -20
- package/src/duckdb/src/main/relation/setop_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/subquery_relation.cpp +3 -8
- package/src/duckdb/src/main/relation/table_function_relation.cpp +10 -1
- package/src/duckdb/src/main/relation/table_relation.cpp +19 -3
- package/src/duckdb/src/main/relation/update_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/value_relation.cpp +42 -2
- package/src/duckdb/src/main/relation/view_relation.cpp +8 -2
- package/src/duckdb/src/main/relation/write_csv_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/write_parquet_relation.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +49 -28
- package/src/duckdb/src/main/secret/secret_manager.cpp +1 -1
- package/src/duckdb/src/main/secret/secret_storage.cpp +6 -4
- package/src/duckdb/src/main/settings/autogenerated_settings.cpp +1102 -0
- package/src/duckdb/src/main/settings/custom_settings.cpp +1343 -0
- package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +60 -37
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +1 -1
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +126 -72
- package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +22 -6
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -3
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +2 -2
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +3 -3
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +7 -7
- package/src/duckdb/src/optimizer/deliminator.cpp +6 -5
- package/src/duckdb/src/optimizer/empty_result_pullup.cpp +96 -0
- package/src/duckdb/src/optimizer/expression_heuristics.cpp +11 -3
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +9 -2
- package/src/duckdb/src/optimizer/filter_combiner.cpp +190 -88
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +6 -5
- package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +25 -9
- package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +170 -72
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +5 -4
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +3 -1
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +7 -7
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -6
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +37 -22
- package/src/duckdb/src/optimizer/late_materialization.cpp +414 -0
- package/src/duckdb/src/optimizer/limit_pushdown.cpp +1 -0
- package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +30 -2
- package/src/duckdb/src/optimizer/optimizer.cpp +67 -7
- package/src/duckdb/src/optimizer/pullup/pullup_filter.cpp +3 -3
- package/src/duckdb/src/optimizer/pullup/pullup_projection.cpp +2 -2
- package/src/duckdb/src/optimizer/pullup/pullup_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_filter.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +3 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +5 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_unnest.cpp +52 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +2 -2
- package/src/duckdb/src/optimizer/regex_range_filter.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +168 -38
- package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +8 -5
- package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/constant_folding.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/distinct_aggregate_optimizer.cpp +65 -0
- package/src/duckdb/src/optimizer/rule/distributivity.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +4 -3
- package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +3 -3
- package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -1
- package/src/duckdb/src/optimizer/rule/move_constants.cpp +9 -9
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +4 -3
- package/src/duckdb/src/optimizer/rule/timestamp_comparison.cpp +1 -1
- package/src/duckdb/src/optimizer/sampling_pushdown.cpp +24 -0
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +74 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +10 -7
- package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +3 -0
- package/src/duckdb/src/optimizer/sum_rewriter.cpp +174 -0
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +71 -0
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +5 -5
- package/src/duckdb/src/parallel/event.cpp +4 -0
- package/src/duckdb/src/parallel/executor.cpp +11 -29
- package/src/duckdb/src/parallel/executor_task.cpp +8 -3
- package/src/duckdb/src/parallel/pipeline.cpp +15 -8
- package/src/duckdb/src/parallel/pipeline_executor.cpp +67 -43
- package/src/duckdb/src/parallel/thread_context.cpp +12 -1
- package/src/duckdb/src/parser/column_definition.cpp +3 -3
- package/src/duckdb/src/parser/constraints/unique_constraint.cpp +72 -9
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -3
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +3 -3
- package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/star_expression.cpp +46 -2
- package/src/duckdb/src/parser/expression/window_expression.cpp +24 -1
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +26 -2
- package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +5 -3
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +29 -1
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +6 -6
- package/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_function_info.cpp +17 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +16 -15
- package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
- package/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +4 -4
- package/src/duckdb/src/parser/parsed_data/load_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +31 -1
- package/src/duckdb/src/parser/parsed_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +4 -1
- package/src/duckdb/src/parser/parser.cpp +129 -0
- package/src/duckdb/src/parser/qualified_name.cpp +99 -0
- package/src/duckdb/src/parser/query_error_context.cpp +35 -6
- package/src/duckdb/src/parser/query_node/select_node.cpp +4 -4
- package/src/duckdb/src/parser/statement/delete_statement.cpp +6 -1
- package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -3
- package/src/duckdb/src/parser/statement/update_statement.cpp +6 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +2 -2
- package/src/duckdb/src/parser/tableref.cpp +2 -2
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +16 -24
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +5 -5
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +61 -13
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +10 -4
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -2
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +30 -3
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +25 -6
- package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +10 -3
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +4 -3
- package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +18 -3
- package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +0 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +5 -5
- package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +26 -12
- package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +11 -3
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +3 -3
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +4 -4
- package/src/duckdb/src/parser/transform/statement/transform_set.cpp +2 -2
- package/src/duckdb/src/parser/transform/statement/transform_show.cpp +21 -3
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +8 -6
- package/src/duckdb/src/parser/transformer.cpp +2 -2
- package/src/duckdb/src/planner/bind_context.cpp +308 -136
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +30 -31
- package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +4 -2
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +102 -94
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +7 -5
- package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +7 -7
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -10
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -6
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +23 -15
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +97 -19
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +74 -16
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +6 -6
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +49 -15
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +32 -23
- package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +20 -3
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +3 -0
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +6 -5
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +38 -19
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +2 -12
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +117 -412
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +423 -144
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +5 -0
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +0 -4
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +31 -13
- package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +96 -27
- package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -3
- package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +7 -6
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +36 -9
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +34 -34
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +72 -35
- package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +99 -18
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +23 -11
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +22 -19
- package/src/duckdb/src/planner/binder.cpp +23 -45
- package/src/duckdb/src/planner/binding_alias.cpp +69 -0
- package/src/duckdb/src/planner/bound_parameter_map.cpp +1 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +6 -2
- package/src/duckdb/src/planner/collation_binding.cpp +38 -4
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +17 -5
- package/src/duckdb/src/planner/expression/bound_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -1
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +2 -2
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -4
- package/src/duckdb/src/planner/expression.cpp +7 -1
- package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +16 -0
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +53 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -8
- package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder.cpp +7 -7
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -3
- package/src/duckdb/src/planner/filter/constant_filter.cpp +17 -2
- package/src/duckdb/src/planner/filter/dynamic_filter.cpp +68 -0
- package/src/duckdb/src/planner/filter/in_filter.cpp +84 -0
- package/src/duckdb/src/planner/filter/null_filter.cpp +1 -2
- package/src/duckdb/src/planner/filter/optional_filter.cpp +29 -0
- package/src/duckdb/src/planner/filter/struct_filter.cpp +11 -6
- package/src/duckdb/src/planner/joinside.cpp +6 -5
- package/src/duckdb/src/planner/logical_operator.cpp +4 -1
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +68 -2
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +23 -0
- package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -12
- package/src/duckdb/src/planner/operator/logical_filter.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_get.cpp +48 -25
- package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_join.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_order.cpp +4 -11
- package/src/duckdb/src/planner/operator/logical_top_n.cpp +7 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +33 -5
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +2 -2
- package/src/duckdb/src/planner/table_binding.cpp +74 -36
- package/src/duckdb/src/planner/table_filter.cpp +5 -8
- package/src/duckdb/src/storage/arena_allocator.cpp +5 -4
- package/src/duckdb/src/storage/buffer/block_handle.cpp +88 -17
- package/src/duckdb/src/storage/buffer/block_manager.cpp +34 -26
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +70 -49
- package/src/duckdb/src/storage/buffer_manager.cpp +4 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -5
- package/src/duckdb/src/storage/compression/bitpacking.cpp +14 -16
- package/src/duckdb/src/storage/compression/dictionary/analyze.cpp +54 -0
- package/src/duckdb/src/storage/compression/dictionary/common.cpp +90 -0
- package/src/duckdb/src/storage/compression/dictionary/compression.cpp +174 -0
- package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +115 -0
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +53 -545
- package/src/duckdb/src/storage/compression/empty_validity.cpp +15 -0
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +25 -16
- package/src/duckdb/src/storage/compression/fsst.cpp +101 -47
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +92 -2
- package/src/duckdb/src/storage/compression/rle.cpp +216 -46
- package/src/duckdb/src/storage/compression/roaring/analyze.cpp +179 -0
- package/src/duckdb/src/storage/compression/roaring/common.cpp +282 -0
- package/src/duckdb/src/storage/compression/roaring/compress.cpp +481 -0
- package/src/duckdb/src/storage/compression/roaring/metadata.cpp +262 -0
- package/src/duckdb/src/storage/compression/roaring/scan.cpp +364 -0
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +47 -65
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +102 -39
- package/src/duckdb/src/storage/compression/zstd.cpp +1049 -0
- package/src/duckdb/src/storage/data_table.cpp +312 -172
- package/src/duckdb/src/storage/local_storage.cpp +104 -46
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -3
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +138 -58
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +14 -0
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +19 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -0
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +43 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +32 -5
- package/src/duckdb/src/storage/single_file_block_manager.cpp +6 -8
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +82 -71
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +3 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +18 -17
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +34 -22
- package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -3
- package/src/duckdb/src/storage/storage_info.cpp +72 -10
- package/src/duckdb/src/storage/storage_manager.cpp +41 -47
- package/src/duckdb/src/storage/table/array_column_data.cpp +7 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -9
- package/src/duckdb/src/storage/table/column_data.cpp +105 -43
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +307 -132
- package/src/duckdb/src/storage/table/column_segment.cpp +36 -13
- package/src/duckdb/src/storage/table/list_column_data.cpp +4 -4
- package/src/duckdb/src/storage/table/row_group.cpp +159 -66
- package/src/duckdb/src/storage/table/row_group_collection.cpp +157 -68
- package/src/duckdb/src/storage/table/row_version_manager.cpp +33 -10
- package/src/duckdb/src/storage/table/scan_state.cpp +21 -7
- package/src/duckdb/src/storage/table/standard_column_data.cpp +68 -5
- package/src/duckdb/src/storage/table/struct_column_data.cpp +42 -4
- package/src/duckdb/src/storage/table/table_statistics.cpp +91 -5
- package/src/duckdb/src/storage/table/update_segment.cpp +287 -210
- package/src/duckdb/src/storage/table_index_list.cpp +55 -58
- package/src/duckdb/src/storage/temporary_file_manager.cpp +412 -149
- package/src/duckdb/src/storage/wal_replay.cpp +132 -48
- package/src/duckdb/src/storage/write_ahead_log.cpp +75 -48
- package/src/duckdb/src/transaction/cleanup_state.cpp +0 -1
- package/src/duckdb/src/transaction/commit_state.cpp +23 -14
- package/src/duckdb/src/transaction/duck_transaction.cpp +29 -25
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +18 -6
- package/src/duckdb/src/transaction/meta_transaction.cpp +3 -2
- package/src/duckdb/src/transaction/rollback_state.cpp +5 -2
- package/src/duckdb/src/transaction/transaction_context.cpp +9 -1
- package/src/duckdb/src/transaction/undo_buffer.cpp +35 -27
- package/src/duckdb/src/transaction/undo_buffer_allocator.cpp +72 -0
- package/src/duckdb/src/transaction/wal_write_state.cpp +12 -10
- package/src/duckdb/src/verification/copied_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +7 -5
- package/src/duckdb/src/verification/external_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/fetch_row_verifier.cpp +7 -4
- package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +8 -4
- package/src/duckdb/src/verification/parsed_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -12
- package/src/duckdb/src/verification/statement_verifier.cpp +20 -15
- package/src/duckdb/src/verification/unoptimized_statement_verifier.cpp +7 -4
- package/src/duckdb/third_party/fsst/libfsst.hpp +1 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +15 -22
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +4 -2
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +2 -4
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +14278 -13832
- package/src/duckdb/third_party/parquet/parquet_types.cpp +3410 -1686
- package/src/duckdb/third_party/parquet/parquet_types.h +1585 -1204
- package/src/duckdb/third_party/skiplist/SkipList.h +0 -1
- package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +13 -15
- package/src/duckdb/third_party/zstd/common/debug.cpp +36 -0
- package/src/duckdb/third_party/zstd/common/entropy_common.cpp +173 -49
- package/src/duckdb/third_party/zstd/common/error_private.cpp +11 -3
- package/src/duckdb/third_party/zstd/common/fse_decompress.cpp +126 -97
- package/src/duckdb/third_party/zstd/common/pool.cpp +376 -0
- package/src/duckdb/third_party/zstd/common/threading.cpp +193 -0
- package/src/duckdb/third_party/zstd/common/xxhash.cpp +18 -14
- package/src/duckdb/third_party/zstd/common/zstd_common.cpp +3 -38
- package/src/duckdb/third_party/zstd/compress/fse_compress.cpp +93 -165
- package/src/duckdb/third_party/zstd/compress/hist.cpp +28 -31
- package/src/duckdb/third_party/zstd/compress/huf_compress.cpp +957 -291
- package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3988 -1124
- package/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp +120 -43
- package/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp +47 -23
- package/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp +274 -424
- package/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp +403 -153
- package/src/duckdb/third_party/zstd/compress/zstd_fast.cpp +741 -268
- package/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp +1339 -278
- package/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp +334 -222
- package/src/duckdb/third_party/zstd/compress/zstd_opt.cpp +674 -298
- package/src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp +1885 -0
- package/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp +1247 -586
- package/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp +18 -17
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp +724 -270
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1193 -393
- package/src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp +30 -0
- package/src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp +171 -0
- package/src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp +80 -0
- package/src/duckdb/third_party/zstd/dict/cover.cpp +1271 -0
- package/src/duckdb/third_party/zstd/dict/divsufsort.cpp +1916 -0
- package/src/duckdb/third_party/zstd/dict/fastcover.cpp +775 -0
- package/src/duckdb/third_party/zstd/dict/zdict.cpp +1139 -0
- package/src/duckdb/third_party/zstd/include/zdict.h +473 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/allocations.h +58 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/bits.h +204 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h +88 -85
- package/src/duckdb/third_party/zstd/include/zstd/common/compiler.h +243 -47
- package/src/duckdb/third_party/zstd/include/zstd/common/cpu.h +253 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/debug.h +31 -31
- package/src/duckdb/third_party/zstd/include/zstd/common/error_private.h +94 -6
- package/src/duckdb/third_party/zstd/include/zstd/common/fse.h +424 -64
- package/src/duckdb/third_party/zstd/include/zstd/common/huf.h +255 -70
- package/src/duckdb/third_party/zstd/include/zstd/common/mem.h +125 -85
- package/src/duckdb/third_party/zstd/include/zstd/common/pool.h +84 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/portability_macros.h +158 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/threading.h +152 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash.h → xxhash.hpp} +0 -1
- package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash_static.h → xxhash_static.hpp} +1 -1
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_deps.h +122 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h +143 -174
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_trace.h +159 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/clevels.h +136 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/hist.h +4 -4
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h +631 -220
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h +17 -7
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h +2 -2
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h +3 -2
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +256 -153
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h +16 -3
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h +4 -3
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h +145 -11
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h +14 -6
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm_geartab.h +110 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h +33 -9
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstdmt_compress.h +107 -0
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h +4 -3
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h +20 -6
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h +88 -16
- package/src/duckdb/third_party/zstd/include/zstd/deprecated/zbuff.h +214 -0
- package/src/duckdb/third_party/zstd/include/zstd/dict/cover.h +156 -0
- package/src/duckdb/third_party/zstd/include/zstd/dict/divsufsort.h +62 -0
- package/src/duckdb/third_party/zstd/include/zstd.h +2171 -93
- package/src/duckdb/third_party/zstd/include/{zstd/common/zstd_errors.h → zstd_errors.h} +32 -10
- package/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp +8 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp +20 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp +12 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_nested.cpp +6 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp +14 -0
- package/src/duckdb/ub_extension_core_functions_scalar_array.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_bit.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_blob.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_date.cpp +20 -0
- package/src/duckdb/ub_extension_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_enum.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_generic.cpp +18 -0
- package/src/duckdb/ub_extension_core_functions_scalar_list.cpp +22 -0
- package/src/duckdb/ub_extension_core_functions_scalar_map.cpp +14 -0
- package/src/duckdb/ub_extension_core_functions_scalar_math.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_operators.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_random.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_string.cpp +48 -0
- package/src/duckdb/ub_extension_core_functions_scalar_struct.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_union.cpp +6 -0
- package/src/duckdb/ub_src_common.cpp +4 -0
- package/src/duckdb/ub_src_common_arrow.cpp +3 -1
- package/src/duckdb/ub_src_execution.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_aggregate.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +2 -0
- package/src/duckdb/ub_src_execution_sample.cpp +4 -0
- package/src/duckdb/ub_src_function.cpp +6 -0
- package/src/duckdb/ub_src_function_aggregate.cpp +0 -2
- package/src/duckdb/ub_src_function_aggregate_distributive.cpp +3 -1
- package/src/duckdb/ub_src_function_scalar.cpp +2 -8
- package/src/duckdb/ub_src_function_scalar_date.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_generic.cpp +2 -2
- package/src/duckdb/ub_src_function_scalar_map.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_operator.cpp +8 -0
- package/src/duckdb/ub_src_function_scalar_string.cpp +10 -0
- package/src/duckdb/ub_src_function_scalar_struct.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_system.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +6 -0
- package/src/duckdb/ub_src_function_window.cpp +36 -0
- package/src/duckdb/ub_src_logging.cpp +8 -0
- package/src/duckdb/ub_src_main_settings.cpp +3 -1
- package/src/duckdb/ub_src_optimizer.cpp +8 -0
- package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
- package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
- package/src/duckdb/ub_src_parser.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
- package/src/duckdb/ub_src_planner.cpp +2 -0
- package/src/duckdb/ub_src_planner_filter.cpp +6 -0
- package/src/duckdb/ub_src_storage_compression.cpp +4 -0
- package/src/duckdb/ub_src_storage_compression_dictionary.cpp +8 -0
- package/src/duckdb/ub_src_storage_compression_roaring.cpp +10 -0
- package/src/duckdb/ub_src_transaction.cpp +2 -0
- package/vendor.py +1 -1
- package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +0 -6003
- package/src/duckdb/extension/json/yyjson/yyjson.cpp +0 -8218
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +0 -78
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +0 -91
- package/src/duckdb/src/common/cycle_counter.cpp +0 -76
- package/src/duckdb/src/common/field_writer.cpp +0 -97
- package/src/duckdb/src/common/http_state.cpp +0 -95
- package/src/duckdb/src/common/preserved_error.cpp +0 -87
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
- package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +0 -27
- package/src/duckdb/src/common/serializer/buffered_serializer.cpp +0 -36
- package/src/duckdb/src/common/serializer/format_serializer.cpp +0 -15
- package/src/duckdb/src/common/serializer.cpp +0 -24
- package/src/duckdb/src/common/types/chunk_collection.cpp +0 -190
- package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +0 -183
- package/src/duckdb/src/core_functions/scalar/date/current.cpp +0 -54
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +0 -78
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +0 -70
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +0 -412
- package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +0 -28
- package/src/duckdb/src/core_functions/scalar/string/jaro_winkler.cpp +0 -71
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/execution/index/art/node16.cpp +0 -196
- package/src/duckdb/src/execution/index/art/node4.cpp +0 -189
- package/src/duckdb/src/execution/index/unknown_index.cpp +0 -65
- package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +0 -595
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +0 -434
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +0 -89
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +0 -90
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +0 -95
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +0 -494
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +0 -35
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +0 -99
- package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -689
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +0 -242
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +0 -695
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -280
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +0 -666
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +0 -499
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -207
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/execution/physical_plan/plan_limit_percent.cpp +0 -18
- package/src/duckdb/src/execution/physical_plan/plan_show_select.cpp +0 -47
- package/src/duckdb/src/execution/reservoir_sample.cpp +0 -324
- package/src/duckdb/src/execution/window_executor.cpp +0 -1830
- package/src/duckdb/src/execution/window_segment_tree.cpp +0 -2073
- package/src/duckdb/src/extension_forward_decl/icu.cpp +0 -59
- package/src/duckdb/src/function/aggregate/distributive_functions.cpp +0 -15
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +0 -29
- package/src/duckdb/src/function/scalar/generic_functions.cpp +0 -11
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +0 -143
- package/src/duckdb/src/function/scalar/operators.cpp +0 -14
- package/src/duckdb/src/function/scalar/sequence_functions.cpp +0 -10
- package/src/duckdb/src/function/scalar/string_functions.cpp +0 -22
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +0 -173
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +0 -101
- package/src/duckdb/src/include/duckdb/catalog/mapping_value.hpp +0 -92
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +0 -42
- package/src/duckdb/src/include/duckdb/common/cycle_counter.hpp +0 -68
- package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +0 -34
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +0 -113
- package/src/duckdb/src/include/duckdb/common/platform.h +0 -58
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +0 -59
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +0 -192
- package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +0 -137
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +0 -63
- package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +0 -103
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +0 -60
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +0 -253
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +0 -155
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +0 -163
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +0 -35
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +0 -228
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +0 -93
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +0 -60
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +0 -197
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +0 -21
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +0 -30
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +0 -99
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +0 -87
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +0 -80
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +0 -137
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +0 -104
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +0 -79
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -119
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +0 -72
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +0 -110
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +0 -103
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_file_handle.hpp +0 -59
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +0 -46
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +0 -210
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +0 -131
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp +0 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +0 -167
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +0 -21
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +0 -343
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -165
- package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_optimizer.hpp +0 -45
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +0 -57
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +0 -45
- package/src/duckdb/src/include/duckdb/parser/statement/show_statement.hpp +0 -32
- package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +0 -49
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +0 -42
- package/src/duckdb/src/main/settings/settings.cpp +0 -2056
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +0 -36
- package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +0 -19
- package/src/duckdb/src/parser/statement/show_statement.cpp +0 -15
- package/src/duckdb/src/planner/binder/statement/bind_show.cpp +0 -30
- package/src/duckdb/src/planner/operator/logical_limit_percent.cpp +0 -14
- package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +0 -70
- package/src/duckdb/third_party/fsst/fsst_avx512.cpp +0 -140
- package/src/duckdb/third_party/fsst/fsst_avx512.inc +0 -57
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll1.inc +0 -57
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll2.inc +0 -114
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll3.inc +0 -171
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll4.inc +0 -228
- package/src/duckdb/third_party/parquet/parquet_constants.cpp +0 -17
- package/src/duckdb/third_party/parquet/parquet_constants.h +0 -24
- package/src/duckdb/third_party/re2/util/pod_array.h +0 -55
- package/src/duckdb/third_party/re2/util/sparse_array.h +0 -392
- package/src/duckdb/third_party/re2/util/sparse_set.h +0 -264
- package/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h +0 -421
- package/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h +0 -238
- package/src/duckdb/third_party/zstd/include/zstd_static.h +0 -1070
- package/src/duckdb/ub_src_core_functions.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_aggregate_algebraic.cpp +0 -8
- package/src/duckdb/ub_src_core_functions_aggregate_distributive.cpp +0 -24
- package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +0 -12
- package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_aggregate_regression.cpp +0 -14
- package/src/duckdb/ub_src_core_functions_scalar_array.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_bit.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_scalar_date.cpp +0 -22
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_enum.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +0 -18
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +0 -22
- package/src/duckdb/ub_src_core_functions_scalar_map.cpp +0 -16
- package/src/duckdb/ub_src_core_functions_scalar_math.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_operators.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_random.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +0 -58
- package/src/duckdb/ub_src_core_functions_scalar_struct.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_union.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +0 -18
- package/src/duckdb/ub_src_function_scalar_operators.cpp +0 -8
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/covar.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/stddev.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/sum_helpers.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/array_kernels.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/function_list.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/secret_functions.hpp +0 -0
- /package/src/duckdb/src/function/scalar/{operators → operator}/multiply.cpp +0 -0
- /package/src/duckdb/src/function/scalar/{operators → operator}/subtract.cpp +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -10,16 +10,25 @@
|
|
10
10
|
|
11
11
|
#include "zstd/compress/zstd_compress_internal.h"
|
12
12
|
#include "zstd/compress/zstd_lazy.h"
|
13
|
+
#include "zstd/common/bits.h" /* ZSTD_countTrailingZeros64 */
|
14
|
+
|
15
|
+
namespace duckdb_zstd {
|
16
|
+
|
17
|
+
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|
18
|
+
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|
19
|
+
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
|
20
|
+
|| !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
|
21
|
+
|
22
|
+
#define kLazySkippingStep 8
|
13
23
|
|
14
24
|
|
15
25
|
/*-*************************************
|
16
26
|
* Binary Tree search
|
17
27
|
***************************************/
|
18
28
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
ZSTD_updateDUBT(ZSTD_matchState_t* ms,
|
29
|
+
static
|
30
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
31
|
+
void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
|
23
32
|
const BYTE* ip, const BYTE* iend,
|
24
33
|
U32 mls)
|
25
34
|
{
|
@@ -60,11 +69,12 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
|
|
60
69
|
|
61
70
|
/** ZSTD_insertDUBT1() :
|
62
71
|
* sort one already inserted but unsorted position
|
63
|
-
* assumption :
|
72
|
+
* assumption : curr >= btlow == (curr - btmask)
|
64
73
|
* doesn't fail */
|
65
|
-
static
|
66
|
-
|
67
|
-
|
74
|
+
static
|
75
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
76
|
+
void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
|
77
|
+
U32 curr, const BYTE* inputEnd,
|
68
78
|
U32 nbCompares, U32 btLow,
|
69
79
|
const ZSTD_dictMode_e dictMode)
|
70
80
|
{
|
@@ -76,41 +86,41 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
|
76
86
|
const BYTE* const base = ms->window.base;
|
77
87
|
const BYTE* const dictBase = ms->window.dictBase;
|
78
88
|
const U32 dictLimit = ms->window.dictLimit;
|
79
|
-
const BYTE* const ip = (
|
80
|
-
const BYTE* const iend = (
|
89
|
+
const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
|
90
|
+
const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
|
81
91
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
82
92
|
const BYTE* const prefixStart = base + dictLimit;
|
83
93
|
const BYTE* match;
|
84
|
-
U32* smallerPtr = bt + 2*(
|
94
|
+
U32* smallerPtr = bt + 2*(curr&btMask);
|
85
95
|
U32* largerPtr = smallerPtr + 1;
|
86
96
|
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
|
87
97
|
U32 dummy32; /* to be nullified at the end */
|
88
98
|
U32 const windowValid = ms->window.lowLimit;
|
89
99
|
U32 const maxDistance = 1U << cParams->windowLog;
|
90
|
-
U32 const windowLow = (
|
100
|
+
U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
|
91
101
|
|
92
102
|
|
93
103
|
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
|
94
|
-
|
95
|
-
assert(
|
104
|
+
curr, dictLimit, windowLow);
|
105
|
+
assert(curr >= btLow);
|
96
106
|
assert(ip < iend); /* condition for ZSTD_count */
|
97
107
|
|
98
|
-
|
108
|
+
for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
|
99
109
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
100
110
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
101
|
-
assert(matchIndex <
|
111
|
+
assert(matchIndex < curr);
|
102
112
|
/* note : all candidates are now supposed sorted,
|
103
113
|
* but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
|
104
114
|
* when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
|
105
115
|
|
106
116
|
if ( (dictMode != ZSTD_extDict)
|
107
117
|
|| (matchIndex+matchLength >= dictLimit) /* both in current segment*/
|
108
|
-
|| (
|
118
|
+
|| (curr < dictLimit) /* both in extDict */) {
|
109
119
|
const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
|
110
120
|
|| (matchIndex+matchLength >= dictLimit)) ?
|
111
121
|
base : dictBase;
|
112
122
|
assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
|
113
|
-
|| (
|
123
|
+
|| (curr < dictLimit) );
|
114
124
|
match = mBase + matchIndex;
|
115
125
|
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
|
116
126
|
} else {
|
@@ -121,7 +131,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
|
121
131
|
}
|
122
132
|
|
123
133
|
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
|
124
|
-
|
134
|
+
curr, matchIndex, (U32)matchLength);
|
125
135
|
|
126
136
|
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
127
137
|
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
|
@@ -151,9 +161,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
|
151
161
|
}
|
152
162
|
|
153
163
|
|
154
|
-
static
|
155
|
-
|
156
|
-
|
164
|
+
static
|
165
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
166
|
+
size_t ZSTD_DUBT_findBetterDictMatch (
|
167
|
+
const ZSTD_matchState_t* ms,
|
157
168
|
const BYTE* const ip, const BYTE* const iend,
|
158
169
|
size_t* offsetPtr,
|
159
170
|
size_t bestLength,
|
@@ -170,7 +181,7 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
170
181
|
|
171
182
|
const BYTE* const base = ms->window.base;
|
172
183
|
const BYTE* const prefixStart = base + ms->window.dictLimit;
|
173
|
-
U32 const
|
184
|
+
U32 const curr = (U32)(ip-base);
|
174
185
|
const BYTE* const dictBase = dms->window.base;
|
175
186
|
const BYTE* const dictEnd = dms->window.nextSrc;
|
176
187
|
U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
|
@@ -187,7 +198,7 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
187
198
|
(void)dictMode;
|
188
199
|
assert(dictMode == ZSTD_dictMatchState);
|
189
200
|
|
190
|
-
|
201
|
+
for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
|
191
202
|
U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
|
192
203
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
193
204
|
const BYTE* match = dictBase + dictMatchIndex;
|
@@ -197,10 +208,10 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
197
208
|
|
198
209
|
if (matchLength > bestLength) {
|
199
210
|
U32 matchIndex = dictMatchIndex + dictIndexDelta;
|
200
|
-
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(
|
211
|
+
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
|
201
212
|
DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
|
202
|
-
|
203
|
-
bestLength = matchLength, *offsetPtr =
|
213
|
+
curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex);
|
214
|
+
bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
204
215
|
}
|
205
216
|
if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
|
206
217
|
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
@@ -220,19 +231,20 @@ ZSTD_DUBT_findBetterDictMatch (
|
|
220
231
|
}
|
221
232
|
|
222
233
|
if (bestLength >= MINMATCH) {
|
223
|
-
U32 const mIndex =
|
234
|
+
U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex;
|
224
235
|
DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
|
225
|
-
|
236
|
+
curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
|
226
237
|
}
|
227
238
|
return bestLength;
|
228
239
|
|
229
240
|
}
|
230
241
|
|
231
242
|
|
232
|
-
static
|
233
|
-
|
243
|
+
static
|
244
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
245
|
+
size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
234
246
|
const BYTE* const ip, const BYTE* const iend,
|
235
|
-
size_t*
|
247
|
+
size_t* offBasePtr,
|
236
248
|
U32 const mls,
|
237
249
|
const ZSTD_dictMode_e dictMode)
|
238
250
|
{
|
@@ -243,13 +255,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
243
255
|
U32 matchIndex = hashTable[h];
|
244
256
|
|
245
257
|
const BYTE* const base = ms->window.base;
|
246
|
-
U32 const
|
247
|
-
U32 const windowLow = ZSTD_getLowestMatchIndex(ms,
|
258
|
+
U32 const curr = (U32)(ip-base);
|
259
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
|
248
260
|
|
249
261
|
U32* const bt = ms->chainTable;
|
250
262
|
U32 const btLog = cParams->chainLog - 1;
|
251
263
|
U32 const btMask = (1 << btLog) - 1;
|
252
|
-
U32 const btLow = (btMask >=
|
264
|
+
U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
|
253
265
|
U32 const unsortLimit = MAX(btLow, windowLow);
|
254
266
|
|
255
267
|
U32* nextCandidate = bt + 2*(matchIndex&btMask);
|
@@ -258,8 +270,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
258
270
|
U32 nbCandidates = nbCompares;
|
259
271
|
U32 previousCandidate = 0;
|
260
272
|
|
261
|
-
DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ",
|
273
|
+
DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
|
262
274
|
assert(ip <= iend-8); /* required for h calculation */
|
275
|
+
assert(dictMode != ZSTD_dedicatedDictSearch);
|
263
276
|
|
264
277
|
/* reach end of unsorted candidates list */
|
265
278
|
while ( (matchIndex > unsortLimit)
|
@@ -301,16 +314,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
301
314
|
const U32 dictLimit = ms->window.dictLimit;
|
302
315
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
303
316
|
const BYTE* const prefixStart = base + dictLimit;
|
304
|
-
U32* smallerPtr = bt + 2*(
|
305
|
-
U32* largerPtr = bt + 2*(
|
306
|
-
U32 matchEndIdx =
|
317
|
+
U32* smallerPtr = bt + 2*(curr&btMask);
|
318
|
+
U32* largerPtr = bt + 2*(curr&btMask) + 1;
|
319
|
+
U32 matchEndIdx = curr + 8 + 1;
|
307
320
|
U32 dummy32; /* to be nullified at the end */
|
308
321
|
size_t bestLength = 0;
|
309
322
|
|
310
323
|
matchIndex = hashTable[h];
|
311
|
-
hashTable[h] =
|
324
|
+
hashTable[h] = curr; /* Update Hash Table */
|
312
325
|
|
313
|
-
|
326
|
+
for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
|
314
327
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
315
328
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
316
329
|
const BYTE* match;
|
@@ -328,8 +341,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
328
341
|
if (matchLength > bestLength) {
|
329
342
|
if (matchLength > matchEndIdx - matchIndex)
|
330
343
|
matchEndIdx = matchIndex + (U32)matchLength;
|
331
|
-
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(
|
332
|
-
bestLength = matchLength, *
|
344
|
+
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) )
|
345
|
+
bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
333
346
|
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
334
347
|
if (dictMode == ZSTD_dictMatchState) {
|
335
348
|
nbCompares = 0; /* in addition to avoiding checking any
|
@@ -358,19 +371,20 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
358
371
|
|
359
372
|
*smallerPtr = *largerPtr = 0;
|
360
373
|
|
374
|
+
assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
|
361
375
|
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
362
376
|
bestLength = ZSTD_DUBT_findBetterDictMatch(
|
363
377
|
ms, ip, iend,
|
364
|
-
|
378
|
+
offBasePtr, bestLength, nbCompares,
|
365
379
|
mls, dictMode);
|
366
380
|
}
|
367
381
|
|
368
|
-
assert(matchEndIdx >
|
382
|
+
assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
|
369
383
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
370
384
|
if (bestLength >= MINMATCH) {
|
371
|
-
U32 const mIndex =
|
385
|
+
U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex;
|
372
386
|
DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
|
373
|
-
|
387
|
+
curr, (U32)bestLength, (U32)*offBasePtr, mIndex);
|
374
388
|
}
|
375
389
|
return bestLength;
|
376
390
|
}
|
@@ -378,69 +392,236 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
378
392
|
|
379
393
|
|
380
394
|
/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
|
381
|
-
FORCE_INLINE_TEMPLATE
|
382
|
-
|
395
|
+
FORCE_INLINE_TEMPLATE
|
396
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
397
|
+
size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
|
383
398
|
const BYTE* const ip, const BYTE* const iLimit,
|
384
|
-
size_t*
|
399
|
+
size_t* offBasePtr,
|
385
400
|
const U32 mls /* template */,
|
386
401
|
const ZSTD_dictMode_e dictMode)
|
387
402
|
{
|
388
403
|
DEBUGLOG(7, "ZSTD_BtFindBestMatch");
|
389
404
|
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
|
390
405
|
ZSTD_updateDUBT(ms, ip, iLimit, mls);
|
391
|
-
return ZSTD_DUBT_findBestMatch(ms, ip, iLimit,
|
406
|
+
return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode);
|
392
407
|
}
|
393
408
|
|
409
|
+
/***********************************
|
410
|
+
* Dedicated dict search
|
411
|
+
***********************************/
|
394
412
|
|
395
|
-
|
396
|
-
ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
|
397
|
-
const BYTE* ip, const BYTE* const iLimit,
|
398
|
-
size_t* offsetPtr)
|
413
|
+
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
|
399
414
|
{
|
400
|
-
|
415
|
+
const BYTE* const base = ms->window.base;
|
416
|
+
U32 const target = (U32)(ip - base);
|
417
|
+
U32* const hashTable = ms->hashTable;
|
418
|
+
U32* const chainTable = ms->chainTable;
|
419
|
+
U32 const chainSize = 1 << ms->cParams.chainLog;
|
420
|
+
U32 idx = ms->nextToUpdate;
|
421
|
+
U32 const minChain = chainSize < target - idx ? target - chainSize : idx;
|
422
|
+
U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
|
423
|
+
U32 const cacheSize = bucketSize - 1;
|
424
|
+
U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
|
425
|
+
U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
|
426
|
+
|
427
|
+
/* We know the hashtable is oversized by a factor of `bucketSize`.
|
428
|
+
* We are going to temporarily pretend `bucketSize == 1`, keeping only a
|
429
|
+
* single entry. We will use the rest of the space to construct a temporary
|
430
|
+
* chaintable.
|
431
|
+
*/
|
432
|
+
U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
|
433
|
+
U32* const tmpHashTable = hashTable;
|
434
|
+
U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
|
435
|
+
U32 const tmpChainSize = (U32)((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
|
436
|
+
U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
|
437
|
+
U32 hashIdx;
|
438
|
+
|
439
|
+
assert(ms->cParams.chainLog <= 24);
|
440
|
+
assert(ms->cParams.hashLog > ms->cParams.chainLog);
|
441
|
+
assert(idx != 0);
|
442
|
+
assert(tmpMinChain <= minChain);
|
443
|
+
|
444
|
+
/* fill conventional hash table and conventional chain table */
|
445
|
+
for ( ; idx < target; idx++) {
|
446
|
+
U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
|
447
|
+
if (idx >= tmpMinChain) {
|
448
|
+
tmpChainTable[idx - tmpMinChain] = hashTable[h];
|
449
|
+
}
|
450
|
+
tmpHashTable[h] = idx;
|
451
|
+
}
|
452
|
+
|
453
|
+
/* sort chains into ddss chain table */
|
401
454
|
{
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
455
|
+
U32 chainPos = 0;
|
456
|
+
for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
|
457
|
+
U32 count;
|
458
|
+
U32 countBeyondMinChain = 0;
|
459
|
+
U32 i = tmpHashTable[hashIdx];
|
460
|
+
for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
|
461
|
+
/* skip through the chain to the first position that won't be
|
462
|
+
* in the hash cache bucket */
|
463
|
+
if (i < minChain) {
|
464
|
+
countBeyondMinChain++;
|
465
|
+
}
|
466
|
+
i = tmpChainTable[i - tmpMinChain];
|
467
|
+
}
|
468
|
+
if (count == cacheSize) {
|
469
|
+
for (count = 0; count < chainLimit;) {
|
470
|
+
if (i < minChain) {
|
471
|
+
if (!i || ++countBeyondMinChain > cacheSize) {
|
472
|
+
/* only allow pulling `cacheSize` number of entries
|
473
|
+
* into the cache or chainTable beyond `minChain`,
|
474
|
+
* to replace the entries pulled out of the
|
475
|
+
* chainTable into the cache. This lets us reach
|
476
|
+
* back further without increasing the total number
|
477
|
+
* of entries in the chainTable, guaranteeing the
|
478
|
+
* DDSS chain table will fit into the space
|
479
|
+
* allocated for the regular one. */
|
480
|
+
break;
|
481
|
+
}
|
482
|
+
}
|
483
|
+
chainTable[chainPos++] = i;
|
484
|
+
count++;
|
485
|
+
if (i < tmpMinChain) {
|
486
|
+
break;
|
487
|
+
}
|
488
|
+
i = tmpChainTable[i - tmpMinChain];
|
489
|
+
}
|
490
|
+
} else {
|
491
|
+
count = 0;
|
492
|
+
}
|
493
|
+
if (count) {
|
494
|
+
tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
|
495
|
+
} else {
|
496
|
+
tmpHashTable[hashIdx] = 0;
|
497
|
+
}
|
498
|
+
}
|
499
|
+
assert(chainPos <= chainSize); /* I believe this is guaranteed... */
|
500
|
+
}
|
501
|
+
|
502
|
+
/* move chain pointers into the last entry of each hash bucket */
|
503
|
+
for (hashIdx = (1 << hashLog); hashIdx; ) {
|
504
|
+
U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
|
505
|
+
U32 const chainPackedPointer = tmpHashTable[hashIdx];
|
506
|
+
U32 i;
|
507
|
+
for (i = 0; i < cacheSize; i++) {
|
508
|
+
hashTable[bucketIdx + i] = 0;
|
509
|
+
}
|
510
|
+
hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
|
511
|
+
}
|
512
|
+
|
513
|
+
/* fill the buckets of the hash table */
|
514
|
+
for (idx = ms->nextToUpdate; idx < target; idx++) {
|
515
|
+
U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
|
516
|
+
<< ZSTD_LAZY_DDSS_BUCKET_LOG;
|
517
|
+
U32 i;
|
518
|
+
/* Shift hash cache down 1. */
|
519
|
+
for (i = cacheSize - 1; i; i--)
|
520
|
+
hashTable[h + i] = hashTable[h + i - 1];
|
521
|
+
hashTable[h] = idx;
|
407
522
|
}
|
523
|
+
|
524
|
+
ms->nextToUpdate = target;
|
408
525
|
}
|
409
526
|
|
527
|
+
/* Returns the longest match length found in the dedicated dict search structure.
|
528
|
+
* If none are longer than the argument ml, then ml will be returned.
|
529
|
+
*/
|
530
|
+
FORCE_INLINE_TEMPLATE
|
531
|
+
size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
|
532
|
+
const ZSTD_matchState_t* const dms,
|
533
|
+
const BYTE* const ip, const BYTE* const iLimit,
|
534
|
+
const BYTE* const prefixStart, const U32 curr,
|
535
|
+
const U32 dictLimit, const size_t ddsIdx) {
|
536
|
+
const U32 ddsLowestIndex = dms->window.dictLimit;
|
537
|
+
const BYTE* const ddsBase = dms->window.base;
|
538
|
+
const BYTE* const ddsEnd = dms->window.nextSrc;
|
539
|
+
const U32 ddsSize = (U32)(ddsEnd - ddsBase);
|
540
|
+
const U32 ddsIndexDelta = dictLimit - ddsSize;
|
541
|
+
const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
|
542
|
+
const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
|
543
|
+
U32 ddsAttempt;
|
544
|
+
U32 matchIndex;
|
545
|
+
|
546
|
+
for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
|
547
|
+
PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
|
548
|
+
}
|
410
549
|
|
411
|
-
static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
|
412
|
-
ZSTD_matchState_t* ms,
|
413
|
-
const BYTE* ip, const BYTE* const iLimit,
|
414
|
-
size_t* offsetPtr)
|
415
|
-
{
|
416
|
-
switch(ms->cParams.minMatch)
|
417
550
|
{
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
|
551
|
+
U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
|
552
|
+
U32 const chainIndex = chainPackedPointer >> 8;
|
553
|
+
|
554
|
+
PREFETCH_L1(&dms->chainTable[chainIndex]);
|
423
555
|
}
|
424
|
-
}
|
425
556
|
|
557
|
+
for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
|
558
|
+
size_t currentMl=0;
|
559
|
+
const BYTE* match;
|
560
|
+
matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
|
561
|
+
match = ddsBase + matchIndex;
|
562
|
+
|
563
|
+
if (!matchIndex) {
|
564
|
+
return ml;
|
565
|
+
}
|
566
|
+
|
567
|
+
/* guaranteed by table construction */
|
568
|
+
(void)ddsLowestIndex;
|
569
|
+
assert(matchIndex >= ddsLowestIndex);
|
570
|
+
assert(match+4 <= ddsEnd);
|
571
|
+
if (MEM_read32(match) == MEM_read32(ip)) {
|
572
|
+
/* assumption : matchIndex <= dictLimit-4 (by table construction) */
|
573
|
+
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
|
574
|
+
}
|
575
|
+
|
576
|
+
/* save best solution */
|
577
|
+
if (currentMl > ml) {
|
578
|
+
ml = currentMl;
|
579
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
|
580
|
+
if (ip+currentMl == iLimit) {
|
581
|
+
/* best possible, avoids read overflow on next attempt */
|
582
|
+
return ml;
|
583
|
+
}
|
584
|
+
}
|
585
|
+
}
|
426
586
|
|
427
|
-
static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
|
428
|
-
ZSTD_matchState_t* ms,
|
429
|
-
const BYTE* ip, const BYTE* const iLimit,
|
430
|
-
size_t* offsetPtr)
|
431
|
-
{
|
432
|
-
switch(ms->cParams.minMatch)
|
433
587
|
{
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
588
|
+
U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
|
589
|
+
U32 chainIndex = chainPackedPointer >> 8;
|
590
|
+
U32 const chainLength = chainPackedPointer & 0xFF;
|
591
|
+
U32 const chainAttempts = nbAttempts - ddsAttempt;
|
592
|
+
U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
|
593
|
+
U32 chainAttempt;
|
594
|
+
|
595
|
+
for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
|
596
|
+
PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
|
597
|
+
}
|
598
|
+
|
599
|
+
for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
|
600
|
+
size_t currentMl=0;
|
601
|
+
const BYTE* match;
|
602
|
+
matchIndex = dms->chainTable[chainIndex];
|
603
|
+
match = ddsBase + matchIndex;
|
604
|
+
|
605
|
+
/* guaranteed by table construction */
|
606
|
+
assert(matchIndex >= ddsLowestIndex);
|
607
|
+
assert(match+4 <= ddsEnd);
|
608
|
+
if (MEM_read32(match) == MEM_read32(ip)) {
|
609
|
+
/* assumption : matchIndex <= dictLimit-4 (by table construction) */
|
610
|
+
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
|
611
|
+
}
|
612
|
+
|
613
|
+
/* save best solution */
|
614
|
+
if (currentMl > ml) {
|
615
|
+
ml = currentMl;
|
616
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
|
617
|
+
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
618
|
+
}
|
619
|
+
}
|
439
620
|
}
|
621
|
+
return ml;
|
440
622
|
}
|
441
623
|
|
442
624
|
|
443
|
-
|
444
625
|
/* *********************************
|
445
626
|
* Hash Chain
|
446
627
|
***********************************/
|
@@ -448,10 +629,12 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
|
|
448
629
|
|
449
630
|
/* Update chains up to ip (excluded)
|
450
631
|
Assumption : always within prefix (i.e. not within extDict) */
|
451
|
-
|
632
|
+
FORCE_INLINE_TEMPLATE
|
633
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
634
|
+
U32 ZSTD_insertAndFindFirstIndex_internal(
|
452
635
|
ZSTD_matchState_t* ms,
|
453
636
|
const ZSTD_compressionParameters* const cParams,
|
454
|
-
const BYTE* ip, U32 const mls)
|
637
|
+
const BYTE* ip, U32 const mls, U32 const lazySkipping)
|
455
638
|
{
|
456
639
|
U32* const hashTable = ms->hashTable;
|
457
640
|
const U32 hashLog = cParams->hashLog;
|
@@ -466,6 +649,9 @@ static U32 ZSTD_insertAndFindFirstIndex_internal(
|
|
466
649
|
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
|
467
650
|
hashTable[h] = idx;
|
468
651
|
idx++;
|
652
|
+
/* Stop inserting every position when in the lazy skipping mode. */
|
653
|
+
if (lazySkipping)
|
654
|
+
break;
|
469
655
|
}
|
470
656
|
|
471
657
|
ms->nextToUpdate = target;
|
@@ -474,13 +660,13 @@ static U32 ZSTD_insertAndFindFirstIndex_internal(
|
|
474
660
|
|
475
661
|
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
|
476
662
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
477
|
-
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
|
663
|
+
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
|
478
664
|
}
|
479
665
|
|
480
|
-
|
481
666
|
/* inlining is important to hardwire a hot branch (template emulation) */
|
482
667
|
FORCE_INLINE_TEMPLATE
|
483
|
-
|
668
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
669
|
+
size_t ZSTD_HcFindBestMatch(
|
484
670
|
ZSTD_matchState_t* ms,
|
485
671
|
const BYTE* const ip, const BYTE* const iLimit,
|
486
672
|
size_t* offsetPtr,
|
@@ -495,25 +681,39 @@ size_t ZSTD_HcFindBestMatch_generic (
|
|
495
681
|
const U32 dictLimit = ms->window.dictLimit;
|
496
682
|
const BYTE* const prefixStart = base + dictLimit;
|
497
683
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
498
|
-
const U32
|
684
|
+
const U32 curr = (U32)(ip-base);
|
499
685
|
const U32 maxDistance = 1U << cParams->windowLog;
|
500
686
|
const U32 lowestValid = ms->window.lowLimit;
|
501
|
-
const U32 withinMaxDistance = (
|
687
|
+
const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
502
688
|
const U32 isDictionary = (ms->loadedDictEnd != 0);
|
503
689
|
const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
|
504
|
-
const U32 minChain =
|
690
|
+
const U32 minChain = curr > chainSize ? curr - chainSize : 0;
|
505
691
|
U32 nbAttempts = 1U << cParams->searchLog;
|
506
692
|
size_t ml=4-1;
|
507
693
|
|
694
|
+
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
695
|
+
const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
|
696
|
+
? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
|
697
|
+
const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
|
698
|
+
? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
|
699
|
+
|
700
|
+
U32 matchIndex;
|
701
|
+
|
702
|
+
if (dictMode == ZSTD_dedicatedDictSearch) {
|
703
|
+
const U32* entry = &dms->hashTable[ddsIdx];
|
704
|
+
PREFETCH_L1(entry);
|
705
|
+
}
|
706
|
+
|
508
707
|
/* HC4 match finder */
|
509
|
-
|
708
|
+
matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
|
510
709
|
|
511
|
-
for ( ; (matchIndex
|
710
|
+
for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
|
512
711
|
size_t currentMl=0;
|
513
712
|
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
|
514
713
|
const BYTE* const match = base + matchIndex;
|
515
714
|
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
|
516
|
-
|
715
|
+
/* read 4B starting from (match + ml + 1 - sizeof(U32)) */
|
716
|
+
if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
|
517
717
|
currentMl = ZSTD_count(ip, match, iLimit);
|
518
718
|
} else {
|
519
719
|
const BYTE* const match = dictBase + matchIndex;
|
@@ -525,7 +725,7 @@ size_t ZSTD_HcFindBestMatch_generic (
|
|
525
725
|
/* save best solution */
|
526
726
|
if (currentMl > ml) {
|
527
727
|
ml = currentMl;
|
528
|
-
*offsetPtr =
|
728
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
529
729
|
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
530
730
|
}
|
531
731
|
|
@@ -533,8 +733,11 @@ size_t ZSTD_HcFindBestMatch_generic (
|
|
533
733
|
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
|
534
734
|
}
|
535
735
|
|
536
|
-
|
537
|
-
|
736
|
+
assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
|
737
|
+
if (dictMode == ZSTD_dedicatedDictSearch) {
|
738
|
+
ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms,
|
739
|
+
ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
|
740
|
+
} else if (dictMode == ZSTD_dictMatchState) {
|
538
741
|
const U32* const dmsChainTable = dms->chainTable;
|
539
742
|
const U32 dmsChainSize = (1 << dms->cParams.chainLog);
|
540
743
|
const U32 dmsChainMask = dmsChainSize - 1;
|
@@ -547,7 +750,7 @@ size_t ZSTD_HcFindBestMatch_generic (
|
|
547
750
|
|
548
751
|
matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
|
549
752
|
|
550
|
-
for ( ; (matchIndex
|
753
|
+
for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
|
551
754
|
size_t currentMl=0;
|
552
755
|
const BYTE* const match = dmsBase + matchIndex;
|
553
756
|
assert(match+4 <= dmsEnd);
|
@@ -557,11 +760,13 @@ size_t ZSTD_HcFindBestMatch_generic (
|
|
557
760
|
/* save best solution */
|
558
761
|
if (currentMl > ml) {
|
559
762
|
ml = currentMl;
|
560
|
-
|
763
|
+
assert(curr > matchIndex + dmsIndexDelta);
|
764
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
|
561
765
|
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
562
766
|
}
|
563
767
|
|
564
768
|
if (matchIndex <= dmsMinChain) break;
|
769
|
+
|
565
770
|
matchIndex = dmsChainTable[matchIndex & dmsChainMask];
|
566
771
|
}
|
567
772
|
}
|
@@ -569,62 +774,748 @@ size_t ZSTD_HcFindBestMatch_generic (
|
|
569
774
|
return ml;
|
570
775
|
}
|
571
776
|
|
777
|
+
/* *********************************
|
778
|
+
* (SIMD) Row-based matchfinder
|
779
|
+
***********************************/
|
780
|
+
/* Constants for row-based hash */
|
781
|
+
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
|
782
|
+
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
|
572
783
|
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
784
|
+
#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
|
785
|
+
|
786
|
+
typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 representing a mask of matches */
|
787
|
+
|
788
|
+
/* ZSTD_VecMask_next():
|
789
|
+
* Starting from the LSB, returns the idx of the next non-zero bit.
|
790
|
+
* Basically counting the nb of trailing zeroes.
|
791
|
+
*/
|
792
|
+
MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
|
793
|
+
return ZSTD_countTrailingZeros64(val);
|
794
|
+
}
|
795
|
+
|
796
|
+
/* ZSTD_row_nextIndex():
|
797
|
+
* Returns the next index to insert at within a tagTable row, and updates the "head"
|
798
|
+
* value to reflect the update. Essentially cycles backwards from [1, {entries per row})
|
799
|
+
*/
|
800
|
+
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
|
801
|
+
U32 next = (*tagRow-1) & rowMask;
|
802
|
+
next += (next == 0) ? rowMask : 0; /* skip first position */
|
803
|
+
*tagRow = (BYTE)next;
|
804
|
+
return next;
|
805
|
+
}
|
806
|
+
|
807
|
+
/* ZSTD_isAligned():
|
808
|
+
* Checks that a pointer is aligned to "align" bytes which must be a power of 2.
|
809
|
+
*/
|
810
|
+
MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
|
811
|
+
assert((align & (align - 1)) == 0);
|
812
|
+
return (((size_t)ptr) & (align - 1)) == 0;
|
813
|
+
}
|
814
|
+
|
815
|
+
/* ZSTD_row_prefetch():
|
816
|
+
* Performs prefetching for the hashTable and tagTable at a given row.
|
817
|
+
*/
|
818
|
+
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
|
819
|
+
PREFETCH_L1(hashTable + relRow);
|
820
|
+
if (rowLog >= 5) {
|
821
|
+
PREFETCH_L1(hashTable + relRow + 16);
|
822
|
+
/* Note: prefetching more of the hash table does not appear to be beneficial for 128-entry rows */
|
823
|
+
}
|
824
|
+
PREFETCH_L1(tagTable + relRow);
|
825
|
+
if (rowLog == 6) {
|
826
|
+
PREFETCH_L1(tagTable + relRow + 32);
|
827
|
+
}
|
828
|
+
assert(rowLog == 4 || rowLog == 5 || rowLog == 6);
|
829
|
+
assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */
|
830
|
+
assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on correct multiple of bytes (32,64,128) */
|
831
|
+
}
|
832
|
+
|
833
|
+
/* ZSTD_row_fillHashCache():
|
834
|
+
* Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
|
835
|
+
* but not beyond iLimit.
|
836
|
+
*/
|
837
|
+
FORCE_INLINE_TEMPLATE
|
838
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
839
|
+
void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
|
840
|
+
U32 const rowLog, U32 const mls,
|
841
|
+
U32 idx, const BYTE* const iLimit)
|
577
842
|
{
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
843
|
+
U32 const* const hashTable = ms->hashTable;
|
844
|
+
BYTE const* const tagTable = ms->tagTable;
|
845
|
+
U32 const hashLog = ms->rowHashLog;
|
846
|
+
U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
|
847
|
+
U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
|
848
|
+
|
849
|
+
for (; idx < lim; ++idx) {
|
850
|
+
U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
|
851
|
+
U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
852
|
+
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
|
853
|
+
ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
|
585
854
|
}
|
855
|
+
|
856
|
+
DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1],
|
857
|
+
ms->hashCache[2], ms->hashCache[3], ms->hashCache[4],
|
858
|
+
ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]);
|
586
859
|
}
|
587
860
|
|
861
|
+
/* ZSTD_row_nextCachedHash():
|
862
|
+
* Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
|
863
|
+
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
|
864
|
+
*/
|
865
|
+
FORCE_INLINE_TEMPLATE
|
866
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
867
|
+
U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
|
868
|
+
BYTE const* tagTable, BYTE const* base,
|
869
|
+
U32 idx, U32 const hashLog,
|
870
|
+
U32 const rowLog, U32 const mls,
|
871
|
+
U64 const hashSalt)
|
872
|
+
{
|
873
|
+
U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
|
874
|
+
U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
875
|
+
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
|
876
|
+
{ U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
|
877
|
+
cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash;
|
878
|
+
return hash;
|
879
|
+
}
|
880
|
+
}
|
588
881
|
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
882
|
+
/* ZSTD_row_update_internalImpl():
|
883
|
+
* Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
|
884
|
+
*/
|
885
|
+
FORCE_INLINE_TEMPLATE
|
886
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
887
|
+
void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
|
888
|
+
U32 updateStartIdx, U32 const updateEndIdx,
|
889
|
+
U32 const mls, U32 const rowLog,
|
890
|
+
U32 const rowMask, U32 const useCache)
|
593
891
|
{
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
892
|
+
U32* const hashTable = ms->hashTable;
|
893
|
+
BYTE* const tagTable = ms->tagTable;
|
894
|
+
U32 const hashLog = ms->rowHashLog;
|
895
|
+
const BYTE* const base = ms->window.base;
|
896
|
+
|
897
|
+
DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
|
898
|
+
for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
|
899
|
+
U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
|
900
|
+
: (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
|
901
|
+
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
902
|
+
U32* const row = hashTable + relRow;
|
903
|
+
BYTE* tagRow = tagTable + relRow;
|
904
|
+
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
|
905
|
+
|
906
|
+
assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
|
907
|
+
tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
|
908
|
+
row[pos] = updateStartIdx;
|
601
909
|
}
|
602
910
|
}
|
603
911
|
|
912
|
+
/* ZSTD_row_update_internal():
|
913
|
+
* Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
|
914
|
+
* Skips sections of long matches as is necessary.
|
915
|
+
*/
|
916
|
+
FORCE_INLINE_TEMPLATE
|
917
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
918
|
+
void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
|
919
|
+
U32 const mls, U32 const rowLog,
|
920
|
+
U32 const rowMask, U32 const useCache)
|
921
|
+
{
|
922
|
+
U32 idx = ms->nextToUpdate;
|
923
|
+
const BYTE* const base = ms->window.base;
|
924
|
+
const U32 target = (U32)(ip - base);
|
925
|
+
const U32 kSkipThreshold = 384;
|
926
|
+
const U32 kMaxMatchStartPositionsToUpdate = 96;
|
927
|
+
const U32 kMaxMatchEndPositionsToUpdate = 32;
|
928
|
+
|
929
|
+
if (useCache) {
|
930
|
+
/* Only skip positions when using hash cache, i.e.
|
931
|
+
* if we are loading a dict, don't skip anything.
|
932
|
+
* If we decide to skip, then we only update a set number
|
933
|
+
* of positions at the beginning and end of the match.
|
934
|
+
*/
|
935
|
+
if (UNLIKELY(target - idx > kSkipThreshold)) {
|
936
|
+
U32 const bound = idx + kMaxMatchStartPositionsToUpdate;
|
937
|
+
ZSTD_row_update_internalImpl(ms, idx, bound, mls, rowLog, rowMask, useCache);
|
938
|
+
idx = target - kMaxMatchEndPositionsToUpdate;
|
939
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, idx, ip+1);
|
940
|
+
}
|
941
|
+
}
|
942
|
+
assert(target >= idx);
|
943
|
+
ZSTD_row_update_internalImpl(ms, idx, target, mls, rowLog, rowMask, useCache);
|
944
|
+
ms->nextToUpdate = target;
|
945
|
+
}
|
946
|
+
|
947
|
+
/* ZSTD_row_update():
|
948
|
+
* External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
|
949
|
+
* processing.
|
950
|
+
*/
|
951
|
+
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
|
952
|
+
const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
|
953
|
+
const U32 rowMask = (1u << rowLog) - 1;
|
954
|
+
const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
|
955
|
+
|
956
|
+
DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
|
957
|
+
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */);
|
958
|
+
}
|
959
|
+
|
960
|
+
/* Returns the mask width of bits group of which will be set to 1. Given not all
|
961
|
+
* architectures have easy movemask instruction, this helps to iterate over
|
962
|
+
* groups of bits easier and faster.
|
963
|
+
*/
|
964
|
+
FORCE_INLINE_TEMPLATE U32
|
965
|
+
ZSTD_row_matchMaskGroupWidth(const U32 rowEntries)
|
966
|
+
{
|
967
|
+
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
|
968
|
+
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
|
969
|
+
(void)rowEntries;
|
970
|
+
#if defined(ZSTD_ARCH_ARM_NEON)
|
971
|
+
/* NEON path only works for little endian */
|
972
|
+
if (!MEM_isLittleEndian()) {
|
973
|
+
return 1;
|
974
|
+
}
|
975
|
+
if (rowEntries == 16) {
|
976
|
+
return 4;
|
977
|
+
}
|
978
|
+
if (rowEntries == 32) {
|
979
|
+
return 2;
|
980
|
+
}
|
981
|
+
if (rowEntries == 64) {
|
982
|
+
return 1;
|
983
|
+
}
|
984
|
+
#endif
|
985
|
+
return 1;
|
986
|
+
}
|
604
987
|
|
605
|
-
|
988
|
+
#if defined(ZSTD_ARCH_X86_SSE2)
|
989
|
+
FORCE_INLINE_TEMPLATE ZSTD_VecMask
|
990
|
+
ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U32 head)
|
991
|
+
{
|
992
|
+
const __m128i comparisonMask = _mm_set1_epi8((char)tag);
|
993
|
+
int matches[4] = {0};
|
994
|
+
int i;
|
995
|
+
assert(nbChunks == 1 || nbChunks == 2 || nbChunks == 4);
|
996
|
+
for (i=0; i<nbChunks; i++) {
|
997
|
+
const __m128i chunk = _mm_loadu_si128((const __m128i*)(const void*)(src + 16*i));
|
998
|
+
const __m128i equalMask = _mm_cmpeq_epi8(chunk, comparisonMask);
|
999
|
+
matches[i] = _mm_movemask_epi8(equalMask);
|
1000
|
+
}
|
1001
|
+
if (nbChunks == 1) return ZSTD_rotateRight_U16((U16)matches[0], head);
|
1002
|
+
if (nbChunks == 2) return ZSTD_rotateRight_U32((U32)matches[1] << 16 | (U32)matches[0], head);
|
1003
|
+
assert(nbChunks == 4);
|
1004
|
+
return ZSTD_rotateRight_U64((U64)matches[3] << 48 | (U64)matches[2] << 32 | (U64)matches[1] << 16 | (U64)matches[0], head);
|
1005
|
+
}
|
1006
|
+
#endif
|
1007
|
+
|
1008
|
+
#if defined(ZSTD_ARCH_ARM_NEON)
|
1009
|
+
FORCE_INLINE_TEMPLATE ZSTD_VecMask
|
1010
|
+
ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped)
|
1011
|
+
{
|
1012
|
+
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
|
1013
|
+
if (rowEntries == 16) {
|
1014
|
+
/* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits.
|
1015
|
+
* After that groups of 4 bits represent the equalMask. We lower
|
1016
|
+
* all bits except the highest in these groups by doing AND with
|
1017
|
+
* 0x88 = 0b10001000.
|
1018
|
+
*/
|
1019
|
+
const uint8x16_t chunk = vld1q_u8(src);
|
1020
|
+
const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
|
1021
|
+
const uint8x8_t res = vshrn_n_u16(equalMask, 4);
|
1022
|
+
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
1023
|
+
return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull;
|
1024
|
+
} else if (rowEntries == 32) {
|
1025
|
+
/* Same idea as with rowEntries == 16 but doing AND with
|
1026
|
+
* 0x55 = 0b01010101.
|
1027
|
+
*/
|
1028
|
+
const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src);
|
1029
|
+
const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
|
1030
|
+
const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
|
1031
|
+
const uint8x16_t dup = vdupq_n_u8(tag);
|
1032
|
+
const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6);
|
1033
|
+
const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6);
|
1034
|
+
const uint8x8_t res = vsli_n_u8(t0, t1, 4);
|
1035
|
+
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ;
|
1036
|
+
return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull;
|
1037
|
+
} else { /* rowEntries == 64 */
|
1038
|
+
const uint8x16x4_t chunk = vld4q_u8(src);
|
1039
|
+
const uint8x16_t dup = vdupq_n_u8(tag);
|
1040
|
+
const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
|
1041
|
+
const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
|
1042
|
+
const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
|
1043
|
+
const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
|
1044
|
+
|
1045
|
+
const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
|
1046
|
+
const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
|
1047
|
+
const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
|
1048
|
+
const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
|
1049
|
+
const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
|
1050
|
+
const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
|
1051
|
+
return ZSTD_rotateRight_U64(matches, headGrouped);
|
1052
|
+
}
|
1053
|
+
}
|
1054
|
+
#endif
|
1055
|
+
|
1056
|
+
/* Returns a ZSTD_VecMask (U64) that has the nth group (determined by
|
1057
|
+
* ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag"
|
1058
|
+
* matches the hash at the nth position in a row of the tagTable.
|
1059
|
+
* Each row is a circular buffer beginning at the value of "headGrouped". So we
|
1060
|
+
* must rotate the "matches" bitfield to match up with the actual layout of the
|
1061
|
+
* entries within the hashTable */
|
1062
|
+
FORCE_INLINE_TEMPLATE ZSTD_VecMask
|
1063
|
+
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
|
1064
|
+
{
|
1065
|
+
const BYTE* const src = tagRow;
|
1066
|
+
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
|
1067
|
+
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
|
1068
|
+
assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
|
1069
|
+
|
1070
|
+
#if defined(ZSTD_ARCH_X86_SSE2)
|
1071
|
+
|
1072
|
+
return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped);
|
1073
|
+
|
1074
|
+
#else /* SW or NEON-LE */
|
1075
|
+
|
1076
|
+
# if defined(ZSTD_ARCH_ARM_NEON)
|
1077
|
+
/* This NEON path only works for little endian - otherwise use SWAR below */
|
1078
|
+
if (MEM_isLittleEndian()) {
|
1079
|
+
return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped);
|
1080
|
+
}
|
1081
|
+
# endif /* ZSTD_ARCH_ARM_NEON */
|
1082
|
+
/* SWAR */
|
1083
|
+
{ const int chunkSize = sizeof(size_t);
|
1084
|
+
const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
|
1085
|
+
const size_t xFF = ~((size_t)0);
|
1086
|
+
const size_t x01 = xFF / 0xFF;
|
1087
|
+
const size_t x80 = x01 << 7;
|
1088
|
+
const size_t splatChar = tag * x01;
|
1089
|
+
ZSTD_VecMask matches = 0;
|
1090
|
+
int i = rowEntries - chunkSize;
|
1091
|
+
assert((sizeof(size_t) == 4) || (sizeof(size_t) == 8));
|
1092
|
+
if (MEM_isLittleEndian()) { /* runtime check so have two loops */
|
1093
|
+
const size_t extractMagic = (xFF / 0x7F) >> chunkSize;
|
1094
|
+
do {
|
1095
|
+
size_t chunk = MEM_readST(&src[i]);
|
1096
|
+
chunk ^= splatChar;
|
1097
|
+
chunk = (((chunk | x80) - x01) | chunk) & x80;
|
1098
|
+
matches <<= chunkSize;
|
1099
|
+
matches |= (chunk * extractMagic) >> shiftAmount;
|
1100
|
+
i -= chunkSize;
|
1101
|
+
} while (i >= 0);
|
1102
|
+
} else { /* big endian: reverse bits during extraction */
|
1103
|
+
const size_t msb = xFF ^ (xFF >> 1);
|
1104
|
+
const size_t extractMagic = (msb / 0x1FF) | msb;
|
1105
|
+
do {
|
1106
|
+
size_t chunk = MEM_readST(&src[i]);
|
1107
|
+
chunk ^= splatChar;
|
1108
|
+
chunk = (((chunk | x80) - x01) | chunk) & x80;
|
1109
|
+
matches <<= chunkSize;
|
1110
|
+
matches |= ((chunk >> 7) * extractMagic) >> shiftAmount;
|
1111
|
+
i -= chunkSize;
|
1112
|
+
} while (i >= 0);
|
1113
|
+
}
|
1114
|
+
matches = ~matches;
|
1115
|
+
if (rowEntries == 16) {
|
1116
|
+
return ZSTD_rotateRight_U16((U16)matches, headGrouped);
|
1117
|
+
} else if (rowEntries == 32) {
|
1118
|
+
return ZSTD_rotateRight_U32((U32)matches, headGrouped);
|
1119
|
+
} else {
|
1120
|
+
return ZSTD_rotateRight_U64((U64)matches, headGrouped);
|
1121
|
+
}
|
1122
|
+
}
|
1123
|
+
#endif
|
1124
|
+
}
|
1125
|
+
|
1126
|
+
/* The high-level approach of the SIMD row based match finder is as follows:
|
1127
|
+
* - Figure out where to insert the new entry:
|
1128
|
+
* - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index.
|
1129
|
+
* - The hash is salted by a value that changes on every contex reset, so when the same table is used
|
1130
|
+
* we will avoid collisions that would otherwise slow us down by intorducing phantom matches.
|
1131
|
+
* - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
|
1132
|
+
* which row to insert into.
|
1133
|
+
* - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
|
1134
|
+
* be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
|
1135
|
+
* per row).
|
1136
|
+
* - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
|
1137
|
+
* generate a bitfield that we can cycle through to check the collisions in the hash table.
|
1138
|
+
* - Pick the longest match.
|
1139
|
+
* - Insert the tag into the equivalent row and position in the tagTable.
|
1140
|
+
*/
|
1141
|
+
FORCE_INLINE_TEMPLATE
|
1142
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1143
|
+
size_t ZSTD_RowFindBestMatch(
|
606
1144
|
ZSTD_matchState_t* ms,
|
607
|
-
const BYTE* ip, const BYTE* const iLimit,
|
608
|
-
size_t* offsetPtr
|
1145
|
+
const BYTE* const ip, const BYTE* const iLimit,
|
1146
|
+
size_t* offsetPtr,
|
1147
|
+
const U32 mls, const ZSTD_dictMode_e dictMode,
|
1148
|
+
const U32 rowLog)
|
609
1149
|
{
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
1150
|
+
U32* const hashTable = ms->hashTable;
|
1151
|
+
BYTE* const tagTable = ms->tagTable;
|
1152
|
+
U32* const hashCache = ms->hashCache;
|
1153
|
+
const U32 hashLog = ms->rowHashLog;
|
1154
|
+
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
1155
|
+
const BYTE* const base = ms->window.base;
|
1156
|
+
const BYTE* const dictBase = ms->window.dictBase;
|
1157
|
+
const U32 dictLimit = ms->window.dictLimit;
|
1158
|
+
const BYTE* const prefixStart = base + dictLimit;
|
1159
|
+
const BYTE* const dictEnd = dictBase + dictLimit;
|
1160
|
+
const U32 curr = (U32)(ip-base);
|
1161
|
+
const U32 maxDistance = 1U << cParams->windowLog;
|
1162
|
+
const U32 lowestValid = ms->window.lowLimit;
|
1163
|
+
const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
1164
|
+
const U32 isDictionary = (ms->loadedDictEnd != 0);
|
1165
|
+
const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
|
1166
|
+
const U32 rowEntries = (1U << rowLog);
|
1167
|
+
const U32 rowMask = rowEntries - 1;
|
1168
|
+
const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
|
1169
|
+
const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
|
1170
|
+
const U64 hashSalt = ms->hashSalt;
|
1171
|
+
U32 nbAttempts = 1U << cappedSearchLog;
|
1172
|
+
size_t ml=4-1;
|
1173
|
+
U32 hash;
|
1174
|
+
|
1175
|
+
/* DMS/DDS variables that may be referenced laster */
|
1176
|
+
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
1177
|
+
|
1178
|
+
/* Initialize the following variables to satisfy static analyzer */
|
1179
|
+
size_t ddsIdx = 0;
|
1180
|
+
U32 ddsExtraAttempts = 0; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
|
1181
|
+
U32 dmsTag = 0;
|
1182
|
+
U32* dmsRow = NULL;
|
1183
|
+
BYTE* dmsTagRow = NULL;
|
1184
|
+
|
1185
|
+
if (dictMode == ZSTD_dedicatedDictSearch) {
|
1186
|
+
const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
|
1187
|
+
{ /* Prefetch DDS hashtable entry */
|
1188
|
+
ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG;
|
1189
|
+
PREFETCH_L1(&dms->hashTable[ddsIdx]);
|
1190
|
+
}
|
1191
|
+
ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0;
|
617
1192
|
}
|
1193
|
+
|
1194
|
+
if (dictMode == ZSTD_dictMatchState) {
|
1195
|
+
/* Prefetch DMS rows */
|
1196
|
+
U32* const dmsHashTable = dms->hashTable;
|
1197
|
+
BYTE* const dmsTagTable = dms->tagTable;
|
1198
|
+
U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
|
1199
|
+
U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
1200
|
+
dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
|
1201
|
+
dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow);
|
1202
|
+
dmsRow = dmsHashTable + dmsRelRow;
|
1203
|
+
ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog);
|
1204
|
+
}
|
1205
|
+
|
1206
|
+
/* Update the hashTable and tagTable up to (but not including) ip */
|
1207
|
+
if (!ms->lazySkipping) {
|
1208
|
+
ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
|
1209
|
+
hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
|
1210
|
+
} else {
|
1211
|
+
/* Stop inserting every position when in the lazy skipping mode.
|
1212
|
+
* The hash cache is also not kept up to date in this mode.
|
1213
|
+
*/
|
1214
|
+
hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
|
1215
|
+
ms->nextToUpdate = curr;
|
1216
|
+
}
|
1217
|
+
ms->hashSaltEntropy += hash; /* collect salt entropy */
|
1218
|
+
|
1219
|
+
{ /* Get the hash for ip, compute the appropriate row */
|
1220
|
+
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
|
1221
|
+
U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
|
1222
|
+
U32* const row = hashTable + relRow;
|
1223
|
+
BYTE* tagRow = (BYTE*)(tagTable + relRow);
|
1224
|
+
U32 const headGrouped = (*tagRow & rowMask) * groupWidth;
|
1225
|
+
U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
|
1226
|
+
size_t numMatches = 0;
|
1227
|
+
size_t currMatch = 0;
|
1228
|
+
ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
|
1229
|
+
|
1230
|
+
/* Cycle through the matches and prefetch */
|
1231
|
+
for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
|
1232
|
+
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
|
1233
|
+
U32 const matchIndex = row[matchPos];
|
1234
|
+
if(matchPos == 0) continue;
|
1235
|
+
assert(numMatches < rowEntries);
|
1236
|
+
if (matchIndex < lowLimit)
|
1237
|
+
break;
|
1238
|
+
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
|
1239
|
+
PREFETCH_L1(base + matchIndex);
|
1240
|
+
} else {
|
1241
|
+
PREFETCH_L1(dictBase + matchIndex);
|
1242
|
+
}
|
1243
|
+
matchBuffer[numMatches++] = matchIndex;
|
1244
|
+
--nbAttempts;
|
1245
|
+
}
|
1246
|
+
|
1247
|
+
/* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
|
1248
|
+
in ZSTD_row_update_internal() at the next search. */
|
1249
|
+
{
|
1250
|
+
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
|
1251
|
+
tagRow[pos] = (BYTE)tag;
|
1252
|
+
row[pos] = ms->nextToUpdate++;
|
1253
|
+
}
|
1254
|
+
|
1255
|
+
/* Return the longest match */
|
1256
|
+
for (; currMatch < numMatches; ++currMatch) {
|
1257
|
+
U32 const matchIndex = matchBuffer[currMatch];
|
1258
|
+
size_t currentMl=0;
|
1259
|
+
assert(matchIndex < curr);
|
1260
|
+
assert(matchIndex >= lowLimit);
|
1261
|
+
|
1262
|
+
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
|
1263
|
+
const BYTE* const match = base + matchIndex;
|
1264
|
+
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
|
1265
|
+
/* read 4B starting from (match + ml + 1 - sizeof(U32)) */
|
1266
|
+
if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
|
1267
|
+
currentMl = ZSTD_count(ip, match, iLimit);
|
1268
|
+
} else {
|
1269
|
+
const BYTE* const match = dictBase + matchIndex;
|
1270
|
+
assert(match+4 <= dictEnd);
|
1271
|
+
if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
|
1272
|
+
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
|
1273
|
+
}
|
1274
|
+
|
1275
|
+
/* Save best solution */
|
1276
|
+
if (currentMl > ml) {
|
1277
|
+
ml = currentMl;
|
1278
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
|
1279
|
+
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
1280
|
+
}
|
1281
|
+
}
|
1282
|
+
}
|
1283
|
+
|
1284
|
+
assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
|
1285
|
+
if (dictMode == ZSTD_dedicatedDictSearch) {
|
1286
|
+
ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms,
|
1287
|
+
ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
|
1288
|
+
} else if (dictMode == ZSTD_dictMatchState) {
|
1289
|
+
/* TODO: Measure and potentially add prefetching to DMS */
|
1290
|
+
const U32 dmsLowestIndex = dms->window.dictLimit;
|
1291
|
+
const BYTE* const dmsBase = dms->window.base;
|
1292
|
+
const BYTE* const dmsEnd = dms->window.nextSrc;
|
1293
|
+
const U32 dmsSize = (U32)(dmsEnd - dmsBase);
|
1294
|
+
const U32 dmsIndexDelta = dictLimit - dmsSize;
|
1295
|
+
|
1296
|
+
{ U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth;
|
1297
|
+
U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
|
1298
|
+
size_t numMatches = 0;
|
1299
|
+
size_t currMatch = 0;
|
1300
|
+
ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
|
1301
|
+
|
1302
|
+
for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
|
1303
|
+
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
|
1304
|
+
U32 const matchIndex = dmsRow[matchPos];
|
1305
|
+
if(matchPos == 0) continue;
|
1306
|
+
if (matchIndex < dmsLowestIndex)
|
1307
|
+
break;
|
1308
|
+
PREFETCH_L1(dmsBase + matchIndex);
|
1309
|
+
matchBuffer[numMatches++] = matchIndex;
|
1310
|
+
--nbAttempts;
|
1311
|
+
}
|
1312
|
+
|
1313
|
+
/* Return the longest match */
|
1314
|
+
for (; currMatch < numMatches; ++currMatch) {
|
1315
|
+
U32 const matchIndex = matchBuffer[currMatch];
|
1316
|
+
size_t currentMl=0;
|
1317
|
+
assert(matchIndex >= dmsLowestIndex);
|
1318
|
+
assert(matchIndex < curr);
|
1319
|
+
|
1320
|
+
{ const BYTE* const match = dmsBase + matchIndex;
|
1321
|
+
assert(match+4 <= dmsEnd);
|
1322
|
+
if (MEM_read32(match) == MEM_read32(ip))
|
1323
|
+
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
|
1324
|
+
}
|
1325
|
+
|
1326
|
+
if (currentMl > ml) {
|
1327
|
+
ml = currentMl;
|
1328
|
+
assert(curr > matchIndex + dmsIndexDelta);
|
1329
|
+
*offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
|
1330
|
+
if (ip+currentMl == iLimit) break;
|
1331
|
+
}
|
1332
|
+
}
|
1333
|
+
}
|
1334
|
+
}
|
1335
|
+
return ml;
|
618
1336
|
}
|
619
1337
|
|
620
1338
|
|
1339
|
+
/**
|
1340
|
+
* Generate search functions templated on (dictMode, mls, rowLog).
|
1341
|
+
* These functions are outlined for code size & compilation time.
|
1342
|
+
* ZSTD_searchMax() dispatches to the correct implementation function.
|
1343
|
+
*
|
1344
|
+
* TODO: The start of the search function involves loading and calculating a
|
1345
|
+
* bunch of constants from the ZSTD_matchState_t. These computations could be
|
1346
|
+
* done in an initialization function, and saved somewhere in the match state.
|
1347
|
+
* Then we could pass a pointer to the saved state instead of the match state,
|
1348
|
+
* and avoid duplicate computations.
|
1349
|
+
*
|
1350
|
+
* TODO: Move the match re-winding into searchMax. This improves compression
|
1351
|
+
* ratio, and unlocks further simplifications with the next TODO.
|
1352
|
+
*
|
1353
|
+
* TODO: Try moving the repcode search into searchMax. After the re-winding
|
1354
|
+
* and repcode search are in searchMax, there is no more logic in the match
|
1355
|
+
* finder loop that requires knowledge about the dictMode. So we should be
|
1356
|
+
* able to avoid force inlining it, and we can join the extDict loop with
|
1357
|
+
* the single segment loop. It should go in searchMax instead of its own
|
1358
|
+
* function to avoid having multiple virtual function calls per search.
|
1359
|
+
*/
|
1360
|
+
|
1361
|
+
#define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls
|
1362
|
+
#define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls
|
1363
|
+
#define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog
|
1364
|
+
|
1365
|
+
#define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE
|
1366
|
+
|
1367
|
+
#define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \
|
1368
|
+
ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \
|
1369
|
+
ZSTD_matchState_t* ms, \
|
1370
|
+
const BYTE* ip, const BYTE* const iLimit, \
|
1371
|
+
size_t* offBasePtr) \
|
1372
|
+
{ \
|
1373
|
+
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
|
1374
|
+
return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \
|
1375
|
+
} \
|
1376
|
+
|
1377
|
+
#define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \
|
1378
|
+
ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \
|
1379
|
+
ZSTD_matchState_t* ms, \
|
1380
|
+
const BYTE* ip, const BYTE* const iLimit, \
|
1381
|
+
size_t* offsetPtr) \
|
1382
|
+
{ \
|
1383
|
+
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
|
1384
|
+
return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
|
1385
|
+
} \
|
1386
|
+
|
1387
|
+
#define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \
|
1388
|
+
ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \
|
1389
|
+
ZSTD_matchState_t* ms, \
|
1390
|
+
const BYTE* ip, const BYTE* const iLimit, \
|
1391
|
+
size_t* offsetPtr) \
|
1392
|
+
{ \
|
1393
|
+
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
|
1394
|
+
assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \
|
1395
|
+
return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
|
1396
|
+
} \
|
1397
|
+
|
1398
|
+
#define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \
|
1399
|
+
X(dictMode, mls, 4) \
|
1400
|
+
X(dictMode, mls, 5) \
|
1401
|
+
X(dictMode, mls, 6)
|
1402
|
+
|
1403
|
+
#define ZSTD_FOR_EACH_MLS_ROWLOG(X, dictMode) \
|
1404
|
+
ZSTD_FOR_EACH_ROWLOG(X, dictMode, 4) \
|
1405
|
+
ZSTD_FOR_EACH_ROWLOG(X, dictMode, 5) \
|
1406
|
+
ZSTD_FOR_EACH_ROWLOG(X, dictMode, 6)
|
1407
|
+
|
1408
|
+
#define ZSTD_FOR_EACH_MLS(X, dictMode) \
|
1409
|
+
X(dictMode, 4) \
|
1410
|
+
X(dictMode, 5) \
|
1411
|
+
X(dictMode, 6)
|
1412
|
+
|
1413
|
+
#define ZSTD_FOR_EACH_DICT_MODE(X, ...) \
|
1414
|
+
X(__VA_ARGS__, noDict) \
|
1415
|
+
X(__VA_ARGS__, extDict) \
|
1416
|
+
X(__VA_ARGS__, dictMatchState) \
|
1417
|
+
X(__VA_ARGS__, dedicatedDictSearch)
|
1418
|
+
|
1419
|
+
/* Generate row search fns for each combination of (dictMode, mls, rowLog) */
|
1420
|
+
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN)
|
1421
|
+
/* Generate binary Tree search fns for each combination of (dictMode, mls) */
|
1422
|
+
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN)
|
1423
|
+
/* Generate hash chain search fns for each combination of (dictMode, mls) */
|
1424
|
+
ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN)
|
1425
|
+
|
1426
|
+
typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
|
1427
|
+
|
1428
|
+
#define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls) \
|
1429
|
+
case mls: \
|
1430
|
+
return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
|
1431
|
+
#define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls) \
|
1432
|
+
case mls: \
|
1433
|
+
return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
|
1434
|
+
#define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog) \
|
1435
|
+
case rowLog: \
|
1436
|
+
return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr);
|
1437
|
+
|
1438
|
+
#define ZSTD_SWITCH_MLS(X, dictMode) \
|
1439
|
+
switch (mls) { \
|
1440
|
+
ZSTD_FOR_EACH_MLS(X, dictMode) \
|
1441
|
+
}
|
1442
|
+
|
1443
|
+
#define ZSTD_SWITCH_ROWLOG(dictMode, mls) \
|
1444
|
+
case mls: \
|
1445
|
+
switch (rowLog) { \
|
1446
|
+
ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \
|
1447
|
+
} \
|
1448
|
+
ZSTD_UNREACHABLE; \
|
1449
|
+
break;
|
1450
|
+
|
1451
|
+
#define ZSTD_SWITCH_SEARCH_METHOD(dictMode) \
|
1452
|
+
switch (searchMethod) { \
|
1453
|
+
case search_hashChain: \
|
1454
|
+
ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \
|
1455
|
+
break; \
|
1456
|
+
case search_binaryTree: \
|
1457
|
+
ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \
|
1458
|
+
break; \
|
1459
|
+
case search_rowHash: \
|
1460
|
+
ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \
|
1461
|
+
break; \
|
1462
|
+
} \
|
1463
|
+
ZSTD_UNREACHABLE;
|
1464
|
+
|
1465
|
+
/**
|
1466
|
+
* Searches for the longest match at @p ip.
|
1467
|
+
* Dispatches to the correct implementation function based on the
|
1468
|
+
* (searchMethod, dictMode, mls, rowLog). We use switch statements
|
1469
|
+
* here instead of using an indirect function call through a function
|
1470
|
+
* pointer because after Spectre and Meltdown mitigations, indirect
|
1471
|
+
* function calls can be very costly, especially in the kernel.
|
1472
|
+
*
|
1473
|
+
* NOTE: dictMode and searchMethod should be templated, so those switch
|
1474
|
+
* statements should be optimized out. Only the mls & rowLog switches
|
1475
|
+
* should be left.
|
1476
|
+
*
|
1477
|
+
* @param ms The match state.
|
1478
|
+
* @param ip The position to search at.
|
1479
|
+
* @param iend The end of the input data.
|
1480
|
+
* @param[out] offsetPtr Stores the match offset into this pointer.
|
1481
|
+
* @param mls The minimum search length, in the range [4, 6].
|
1482
|
+
* @param rowLog The row log (if applicable), in the range [4, 6].
|
1483
|
+
* @param searchMethod The search method to use (templated).
|
1484
|
+
* @param dictMode The dictMode (templated).
|
1485
|
+
*
|
1486
|
+
* @returns The length of the longest match found, or < mls if no match is found.
|
1487
|
+
* If a match is found its offset is stored in @p offsetPtr.
|
1488
|
+
*/
|
1489
|
+
FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
|
1490
|
+
ZSTD_matchState_t* ms,
|
1491
|
+
const BYTE* ip,
|
1492
|
+
const BYTE* iend,
|
1493
|
+
size_t* offsetPtr,
|
1494
|
+
U32 const mls,
|
1495
|
+
U32 const rowLog,
|
1496
|
+
searchMethod_e const searchMethod,
|
1497
|
+
ZSTD_dictMode_e const dictMode)
|
1498
|
+
{
|
1499
|
+
if (dictMode == ZSTD_noDict) {
|
1500
|
+
ZSTD_SWITCH_SEARCH_METHOD(noDict)
|
1501
|
+
} else if (dictMode == ZSTD_extDict) {
|
1502
|
+
ZSTD_SWITCH_SEARCH_METHOD(extDict)
|
1503
|
+
} else if (dictMode == ZSTD_dictMatchState) {
|
1504
|
+
ZSTD_SWITCH_SEARCH_METHOD(dictMatchState)
|
1505
|
+
} else if (dictMode == ZSTD_dedicatedDictSearch) {
|
1506
|
+
ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch)
|
1507
|
+
}
|
1508
|
+
ZSTD_UNREACHABLE;
|
1509
|
+
return 0;
|
1510
|
+
}
|
1511
|
+
|
621
1512
|
/* *******************************
|
622
1513
|
* Common parser - lazy strategy
|
623
1514
|
*********************************/
|
624
|
-
typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
|
625
1515
|
|
626
|
-
FORCE_INLINE_TEMPLATE
|
627
|
-
|
1516
|
+
FORCE_INLINE_TEMPLATE
|
1517
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1518
|
+
size_t ZSTD_compressBlock_lazy_generic(
|
628
1519
|
ZSTD_matchState_t* ms, seqStore_t* seqStore,
|
629
1520
|
U32 rep[ZSTD_REP_NUM],
|
630
1521
|
const void* src, size_t srcSize,
|
@@ -635,53 +1526,52 @@ ZSTD_compressBlock_lazy_generic(
|
|
635
1526
|
const BYTE* ip = istart;
|
636
1527
|
const BYTE* anchor = istart;
|
637
1528
|
const BYTE* const iend = istart + srcSize;
|
638
|
-
const BYTE* const ilimit = iend - 8;
|
1529
|
+
const BYTE* const ilimit = (searchMethod == search_rowHash) ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
|
639
1530
|
const BYTE* const base = ms->window.base;
|
640
1531
|
const U32 prefixLowestIndex = ms->window.dictLimit;
|
641
1532
|
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
1533
|
+
const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
|
1534
|
+
const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
|
642
1535
|
|
643
|
-
|
644
|
-
|
645
|
-
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
|
646
|
-
searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
|
647
|
-
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
|
648
|
-
: ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
|
649
|
-
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
|
650
|
-
: ZSTD_HcFindBestMatch_selectMLS);
|
651
|
-
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
|
1536
|
+
U32 offset_1 = rep[0], offset_2 = rep[1];
|
1537
|
+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
|
652
1538
|
|
1539
|
+
const int isDMS = dictMode == ZSTD_dictMatchState;
|
1540
|
+
const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
|
1541
|
+
const int isDxS = isDMS || isDDS;
|
653
1542
|
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
654
|
-
const U32 dictLowestIndex =
|
655
|
-
|
656
|
-
const BYTE* const
|
657
|
-
|
658
|
-
const
|
659
|
-
dictBase + dictLowestIndex : NULL;
|
660
|
-
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
661
|
-
dms->window.nextSrc : NULL;
|
662
|
-
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
1543
|
+
const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
|
1544
|
+
const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
|
1545
|
+
const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
|
1546
|
+
const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
|
1547
|
+
const U32 dictIndexDelta = isDxS ?
|
663
1548
|
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
664
1549
|
0;
|
665
1550
|
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
|
666
1551
|
|
667
|
-
DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
|
668
|
-
|
669
|
-
/* init */
|
1552
|
+
DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
|
670
1553
|
ip += (dictAndPrefixLength == 0);
|
671
1554
|
if (dictMode == ZSTD_noDict) {
|
672
|
-
U32 const
|
673
|
-
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms,
|
674
|
-
U32 const maxRep =
|
675
|
-
if (offset_2 > maxRep)
|
676
|
-
if (offset_1 > maxRep)
|
1555
|
+
U32 const curr = (U32)(ip - base);
|
1556
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
|
1557
|
+
U32 const maxRep = curr - windowLow;
|
1558
|
+
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
|
1559
|
+
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
|
677
1560
|
}
|
678
|
-
if (
|
1561
|
+
if (isDxS) {
|
679
1562
|
/* dictMatchState repCode checks don't currently handle repCode == 0
|
680
1563
|
* disabling. */
|
681
1564
|
assert(offset_1 <= dictAndPrefixLength);
|
682
1565
|
assert(offset_2 <= dictAndPrefixLength);
|
683
1566
|
}
|
684
1567
|
|
1568
|
+
/* Reset the lazy skipping state */
|
1569
|
+
ms->lazySkipping = 0;
|
1570
|
+
|
1571
|
+
if (searchMethod == search_rowHash) {
|
1572
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
1573
|
+
}
|
1574
|
+
|
685
1575
|
/* Match Loop */
|
686
1576
|
#if defined(__GNUC__) && defined(__x86_64__)
|
687
1577
|
/* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
|
@@ -691,13 +1581,14 @@ ZSTD_compressBlock_lazy_generic(
|
|
691
1581
|
#endif
|
692
1582
|
while (ip < ilimit) {
|
693
1583
|
size_t matchLength=0;
|
694
|
-
size_t
|
1584
|
+
size_t offBase = REPCODE1_TO_OFFBASE;
|
695
1585
|
const BYTE* start=ip+1;
|
1586
|
+
DEBUGLOG(7, "search baseline (depth 0)");
|
696
1587
|
|
697
1588
|
/* check repCode */
|
698
|
-
if (
|
1589
|
+
if (isDxS) {
|
699
1590
|
const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
|
700
|
-
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
1591
|
+
const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
|
701
1592
|
&& repIndex < prefixLowestIndex) ?
|
702
1593
|
dictBase + (repIndex - dictIndexDelta) :
|
703
1594
|
base + repIndex;
|
@@ -715,30 +1606,40 @@ ZSTD_compressBlock_lazy_generic(
|
|
715
1606
|
}
|
716
1607
|
|
717
1608
|
/* first search (depth 0) */
|
718
|
-
{ size_t
|
719
|
-
size_t const ml2 =
|
1609
|
+
{ size_t offbaseFound = 999999999;
|
1610
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode);
|
720
1611
|
if (ml2 > matchLength)
|
721
|
-
matchLength = ml2, start = ip,
|
1612
|
+
matchLength = ml2, start = ip, offBase = offbaseFound;
|
722
1613
|
}
|
723
1614
|
|
724
1615
|
if (matchLength < 4) {
|
725
|
-
|
1616
|
+
size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */;
|
1617
|
+
ip += step;
|
1618
|
+
/* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
|
1619
|
+
* In this mode we stop inserting every position into our tables, and only insert
|
1620
|
+
* positions that we search, which is one in step positions.
|
1621
|
+
* The exact cutoff is flexible, I've just chosen a number that is reasonably high,
|
1622
|
+
* so we minimize the compression ratio loss in "normal" scenarios. This mode gets
|
1623
|
+
* triggered once we've gone 2KB without finding any matches.
|
1624
|
+
*/
|
1625
|
+
ms->lazySkipping = step > kLazySkippingStep;
|
726
1626
|
continue;
|
727
1627
|
}
|
728
1628
|
|
729
1629
|
/* let's try to find a better solution */
|
730
1630
|
if (depth>=1)
|
731
1631
|
while (ip<ilimit) {
|
1632
|
+
DEBUGLOG(7, "search depth 1");
|
732
1633
|
ip ++;
|
733
1634
|
if ( (dictMode == ZSTD_noDict)
|
734
|
-
&& (
|
1635
|
+
&& (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
|
735
1636
|
size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
|
736
1637
|
int const gain2 = (int)(mlRep * 3);
|
737
|
-
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)
|
1638
|
+
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
|
738
1639
|
if ((mlRep >= 4) && (gain2 > gain1))
|
739
|
-
matchLength = mlRep,
|
1640
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
740
1641
|
}
|
741
|
-
if (
|
1642
|
+
if (isDxS) {
|
742
1643
|
const U32 repIndex = (U32)(ip - base) - offset_1;
|
743
1644
|
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
744
1645
|
dictBase + (repIndex - dictIndexDelta) :
|
@@ -748,32 +1649,33 @@ ZSTD_compressBlock_lazy_generic(
|
|
748
1649
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
749
1650
|
size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
750
1651
|
int const gain2 = (int)(mlRep * 3);
|
751
|
-
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)
|
1652
|
+
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
|
752
1653
|
if ((mlRep >= 4) && (gain2 > gain1))
|
753
|
-
matchLength = mlRep,
|
1654
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
754
1655
|
}
|
755
1656
|
}
|
756
|
-
{ size_t
|
757
|
-
size_t const ml2 =
|
758
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
759
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
1657
|
+
{ size_t ofbCandidate=999999999;
|
1658
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
|
1659
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
1660
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
|
760
1661
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
761
|
-
matchLength = ml2,
|
1662
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
762
1663
|
continue; /* search a better one */
|
763
1664
|
} }
|
764
1665
|
|
765
1666
|
/* let's find an even better one */
|
766
1667
|
if ((depth==2) && (ip<ilimit)) {
|
1668
|
+
DEBUGLOG(7, "search depth 2");
|
767
1669
|
ip ++;
|
768
1670
|
if ( (dictMode == ZSTD_noDict)
|
769
|
-
&& (
|
1671
|
+
&& (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
|
770
1672
|
size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
|
771
1673
|
int const gain2 = (int)(mlRep * 4);
|
772
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
1674
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
|
773
1675
|
if ((mlRep >= 4) && (gain2 > gain1))
|
774
|
-
matchLength = mlRep,
|
1676
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
775
1677
|
}
|
776
|
-
if (
|
1678
|
+
if (isDxS) {
|
777
1679
|
const U32 repIndex = (U32)(ip - base) - offset_1;
|
778
1680
|
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
779
1681
|
dictBase + (repIndex - dictIndexDelta) :
|
@@ -783,64 +1685,69 @@ ZSTD_compressBlock_lazy_generic(
|
|
783
1685
|
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
784
1686
|
size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
785
1687
|
int const gain2 = (int)(mlRep * 4);
|
786
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
1688
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
|
787
1689
|
if ((mlRep >= 4) && (gain2 > gain1))
|
788
|
-
matchLength = mlRep,
|
1690
|
+
matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
789
1691
|
}
|
790
1692
|
}
|
791
|
-
{ size_t
|
792
|
-
size_t const ml2 =
|
793
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
794
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
1693
|
+
{ size_t ofbCandidate=999999999;
|
1694
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
|
1695
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
1696
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
|
795
1697
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
796
|
-
matchLength = ml2,
|
1698
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
797
1699
|
continue;
|
798
1700
|
} } }
|
799
1701
|
break; /* nothing found : store previous solution */
|
800
1702
|
}
|
801
1703
|
|
802
1704
|
/* NOTE:
|
803
|
-
* start[-
|
804
|
-
*
|
805
|
-
* overflows the pointer, which is undefined behavior.
|
1705
|
+
* Pay attention that `start[-value]` can lead to strange undefined behavior
|
1706
|
+
* notably if `value` is unsigned, resulting in a large positive `-value`.
|
806
1707
|
*/
|
807
1708
|
/* catch up */
|
808
|
-
if (
|
1709
|
+
if (OFFBASE_IS_OFFSET(offBase)) {
|
809
1710
|
if (dictMode == ZSTD_noDict) {
|
810
|
-
while ( ((start > anchor) & (start - (
|
811
|
-
&& (start[-1] == (start-(
|
1711
|
+
while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest))
|
1712
|
+
&& (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) ) /* only search for offset within prefix */
|
812
1713
|
{ start--; matchLength++; }
|
813
1714
|
}
|
814
|
-
if (
|
815
|
-
U32 const matchIndex = (U32)((start-base) - (
|
1715
|
+
if (isDxS) {
|
1716
|
+
U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
|
816
1717
|
const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
|
817
1718
|
const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
|
818
1719
|
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
|
819
1720
|
}
|
820
|
-
offset_2 = offset_1; offset_1 = (U32)(
|
1721
|
+
offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
|
821
1722
|
}
|
822
1723
|
/* store sequence */
|
823
1724
|
_storeSequence:
|
824
|
-
{ size_t const litLength = start - anchor;
|
825
|
-
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)
|
1725
|
+
{ size_t const litLength = (size_t)(start - anchor);
|
1726
|
+
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
|
826
1727
|
anchor = ip = start + matchLength;
|
827
1728
|
}
|
1729
|
+
if (ms->lazySkipping) {
|
1730
|
+
/* We've found a match, disable lazy skipping mode, and refill the hash cache. */
|
1731
|
+
if (searchMethod == search_rowHash) {
|
1732
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
1733
|
+
}
|
1734
|
+
ms->lazySkipping = 0;
|
1735
|
+
}
|
828
1736
|
|
829
1737
|
/* check immediate repcode */
|
830
|
-
if (
|
1738
|
+
if (isDxS) {
|
831
1739
|
while (ip <= ilimit) {
|
832
1740
|
U32 const current2 = (U32)(ip-base);
|
833
1741
|
U32 const repIndex = current2 - offset_2;
|
834
|
-
const BYTE* repMatch =
|
835
|
-
&& repIndex < prefixLowestIndex ?
|
1742
|
+
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
836
1743
|
dictBase - dictIndexDelta + repIndex :
|
837
1744
|
base + repIndex;
|
838
1745
|
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
|
839
1746
|
&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {
|
840
1747
|
const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
|
841
1748
|
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
|
842
|
-
|
843
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
1749
|
+
offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset_2 <=> offset_1 */
|
1750
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
|
844
1751
|
ip += matchLength;
|
845
1752
|
anchor = ip;
|
846
1753
|
continue;
|
@@ -854,36 +1761,72 @@ _storeSequence:
|
|
854
1761
|
&& (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
|
855
1762
|
/* store sequence */
|
856
1763
|
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
857
|
-
|
858
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
1764
|
+
offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */
|
1765
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
|
859
1766
|
ip += matchLength;
|
860
1767
|
anchor = ip;
|
861
1768
|
continue; /* faster when present ... (?) */
|
862
1769
|
} } }
|
863
1770
|
|
864
|
-
/*
|
865
|
-
|
866
|
-
|
1771
|
+
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
|
1772
|
+
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
|
1773
|
+
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
|
1774
|
+
|
1775
|
+
/* save reps for next block */
|
1776
|
+
rep[0] = offset_1 ? offset_1 : offsetSaved1;
|
1777
|
+
rep[1] = offset_2 ? offset_2 : offsetSaved2;
|
867
1778
|
|
868
1779
|
/* Return the last literals size */
|
869
1780
|
return (size_t)(iend - anchor);
|
870
1781
|
}
|
1782
|
+
#endif /* build exclusions */
|
871
1783
|
|
872
1784
|
|
873
|
-
|
1785
|
+
#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
|
1786
|
+
size_t ZSTD_compressBlock_greedy(
|
874
1787
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
875
1788
|
void const* src, size_t srcSize)
|
876
1789
|
{
|
877
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
1790
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
|
878
1791
|
}
|
879
1792
|
|
880
|
-
size_t
|
1793
|
+
size_t ZSTD_compressBlock_greedy_dictMatchState(
|
881
1794
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
882
1795
|
void const* src, size_t srcSize)
|
883
1796
|
{
|
884
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain,
|
1797
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
|
1798
|
+
}
|
1799
|
+
|
1800
|
+
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
|
1801
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1802
|
+
void const* src, size_t srcSize)
|
1803
|
+
{
|
1804
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
|
885
1805
|
}
|
886
1806
|
|
1807
|
+
size_t ZSTD_compressBlock_greedy_row(
|
1808
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1809
|
+
void const* src, size_t srcSize)
|
1810
|
+
{
|
1811
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
|
1812
|
+
}
|
1813
|
+
|
1814
|
+
size_t ZSTD_compressBlock_greedy_dictMatchState_row(
|
1815
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1816
|
+
void const* src, size_t srcSize)
|
1817
|
+
{
|
1818
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
|
1819
|
+
}
|
1820
|
+
|
1821
|
+
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
|
1822
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1823
|
+
void const* src, size_t srcSize)
|
1824
|
+
{
|
1825
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
|
1826
|
+
}
|
1827
|
+
#endif
|
1828
|
+
|
1829
|
+
#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
|
887
1830
|
size_t ZSTD_compressBlock_lazy(
|
888
1831
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
889
1832
|
void const* src, size_t srcSize)
|
@@ -891,18 +1834,48 @@ size_t ZSTD_compressBlock_lazy(
|
|
891
1834
|
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
|
892
1835
|
}
|
893
1836
|
|
894
|
-
size_t
|
1837
|
+
size_t ZSTD_compressBlock_lazy_dictMatchState(
|
895
1838
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
896
1839
|
void const* src, size_t srcSize)
|
897
1840
|
{
|
898
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain,
|
1841
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
|
899
1842
|
}
|
900
1843
|
|
901
|
-
size_t
|
1844
|
+
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
|
902
1845
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
903
1846
|
void const* src, size_t srcSize)
|
904
1847
|
{
|
905
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
1848
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
|
1849
|
+
}
|
1850
|
+
|
1851
|
+
size_t ZSTD_compressBlock_lazy_row(
|
1852
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1853
|
+
void const* src, size_t srcSize)
|
1854
|
+
{
|
1855
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
|
1856
|
+
}
|
1857
|
+
|
1858
|
+
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
|
1859
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1860
|
+
void const* src, size_t srcSize)
|
1861
|
+
{
|
1862
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
|
1863
|
+
}
|
1864
|
+
|
1865
|
+
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
|
1866
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1867
|
+
void const* src, size_t srcSize)
|
1868
|
+
{
|
1869
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
|
1870
|
+
}
|
1871
|
+
#endif
|
1872
|
+
|
1873
|
+
#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
|
1874
|
+
size_t ZSTD_compressBlock_lazy2(
|
1875
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1876
|
+
void const* src, size_t srcSize)
|
1877
|
+
{
|
1878
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
|
906
1879
|
}
|
907
1880
|
|
908
1881
|
size_t ZSTD_compressBlock_lazy2_dictMatchState(
|
@@ -912,22 +1885,57 @@ size_t ZSTD_compressBlock_lazy2_dictMatchState(
|
|
912
1885
|
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
|
913
1886
|
}
|
914
1887
|
|
915
|
-
size_t
|
1888
|
+
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
|
916
1889
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
917
1890
|
void const* src, size_t srcSize)
|
918
1891
|
{
|
919
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain,
|
1892
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
|
920
1893
|
}
|
921
1894
|
|
922
|
-
size_t
|
1895
|
+
size_t ZSTD_compressBlock_lazy2_row(
|
923
1896
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
924
1897
|
void const* src, size_t srcSize)
|
925
1898
|
{
|
926
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
1899
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
|
1900
|
+
}
|
1901
|
+
|
1902
|
+
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
|
1903
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1904
|
+
void const* src, size_t srcSize)
|
1905
|
+
{
|
1906
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
|
1907
|
+
}
|
1908
|
+
|
1909
|
+
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
|
1910
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1911
|
+
void const* src, size_t srcSize)
|
1912
|
+
{
|
1913
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
|
927
1914
|
}
|
1915
|
+
#endif
|
928
1916
|
|
1917
|
+
#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
|
1918
|
+
size_t ZSTD_compressBlock_btlazy2(
|
1919
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1920
|
+
void const* src, size_t srcSize)
|
1921
|
+
{
|
1922
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
|
1923
|
+
}
|
929
1924
|
|
1925
|
+
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
|
1926
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1927
|
+
void const* src, size_t srcSize)
|
1928
|
+
{
|
1929
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
|
1930
|
+
}
|
1931
|
+
#endif
|
1932
|
+
|
1933
|
+
#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
|
1934
|
+
|| !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
|
1935
|
+
|| !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
|
1936
|
+
|| !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
|
930
1937
|
FORCE_INLINE_TEMPLATE
|
1938
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
931
1939
|
size_t ZSTD_compressBlock_lazy_extDict_generic(
|
932
1940
|
ZSTD_matchState_t* ms, seqStore_t* seqStore,
|
933
1941
|
U32 rep[ZSTD_REP_NUM],
|
@@ -938,7 +1946,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
938
1946
|
const BYTE* ip = istart;
|
939
1947
|
const BYTE* anchor = istart;
|
940
1948
|
const BYTE* const iend = istart + srcSize;
|
941
|
-
const BYTE* const ilimit = iend - 8;
|
1949
|
+
const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
|
942
1950
|
const BYTE* const base = ms->window.base;
|
943
1951
|
const U32 dictLimit = ms->window.dictLimit;
|
944
1952
|
const BYTE* const prefixStart = base + dictLimit;
|
@@ -946,18 +1954,21 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
946
1954
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
947
1955
|
const BYTE* const dictStart = dictBase + ms->window.lowLimit;
|
948
1956
|
const U32 windowLog = ms->cParams.windowLog;
|
949
|
-
|
950
|
-
|
951
|
-
ZSTD_matchState_t* ms,
|
952
|
-
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
|
953
|
-
searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
|
1957
|
+
const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
|
1958
|
+
const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
|
954
1959
|
|
955
1960
|
U32 offset_1 = rep[0], offset_2 = rep[1];
|
956
1961
|
|
957
|
-
DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
|
1962
|
+
DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
|
1963
|
+
|
1964
|
+
/* Reset the lazy skipping state */
|
1965
|
+
ms->lazySkipping = 0;
|
958
1966
|
|
959
1967
|
/* init */
|
960
1968
|
ip += (ip == prefixStart);
|
1969
|
+
if (searchMethod == search_rowHash) {
|
1970
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
1971
|
+
}
|
961
1972
|
|
962
1973
|
/* Match Loop */
|
963
1974
|
#if defined(__GNUC__) && defined(__x86_64__)
|
@@ -968,16 +1979,17 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
968
1979
|
#endif
|
969
1980
|
while (ip < ilimit) {
|
970
1981
|
size_t matchLength=0;
|
971
|
-
size_t
|
1982
|
+
size_t offBase = REPCODE1_TO_OFFBASE;
|
972
1983
|
const BYTE* start=ip+1;
|
973
|
-
U32
|
1984
|
+
U32 curr = (U32)(ip-base);
|
974
1985
|
|
975
1986
|
/* check repCode */
|
976
|
-
{ const U32 windowLow = ZSTD_getLowestMatchIndex(ms,
|
977
|
-
const U32 repIndex = (U32)(
|
1987
|
+
{ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
|
1988
|
+
const U32 repIndex = (U32)(curr+1 - offset_1);
|
978
1989
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
979
1990
|
const BYTE* const repMatch = repBase + repIndex;
|
980
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3)
|
1991
|
+
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
|
1992
|
+
& (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */
|
981
1993
|
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
|
982
1994
|
/* repcode detected we should take it */
|
983
1995
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
@@ -986,14 +1998,23 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
986
1998
|
} }
|
987
1999
|
|
988
2000
|
/* first search (depth 0) */
|
989
|
-
{ size_t
|
990
|
-
size_t const ml2 =
|
2001
|
+
{ size_t ofbCandidate = 999999999;
|
2002
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
|
991
2003
|
if (ml2 > matchLength)
|
992
|
-
matchLength = ml2, start = ip,
|
2004
|
+
matchLength = ml2, start = ip, offBase = ofbCandidate;
|
993
2005
|
}
|
994
2006
|
|
995
|
-
|
996
|
-
|
2007
|
+
if (matchLength < 4) {
|
2008
|
+
size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
|
2009
|
+
ip += step + 1; /* jump faster over incompressible sections */
|
2010
|
+
/* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
|
2011
|
+
* In this mode we stop inserting every position into our tables, and only insert
|
2012
|
+
* positions that we search, which is one in step positions.
|
2013
|
+
* The exact cutoff is flexible, I've just chosen a number that is reasonably high,
|
2014
|
+
* so we minimize the compression ratio loss in "normal" scenarios. This mode gets
|
2015
|
+
* triggered once we've gone 2KB without finding any matches.
|
2016
|
+
*/
|
2017
|
+
ms->lazySkipping = step > kLazySkippingStep;
|
997
2018
|
continue;
|
998
2019
|
}
|
999
2020
|
|
@@ -1001,82 +2022,91 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1001
2022
|
if (depth>=1)
|
1002
2023
|
while (ip<ilimit) {
|
1003
2024
|
ip ++;
|
1004
|
-
|
2025
|
+
curr++;
|
1005
2026
|
/* check repCode */
|
1006
|
-
if (
|
1007
|
-
const U32 windowLow = ZSTD_getLowestMatchIndex(ms,
|
1008
|
-
const U32 repIndex = (U32)(
|
2027
|
+
if (offBase) {
|
2028
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
|
2029
|
+
const U32 repIndex = (U32)(curr - offset_1);
|
1009
2030
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
1010
2031
|
const BYTE* const repMatch = repBase + repIndex;
|
1011
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3)
|
2032
|
+
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
2033
|
+
& (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
1012
2034
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
1013
2035
|
/* repcode detected */
|
1014
2036
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
1015
2037
|
size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
1016
2038
|
int const gain2 = (int)(repLength * 3);
|
1017
|
-
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)
|
2039
|
+
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
|
1018
2040
|
if ((repLength >= 4) && (gain2 > gain1))
|
1019
|
-
matchLength = repLength,
|
2041
|
+
matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
1020
2042
|
} }
|
1021
2043
|
|
1022
2044
|
/* search match, depth 1 */
|
1023
|
-
{ size_t
|
1024
|
-
size_t const ml2 =
|
1025
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
1026
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
2045
|
+
{ size_t ofbCandidate = 999999999;
|
2046
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
|
2047
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
2048
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
|
1027
2049
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
1028
|
-
matchLength = ml2,
|
2050
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
1029
2051
|
continue; /* search a better one */
|
1030
2052
|
} }
|
1031
2053
|
|
1032
2054
|
/* let's find an even better one */
|
1033
2055
|
if ((depth==2) && (ip<ilimit)) {
|
1034
2056
|
ip ++;
|
1035
|
-
|
2057
|
+
curr++;
|
1036
2058
|
/* check repCode */
|
1037
|
-
if (
|
1038
|
-
const U32 windowLow = ZSTD_getLowestMatchIndex(ms,
|
1039
|
-
const U32 repIndex = (U32)(
|
2059
|
+
if (offBase) {
|
2060
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
|
2061
|
+
const U32 repIndex = (U32)(curr - offset_1);
|
1040
2062
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
1041
2063
|
const BYTE* const repMatch = repBase + repIndex;
|
1042
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3)
|
2064
|
+
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
2065
|
+
& (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
1043
2066
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
1044
2067
|
/* repcode detected */
|
1045
2068
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
1046
2069
|
size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
1047
2070
|
int const gain2 = (int)(repLength * 4);
|
1048
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
2071
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
|
1049
2072
|
if ((repLength >= 4) && (gain2 > gain1))
|
1050
|
-
matchLength = repLength,
|
2073
|
+
matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
|
1051
2074
|
} }
|
1052
2075
|
|
1053
2076
|
/* search match, depth 2 */
|
1054
|
-
{ size_t
|
1055
|
-
size_t const ml2 =
|
1056
|
-
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)
|
1057
|
-
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)
|
2077
|
+
{ size_t ofbCandidate = 999999999;
|
2078
|
+
size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
|
2079
|
+
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
|
2080
|
+
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
|
1058
2081
|
if ((ml2 >= 4) && (gain2 > gain1)) {
|
1059
|
-
matchLength = ml2,
|
2082
|
+
matchLength = ml2, offBase = ofbCandidate, start = ip;
|
1060
2083
|
continue;
|
1061
2084
|
} } }
|
1062
2085
|
break; /* nothing found : store previous solution */
|
1063
2086
|
}
|
1064
2087
|
|
1065
2088
|
/* catch up */
|
1066
|
-
if (
|
1067
|
-
U32 const matchIndex = (U32)((start-base) - (
|
2089
|
+
if (OFFBASE_IS_OFFSET(offBase)) {
|
2090
|
+
U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
|
1068
2091
|
const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
|
1069
2092
|
const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
|
1070
2093
|
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
|
1071
|
-
offset_2 = offset_1; offset_1 = (U32)(
|
2094
|
+
offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
|
1072
2095
|
}
|
1073
2096
|
|
1074
2097
|
/* store sequence */
|
1075
2098
|
_storeSequence:
|
1076
|
-
{ size_t const litLength = start - anchor;
|
1077
|
-
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)
|
2099
|
+
{ size_t const litLength = (size_t)(start - anchor);
|
2100
|
+
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
|
1078
2101
|
anchor = ip = start + matchLength;
|
1079
2102
|
}
|
2103
|
+
if (ms->lazySkipping) {
|
2104
|
+
/* We've found a match, disable lazy skipping mode, and refill the hash cache. */
|
2105
|
+
if (searchMethod == search_rowHash) {
|
2106
|
+
ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
|
2107
|
+
}
|
2108
|
+
ms->lazySkipping = 0;
|
2109
|
+
}
|
1080
2110
|
|
1081
2111
|
/* check immediate repcode */
|
1082
2112
|
while (ip <= ilimit) {
|
@@ -1085,13 +2115,14 @@ _storeSequence:
|
|
1085
2115
|
const U32 repIndex = repCurrent - offset_2;
|
1086
2116
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
1087
2117
|
const BYTE* const repMatch = repBase + repIndex;
|
1088
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3)
|
2118
|
+
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
2119
|
+
& (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
1089
2120
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
1090
2121
|
/* repcode detected we should take it */
|
1091
2122
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
1092
2123
|
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
1093
|
-
|
1094
|
-
ZSTD_storeSeq(seqStore, 0, anchor, iend,
|
2124
|
+
offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset history */
|
2125
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
|
1095
2126
|
ip += matchLength;
|
1096
2127
|
anchor = ip;
|
1097
2128
|
continue; /* faster when present ... (?) */
|
@@ -1106,8 +2137,9 @@ _storeSequence:
|
|
1106
2137
|
/* Return the last literals size */
|
1107
2138
|
return (size_t)(iend - anchor);
|
1108
2139
|
}
|
2140
|
+
#endif /* build exclusions */
|
1109
2141
|
|
1110
|
-
|
2142
|
+
#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
|
1111
2143
|
size_t ZSTD_compressBlock_greedy_extDict(
|
1112
2144
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1113
2145
|
void const* src, size_t srcSize)
|
@@ -1115,6 +2147,15 @@ size_t ZSTD_compressBlock_greedy_extDict(
|
|
1115
2147
|
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
|
1116
2148
|
}
|
1117
2149
|
|
2150
|
+
size_t ZSTD_compressBlock_greedy_extDict_row(
|
2151
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
2152
|
+
void const* src, size_t srcSize)
|
2153
|
+
{
|
2154
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
|
2155
|
+
}
|
2156
|
+
#endif
|
2157
|
+
|
2158
|
+
#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
|
1118
2159
|
size_t ZSTD_compressBlock_lazy_extDict(
|
1119
2160
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1120
2161
|
void const* src, size_t srcSize)
|
@@ -1123,6 +2164,16 @@ size_t ZSTD_compressBlock_lazy_extDict(
|
|
1123
2164
|
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
|
1124
2165
|
}
|
1125
2166
|
|
2167
|
+
size_t ZSTD_compressBlock_lazy_extDict_row(
|
2168
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
2169
|
+
void const* src, size_t srcSize)
|
2170
|
+
|
2171
|
+
{
|
2172
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
|
2173
|
+
}
|
2174
|
+
#endif
|
2175
|
+
|
2176
|
+
#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
|
1126
2177
|
size_t ZSTD_compressBlock_lazy2_extDict(
|
1127
2178
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1128
2179
|
void const* src, size_t srcSize)
|
@@ -1131,6 +2182,15 @@ size_t ZSTD_compressBlock_lazy2_extDict(
|
|
1131
2182
|
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
|
1132
2183
|
}
|
1133
2184
|
|
2185
|
+
size_t ZSTD_compressBlock_lazy2_extDict_row(
|
2186
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
2187
|
+
void const* src, size_t srcSize)
|
2188
|
+
{
|
2189
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
|
2190
|
+
}
|
2191
|
+
#endif
|
2192
|
+
|
2193
|
+
#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
|
1134
2194
|
size_t ZSTD_compressBlock_btlazy2_extDict(
|
1135
2195
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1136
2196
|
void const* src, size_t srcSize)
|
@@ -1138,5 +2198,6 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
|
|
1138
2198
|
{
|
1139
2199
|
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
|
1140
2200
|
}
|
2201
|
+
#endif
|
1141
2202
|
|
1142
|
-
}
|
2203
|
+
} // namespace duckdb_zstd
|