duckdb 1.1.2-dev6.0 → 1.1.4-dev11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1747) hide show
  1. package/.github/workflows/NodeJS.yml +5 -54
  2. package/binding.gyp +73 -52
  3. package/package.json +2 -2
  4. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/avg.cpp +2 -2
  5. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/corr.cpp +4 -4
  6. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/covar.cpp +2 -2
  7. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/stddev.cpp +2 -2
  8. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/approx_count.cpp +1 -1
  9. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/arg_min_max.cpp +66 -18
  10. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitagg.cpp +1 -1
  11. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitstring_agg.cpp +5 -7
  12. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bool.cpp +3 -1
  13. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/kurtosis.cpp +1 -1
  14. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/product.cpp +1 -1
  15. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/skew.cpp +2 -2
  16. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/string_agg.cpp +1 -1
  17. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/sum.cpp +13 -2
  18. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approx_top_k.cpp +3 -3
  19. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approximate_quantile.cpp +51 -15
  20. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mad.cpp +25 -10
  21. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mode.cpp +215 -71
  22. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/quantile.cpp +58 -31
  23. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -2
  24. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/binned_histogram.cpp +9 -4
  25. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/histogram.cpp +4 -2
  26. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/list.cpp +1 -1
  27. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_avg.cpp +1 -1
  28. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_count.cpp +2 -2
  29. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_intercept.cpp +6 -2
  30. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_r2.cpp +2 -2
  31. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_slope.cpp +2 -2
  32. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxx_syy.cpp +2 -2
  33. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxy.cpp +3 -3
  34. package/src/duckdb/extension/core_functions/core_functions_extension.cpp +85 -0
  35. package/src/duckdb/{src → extension}/core_functions/function_list.cpp +30 -51
  36. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/corr.hpp +3 -7
  37. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic_functions.hpp +1 -1
  38. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/distributive_functions.hpp +16 -21
  39. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/histogram_helpers.hpp +1 -1
  40. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/holistic_functions.hpp +1 -1
  41. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/nested_functions.hpp +1 -1
  42. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_helpers.hpp +2 -2
  43. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_sort_tree.hpp +140 -58
  44. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_state.hpp +50 -43
  45. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_count.hpp +2 -2
  46. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_slope.hpp +3 -7
  47. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression_functions.hpp +1 -1
  48. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/array_functions.hpp +1 -1
  49. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/bit_functions.hpp +1 -1
  50. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/blob_functions.hpp +1 -10
  51. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/date_functions.hpp +22 -55
  52. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/debug_functions.hpp +1 -1
  53. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/enum_functions.hpp +1 -1
  54. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/generic_functions.hpp +1 -10
  55. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/list_functions.hpp +4 -4
  56. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/map_functions.hpp +1 -10
  57. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/math_functions.hpp +1 -1
  58. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/operators_functions.hpp +1 -1
  59. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/random_functions.hpp +1 -1
  60. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/string_functions.hpp +10 -103
  61. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/struct_functions.hpp +1 -19
  62. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/union_functions.hpp +1 -1
  63. package/src/duckdb/extension/core_functions/include/core_functions_extension.hpp +22 -0
  64. package/src/duckdb/{src → extension}/core_functions/lambda_functions.cpp +1 -1
  65. package/src/duckdb/{src → extension}/core_functions/scalar/array/array_functions.cpp +11 -4
  66. package/src/duckdb/{src → extension}/core_functions/scalar/array/array_value.cpp +2 -2
  67. package/src/duckdb/{src → extension}/core_functions/scalar/bit/bitstring.cpp +12 -5
  68. package/src/duckdb/{src → extension}/core_functions/scalar/blob/base64.cpp +4 -2
  69. package/src/duckdb/{src → extension}/core_functions/scalar/blob/encode.cpp +4 -2
  70. package/src/duckdb/{src → extension}/core_functions/scalar/date/age.cpp +9 -3
  71. package/src/duckdb/extension/core_functions/scalar/date/current.cpp +29 -0
  72. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_diff.cpp +1 -1
  73. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_part.cpp +42 -9
  74. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_sub.cpp +1 -1
  75. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_trunc.cpp +4 -1
  76. package/src/duckdb/{src → extension}/core_functions/scalar/date/epoch.cpp +19 -3
  77. package/src/duckdb/{src → extension}/core_functions/scalar/date/make_date.cpp +40 -5
  78. package/src/duckdb/{src → extension}/core_functions/scalar/date/time_bucket.cpp +4 -1
  79. package/src/duckdb/{src → extension}/core_functions/scalar/date/to_interval.cpp +54 -28
  80. package/src/duckdb/{src → extension}/core_functions/scalar/debug/vector_type.cpp +1 -1
  81. package/src/duckdb/{src → extension}/core_functions/scalar/enum/enum_functions.cpp +2 -7
  82. package/src/duckdb/{src → extension}/core_functions/scalar/generic/alias.cpp +2 -2
  83. package/src/duckdb/{src/function → extension/core_functions}/scalar/generic/binning.cpp +4 -3
  84. package/src/duckdb/{src → extension}/core_functions/scalar/generic/can_implicitly_cast.cpp +1 -1
  85. package/src/duckdb/{src → extension}/core_functions/scalar/generic/current_setting.cpp +1 -1
  86. package/src/duckdb/{src → extension}/core_functions/scalar/generic/hash.cpp +1 -1
  87. package/src/duckdb/{src → extension}/core_functions/scalar/generic/least.cpp +30 -10
  88. package/src/duckdb/{src → extension}/core_functions/scalar/generic/stats.cpp +1 -1
  89. package/src/duckdb/{src → extension}/core_functions/scalar/generic/system_functions.cpp +1 -1
  90. package/src/duckdb/{src → extension}/core_functions/scalar/generic/typeof.cpp +1 -1
  91. package/src/duckdb/{src → extension}/core_functions/scalar/list/array_slice.cpp +93 -88
  92. package/src/duckdb/{src → extension}/core_functions/scalar/list/flatten.cpp +1 -1
  93. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_aggregates.cpp +7 -3
  94. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_distance.cpp +8 -2
  95. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_filter.cpp +3 -3
  96. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_has_any_or_all.cpp +3 -3
  97. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_reduce.cpp +5 -5
  98. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_sort.cpp +1 -1
  99. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_transform.cpp +3 -3
  100. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_value.cpp +3 -3
  101. package/src/duckdb/{src → extension}/core_functions/scalar/list/range.cpp +7 -1
  102. package/src/duckdb/{src → extension}/core_functions/scalar/map/cardinality.cpp +1 -1
  103. package/src/duckdb/{src → extension}/core_functions/scalar/map/map.cpp +5 -4
  104. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_concat.cpp +1 -1
  105. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_entries.cpp +1 -1
  106. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_extract.cpp +13 -25
  107. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_from_entries.cpp +2 -1
  108. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_keys_values.cpp +11 -9
  109. package/src/duckdb/{src → extension}/core_functions/scalar/math/numeric.cpp +83 -37
  110. package/src/duckdb/{src → extension}/core_functions/scalar/operators/bitwise.cpp +19 -1
  111. package/src/duckdb/{src → extension}/core_functions/scalar/random/random.cpp +4 -3
  112. package/src/duckdb/{src → extension}/core_functions/scalar/random/setseed.cpp +2 -1
  113. package/src/duckdb/{src → extension}/core_functions/scalar/string/ascii.cpp +1 -1
  114. package/src/duckdb/{src → extension}/core_functions/scalar/string/bar.cpp +6 -4
  115. package/src/duckdb/{src → extension}/core_functions/scalar/string/chr.cpp +1 -1
  116. package/src/duckdb/{src → extension}/core_functions/scalar/string/damerau_levenshtein.cpp +1 -1
  117. package/src/duckdb/{src → extension}/core_functions/scalar/string/format_bytes.cpp +1 -1
  118. package/src/duckdb/{src → extension}/core_functions/scalar/string/hamming.cpp +1 -1
  119. package/src/duckdb/{src → extension}/core_functions/scalar/string/hex.cpp +7 -3
  120. package/src/duckdb/{src → extension}/core_functions/scalar/string/instr.cpp +4 -4
  121. package/src/duckdb/{src → extension}/core_functions/scalar/string/jaccard.cpp +1 -1
  122. package/src/duckdb/extension/core_functions/scalar/string/jaro_winkler.cpp +112 -0
  123. package/src/duckdb/{src → extension}/core_functions/scalar/string/left_right.cpp +6 -6
  124. package/src/duckdb/{src → extension}/core_functions/scalar/string/levenshtein.cpp +1 -1
  125. package/src/duckdb/{src → extension}/core_functions/scalar/string/pad.cpp +9 -5
  126. package/src/duckdb/{src → extension}/core_functions/scalar/string/parse_path.cpp +4 -4
  127. package/src/duckdb/{src → extension}/core_functions/scalar/string/printf.cpp +3 -1
  128. package/src/duckdb/{src → extension}/core_functions/scalar/string/repeat.cpp +4 -1
  129. package/src/duckdb/{src → extension}/core_functions/scalar/string/replace.cpp +1 -1
  130. package/src/duckdb/{src → extension}/core_functions/scalar/string/reverse.cpp +1 -1
  131. package/src/duckdb/{src → extension}/core_functions/scalar/string/starts_with.cpp +5 -3
  132. package/src/duckdb/{src → extension}/core_functions/scalar/string/to_base.cpp +1 -1
  133. package/src/duckdb/{src → extension}/core_functions/scalar/string/translate.cpp +1 -1
  134. package/src/duckdb/{src → extension}/core_functions/scalar/string/trim.cpp +1 -1
  135. package/src/duckdb/{src → extension}/core_functions/scalar/string/unicode.cpp +1 -1
  136. package/src/duckdb/{src → extension}/core_functions/scalar/string/url_encode.cpp +1 -1
  137. package/src/duckdb/{src → extension}/core_functions/scalar/struct/struct_insert.cpp +25 -31
  138. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_extract.cpp +1 -1
  139. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_tag.cpp +1 -1
  140. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_value.cpp +3 -3
  141. package/src/duckdb/extension/icu/icu-dateadd.cpp +16 -11
  142. package/src/duckdb/extension/icu/icu-datefunc.cpp +2 -2
  143. package/src/duckdb/extension/icu/icu-datepart.cpp +8 -5
  144. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  145. package/src/duckdb/extension/icu/icu-datetrunc.cpp +8 -1
  146. package/src/duckdb/extension/icu/icu-list-range.cpp +2 -2
  147. package/src/duckdb/extension/icu/icu-makedate.cpp +18 -7
  148. package/src/duckdb/extension/icu/icu-strptime.cpp +9 -3
  149. package/src/duckdb/extension/icu/icu-table-range.cpp +2 -2
  150. package/src/duckdb/extension/icu/icu-timebucket.cpp +4 -1
  151. package/src/duckdb/extension/icu/icu-timezone.cpp +67 -1
  152. package/src/duckdb/extension/icu/icu_extension.cpp +60 -5
  153. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -1
  154. package/src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp +1 -1
  155. package/src/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp +1 -1
  156. package/src/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp +1 -1
  157. package/src/duckdb/extension/icu/third_party/icu/common/locid.cpp +1 -1
  158. package/src/duckdb/extension/icu/third_party/icu/common/lsr.cpp +1 -1
  159. package/src/duckdb/extension/icu/third_party/icu/common/lsr.h +2 -2
  160. package/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp +2 -2
  161. package/src/duckdb/extension/icu/third_party/icu/common/normlzr.cpp +1 -1
  162. package/src/duckdb/extension/icu/third_party/icu/common/rbbinode.h +1 -1
  163. package/src/duckdb/extension/icu/third_party/icu/common/schriter.cpp +1 -1
  164. package/src/duckdb/extension/icu/third_party/icu/common/stringtriebuilder.cpp +8 -8
  165. package/src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp +1 -1
  166. package/src/duckdb/extension/icu/third_party/icu/common/uchriter.cpp +1 -1
  167. package/src/duckdb/extension/icu/third_party/icu/common/unicode/brkiter.h +2 -2
  168. package/src/duckdb/extension/icu/third_party/icu/common/unicode/bytestriebuilder.h +1 -1
  169. package/src/duckdb/extension/icu/third_party/icu/common/unicode/chariter.h +3 -3
  170. package/src/duckdb/extension/icu/third_party/icu/common/unicode/dtintrv.h +3 -3
  171. package/src/duckdb/extension/icu/third_party/icu/common/unicode/locid.h +3 -3
  172. package/src/duckdb/extension/icu/third_party/icu/common/unicode/messagepattern.h +4 -4
  173. package/src/duckdb/extension/icu/third_party/icu/common/unicode/normlzr.h +3 -3
  174. package/src/duckdb/extension/icu/third_party/icu/common/unicode/parsepos.h +4 -4
  175. package/src/duckdb/extension/icu/third_party/icu/common/unicode/rbbi.h +2 -2
  176. package/src/duckdb/extension/icu/third_party/icu/common/unicode/schriter.h +1 -1
  177. package/src/duckdb/extension/icu/third_party/icu/common/unicode/strenum.h +2 -2
  178. package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringpiece.h +1 -1
  179. package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringtriebuilder.h +9 -9
  180. package/src/duckdb/extension/icu/third_party/icu/common/unicode/ucharstriebuilder.h +1 -1
  181. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uchriter.h +1 -1
  182. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uniset.h +3 -3
  183. package/src/duckdb/extension/icu/third_party/icu/common/unicode/unistr.h +12 -12
  184. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uobject.h +2 -2
  185. package/src/duckdb/extension/icu/third_party/icu/common/unifiedcache.h +4 -4
  186. package/src/duckdb/extension/icu/third_party/icu/common/uniset.cpp +1 -1
  187. package/src/duckdb/extension/icu/third_party/icu/common/ustr_titlecase_brkiter.cpp +1 -1
  188. package/src/duckdb/extension/icu/third_party/icu/common/ustrenum.cpp +2 -2
  189. package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
  190. package/src/duckdb/extension/icu/third_party/icu/common/uvector.h +3 -3
  191. package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.cpp +1 -1
  192. package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.h +3 -3
  193. package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.cpp +1 -1
  194. package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.h +3 -3
  195. package/src/duckdb/extension/icu/third_party/icu/i18n/alphaindex.cpp +2 -2
  196. package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +1 -1
  197. package/src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp +1 -1
  198. package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +2 -2
  199. package/src/duckdb/extension/icu/third_party/icu/i18n/coll.cpp +2 -2
  200. package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.cpp +1 -1
  201. package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.h +2 -2
  202. package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.cpp +1 -1
  203. package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.h +2 -2
  204. package/src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp +1 -1
  205. package/src/duckdb/extension/icu/third_party/icu/i18n/datefmt.cpp +2 -2
  206. package/src/duckdb/extension/icu/third_party/icu/i18n/dcfmtsym.cpp +1 -1
  207. package/src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp +1 -1
  208. package/src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp +1 -1
  209. package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp +1 -1
  210. package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvinf.cpp +1 -1
  211. package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen.cpp +2 -2
  212. package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen_impl.h +4 -4
  213. package/src/duckdb/extension/icu/third_party/icu/i18n/dtrule.cpp +2 -2
  214. package/src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp +1 -1
  215. package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
  216. package/src/duckdb/extension/icu/third_party/icu/i18n/fpositer.cpp +1 -1
  217. package/src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp +1 -1
  218. package/src/duckdb/extension/icu/third_party/icu/i18n/measunit.cpp +1 -1
  219. package/src/duckdb/extension/icu/third_party/icu/i18n/measure.cpp +1 -1
  220. package/src/duckdb/extension/icu/third_party/icu/i18n/msgfmt.cpp +2 -2
  221. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp +1 -1
  222. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.h +2 -2
  223. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp +1 -1
  224. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.h +2 -2
  225. package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp +9 -9
  226. package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.h +2 -2
  227. package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp +1 -1
  228. package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.h +1 -1
  229. package/src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp +1 -1
  230. package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.cpp +1 -1
  231. package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.h +1 -1
  232. package/src/duckdb/extension/icu/third_party/icu/i18n/plurfmt.cpp +2 -2
  233. package/src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp +1 -1
  234. package/src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp +4 -4
  235. package/src/duckdb/extension/icu/third_party/icu/i18n/rbtz.cpp +2 -2
  236. package/src/duckdb/extension/icu/third_party/icu/i18n/region.cpp +2 -2
  237. package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.cpp +1 -1
  238. package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.h +1 -1
  239. package/src/duckdb/extension/icu/third_party/icu/i18n/rulebasedcollator.cpp +1 -1
  240. package/src/duckdb/extension/icu/third_party/icu/i18n/selfmt.cpp +2 -2
  241. package/src/duckdb/extension/icu/third_party/icu/i18n/simpletz.cpp +1 -1
  242. package/src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp +1 -1
  243. package/src/duckdb/extension/icu/third_party/icu/i18n/sortkey.cpp +1 -1
  244. package/src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp +1 -1
  245. package/src/duckdb/extension/icu/third_party/icu/i18n/tmutamt.cpp +1 -1
  246. package/src/duckdb/extension/icu/third_party/icu/i18n/tzfmt.cpp +1 -1
  247. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
  248. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.h +2 -2
  249. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames.cpp +3 -3
  250. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.cpp +2 -2
  251. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.h +2 -2
  252. package/src/duckdb/extension/icu/third_party/icu/i18n/tzrule.cpp +8 -8
  253. package/src/duckdb/extension/icu/third_party/icu/i18n/tztrans.cpp +2 -2
  254. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/alphaindex.h +2 -2
  255. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/calendar.h +2 -2
  256. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/choicfmt.h +1 -1
  257. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +2 -2
  258. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coll.h +2 -2
  259. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/currpinf.h +3 -3
  260. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/datefmt.h +1 -1
  261. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dcfmtsym.h +2 -2
  262. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/decimfmt.h +1 -1
  263. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtfmtsym.h +2 -2
  264. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvfmt.h +3 -3
  265. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvinf.h +3 -3
  266. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtptngen.h +2 -2
  267. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtrule.h +2 -2
  268. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fieldpos.h +4 -4
  269. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fmtable.h +2 -2
  270. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +2 -2
  271. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fpositer.h +2 -2
  272. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measfmt.h +1 -1
  273. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measunit.h +2 -2
  274. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measure.h +1 -1
  275. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/msgfmt.h +2 -2
  276. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/numfmt.h +1 -1
  277. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurfmt.h +2 -2
  278. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurrule.h +2 -2
  279. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbnf.h +1 -1
  280. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbtz.h +2 -2
  281. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/region.h +2 -2
  282. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/search.h +2 -2
  283. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/selfmt.h +2 -2
  284. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/simpletz.h +1 -1
  285. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/smpdtfmt.h +1 -1
  286. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/sortkey.h +3 -3
  287. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/stsearch.h +1 -1
  288. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tblcoll.h +1 -1
  289. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/timezone.h +2 -2
  290. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutamt.h +3 -3
  291. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutfmt.h +2 -2
  292. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzfmt.h +1 -1
  293. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tznames.h +2 -2
  294. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzrule.h +8 -8
  295. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tztrans.h +2 -2
  296. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/vtzone.h +2 -2
  297. package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.cpp +2 -2
  298. package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.h +2 -2
  299. package/src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp +2 -2
  300. package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -1
  301. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
  302. package/src/duckdb/extension/json/include/json_common.hpp +14 -10
  303. package/src/duckdb/extension/json/include/json_scan.hpp +48 -7
  304. package/src/duckdb/extension/json/include/json_structure.hpp +2 -1
  305. package/src/duckdb/extension/json/include/json_transform.hpp +5 -2
  306. package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
  307. package/src/duckdb/extension/json/json_functions/json_create.cpp +57 -20
  308. package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +7 -6
  309. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +6 -5
  310. package/src/duckdb/extension/json/json_functions/json_structure.cpp +20 -17
  311. package/src/duckdb/extension/json/json_functions/json_transform.cpp +48 -17
  312. package/src/duckdb/extension/json/json_functions/read_json.cpp +83 -34
  313. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -3
  314. package/src/duckdb/extension/json/json_functions.cpp +14 -16
  315. package/src/duckdb/extension/json/json_scan.cpp +36 -16
  316. package/src/duckdb/extension/json/json_serializer.cpp +1 -1
  317. package/src/duckdb/extension/json/serialize_json.cpp +2 -2
  318. package/src/duckdb/extension/parquet/column_reader.cpp +136 -116
  319. package/src/duckdb/extension/parquet/column_writer.cpp +870 -604
  320. package/src/duckdb/extension/parquet/geo_parquet.cpp +4 -5
  321. package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +0 -4
  322. package/src/duckdb/extension/parquet/include/column_reader.hpp +24 -19
  323. package/src/duckdb/extension/parquet/include/column_writer.hpp +7 -5
  324. package/src/duckdb/extension/parquet/include/decode_utils.hpp +138 -18
  325. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +4 -3
  326. package/src/duckdb/extension/parquet/include/null_column_reader.hpp +1 -14
  327. package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +45 -0
  328. package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +1 -1
  329. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +101 -90
  330. package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +179 -0
  331. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +2 -3
  332. package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +48 -0
  333. package/src/duckdb/extension/parquet/include/parquet_extension.hpp +8 -0
  334. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  335. package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +5 -0
  336. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +22 -18
  337. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -5
  338. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +87 -3
  339. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +30 -16
  340. package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
  341. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +0 -8
  342. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +1 -1
  343. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +1 -42
  344. package/src/duckdb/extension/parquet/include/thrift_tools.hpp +13 -1
  345. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +4 -0
  346. package/src/duckdb/extension/parquet/parquet_extension.cpp +240 -197
  347. package/src/duckdb/extension/parquet/parquet_metadata.cpp +138 -6
  348. package/src/duckdb/extension/parquet/parquet_reader.cpp +155 -79
  349. package/src/duckdb/extension/parquet/parquet_statistics.cpp +258 -38
  350. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +17 -3
  351. package/src/duckdb/extension/parquet/parquet_writer.cpp +65 -34
  352. package/src/duckdb/extension/parquet/serialize_parquet.cpp +4 -0
  353. package/src/duckdb/extension/parquet/zstd_file_system.cpp +13 -0
  354. package/src/duckdb/src/catalog/catalog.cpp +272 -97
  355. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +9 -4
  356. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -0
  357. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +145 -95
  358. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +9 -3
  359. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +15 -0
  360. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +40 -24
  361. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +1 -1
  362. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +2 -2
  363. package/src/duckdb/src/catalog/catalog_entry.cpp +3 -0
  364. package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +60 -5
  365. package/src/duckdb/src/catalog/catalog_search_path.cpp +27 -14
  366. package/src/duckdb/src/catalog/catalog_set.cpp +75 -31
  367. package/src/duckdb/src/catalog/default/default_functions.cpp +13 -8
  368. package/src/duckdb/src/catalog/default/default_views.cpp +1 -0
  369. package/src/duckdb/src/catalog/dependency_manager.cpp +133 -5
  370. package/src/duckdb/src/catalog/duck_catalog.cpp +17 -9
  371. package/src/duckdb/src/common/adbc/adbc.cpp +18 -0
  372. package/src/duckdb/src/common/allocator.cpp +3 -1
  373. package/src/duckdb/src/common/arrow/arrow_appender.cpp +30 -9
  374. package/src/duckdb/src/common/arrow/arrow_converter.cpp +63 -82
  375. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +4 -3
  376. package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +361 -0
  377. package/src/duckdb/src/common/arrow/arrow_util.cpp +10 -6
  378. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +6 -2
  379. package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +2 -1
  380. package/src/duckdb/src/common/arrow/schema_metadata.cpp +27 -14
  381. package/src/duckdb/src/common/assert.cpp +1 -2
  382. package/src/duckdb/src/common/bind_helpers.cpp +1 -1
  383. package/src/duckdb/src/common/box_renderer.cpp +316 -26
  384. package/src/duckdb/src/common/cgroups.cpp +7 -1
  385. package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
  386. package/src/duckdb/src/common/enum_util.cpp +2865 -6882
  387. package/src/duckdb/src/common/enums/compression_type.cpp +12 -0
  388. package/src/duckdb/src/common/enums/metric_type.cpp +24 -0
  389. package/src/duckdb/src/common/enums/optimizer_type.cpp +4 -0
  390. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  391. package/src/duckdb/src/common/error_data.cpp +23 -6
  392. package/src/duckdb/src/common/exception/binder_exception.cpp +1 -1
  393. package/src/duckdb/src/common/exception.cpp +20 -28
  394. package/src/duckdb/src/common/extra_type_info.cpp +85 -20
  395. package/src/duckdb/src/common/file_buffer.cpp +5 -2
  396. package/src/duckdb/src/common/file_system.cpp +8 -3
  397. package/src/duckdb/src/common/fsst.cpp +3 -3
  398. package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
  399. package/src/duckdb/src/common/local_file_system.cpp +169 -60
  400. package/src/duckdb/src/common/multi_file_list.cpp +4 -1
  401. package/src/duckdb/src/common/multi_file_reader.cpp +240 -63
  402. package/src/duckdb/src/common/opener_file_system.cpp +37 -0
  403. package/src/duckdb/src/common/operator/cast_operators.cpp +77 -11
  404. package/src/duckdb/src/common/operator/string_cast.cpp +6 -2
  405. package/src/duckdb/src/common/pipe_file_system.cpp +4 -4
  406. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +25 -14
  407. package/src/duckdb/src/common/radix_partitioning.cpp +17 -16
  408. package/src/duckdb/src/common/random_engine.cpp +39 -3
  409. package/src/duckdb/src/common/render_tree.cpp +3 -19
  410. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  411. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -58
  412. package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
  413. package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +2 -0
  414. package/src/duckdb/src/common/row_operations/row_scatter.cpp +20 -19
  415. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  416. package/src/duckdb/src/common/serializer/memory_stream.cpp +36 -0
  417. package/src/duckdb/src/common/sort/comparators.cpp +7 -7
  418. package/src/duckdb/src/common/sort/partition_state.cpp +2 -2
  419. package/src/duckdb/src/common/stacktrace.cpp +127 -0
  420. package/src/duckdb/src/common/string_util.cpp +157 -32
  421. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +15 -3
  422. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +4 -0
  423. package/src/duckdb/src/common/types/column/column_data_collection.cpp +71 -8
  424. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +27 -6
  425. package/src/duckdb/src/common/types/conflict_manager.cpp +21 -7
  426. package/src/duckdb/src/common/types/date.cpp +39 -25
  427. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +4 -11
  428. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +21 -7
  429. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -1
  430. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +17 -17
  431. package/src/duckdb/src/common/types/timestamp.cpp +70 -33
  432. package/src/duckdb/src/common/types/uuid.cpp +11 -0
  433. package/src/duckdb/src/common/types/validity_mask.cpp +16 -5
  434. package/src/duckdb/src/common/types/value.cpp +357 -199
  435. package/src/duckdb/src/common/types/varint.cpp +64 -18
  436. package/src/duckdb/src/common/types/vector.cpp +78 -38
  437. package/src/duckdb/src/common/types.cpp +199 -92
  438. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +2 -1
  439. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +32 -5
  440. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +3 -1
  441. package/src/duckdb/src/execution/adaptive_filter.cpp +6 -2
  442. package/src/duckdb/src/execution/aggregate_hashtable.cpp +410 -111
  443. package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -2
  444. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +6 -0
  445. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +4 -3
  446. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  447. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -2
  448. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +1 -0
  449. package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +5 -4
  450. package/src/duckdb/src/execution/expression_executor.cpp +5 -3
  451. package/src/duckdb/src/execution/index/art/art.cpp +208 -72
  452. package/src/duckdb/src/execution/index/art/base_leaf.cpp +1 -1
  453. package/src/duckdb/src/execution/index/art/leaf.cpp +12 -7
  454. package/src/duckdb/src/execution/index/art/node.cpp +2 -1
  455. package/src/duckdb/src/execution/index/art/node256_leaf.cpp +6 -6
  456. package/src/duckdb/src/execution/index/art/plan_art.cpp +50 -55
  457. package/src/duckdb/src/execution/index/art/prefix.cpp +7 -13
  458. package/src/duckdb/src/execution/index/bound_index.cpp +30 -5
  459. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +3 -5
  460. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -9
  461. package/src/duckdb/src/execution/join_hashtable.cpp +254 -158
  462. package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +1 -1
  463. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
  464. package/src/duckdb/src/execution/operator/aggregate/physical_partitioned_aggregate.cpp +226 -0
  465. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +3 -3
  466. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
  467. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +77 -70
  468. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +114 -50
  469. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
  470. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +19 -10
  471. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +22 -15
  472. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +95 -0
  473. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +6 -1
  474. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +75 -2
  475. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +40 -12
  476. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +395 -163
  477. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +20 -23
  478. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +115 -49
  479. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +66 -12
  480. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -23
  481. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +220 -46
  482. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +43 -32
  483. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +54 -119
  484. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +184 -20
  485. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +83 -21
  486. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_validator.cpp +63 -0
  487. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +7 -4
  488. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -1
  489. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +3 -2
  490. package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +9 -1
  491. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +132 -15
  492. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -55
  493. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +284 -154
  494. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +40 -55
  495. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +0 -1
  496. package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -3
  497. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +298 -227
  498. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +5 -2
  499. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +3 -4
  500. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +32 -19
  501. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -0
  502. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +6 -0
  503. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -19
  504. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +11 -27
  505. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +308 -119
  506. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +105 -55
  507. package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +6 -2
  508. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +1 -1
  509. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +15 -6
  510. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +92 -50
  511. package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +0 -1
  512. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +8 -4
  513. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +54 -22
  514. package/src/duckdb/src/execution/operator/set/physical_union.cpp +5 -1
  515. package/src/duckdb/src/execution/physical_operator.cpp +15 -9
  516. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +101 -12
  517. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +11 -140
  518. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -13
  519. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +1 -1
  520. package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
  521. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +1 -1
  522. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +6 -5
  523. package/src/duckdb/src/execution/physical_plan/plan_export.cpp +0 -4
  524. package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +1 -1
  525. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +16 -13
  526. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
  527. package/src/duckdb/src/execution/physical_plan/plan_order.cpp +7 -7
  528. package/src/duckdb/src/execution/physical_plan/plan_prepare.cpp +2 -2
  529. package/src/duckdb/src/execution/physical_plan/plan_projection.cpp +1 -1
  530. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +8 -3
  531. package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -2
  532. package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +1 -2
  533. package/src/duckdb/src/execution/physical_plan/plan_top_n.cpp +3 -2
  534. package/src/duckdb/src/execution/physical_plan_generator.cpp +0 -22
  535. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +136 -116
  536. package/src/duckdb/src/execution/sample/base_reservoir_sample.cpp +136 -0
  537. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +930 -0
  538. package/src/duckdb/src/function/aggregate/distributive/count.cpp +6 -12
  539. package/src/duckdb/src/function/aggregate/distributive/{first.cpp → first_last_any.cpp} +37 -18
  540. package/src/duckdb/src/{core_functions → function}/aggregate/distributive/minmax.cpp +19 -12
  541. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +72 -13
  542. package/src/duckdb/src/function/built_in_functions.cpp +85 -2
  543. package/src/duckdb/src/function/cast/decimal_cast.cpp +1 -1
  544. package/src/duckdb/src/function/cast/string_cast.cpp +1 -1
  545. package/src/duckdb/src/function/cast/struct_cast.cpp +81 -49
  546. package/src/duckdb/src/function/cast/union/from_struct.cpp +7 -5
  547. package/src/duckdb/src/function/compression_config.cpp +6 -0
  548. package/src/duckdb/src/function/encoding_function.cpp +134 -0
  549. package/src/duckdb/src/function/function.cpp +8 -13
  550. package/src/duckdb/src/function/function_binder.cpp +100 -21
  551. package/src/duckdb/src/function/function_list.cpp +178 -0
  552. package/src/duckdb/src/function/macro_function.cpp +4 -4
  553. package/src/duckdb/src/function/pragma/pragma_functions.cpp +0 -2
  554. package/src/duckdb/src/function/pragma/pragma_queries.cpp +0 -4
  555. package/src/duckdb/src/{core_functions/core_functions.cpp → function/register_function_list.cpp} +12 -8
  556. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +62 -23
  557. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +33 -16
  558. package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +21 -0
  559. package/src/duckdb/src/{core_functions/scalar/blob → function/scalar}/create_sort_key.cpp +86 -23
  560. package/src/duckdb/src/{core_functions → function}/scalar/date/strftime.cpp +6 -4
  561. package/src/duckdb/src/function/scalar/generic/constant_or_null.cpp +5 -7
  562. package/src/duckdb/src/{core_functions → function}/scalar/generic/error.cpp +3 -1
  563. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +2 -2
  564. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +1 -7
  565. package/src/duckdb/src/function/scalar/list/list_extract.cpp +27 -21
  566. package/src/duckdb/src/function/scalar/list/list_resize.cpp +8 -12
  567. package/src/duckdb/src/function/scalar/list/list_select.cpp +1 -4
  568. package/src/duckdb/src/function/scalar/list/list_zip.cpp +6 -6
  569. package/src/duckdb/src/{core_functions → function}/scalar/map/map_contains.cpp +2 -2
  570. package/src/duckdb/src/function/scalar/nested_functions.cpp +0 -11
  571. package/src/duckdb/src/function/scalar/{operators → operator}/add.cpp +2 -1
  572. package/src/duckdb/src/function/scalar/{operators → operator}/arithmetic.cpp +195 -127
  573. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +30 -21
  574. package/src/duckdb/src/function/scalar/strftime_format.cpp +10 -0
  575. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +11 -41
  576. package/src/duckdb/src/function/scalar/string/concat.cpp +22 -20
  577. package/src/duckdb/src/function/scalar/string/concat_ws.cpp +2 -2
  578. package/src/duckdb/src/function/scalar/string/contains.cpp +16 -19
  579. package/src/duckdb/src/function/scalar/string/length.cpp +38 -24
  580. package/src/duckdb/src/function/scalar/string/like.cpp +80 -47
  581. package/src/duckdb/src/{core_functions → function}/scalar/string/md5.cpp +2 -2
  582. package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +2 -6
  583. package/src/duckdb/src/function/scalar/string/prefix.cpp +0 -4
  584. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +2 -1
  585. package/src/duckdb/src/function/scalar/string/regexp.cpp +17 -7
  586. package/src/duckdb/src/{core_functions → function}/scalar/string/regexp_escape.cpp +2 -2
  587. package/src/duckdb/src/{core_functions → function}/scalar/string/sha1.cpp +1 -1
  588. package/src/duckdb/src/{core_functions → function}/scalar/string/sha256.cpp +1 -1
  589. package/src/duckdb/src/{core_functions → function}/scalar/string/string_split.cpp +4 -5
  590. package/src/duckdb/src/function/scalar/string/strip_accents.cpp +3 -6
  591. package/src/duckdb/src/function/scalar/string/substring.cpp +14 -13
  592. package/src/duckdb/src/function/scalar/string/suffix.cpp +0 -4
  593. package/src/duckdb/src/function/scalar/struct/struct_concat.cpp +115 -0
  594. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +35 -31
  595. package/src/duckdb/src/{core_functions → function}/scalar/struct/struct_pack.cpp +7 -7
  596. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -8
  597. package/src/duckdb/src/function/scalar/system/write_log.cpp +170 -0
  598. package/src/duckdb/src/function/scalar_function.cpp +5 -5
  599. package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +3 -2
  600. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +287 -1
  601. package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +6 -6
  602. package/src/duckdb/src/function/table/arrow.cpp +32 -352
  603. package/src/duckdb/src/function/table/arrow_conversion.cpp +43 -7
  604. package/src/duckdb/src/function/table/copy_csv.cpp +38 -23
  605. package/src/duckdb/src/function/table/glob.cpp +1 -1
  606. package/src/duckdb/src/function/table/query_function.cpp +12 -7
  607. package/src/duckdb/src/function/table/read_csv.cpp +114 -46
  608. package/src/duckdb/src/function/table/read_file.cpp +26 -6
  609. package/src/duckdb/src/function/table/sniff_csv.cpp +25 -5
  610. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +1 -1
  611. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
  612. package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +6 -7
  613. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +1 -1
  614. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +141 -16
  615. package/src/duckdb/src/function/table/system/duckdb_log.cpp +64 -0
  616. package/src/duckdb/src/function/table/system/duckdb_log_contexts.cpp +65 -0
  617. package/src/duckdb/src/function/table/system/duckdb_memory.cpp +0 -1
  618. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +1 -1
  619. package/src/duckdb/src/function/table/system/duckdb_tables.cpp +1 -13
  620. package/src/duckdb/src/function/table/system/duckdb_types.cpp +1 -1
  621. package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +17 -0
  622. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +6 -0
  623. package/src/duckdb/src/function/table/system/pragma_table_sample.cpp +95 -0
  624. package/src/duckdb/src/function/table/system/test_all_types.cpp +56 -46
  625. package/src/duckdb/src/function/table/system_functions.cpp +3 -0
  626. package/src/duckdb/src/function/table/table_scan.cpp +487 -289
  627. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  628. package/src/duckdb/src/function/table_function.cpp +10 -6
  629. package/src/duckdb/src/function/window/window_aggregate_function.cpp +248 -0
  630. package/src/duckdb/src/function/window/window_aggregate_states.cpp +48 -0
  631. package/src/duckdb/src/function/window/window_aggregator.cpp +88 -0
  632. package/src/duckdb/src/function/window/window_boundaries_state.cpp +854 -0
  633. package/src/duckdb/src/function/window/window_collection.cpp +146 -0
  634. package/src/duckdb/src/function/window/window_constant_aggregator.cpp +357 -0
  635. package/src/duckdb/src/function/window/window_custom_aggregator.cpp +146 -0
  636. package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +758 -0
  637. package/src/duckdb/src/function/window/window_executor.cpp +99 -0
  638. package/src/duckdb/src/function/window/window_index_tree.cpp +63 -0
  639. package/src/duckdb/src/function/window/window_merge_sort_tree.cpp +275 -0
  640. package/src/duckdb/src/function/window/window_naive_aggregator.cpp +361 -0
  641. package/src/duckdb/src/function/window/window_rank_function.cpp +288 -0
  642. package/src/duckdb/src/function/window/window_rownumber_function.cpp +191 -0
  643. package/src/duckdb/src/function/window/window_segment_tree.cpp +594 -0
  644. package/src/duckdb/src/function/window/window_shared_expressions.cpp +50 -0
  645. package/src/duckdb/src/function/window/window_token_tree.cpp +142 -0
  646. package/src/duckdb/src/function/window/window_value_function.cpp +566 -0
  647. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +74 -17
  648. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  649. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +2 -0
  650. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +9 -0
  651. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +4 -10
  652. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp +1 -1
  653. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +2 -2
  654. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -0
  655. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +18 -3
  656. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +1 -1
  657. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +2 -1
  658. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +5 -2
  659. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +21 -18
  660. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +3 -2
  661. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +10 -2
  662. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +11 -0
  663. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +9 -4
  664. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
  665. package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +8 -0
  666. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -1
  667. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +3 -1
  668. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +3 -1
  669. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -1
  670. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +7 -3
  671. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +26 -3
  672. package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +1 -1
  673. package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +144 -0
  674. package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +5 -2
  675. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +2 -0
  676. package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +11 -4
  677. package/src/duckdb/src/include/duckdb/common/assert.hpp +12 -1
  678. package/src/duckdb/src/include/duckdb/common/atomic_ptr.hpp +102 -0
  679. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +65 -6
  680. package/src/duckdb/src/include/duckdb/common/chrono.hpp +1 -0
  681. package/src/duckdb/src/include/duckdb/common/column_index.hpp +72 -0
  682. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +120 -0
  683. package/src/duckdb/src/include/duckdb/{core_functions/core_functions.hpp → common/enums/collation_type.hpp} +2 -7
  684. package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +5 -2
  685. package/src/duckdb/src/include/duckdb/common/enums/function_errors.hpp +18 -0
  686. package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +3 -2
  687. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +7 -2
  688. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +4 -0
  689. package/src/duckdb/src/include/duckdb/common/enums/order_preservation_type.hpp +1 -1
  690. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  691. package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
  692. package/src/duckdb/src/include/duckdb/{core_functions/aggregate → common/enums}/quantile_enum.hpp +3 -1
  693. package/src/duckdb/src/include/duckdb/common/enums/scan_vector_type.hpp +2 -0
  694. package/src/duckdb/src/include/duckdb/common/error_data.hpp +1 -0
  695. package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
  696. package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -1
  697. package/src/duckdb/src/include/duckdb/common/extension_type_info.hpp +37 -0
  698. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +7 -2
  699. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +9 -3
  700. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +6 -6
  701. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +19 -10
  702. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
  703. package/src/duckdb/src/include/duckdb/common/file_system.hpp +6 -1
  704. package/src/duckdb/src/include/duckdb/common/fsst.hpp +2 -2
  705. package/src/duckdb/src/include/duckdb/common/helper.hpp +6 -0
  706. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +10 -0
  707. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +12 -2
  708. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
  709. package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +2 -1
  710. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +147 -27
  711. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +4 -0
  712. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +2 -7
  713. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +16 -5
  714. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +16 -0
  715. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +4 -0
  716. package/src/duckdb/src/include/duckdb/common/platform.hpp +34 -3
  717. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +10 -13
  718. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +8 -3
  719. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +0 -2
  720. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  721. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +7 -0
  722. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
  723. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +1 -0
  724. package/src/duckdb/src/include/duckdb/common/stacktrace.hpp +25 -0
  725. package/src/duckdb/src/include/duckdb/common/string_util.hpp +30 -2
  726. package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +1 -1
  727. package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +1 -1
  728. package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +1 -1
  729. package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +3 -2
  730. package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +2 -0
  731. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  732. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +8 -0
  733. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +13 -2
  734. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -1
  735. package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +21 -4
  736. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +4 -1
  737. package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -4
  738. package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +1 -1
  739. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +58 -10
  740. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -4
  741. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
  742. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
  743. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +10 -0
  744. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +43 -16
  745. package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -1
  746. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +63 -21
  747. package/src/duckdb/src/include/duckdb/common/types/value.hpp +62 -16
  748. package/src/duckdb/src/include/duckdb/common/types/varint.hpp +13 -0
  749. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +34 -7
  750. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +15 -0
  751. package/src/duckdb/src/include/duckdb/common/types.hpp +12 -7
  752. package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +10 -0
  753. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +12 -13
  754. package/src/duckdb/src/include/duckdb/common/vector_operations/binary_executor.hpp +27 -0
  755. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +111 -4
  756. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +0 -1
  757. package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +2 -0
  758. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +48 -10
  759. package/src/duckdb/src/include/duckdb/execution/executor.hpp +2 -1
  760. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +0 -1
  761. package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +25 -27
  762. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +28 -18
  763. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +1 -0
  764. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
  765. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +23 -16
  766. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +4 -0
  767. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -2
  768. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +25 -16
  769. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +15 -10
  770. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
  771. package/src/duckdb/src/include/duckdb/execution/operator/{persistent/physical_fixed_batch_copy.hpp → aggregate/physical_partitioned_aggregate.hpp} +25 -27
  772. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -2
  773. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +5 -4
  774. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +21 -1
  775. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +38 -9
  776. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +8 -9
  777. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +7 -1
  778. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +29 -23
  779. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +15 -13
  780. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +13 -5
  781. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +2 -1
  782. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
  783. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +36 -1
  784. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +21 -13
  785. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +52 -22
  786. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +6 -6
  787. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_validator.hpp +58 -0
  788. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +62 -0
  789. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +6 -3
  790. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +16 -6
  791. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +9 -4
  792. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +8 -4
  793. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +55 -10
  794. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
  795. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +2 -2
  796. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -2
  797. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
  798. package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +28 -7
  799. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +6 -9
  800. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +17 -16
  801. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  802. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +7 -3
  803. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +5 -1
  804. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
  805. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  806. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +55 -4
  807. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -0
  808. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -2
  809. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +2 -1
  810. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +10 -9
  811. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +16 -13
  812. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +0 -4
  813. package/src/duckdb/src/include/duckdb/execution/partition_info.hpp +79 -0
  814. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +20 -9
  815. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +1 -11
  816. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
  817. package/src/duckdb/src/include/duckdb/execution/progress_data.hpp +58 -0
  818. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -1
  819. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +160 -31
  820. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_function_utils.hpp +31 -0
  821. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +61 -10
  822. package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/minmax_n_helpers.hpp +1 -1
  823. package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/sort_key_helpers.hpp +2 -2
  824. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +47 -27
  825. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -0
  826. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +3 -10
  827. package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +13 -6
  828. package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +15 -0
  829. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +29 -6
  830. package/src/duckdb/src/include/duckdb/{core_functions → function}/create_sort_key.hpp +4 -1
  831. package/src/duckdb/src/include/duckdb/function/encoding_function.hpp +78 -0
  832. package/src/duckdb/src/include/duckdb/function/function.hpp +22 -1
  833. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -0
  834. package/src/duckdb/src/include/duckdb/function/function_list.hpp +39 -0
  835. package/src/duckdb/src/include/duckdb/function/function_set.hpp +13 -7
  836. package/src/duckdb/src/include/duckdb/{core_functions → function}/lambda_functions.hpp +1 -1
  837. package/src/duckdb/src/include/duckdb/function/partition_stats.hpp +36 -0
  838. package/src/duckdb/src/include/duckdb/function/register_function_list_helper.hpp +69 -0
  839. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +154 -23
  840. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_utils.hpp +45 -0
  841. package/src/duckdb/src/include/duckdb/function/scalar/date_functions.hpp +45 -0
  842. package/src/duckdb/src/include/duckdb/function/scalar/generic_common.hpp +36 -0
  843. package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +32 -23
  844. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  845. package/src/duckdb/src/include/duckdb/function/scalar/list_functions.hpp +156 -0
  846. package/src/duckdb/src/include/duckdb/function/scalar/map_functions.hpp +27 -0
  847. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +4 -45
  848. package/src/duckdb/src/include/duckdb/function/scalar/operator_functions.hpp +102 -0
  849. package/src/duckdb/src/include/duckdb/function/scalar/operators.hpp +2 -16
  850. package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +16 -25
  851. package/src/duckdb/src/include/duckdb/function/scalar/sequence_utils.hpp +38 -0
  852. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +1 -0
  853. package/src/duckdb/src/include/duckdb/function/scalar/string_common.hpp +49 -0
  854. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +401 -76
  855. package/src/duckdb/src/include/duckdb/function/scalar/struct_functions.hpp +63 -0
  856. package/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp +33 -0
  857. package/src/duckdb/src/include/duckdb/function/scalar/system_functions.hpp +45 -0
  858. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +17 -8
  859. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +59 -6
  860. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +12 -9
  861. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +2 -0
  862. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +18 -13
  863. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -4
  864. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +14 -0
  865. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +15 -10
  866. package/src/duckdb/src/include/duckdb/function/table_function.hpp +94 -18
  867. package/src/duckdb/src/include/duckdb/{core_functions → function}/to_interval.hpp +1 -1
  868. package/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp +44 -0
  869. package/src/duckdb/src/include/duckdb/function/window/window_aggregate_states.hpp +56 -0
  870. package/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp +194 -0
  871. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +153 -0
  872. package/src/duckdb/src/include/duckdb/function/window/window_collection.hpp +146 -0
  873. package/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp +38 -0
  874. package/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp +32 -0
  875. package/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp +39 -0
  876. package/src/duckdb/src/include/duckdb/function/window/window_executor.hpp +122 -0
  877. package/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp +42 -0
  878. package/src/duckdb/src/include/duckdb/function/window/window_merge_sort_tree.hpp +108 -0
  879. package/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp +33 -0
  880. package/src/duckdb/src/include/duckdb/function/window/window_rank_function.hpp +63 -0
  881. package/src/duckdb/src/include/duckdb/function/window/window_rownumber_function.hpp +43 -0
  882. package/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp +31 -0
  883. package/src/duckdb/src/include/duckdb/function/window/window_shared_expressions.hpp +76 -0
  884. package/src/duckdb/src/include/duckdb/function/window/window_token_tree.hpp +46 -0
  885. package/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp +79 -0
  886. package/src/duckdb/src/include/duckdb/logging/http_logger.hpp +2 -0
  887. package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +81 -0
  888. package/src/duckdb/src/include/duckdb/logging/log_storage.hpp +127 -0
  889. package/src/duckdb/src/include/duckdb/logging/logger.hpp +287 -0
  890. package/src/duckdb/src/include/duckdb/logging/logging.hpp +83 -0
  891. package/src/duckdb/src/include/duckdb/main/appender.hpp +41 -18
  892. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +6 -3
  893. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
  894. package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +317 -231
  895. package/src/duckdb/src/include/duckdb/main/client_config.hpp +17 -1
  896. package/src/duckdb/src/include/duckdb/main/client_context.hpp +28 -6
  897. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  898. package/src/duckdb/src/include/duckdb/main/client_context_wrapper.hpp +5 -0
  899. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -2
  900. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +8 -3
  901. package/src/duckdb/src/include/duckdb/main/config.hpp +52 -8
  902. package/src/duckdb/src/include/duckdb/main/connection.hpp +18 -3
  903. package/src/duckdb/src/include/duckdb/main/database.hpp +8 -7
  904. package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +5 -1
  905. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
  906. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +1 -0
  907. package/src/duckdb/src/include/duckdb/main/extension.hpp +8 -2
  908. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +548 -9
  909. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
  910. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +12 -7
  911. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +3 -3
  912. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +2 -2
  913. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +8 -4
  914. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -1
  915. package/src/duckdb/src/include/duckdb/main/relation/delete_relation.hpp +2 -2
  916. package/src/duckdb/src/include/duckdb/main/relation/subquery_relation.hpp +1 -4
  917. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +3 -1
  918. package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +3 -0
  919. package/src/duckdb/src/include/duckdb/main/relation/update_relation.hpp +3 -2
  920. package/src/duckdb/src/include/duckdb/main/relation/value_relation.hpp +7 -0
  921. package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +1 -0
  922. package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +1 -1
  923. package/src/duckdb/src/include/duckdb/main/relation.hpp +45 -9
  924. package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +20 -22
  925. package/src/duckdb/src/include/duckdb/main/settings.hpp +613 -378
  926. package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -4
  927. package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +1 -3
  928. package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +14 -7
  929. package/src/duckdb/src/include/duckdb/optimizer/common_aggregate_optimizer.hpp +2 -2
  930. package/src/duckdb/src/include/duckdb/optimizer/empty_result_pullup.hpp +27 -0
  931. package/src/duckdb/src/include/duckdb/optimizer/expression_heuristics.hpp +1 -1
  932. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +6 -1
  933. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  934. package/src/duckdb/src/include/duckdb/optimizer/in_clause_rewriter.hpp +3 -0
  935. package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +5 -0
  936. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +2 -0
  937. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +2 -2
  938. package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +45 -0
  939. package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +23 -0
  940. package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +18 -0
  941. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +9 -0
  942. package/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp +33 -11
  943. package/src/duckdb/src/include/duckdb/optimizer/rule/distinct_aggregate_optimizer.hpp +34 -0
  944. package/src/duckdb/src/include/duckdb/optimizer/sampling_pushdown.hpp +25 -0
  945. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +3 -1
  946. package/src/duckdb/src/include/duckdb/optimizer/sum_rewriter.hpp +37 -0
  947. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +4 -0
  948. package/src/duckdb/src/include/duckdb/parallel/event.hpp +3 -0
  949. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
  950. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +26 -8
  951. package/src/duckdb/src/include/duckdb/parallel/thread_context.hpp +3 -0
  952. package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +51 -3
  953. package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +28 -44
  954. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  955. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -2
  956. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
  957. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
  958. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +6 -6
  959. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +11 -1
  960. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +12 -0
  961. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +1 -0
  962. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +3 -2
  963. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +22 -1
  964. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -4
  965. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +1 -1
  966. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +16 -12
  967. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +3 -3
  968. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +5 -5
  969. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +12 -3
  970. package/src/duckdb/src/include/duckdb/parser/parser.hpp +3 -0
  971. package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +17 -57
  972. package/src/duckdb/src/include/duckdb/parser/qualified_name_set.hpp +19 -3
  973. package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +2 -1
  974. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +12 -9
  975. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -1
  976. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -2
  977. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +45 -28
  978. package/src/duckdb/src/include/duckdb/planner/binder.hpp +23 -11
  979. package/src/duckdb/src/include/duckdb/planner/binding_alias.hpp +44 -0
  980. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +1 -0
  981. package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +4 -3
  982. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +11 -10
  983. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +2 -0
  984. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -0
  985. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +4 -4
  986. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +6 -0
  987. package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -0
  988. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -0
  989. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +9 -4
  990. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +8 -2
  991. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +1 -2
  992. package/src/duckdb/src/include/duckdb/planner/filter/dynamic_filter.hpp +48 -0
  993. package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +37 -0
  994. package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +35 -0
  995. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +4 -0
  996. package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +3 -0
  997. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
  998. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +9 -9
  999. package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +4 -0
  1000. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +16 -7
  1001. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +2 -0
  1002. package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +4 -0
  1003. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +5 -1
  1004. package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +5 -3
  1005. package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +14 -6
  1006. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +12 -8
  1007. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
  1008. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -0
  1009. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +82 -26
  1010. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +1 -1
  1011. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +10 -3
  1012. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +2 -1
  1013. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +4 -13
  1014. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +14 -15
  1015. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +1 -1
  1016. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +13 -15
  1017. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +1 -1
  1018. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/analyze.hpp +46 -0
  1019. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/common.hpp +60 -0
  1020. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp +61 -0
  1021. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/decompression.hpp +50 -0
  1022. package/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp +100 -0
  1023. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +1 -1
  1024. package/src/duckdb/src/include/duckdb/storage/compression/roaring/appender.hpp +150 -0
  1025. package/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp +618 -0
  1026. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +53 -31
  1027. package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -3
  1028. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
  1029. package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -1
  1030. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -3
  1031. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +1 -1
  1032. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -4
  1033. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +16 -1
  1034. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +2 -1
  1035. package/src/duckdb/src/include/duckdb/storage/storage_index.hpp +70 -0
  1036. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +5 -7
  1037. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -3
  1038. package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +23 -0
  1039. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +34 -6
  1040. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +2 -0
  1041. package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +2 -2
  1042. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  1043. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +39 -10
  1044. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +56 -14
  1045. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +35 -29
  1046. package/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +1 -1
  1047. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  1048. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +7 -1
  1049. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +19 -6
  1050. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
  1051. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +29 -6
  1052. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +10 -10
  1053. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +5 -0
  1054. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +26 -19
  1055. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +8 -1
  1056. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +16 -14
  1057. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
  1058. package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
  1059. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
  1060. package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +228 -61
  1061. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +14 -10
  1062. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +3 -1
  1063. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +3 -2
  1064. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +1 -0
  1065. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +19 -17
  1066. package/src/duckdb/src/include/duckdb/transaction/rollback_state.hpp +5 -2
  1067. package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -2
  1068. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +13 -8
  1069. package/src/duckdb/src/include/duckdb/transaction/undo_buffer_allocator.hpp +79 -0
  1070. package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +43 -13
  1071. package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +4 -1
  1072. package/src/duckdb/src/include/duckdb/verification/copied_statement_verifier.hpp +4 -2
  1073. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +4 -2
  1074. package/src/duckdb/src/include/duckdb/verification/external_statement_verifier.hpp +4 -2
  1075. package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +4 -2
  1076. package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +4 -2
  1077. package/src/duckdb/src/include/duckdb/verification/parsed_statement_verifier.hpp +4 -2
  1078. package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +7 -3
  1079. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +11 -5
  1080. package/src/duckdb/src/include/duckdb/verification/unoptimized_statement_verifier.hpp +4 -2
  1081. package/src/duckdb/src/include/duckdb.h +424 -41
  1082. package/src/duckdb/src/include/duckdb_extension.h +301 -195
  1083. package/src/duckdb/src/logging/log_manager.cpp +157 -0
  1084. package/src/duckdb/src/logging/log_storage.cpp +209 -0
  1085. package/src/duckdb/src/logging/logger.cpp +211 -0
  1086. package/src/duckdb/src/logging/logging.cpp +42 -0
  1087. package/src/duckdb/src/main/appender.cpp +187 -45
  1088. package/src/duckdb/src/main/attached_database.cpp +16 -8
  1089. package/src/duckdb/src/main/capi/appender-c.cpp +47 -4
  1090. package/src/duckdb/src/main/capi/arrow-c.cpp +9 -4
  1091. package/src/duckdb/src/main/capi/config-c.cpp +17 -4
  1092. package/src/duckdb/src/main/capi/datetime-c.cpp +15 -0
  1093. package/src/duckdb/src/main/capi/duckdb-c.cpp +54 -13
  1094. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +212 -4
  1095. package/src/duckdb/src/main/capi/helper-c.cpp +3 -0
  1096. package/src/duckdb/src/main/capi/prepared-c.cpp +26 -7
  1097. package/src/duckdb/src/main/capi/replacement_scan-c.cpp +1 -1
  1098. package/src/duckdb/src/main/capi/result-c.cpp +3 -0
  1099. package/src/duckdb/src/main/capi/table_description-c.cpp +43 -10
  1100. package/src/duckdb/src/main/capi/threading-c.cpp +4 -4
  1101. package/src/duckdb/src/main/client_context.cpp +125 -51
  1102. package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
  1103. package/src/duckdb/src/main/client_context_wrapper.cpp +4 -0
  1104. package/src/duckdb/src/main/client_data.cpp +1 -1
  1105. package/src/duckdb/src/main/client_verify.cpp +39 -20
  1106. package/src/duckdb/src/main/config.cpp +266 -74
  1107. package/src/duckdb/src/main/connection.cpp +53 -13
  1108. package/src/duckdb/src/main/database.cpp +39 -18
  1109. package/src/duckdb/src/main/database_manager.cpp +12 -11
  1110. package/src/duckdb/src/main/db_instance_cache.cpp +14 -7
  1111. package/src/duckdb/src/main/extension/extension_helper.cpp +24 -23
  1112. package/src/duckdb/src/main/extension/extension_install.cpp +19 -7
  1113. package/src/duckdb/src/main/extension/extension_load.cpp +91 -41
  1114. package/src/duckdb/src/main/extension/extension_util.cpp +40 -19
  1115. package/src/duckdb/src/main/extension.cpp +20 -11
  1116. package/src/duckdb/src/main/profiling_info.cpp +19 -5
  1117. package/src/duckdb/src/main/query_profiler.cpp +135 -36
  1118. package/src/duckdb/src/main/query_result.cpp +2 -1
  1119. package/src/duckdb/src/main/relation/aggregate_relation.cpp +3 -3
  1120. package/src/duckdb/src/main/relation/create_table_relation.cpp +5 -4
  1121. package/src/duckdb/src/main/relation/create_view_relation.cpp +2 -2
  1122. package/src/duckdb/src/main/relation/cross_product_relation.cpp +2 -2
  1123. package/src/duckdb/src/main/relation/delete_relation.cpp +2 -2
  1124. package/src/duckdb/src/main/relation/delim_get_relation.cpp +1 -1
  1125. package/src/duckdb/src/main/relation/distinct_relation.cpp +1 -1
  1126. package/src/duckdb/src/main/relation/explain_relation.cpp +1 -1
  1127. package/src/duckdb/src/main/relation/filter_relation.cpp +1 -1
  1128. package/src/duckdb/src/main/relation/insert_relation.cpp +1 -1
  1129. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  1130. package/src/duckdb/src/main/relation/order_relation.cpp +1 -1
  1131. package/src/duckdb/src/main/relation/projection_relation.cpp +3 -3
  1132. package/src/duckdb/src/main/relation/query_relation.cpp +1 -1
  1133. package/src/duckdb/src/main/relation/read_csv_relation.cpp +58 -20
  1134. package/src/duckdb/src/main/relation/setop_relation.cpp +2 -2
  1135. package/src/duckdb/src/main/relation/subquery_relation.cpp +3 -8
  1136. package/src/duckdb/src/main/relation/table_function_relation.cpp +10 -1
  1137. package/src/duckdb/src/main/relation/table_relation.cpp +19 -3
  1138. package/src/duckdb/src/main/relation/update_relation.cpp +2 -2
  1139. package/src/duckdb/src/main/relation/value_relation.cpp +42 -2
  1140. package/src/duckdb/src/main/relation/view_relation.cpp +8 -2
  1141. package/src/duckdb/src/main/relation/write_csv_relation.cpp +1 -1
  1142. package/src/duckdb/src/main/relation/write_parquet_relation.cpp +1 -1
  1143. package/src/duckdb/src/main/relation.cpp +49 -28
  1144. package/src/duckdb/src/main/secret/secret_manager.cpp +1 -1
  1145. package/src/duckdb/src/main/secret/secret_storage.cpp +6 -4
  1146. package/src/duckdb/src/main/settings/autogenerated_settings.cpp +1102 -0
  1147. package/src/duckdb/src/main/settings/custom_settings.cpp +1343 -0
  1148. package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +60 -37
  1149. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +1 -1
  1150. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +126 -72
  1151. package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +22 -6
  1152. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -3
  1153. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +2 -2
  1154. package/src/duckdb/src/optimizer/compressed_materialization.cpp +3 -3
  1155. package/src/duckdb/src/optimizer/cse_optimizer.cpp +7 -7
  1156. package/src/duckdb/src/optimizer/deliminator.cpp +6 -5
  1157. package/src/duckdb/src/optimizer/empty_result_pullup.cpp +96 -0
  1158. package/src/duckdb/src/optimizer/expression_heuristics.cpp +11 -3
  1159. package/src/duckdb/src/optimizer/expression_rewriter.cpp +9 -2
  1160. package/src/duckdb/src/optimizer/filter_combiner.cpp +190 -88
  1161. package/src/duckdb/src/optimizer/filter_pushdown.cpp +6 -5
  1162. package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +25 -9
  1163. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +170 -72
  1164. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +5 -4
  1165. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +3 -1
  1166. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +7 -7
  1167. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -6
  1168. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +37 -22
  1169. package/src/duckdb/src/optimizer/late_materialization.cpp +414 -0
  1170. package/src/duckdb/src/optimizer/limit_pushdown.cpp +1 -0
  1171. package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +30 -2
  1172. package/src/duckdb/src/optimizer/optimizer.cpp +67 -7
  1173. package/src/duckdb/src/optimizer/pullup/pullup_filter.cpp +3 -3
  1174. package/src/duckdb/src/optimizer/pullup/pullup_projection.cpp +2 -2
  1175. package/src/duckdb/src/optimizer/pullup/pullup_set_operation.cpp +1 -1
  1176. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +2 -2
  1177. package/src/duckdb/src/optimizer/pushdown/pushdown_filter.cpp +1 -1
  1178. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
  1179. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +3 -3
  1180. package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +5 -3
  1181. package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +1 -1
  1182. package/src/duckdb/src/optimizer/pushdown/pushdown_unnest.cpp +52 -0
  1183. package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +2 -2
  1184. package/src/duckdb/src/optimizer/regex_range_filter.cpp +1 -1
  1185. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +1 -1
  1186. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +168 -38
  1187. package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +2 -1
  1188. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +8 -5
  1189. package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +2 -2
  1190. package/src/duckdb/src/optimizer/rule/constant_folding.cpp +2 -2
  1191. package/src/duckdb/src/optimizer/rule/distinct_aggregate_optimizer.cpp +65 -0
  1192. package/src/duckdb/src/optimizer/rule/distributivity.cpp +2 -2
  1193. package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +2 -1
  1194. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +4 -3
  1195. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +3 -3
  1196. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -1
  1197. package/src/duckdb/src/optimizer/rule/move_constants.cpp +9 -9
  1198. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +4 -3
  1199. package/src/duckdb/src/optimizer/rule/timestamp_comparison.cpp +1 -1
  1200. package/src/duckdb/src/optimizer/sampling_pushdown.cpp +24 -0
  1201. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -1
  1202. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +1 -1
  1203. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +1 -1
  1204. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +74 -0
  1205. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +10 -7
  1206. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +3 -3
  1207. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +3 -3
  1208. package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +3 -0
  1209. package/src/duckdb/src/optimizer/sum_rewriter.cpp +174 -0
  1210. package/src/duckdb/src/optimizer/topn_optimizer.cpp +71 -0
  1211. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +5 -5
  1212. package/src/duckdb/src/parallel/event.cpp +4 -0
  1213. package/src/duckdb/src/parallel/executor.cpp +11 -29
  1214. package/src/duckdb/src/parallel/executor_task.cpp +8 -3
  1215. package/src/duckdb/src/parallel/pipeline.cpp +15 -8
  1216. package/src/duckdb/src/parallel/pipeline_executor.cpp +67 -43
  1217. package/src/duckdb/src/parallel/thread_context.cpp +12 -1
  1218. package/src/duckdb/src/parser/column_definition.cpp +3 -3
  1219. package/src/duckdb/src/parser/constraints/unique_constraint.cpp +72 -9
  1220. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -3
  1221. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +1 -1
  1222. package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
  1223. package/src/duckdb/src/parser/expression/lambda_expression.cpp +3 -3
  1224. package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +1 -1
  1225. package/src/duckdb/src/parser/expression/star_expression.cpp +46 -2
  1226. package/src/duckdb/src/parser/expression/window_expression.cpp +24 -1
  1227. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +26 -2
  1228. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +5 -3
  1229. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +29 -1
  1230. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +6 -6
  1231. package/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp +1 -1
  1232. package/src/duckdb/src/parser/parsed_data/create_function_info.cpp +17 -0
  1233. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +16 -15
  1234. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +1 -1
  1235. package/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp +1 -1
  1236. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  1237. package/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +1 -1
  1238. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +1 -1
  1239. package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +1 -0
  1240. package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +4 -4
  1241. package/src/duckdb/src/parser/parsed_data/load_info.cpp +1 -0
  1242. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +31 -1
  1243. package/src/duckdb/src/parser/parsed_expression.cpp +1 -1
  1244. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +4 -1
  1245. package/src/duckdb/src/parser/parser.cpp +129 -0
  1246. package/src/duckdb/src/parser/qualified_name.cpp +99 -0
  1247. package/src/duckdb/src/parser/query_error_context.cpp +35 -6
  1248. package/src/duckdb/src/parser/query_node/select_node.cpp +4 -4
  1249. package/src/duckdb/src/parser/statement/delete_statement.cpp +6 -1
  1250. package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -3
  1251. package/src/duckdb/src/parser/statement/update_statement.cpp +6 -1
  1252. package/src/duckdb/src/parser/tableref/pivotref.cpp +2 -2
  1253. package/src/duckdb/src/parser/tableref.cpp +2 -2
  1254. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +16 -24
  1255. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +1 -1
  1256. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +5 -5
  1257. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +61 -13
  1258. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +10 -4
  1259. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -2
  1260. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +30 -3
  1261. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  1262. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +25 -6
  1263. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +1 -1
  1264. package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +10 -3
  1265. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +4 -3
  1266. package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +18 -3
  1267. package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +1 -1
  1268. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +0 -1
  1269. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +5 -5
  1270. package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +26 -12
  1271. package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +11 -3
  1272. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
  1273. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
  1274. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +3 -3
  1275. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +4 -4
  1276. package/src/duckdb/src/parser/transform/statement/transform_set.cpp +2 -2
  1277. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +21 -3
  1278. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +8 -6
  1279. package/src/duckdb/src/parser/transformer.cpp +2 -2
  1280. package/src/duckdb/src/planner/bind_context.cpp +308 -136
  1281. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +30 -31
  1282. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +4 -2
  1283. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +102 -94
  1284. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +7 -5
  1285. package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +1 -1
  1286. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +7 -7
  1287. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -10
  1288. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -6
  1289. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +23 -15
  1290. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +1 -1
  1291. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +97 -19
  1292. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +74 -16
  1293. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +6 -6
  1294. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +49 -15
  1295. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +32 -23
  1296. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +20 -3
  1297. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +2 -2
  1298. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +3 -0
  1299. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +6 -5
  1300. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +38 -19
  1301. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +2 -12
  1302. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +117 -412
  1303. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +423 -144
  1304. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  1305. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +5 -0
  1306. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  1307. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +0 -4
  1308. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +31 -13
  1309. package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +1 -1
  1310. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +96 -27
  1311. package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +1 -1
  1312. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -3
  1313. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +7 -6
  1314. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +36 -9
  1315. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +34 -34
  1316. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +72 -35
  1317. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +99 -18
  1318. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +23 -11
  1319. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +22 -19
  1320. package/src/duckdb/src/planner/binder.cpp +23 -45
  1321. package/src/duckdb/src/planner/binding_alias.cpp +69 -0
  1322. package/src/duckdb/src/planner/bound_parameter_map.cpp +1 -1
  1323. package/src/duckdb/src/planner/bound_result_modifier.cpp +6 -2
  1324. package/src/duckdb/src/planner/collation_binding.cpp +38 -4
  1325. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +17 -5
  1326. package/src/duckdb/src/planner/expression/bound_expression.cpp +1 -1
  1327. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -1
  1328. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +2 -2
  1329. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -4
  1330. package/src/duckdb/src/planner/expression.cpp +7 -1
  1331. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
  1332. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -2
  1333. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
  1334. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +16 -0
  1335. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +53 -1
  1336. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
  1337. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -8
  1338. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +1 -1
  1339. package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +2 -2
  1340. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +1 -1
  1341. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
  1342. package/src/duckdb/src/planner/expression_binder.cpp +7 -7
  1343. package/src/duckdb/src/planner/expression_iterator.cpp +6 -3
  1344. package/src/duckdb/src/planner/filter/constant_filter.cpp +17 -2
  1345. package/src/duckdb/src/planner/filter/dynamic_filter.cpp +68 -0
  1346. package/src/duckdb/src/planner/filter/in_filter.cpp +84 -0
  1347. package/src/duckdb/src/planner/filter/null_filter.cpp +1 -2
  1348. package/src/duckdb/src/planner/filter/optional_filter.cpp +29 -0
  1349. package/src/duckdb/src/planner/filter/struct_filter.cpp +11 -6
  1350. package/src/duckdb/src/planner/joinside.cpp +6 -5
  1351. package/src/duckdb/src/planner/logical_operator.cpp +4 -1
  1352. package/src/duckdb/src/planner/logical_operator_visitor.cpp +68 -2
  1353. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +23 -0
  1354. package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -12
  1355. package/src/duckdb/src/planner/operator/logical_filter.cpp +1 -1
  1356. package/src/duckdb/src/planner/operator/logical_get.cpp +48 -25
  1357. package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
  1358. package/src/duckdb/src/planner/operator/logical_join.cpp +1 -1
  1359. package/src/duckdb/src/planner/operator/logical_order.cpp +4 -11
  1360. package/src/duckdb/src/planner/operator/logical_top_n.cpp +7 -0
  1361. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +33 -5
  1362. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +2 -2
  1363. package/src/duckdb/src/planner/table_binding.cpp +74 -36
  1364. package/src/duckdb/src/planner/table_filter.cpp +5 -8
  1365. package/src/duckdb/src/storage/arena_allocator.cpp +5 -4
  1366. package/src/duckdb/src/storage/buffer/block_handle.cpp +88 -17
  1367. package/src/duckdb/src/storage/buffer/block_manager.cpp +34 -26
  1368. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
  1369. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +70 -49
  1370. package/src/duckdb/src/storage/buffer_manager.cpp +4 -0
  1371. package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -5
  1372. package/src/duckdb/src/storage/compression/bitpacking.cpp +14 -16
  1373. package/src/duckdb/src/storage/compression/dictionary/analyze.cpp +54 -0
  1374. package/src/duckdb/src/storage/compression/dictionary/common.cpp +90 -0
  1375. package/src/duckdb/src/storage/compression/dictionary/compression.cpp +174 -0
  1376. package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +115 -0
  1377. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +53 -545
  1378. package/src/duckdb/src/storage/compression/empty_validity.cpp +15 -0
  1379. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +25 -16
  1380. package/src/duckdb/src/storage/compression/fsst.cpp +101 -47
  1381. package/src/duckdb/src/storage/compression/numeric_constant.cpp +92 -2
  1382. package/src/duckdb/src/storage/compression/rle.cpp +216 -46
  1383. package/src/duckdb/src/storage/compression/roaring/analyze.cpp +179 -0
  1384. package/src/duckdb/src/storage/compression/roaring/common.cpp +282 -0
  1385. package/src/duckdb/src/storage/compression/roaring/compress.cpp +481 -0
  1386. package/src/duckdb/src/storage/compression/roaring/metadata.cpp +262 -0
  1387. package/src/duckdb/src/storage/compression/roaring/scan.cpp +364 -0
  1388. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +47 -65
  1389. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +102 -39
  1390. package/src/duckdb/src/storage/compression/zstd.cpp +1049 -0
  1391. package/src/duckdb/src/storage/data_table.cpp +312 -172
  1392. package/src/duckdb/src/storage/local_storage.cpp +104 -46
  1393. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +1 -1
  1394. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -3
  1395. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +138 -58
  1396. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +14 -0
  1397. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +19 -8
  1398. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -0
  1399. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +43 -0
  1400. package/src/duckdb/src/storage/serialization/serialize_types.cpp +32 -5
  1401. package/src/duckdb/src/storage/single_file_block_manager.cpp +6 -8
  1402. package/src/duckdb/src/storage/standard_buffer_manager.cpp +82 -71
  1403. package/src/duckdb/src/storage/statistics/column_statistics.cpp +3 -3
  1404. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +18 -17
  1405. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +34 -22
  1406. package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -3
  1407. package/src/duckdb/src/storage/storage_info.cpp +72 -10
  1408. package/src/duckdb/src/storage/storage_manager.cpp +41 -47
  1409. package/src/duckdb/src/storage/table/array_column_data.cpp +7 -1
  1410. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -9
  1411. package/src/duckdb/src/storage/table/column_data.cpp +105 -43
  1412. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +307 -132
  1413. package/src/duckdb/src/storage/table/column_segment.cpp +36 -13
  1414. package/src/duckdb/src/storage/table/list_column_data.cpp +4 -4
  1415. package/src/duckdb/src/storage/table/row_group.cpp +159 -66
  1416. package/src/duckdb/src/storage/table/row_group_collection.cpp +157 -68
  1417. package/src/duckdb/src/storage/table/row_version_manager.cpp +33 -10
  1418. package/src/duckdb/src/storage/table/scan_state.cpp +21 -7
  1419. package/src/duckdb/src/storage/table/standard_column_data.cpp +68 -5
  1420. package/src/duckdb/src/storage/table/struct_column_data.cpp +42 -4
  1421. package/src/duckdb/src/storage/table/table_statistics.cpp +91 -5
  1422. package/src/duckdb/src/storage/table/update_segment.cpp +287 -210
  1423. package/src/duckdb/src/storage/table_index_list.cpp +55 -58
  1424. package/src/duckdb/src/storage/temporary_file_manager.cpp +412 -149
  1425. package/src/duckdb/src/storage/wal_replay.cpp +132 -48
  1426. package/src/duckdb/src/storage/write_ahead_log.cpp +75 -48
  1427. package/src/duckdb/src/transaction/cleanup_state.cpp +0 -1
  1428. package/src/duckdb/src/transaction/commit_state.cpp +23 -14
  1429. package/src/duckdb/src/transaction/duck_transaction.cpp +29 -25
  1430. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +18 -6
  1431. package/src/duckdb/src/transaction/meta_transaction.cpp +3 -2
  1432. package/src/duckdb/src/transaction/rollback_state.cpp +5 -2
  1433. package/src/duckdb/src/transaction/transaction_context.cpp +9 -1
  1434. package/src/duckdb/src/transaction/undo_buffer.cpp +35 -27
  1435. package/src/duckdb/src/transaction/undo_buffer_allocator.cpp +72 -0
  1436. package/src/duckdb/src/transaction/wal_write_state.cpp +12 -10
  1437. package/src/duckdb/src/verification/copied_statement_verifier.cpp +7 -4
  1438. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +7 -5
  1439. package/src/duckdb/src/verification/external_statement_verifier.cpp +7 -4
  1440. package/src/duckdb/src/verification/fetch_row_verifier.cpp +7 -4
  1441. package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +8 -4
  1442. package/src/duckdb/src/verification/parsed_statement_verifier.cpp +7 -4
  1443. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -12
  1444. package/src/duckdb/src/verification/statement_verifier.cpp +20 -15
  1445. package/src/duckdb/src/verification/unoptimized_statement_verifier.cpp +7 -4
  1446. package/src/duckdb/third_party/fsst/libfsst.hpp +1 -0
  1447. package/src/duckdb/third_party/httplib/httplib.hpp +15 -22
  1448. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +4 -2
  1449. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +2 -4
  1450. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +14278 -13832
  1451. package/src/duckdb/third_party/parquet/parquet_types.cpp +3410 -1686
  1452. package/src/duckdb/third_party/parquet/parquet_types.h +1585 -1204
  1453. package/src/duckdb/third_party/skiplist/SkipList.h +0 -1
  1454. package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +13 -15
  1455. package/src/duckdb/third_party/zstd/common/debug.cpp +36 -0
  1456. package/src/duckdb/third_party/zstd/common/entropy_common.cpp +173 -49
  1457. package/src/duckdb/third_party/zstd/common/error_private.cpp +11 -3
  1458. package/src/duckdb/third_party/zstd/common/fse_decompress.cpp +126 -97
  1459. package/src/duckdb/third_party/zstd/common/pool.cpp +376 -0
  1460. package/src/duckdb/third_party/zstd/common/threading.cpp +193 -0
  1461. package/src/duckdb/third_party/zstd/common/xxhash.cpp +18 -14
  1462. package/src/duckdb/third_party/zstd/common/zstd_common.cpp +3 -38
  1463. package/src/duckdb/third_party/zstd/compress/fse_compress.cpp +93 -165
  1464. package/src/duckdb/third_party/zstd/compress/hist.cpp +28 -31
  1465. package/src/duckdb/third_party/zstd/compress/huf_compress.cpp +957 -291
  1466. package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3988 -1124
  1467. package/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp +120 -43
  1468. package/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp +47 -23
  1469. package/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp +274 -424
  1470. package/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp +403 -153
  1471. package/src/duckdb/third_party/zstd/compress/zstd_fast.cpp +741 -268
  1472. package/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp +1339 -278
  1473. package/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp +334 -222
  1474. package/src/duckdb/third_party/zstd/compress/zstd_opt.cpp +674 -298
  1475. package/src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp +1885 -0
  1476. package/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp +1247 -586
  1477. package/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp +18 -17
  1478. package/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp +724 -270
  1479. package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1193 -393
  1480. package/src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp +30 -0
  1481. package/src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp +171 -0
  1482. package/src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp +80 -0
  1483. package/src/duckdb/third_party/zstd/dict/cover.cpp +1271 -0
  1484. package/src/duckdb/third_party/zstd/dict/divsufsort.cpp +1916 -0
  1485. package/src/duckdb/third_party/zstd/dict/fastcover.cpp +775 -0
  1486. package/src/duckdb/third_party/zstd/dict/zdict.cpp +1139 -0
  1487. package/src/duckdb/third_party/zstd/include/zdict.h +473 -0
  1488. package/src/duckdb/third_party/zstd/include/zstd/common/allocations.h +58 -0
  1489. package/src/duckdb/third_party/zstd/include/zstd/common/bits.h +204 -0
  1490. package/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h +88 -85
  1491. package/src/duckdb/third_party/zstd/include/zstd/common/compiler.h +243 -47
  1492. package/src/duckdb/third_party/zstd/include/zstd/common/cpu.h +253 -0
  1493. package/src/duckdb/third_party/zstd/include/zstd/common/debug.h +31 -31
  1494. package/src/duckdb/third_party/zstd/include/zstd/common/error_private.h +94 -6
  1495. package/src/duckdb/third_party/zstd/include/zstd/common/fse.h +424 -64
  1496. package/src/duckdb/third_party/zstd/include/zstd/common/huf.h +255 -70
  1497. package/src/duckdb/third_party/zstd/include/zstd/common/mem.h +125 -85
  1498. package/src/duckdb/third_party/zstd/include/zstd/common/pool.h +84 -0
  1499. package/src/duckdb/third_party/zstd/include/zstd/common/portability_macros.h +158 -0
  1500. package/src/duckdb/third_party/zstd/include/zstd/common/threading.h +152 -0
  1501. package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash.h → xxhash.hpp} +0 -1
  1502. package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash_static.h → xxhash_static.hpp} +1 -1
  1503. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_deps.h +122 -0
  1504. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h +143 -174
  1505. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_trace.h +159 -0
  1506. package/src/duckdb/third_party/zstd/include/zstd/compress/clevels.h +136 -0
  1507. package/src/duckdb/third_party/zstd/include/zstd/compress/hist.h +4 -4
  1508. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h +631 -220
  1509. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h +17 -7
  1510. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h +2 -2
  1511. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h +3 -2
  1512. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +256 -153
  1513. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h +16 -3
  1514. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h +4 -3
  1515. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h +145 -11
  1516. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h +14 -6
  1517. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm_geartab.h +110 -0
  1518. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h +33 -9
  1519. package/src/duckdb/third_party/zstd/include/zstd/compress/zstdmt_compress.h +107 -0
  1520. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h +4 -3
  1521. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h +20 -6
  1522. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h +88 -16
  1523. package/src/duckdb/third_party/zstd/include/zstd/deprecated/zbuff.h +214 -0
  1524. package/src/duckdb/third_party/zstd/include/zstd/dict/cover.h +156 -0
  1525. package/src/duckdb/third_party/zstd/include/zstd/dict/divsufsort.h +62 -0
  1526. package/src/duckdb/third_party/zstd/include/zstd.h +2171 -93
  1527. package/src/duckdb/third_party/zstd/include/{zstd/common/zstd_errors.h → zstd_errors.h} +32 -10
  1528. package/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp +8 -0
  1529. package/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp +20 -0
  1530. package/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp +12 -0
  1531. package/src/duckdb/ub_extension_core_functions_aggregate_nested.cpp +6 -0
  1532. package/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp +14 -0
  1533. package/src/duckdb/ub_extension_core_functions_scalar_array.cpp +4 -0
  1534. package/src/duckdb/ub_extension_core_functions_scalar_bit.cpp +2 -0
  1535. package/src/duckdb/ub_extension_core_functions_scalar_blob.cpp +4 -0
  1536. package/src/duckdb/ub_extension_core_functions_scalar_date.cpp +20 -0
  1537. package/src/duckdb/ub_extension_core_functions_scalar_debug.cpp +2 -0
  1538. package/src/duckdb/ub_extension_core_functions_scalar_enum.cpp +2 -0
  1539. package/src/duckdb/ub_extension_core_functions_scalar_generic.cpp +18 -0
  1540. package/src/duckdb/ub_extension_core_functions_scalar_list.cpp +22 -0
  1541. package/src/duckdb/ub_extension_core_functions_scalar_map.cpp +14 -0
  1542. package/src/duckdb/ub_extension_core_functions_scalar_math.cpp +2 -0
  1543. package/src/duckdb/ub_extension_core_functions_scalar_operators.cpp +2 -0
  1544. package/src/duckdb/ub_extension_core_functions_scalar_random.cpp +4 -0
  1545. package/src/duckdb/ub_extension_core_functions_scalar_string.cpp +48 -0
  1546. package/src/duckdb/ub_extension_core_functions_scalar_struct.cpp +2 -0
  1547. package/src/duckdb/ub_extension_core_functions_scalar_union.cpp +6 -0
  1548. package/src/duckdb/ub_src_common.cpp +4 -0
  1549. package/src/duckdb/ub_src_common_arrow.cpp +3 -1
  1550. package/src/duckdb/ub_src_execution.cpp +0 -6
  1551. package/src/duckdb/ub_src_execution_operator_aggregate.cpp +2 -0
  1552. package/src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp +2 -0
  1553. package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +2 -0
  1554. package/src/duckdb/ub_src_execution_sample.cpp +4 -0
  1555. package/src/duckdb/ub_src_function.cpp +6 -0
  1556. package/src/duckdb/ub_src_function_aggregate.cpp +0 -2
  1557. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +3 -1
  1558. package/src/duckdb/ub_src_function_scalar.cpp +2 -8
  1559. package/src/duckdb/ub_src_function_scalar_date.cpp +2 -0
  1560. package/src/duckdb/ub_src_function_scalar_generic.cpp +2 -2
  1561. package/src/duckdb/ub_src_function_scalar_map.cpp +2 -0
  1562. package/src/duckdb/ub_src_function_scalar_operator.cpp +8 -0
  1563. package/src/duckdb/ub_src_function_scalar_string.cpp +10 -0
  1564. package/src/duckdb/ub_src_function_scalar_struct.cpp +4 -0
  1565. package/src/duckdb/ub_src_function_scalar_system.cpp +2 -0
  1566. package/src/duckdb/ub_src_function_table_system.cpp +6 -0
  1567. package/src/duckdb/ub_src_function_window.cpp +36 -0
  1568. package/src/duckdb/ub_src_logging.cpp +8 -0
  1569. package/src/duckdb/ub_src_main_settings.cpp +3 -1
  1570. package/src/duckdb/ub_src_optimizer.cpp +8 -0
  1571. package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
  1572. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  1573. package/src/duckdb/ub_src_parser.cpp +2 -0
  1574. package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
  1575. package/src/duckdb/ub_src_planner.cpp +2 -0
  1576. package/src/duckdb/ub_src_planner_filter.cpp +6 -0
  1577. package/src/duckdb/ub_src_storage_compression.cpp +4 -0
  1578. package/src/duckdb/ub_src_storage_compression_dictionary.cpp +8 -0
  1579. package/src/duckdb/ub_src_storage_compression_roaring.cpp +10 -0
  1580. package/src/duckdb/ub_src_transaction.cpp +2 -0
  1581. package/vendor.py +1 -1
  1582. package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +0 -6003
  1583. package/src/duckdb/extension/json/yyjson/yyjson.cpp +0 -8218
  1584. package/src/duckdb/src/common/arrow/appender/list_data.cpp +0 -78
  1585. package/src/duckdb/src/common/arrow/appender/map_data.cpp +0 -91
  1586. package/src/duckdb/src/common/cycle_counter.cpp +0 -76
  1587. package/src/duckdb/src/common/field_writer.cpp +0 -97
  1588. package/src/duckdb/src/common/http_state.cpp +0 -95
  1589. package/src/duckdb/src/common/preserved_error.cpp +0 -87
  1590. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
  1591. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +0 -27
  1592. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +0 -36
  1593. package/src/duckdb/src/common/serializer/format_serializer.cpp +0 -15
  1594. package/src/duckdb/src/common/serializer.cpp +0 -24
  1595. package/src/duckdb/src/common/types/chunk_collection.cpp +0 -190
  1596. package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +0 -183
  1597. package/src/duckdb/src/core_functions/scalar/date/current.cpp +0 -54
  1598. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +0 -78
  1599. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +0 -70
  1600. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +0 -412
  1601. package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +0 -28
  1602. package/src/duckdb/src/core_functions/scalar/string/jaro_winkler.cpp +0 -71
  1603. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  1604. package/src/duckdb/src/execution/index/art/node16.cpp +0 -196
  1605. package/src/duckdb/src/execution/index/art/node4.cpp +0 -189
  1606. package/src/duckdb/src/execution/index/unknown_index.cpp +0 -65
  1607. package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +0 -595
  1608. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +0 -434
  1609. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +0 -89
  1610. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +0 -90
  1611. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +0 -95
  1612. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +0 -494
  1613. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +0 -35
  1614. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +0 -99
  1615. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -689
  1616. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +0 -242
  1617. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +0 -695
  1618. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  1619. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  1620. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  1621. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -280
  1622. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +0 -666
  1623. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +0 -499
  1624. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -207
  1625. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  1626. package/src/duckdb/src/execution/physical_plan/plan_limit_percent.cpp +0 -18
  1627. package/src/duckdb/src/execution/physical_plan/plan_show_select.cpp +0 -47
  1628. package/src/duckdb/src/execution/reservoir_sample.cpp +0 -324
  1629. package/src/duckdb/src/execution/window_executor.cpp +0 -1830
  1630. package/src/duckdb/src/execution/window_segment_tree.cpp +0 -2073
  1631. package/src/duckdb/src/extension_forward_decl/icu.cpp +0 -59
  1632. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +0 -15
  1633. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +0 -29
  1634. package/src/duckdb/src/function/scalar/generic_functions.cpp +0 -11
  1635. package/src/duckdb/src/function/scalar/list/list_concat.cpp +0 -143
  1636. package/src/duckdb/src/function/scalar/operators.cpp +0 -14
  1637. package/src/duckdb/src/function/scalar/sequence_functions.cpp +0 -10
  1638. package/src/duckdb/src/function/scalar/string_functions.cpp +0 -22
  1639. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +0 -173
  1640. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +0 -101
  1641. package/src/duckdb/src/include/duckdb/catalog/mapping_value.hpp +0 -92
  1642. package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +0 -42
  1643. package/src/duckdb/src/include/duckdb/common/cycle_counter.hpp +0 -68
  1644. package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +0 -34
  1645. package/src/duckdb/src/include/duckdb/common/http_state.hpp +0 -113
  1646. package/src/duckdb/src/include/duckdb/common/platform.h +0 -58
  1647. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +0 -59
  1648. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +0 -192
  1649. package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +0 -137
  1650. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +0 -65
  1651. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +0 -63
  1652. package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +0 -65
  1653. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +0 -103
  1654. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +0 -74
  1655. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +0 -60
  1656. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +0 -253
  1657. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +0 -155
  1658. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +0 -163
  1659. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +0 -35
  1660. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +0 -228
  1661. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +0 -70
  1662. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +0 -93
  1663. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +0 -60
  1664. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +0 -197
  1665. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +0 -21
  1666. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +0 -30
  1667. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +0 -99
  1668. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +0 -87
  1669. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +0 -70
  1670. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +0 -80
  1671. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +0 -137
  1672. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +0 -104
  1673. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +0 -79
  1674. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -119
  1675. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +0 -72
  1676. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +0 -110
  1677. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +0 -103
  1678. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_file_handle.hpp +0 -59
  1679. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +0 -46
  1680. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +0 -210
  1681. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +0 -131
  1682. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp +0 -28
  1683. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +0 -70
  1684. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +0 -65
  1685. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +0 -167
  1686. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +0 -21
  1687. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +0 -343
  1688. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -165
  1689. package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_optimizer.hpp +0 -45
  1690. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +0 -57
  1691. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +0 -45
  1692. package/src/duckdb/src/include/duckdb/parser/statement/show_statement.hpp +0 -32
  1693. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +0 -49
  1694. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +0 -42
  1695. package/src/duckdb/src/main/settings/settings.cpp +0 -2056
  1696. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +0 -36
  1697. package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +0 -19
  1698. package/src/duckdb/src/parser/statement/show_statement.cpp +0 -15
  1699. package/src/duckdb/src/planner/binder/statement/bind_show.cpp +0 -30
  1700. package/src/duckdb/src/planner/operator/logical_limit_percent.cpp +0 -14
  1701. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +0 -70
  1702. package/src/duckdb/third_party/fsst/fsst_avx512.cpp +0 -140
  1703. package/src/duckdb/third_party/fsst/fsst_avx512.inc +0 -57
  1704. package/src/duckdb/third_party/fsst/fsst_avx512_unroll1.inc +0 -57
  1705. package/src/duckdb/third_party/fsst/fsst_avx512_unroll2.inc +0 -114
  1706. package/src/duckdb/third_party/fsst/fsst_avx512_unroll3.inc +0 -171
  1707. package/src/duckdb/third_party/fsst/fsst_avx512_unroll4.inc +0 -228
  1708. package/src/duckdb/third_party/parquet/parquet_constants.cpp +0 -17
  1709. package/src/duckdb/third_party/parquet/parquet_constants.h +0 -24
  1710. package/src/duckdb/third_party/re2/util/pod_array.h +0 -55
  1711. package/src/duckdb/third_party/re2/util/sparse_array.h +0 -392
  1712. package/src/duckdb/third_party/re2/util/sparse_set.h +0 -264
  1713. package/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h +0 -421
  1714. package/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h +0 -238
  1715. package/src/duckdb/third_party/zstd/include/zstd_static.h +0 -1070
  1716. package/src/duckdb/ub_src_core_functions.cpp +0 -6
  1717. package/src/duckdb/ub_src_core_functions_aggregate_algebraic.cpp +0 -8
  1718. package/src/duckdb/ub_src_core_functions_aggregate_distributive.cpp +0 -24
  1719. package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +0 -12
  1720. package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +0 -6
  1721. package/src/duckdb/ub_src_core_functions_aggregate_regression.cpp +0 -14
  1722. package/src/duckdb/ub_src_core_functions_scalar_array.cpp +0 -4
  1723. package/src/duckdb/ub_src_core_functions_scalar_bit.cpp +0 -2
  1724. package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +0 -6
  1725. package/src/duckdb/ub_src_core_functions_scalar_date.cpp +0 -22
  1726. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +0 -2
  1727. package/src/duckdb/ub_src_core_functions_scalar_enum.cpp +0 -2
  1728. package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +0 -18
  1729. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +0 -22
  1730. package/src/duckdb/ub_src_core_functions_scalar_map.cpp +0 -16
  1731. package/src/duckdb/ub_src_core_functions_scalar_math.cpp +0 -2
  1732. package/src/duckdb/ub_src_core_functions_scalar_operators.cpp +0 -2
  1733. package/src/duckdb/ub_src_core_functions_scalar_random.cpp +0 -4
  1734. package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +0 -2
  1735. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +0 -58
  1736. package/src/duckdb/ub_src_core_functions_scalar_struct.cpp +0 -4
  1737. package/src/duckdb/ub_src_core_functions_scalar_union.cpp +0 -6
  1738. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +0 -18
  1739. package/src/duckdb/ub_src_function_scalar_operators.cpp +0 -8
  1740. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/covar.hpp +0 -0
  1741. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/stddev.hpp +0 -0
  1742. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/sum_helpers.hpp +0 -0
  1743. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/array_kernels.hpp +0 -0
  1744. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/function_list.hpp +0 -0
  1745. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/secret_functions.hpp +0 -0
  1746. /package/src/duckdb/src/function/scalar/{operators → operator}/multiply.cpp +0 -0
  1747. /package/src/duckdb/src/function/scalar/{operators → operator}/subtract.cpp +0 -0
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -10,16 +10,25 @@
10
10
 
11
11
  #include "zstd/compress/zstd_compress_internal.h"
12
12
  #include "zstd/compress/zstd_lazy.h"
13
+ #include "zstd/common/bits.h" /* ZSTD_countTrailingZeros64 */
14
+
15
+ namespace duckdb_zstd {
16
+
17
+ #if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
18
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
19
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
20
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
21
+
22
+ #define kLazySkippingStep 8
13
23
 
14
24
 
15
25
  /*-*************************************
16
26
  * Binary Tree search
17
27
  ***************************************/
18
28
 
19
- namespace duckdb_zstd {
20
-
21
- static void
22
- ZSTD_updateDUBT(ZSTD_matchState_t* ms,
29
+ static
30
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
31
+ void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
23
32
  const BYTE* ip, const BYTE* iend,
24
33
  U32 mls)
25
34
  {
@@ -60,11 +69,12 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
60
69
 
61
70
  /** ZSTD_insertDUBT1() :
62
71
  * sort one already inserted but unsorted position
63
- * assumption : current >= btlow == (current - btmask)
72
+ * assumption : curr >= btlow == (curr - btmask)
64
73
  * doesn't fail */
65
- static void
66
- ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
67
- U32 current, const BYTE* inputEnd,
74
+ static
75
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
76
+ void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
77
+ U32 curr, const BYTE* inputEnd,
68
78
  U32 nbCompares, U32 btLow,
69
79
  const ZSTD_dictMode_e dictMode)
70
80
  {
@@ -76,41 +86,41 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
76
86
  const BYTE* const base = ms->window.base;
77
87
  const BYTE* const dictBase = ms->window.dictBase;
78
88
  const U32 dictLimit = ms->window.dictLimit;
79
- const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
80
- const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
89
+ const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
90
+ const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
81
91
  const BYTE* const dictEnd = dictBase + dictLimit;
82
92
  const BYTE* const prefixStart = base + dictLimit;
83
93
  const BYTE* match;
84
- U32* smallerPtr = bt + 2*(current&btMask);
94
+ U32* smallerPtr = bt + 2*(curr&btMask);
85
95
  U32* largerPtr = smallerPtr + 1;
86
96
  U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
87
97
  U32 dummy32; /* to be nullified at the end */
88
98
  U32 const windowValid = ms->window.lowLimit;
89
99
  U32 const maxDistance = 1U << cParams->windowLog;
90
- U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
100
+ U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
91
101
 
92
102
 
93
103
  DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
94
- current, dictLimit, windowLow);
95
- assert(current >= btLow);
104
+ curr, dictLimit, windowLow);
105
+ assert(curr >= btLow);
96
106
  assert(ip < iend); /* condition for ZSTD_count */
97
107
 
98
- while (nbCompares-- && (matchIndex > windowLow)) {
108
+ for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
99
109
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
100
110
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
101
- assert(matchIndex < current);
111
+ assert(matchIndex < curr);
102
112
  /* note : all candidates are now supposed sorted,
103
113
  * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
104
114
  * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
105
115
 
106
116
  if ( (dictMode != ZSTD_extDict)
107
117
  || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
108
- || (current < dictLimit) /* both in extDict */) {
118
+ || (curr < dictLimit) /* both in extDict */) {
109
119
  const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
110
120
  || (matchIndex+matchLength >= dictLimit)) ?
111
121
  base : dictBase;
112
122
  assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
113
- || (current < dictLimit) );
123
+ || (curr < dictLimit) );
114
124
  match = mBase + matchIndex;
115
125
  matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
116
126
  } else {
@@ -121,7 +131,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
121
131
  }
122
132
 
123
133
  DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
124
- current, matchIndex, (U32)matchLength);
134
+ curr, matchIndex, (U32)matchLength);
125
135
 
126
136
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
127
137
  break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
@@ -151,9 +161,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
151
161
  }
152
162
 
153
163
 
154
- static size_t
155
- ZSTD_DUBT_findBetterDictMatch (
156
- ZSTD_matchState_t* ms,
164
+ static
165
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
166
+ size_t ZSTD_DUBT_findBetterDictMatch (
167
+ const ZSTD_matchState_t* ms,
157
168
  const BYTE* const ip, const BYTE* const iend,
158
169
  size_t* offsetPtr,
159
170
  size_t bestLength,
@@ -170,7 +181,7 @@ ZSTD_DUBT_findBetterDictMatch (
170
181
 
171
182
  const BYTE* const base = ms->window.base;
172
183
  const BYTE* const prefixStart = base + ms->window.dictLimit;
173
- U32 const current = (U32)(ip-base);
184
+ U32 const curr = (U32)(ip-base);
174
185
  const BYTE* const dictBase = dms->window.base;
175
186
  const BYTE* const dictEnd = dms->window.nextSrc;
176
187
  U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
@@ -187,7 +198,7 @@ ZSTD_DUBT_findBetterDictMatch (
187
198
  (void)dictMode;
188
199
  assert(dictMode == ZSTD_dictMatchState);
189
200
 
190
- while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
201
+ for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
191
202
  U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
192
203
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
193
204
  const BYTE* match = dictBase + dictMatchIndex;
@@ -197,10 +208,10 @@ ZSTD_DUBT_findBetterDictMatch (
197
208
 
198
209
  if (matchLength > bestLength) {
199
210
  U32 matchIndex = dictMatchIndex + dictIndexDelta;
200
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
211
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
201
212
  DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
202
- current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
203
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
213
+ curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex);
214
+ bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
204
215
  }
205
216
  if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
206
217
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
@@ -220,19 +231,20 @@ ZSTD_DUBT_findBetterDictMatch (
220
231
  }
221
232
 
222
233
  if (bestLength >= MINMATCH) {
223
- U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
234
+ U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex;
224
235
  DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
225
- current, (U32)bestLength, (U32)*offsetPtr, mIndex);
236
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
226
237
  }
227
238
  return bestLength;
228
239
 
229
240
  }
230
241
 
231
242
 
232
- static size_t
233
- ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
243
+ static
244
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
245
+ size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
234
246
  const BYTE* const ip, const BYTE* const iend,
235
- size_t* offsetPtr,
247
+ size_t* offBasePtr,
236
248
  U32 const mls,
237
249
  const ZSTD_dictMode_e dictMode)
238
250
  {
@@ -243,13 +255,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
243
255
  U32 matchIndex = hashTable[h];
244
256
 
245
257
  const BYTE* const base = ms->window.base;
246
- U32 const current = (U32)(ip-base);
247
- U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
258
+ U32 const curr = (U32)(ip-base);
259
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
248
260
 
249
261
  U32* const bt = ms->chainTable;
250
262
  U32 const btLog = cParams->chainLog - 1;
251
263
  U32 const btMask = (1 << btLog) - 1;
252
- U32 const btLow = (btMask >= current) ? 0 : current - btMask;
264
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
253
265
  U32 const unsortLimit = MAX(btLow, windowLow);
254
266
 
255
267
  U32* nextCandidate = bt + 2*(matchIndex&btMask);
@@ -258,8 +270,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
258
270
  U32 nbCandidates = nbCompares;
259
271
  U32 previousCandidate = 0;
260
272
 
261
- DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
273
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
262
274
  assert(ip <= iend-8); /* required for h calculation */
275
+ assert(dictMode != ZSTD_dedicatedDictSearch);
263
276
 
264
277
  /* reach end of unsorted candidates list */
265
278
  while ( (matchIndex > unsortLimit)
@@ -301,16 +314,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
301
314
  const U32 dictLimit = ms->window.dictLimit;
302
315
  const BYTE* const dictEnd = dictBase + dictLimit;
303
316
  const BYTE* const prefixStart = base + dictLimit;
304
- U32* smallerPtr = bt + 2*(current&btMask);
305
- U32* largerPtr = bt + 2*(current&btMask) + 1;
306
- U32 matchEndIdx = current + 8 + 1;
317
+ U32* smallerPtr = bt + 2*(curr&btMask);
318
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
319
+ U32 matchEndIdx = curr + 8 + 1;
307
320
  U32 dummy32; /* to be nullified at the end */
308
321
  size_t bestLength = 0;
309
322
 
310
323
  matchIndex = hashTable[h];
311
- hashTable[h] = current; /* Update Hash Table */
324
+ hashTable[h] = curr; /* Update Hash Table */
312
325
 
313
- while (nbCompares-- && (matchIndex > windowLow)) {
326
+ for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
314
327
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
315
328
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
316
329
  const BYTE* match;
@@ -328,8 +341,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
328
341
  if (matchLength > bestLength) {
329
342
  if (matchLength > matchEndIdx - matchIndex)
330
343
  matchEndIdx = matchIndex + (U32)matchLength;
331
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
332
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
344
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) )
345
+ bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex);
333
346
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
334
347
  if (dictMode == ZSTD_dictMatchState) {
335
348
  nbCompares = 0; /* in addition to avoiding checking any
@@ -358,19 +371,20 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
358
371
 
359
372
  *smallerPtr = *largerPtr = 0;
360
373
 
374
+ assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
361
375
  if (dictMode == ZSTD_dictMatchState && nbCompares) {
362
376
  bestLength = ZSTD_DUBT_findBetterDictMatch(
363
377
  ms, ip, iend,
364
- offsetPtr, bestLength, nbCompares,
378
+ offBasePtr, bestLength, nbCompares,
365
379
  mls, dictMode);
366
380
  }
367
381
 
368
- assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
382
+ assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
369
383
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
370
384
  if (bestLength >= MINMATCH) {
371
- U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
385
+ U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex;
372
386
  DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
373
- current, (U32)bestLength, (U32)*offsetPtr, mIndex);
387
+ curr, (U32)bestLength, (U32)*offBasePtr, mIndex);
374
388
  }
375
389
  return bestLength;
376
390
  }
@@ -378,69 +392,236 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
378
392
 
379
393
 
380
394
  /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
381
- FORCE_INLINE_TEMPLATE size_t
382
- ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
395
+ FORCE_INLINE_TEMPLATE
396
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
397
+ size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
383
398
  const BYTE* const ip, const BYTE* const iLimit,
384
- size_t* offsetPtr,
399
+ size_t* offBasePtr,
385
400
  const U32 mls /* template */,
386
401
  const ZSTD_dictMode_e dictMode)
387
402
  {
388
403
  DEBUGLOG(7, "ZSTD_BtFindBestMatch");
389
404
  if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
390
405
  ZSTD_updateDUBT(ms, ip, iLimit, mls);
391
- return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
406
+ return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode);
392
407
  }
393
408
 
409
+ /***********************************
410
+ * Dedicated dict search
411
+ ***********************************/
394
412
 
395
- static size_t
396
- ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
397
- const BYTE* ip, const BYTE* const iLimit,
398
- size_t* offsetPtr)
413
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
399
414
  {
400
- switch(ms->cParams.minMatch)
415
+ const BYTE* const base = ms->window.base;
416
+ U32 const target = (U32)(ip - base);
417
+ U32* const hashTable = ms->hashTable;
418
+ U32* const chainTable = ms->chainTable;
419
+ U32 const chainSize = 1 << ms->cParams.chainLog;
420
+ U32 idx = ms->nextToUpdate;
421
+ U32 const minChain = chainSize < target - idx ? target - chainSize : idx;
422
+ U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
423
+ U32 const cacheSize = bucketSize - 1;
424
+ U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
425
+ U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
426
+
427
+ /* We know the hashtable is oversized by a factor of `bucketSize`.
428
+ * We are going to temporarily pretend `bucketSize == 1`, keeping only a
429
+ * single entry. We will use the rest of the space to construct a temporary
430
+ * chaintable.
431
+ */
432
+ U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
433
+ U32* const tmpHashTable = hashTable;
434
+ U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
435
+ U32 const tmpChainSize = (U32)((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
436
+ U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
437
+ U32 hashIdx;
438
+
439
+ assert(ms->cParams.chainLog <= 24);
440
+ assert(ms->cParams.hashLog > ms->cParams.chainLog);
441
+ assert(idx != 0);
442
+ assert(tmpMinChain <= minChain);
443
+
444
+ /* fill conventional hash table and conventional chain table */
445
+ for ( ; idx < target; idx++) {
446
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
447
+ if (idx >= tmpMinChain) {
448
+ tmpChainTable[idx - tmpMinChain] = hashTable[h];
449
+ }
450
+ tmpHashTable[h] = idx;
451
+ }
452
+
453
+ /* sort chains into ddss chain table */
401
454
  {
402
- default : /* includes case 3 */
403
- case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
404
- case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
405
- case 7 :
406
- case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
455
+ U32 chainPos = 0;
456
+ for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
457
+ U32 count;
458
+ U32 countBeyondMinChain = 0;
459
+ U32 i = tmpHashTable[hashIdx];
460
+ for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
461
+ /* skip through the chain to the first position that won't be
462
+ * in the hash cache bucket */
463
+ if (i < minChain) {
464
+ countBeyondMinChain++;
465
+ }
466
+ i = tmpChainTable[i - tmpMinChain];
467
+ }
468
+ if (count == cacheSize) {
469
+ for (count = 0; count < chainLimit;) {
470
+ if (i < minChain) {
471
+ if (!i || ++countBeyondMinChain > cacheSize) {
472
+ /* only allow pulling `cacheSize` number of entries
473
+ * into the cache or chainTable beyond `minChain`,
474
+ * to replace the entries pulled out of the
475
+ * chainTable into the cache. This lets us reach
476
+ * back further without increasing the total number
477
+ * of entries in the chainTable, guaranteeing the
478
+ * DDSS chain table will fit into the space
479
+ * allocated for the regular one. */
480
+ break;
481
+ }
482
+ }
483
+ chainTable[chainPos++] = i;
484
+ count++;
485
+ if (i < tmpMinChain) {
486
+ break;
487
+ }
488
+ i = tmpChainTable[i - tmpMinChain];
489
+ }
490
+ } else {
491
+ count = 0;
492
+ }
493
+ if (count) {
494
+ tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
495
+ } else {
496
+ tmpHashTable[hashIdx] = 0;
497
+ }
498
+ }
499
+ assert(chainPos <= chainSize); /* I believe this is guaranteed... */
500
+ }
501
+
502
+ /* move chain pointers into the last entry of each hash bucket */
503
+ for (hashIdx = (1 << hashLog); hashIdx; ) {
504
+ U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
505
+ U32 const chainPackedPointer = tmpHashTable[hashIdx];
506
+ U32 i;
507
+ for (i = 0; i < cacheSize; i++) {
508
+ hashTable[bucketIdx + i] = 0;
509
+ }
510
+ hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
511
+ }
512
+
513
+ /* fill the buckets of the hash table */
514
+ for (idx = ms->nextToUpdate; idx < target; idx++) {
515
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
516
+ << ZSTD_LAZY_DDSS_BUCKET_LOG;
517
+ U32 i;
518
+ /* Shift hash cache down 1. */
519
+ for (i = cacheSize - 1; i; i--)
520
+ hashTable[h + i] = hashTable[h + i - 1];
521
+ hashTable[h] = idx;
407
522
  }
523
+
524
+ ms->nextToUpdate = target;
408
525
  }
409
526
 
527
+ /* Returns the longest match length found in the dedicated dict search structure.
528
+ * If none are longer than the argument ml, then ml will be returned.
529
+ */
530
+ FORCE_INLINE_TEMPLATE
531
+ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
532
+ const ZSTD_matchState_t* const dms,
533
+ const BYTE* const ip, const BYTE* const iLimit,
534
+ const BYTE* const prefixStart, const U32 curr,
535
+ const U32 dictLimit, const size_t ddsIdx) {
536
+ const U32 ddsLowestIndex = dms->window.dictLimit;
537
+ const BYTE* const ddsBase = dms->window.base;
538
+ const BYTE* const ddsEnd = dms->window.nextSrc;
539
+ const U32 ddsSize = (U32)(ddsEnd - ddsBase);
540
+ const U32 ddsIndexDelta = dictLimit - ddsSize;
541
+ const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
542
+ const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
543
+ U32 ddsAttempt;
544
+ U32 matchIndex;
545
+
546
+ for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
547
+ PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
548
+ }
410
549
 
411
- static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
412
- ZSTD_matchState_t* ms,
413
- const BYTE* ip, const BYTE* const iLimit,
414
- size_t* offsetPtr)
415
- {
416
- switch(ms->cParams.minMatch)
417
550
  {
418
- default : /* includes case 3 */
419
- case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
420
- case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
421
- case 7 :
422
- case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
551
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
552
+ U32 const chainIndex = chainPackedPointer >> 8;
553
+
554
+ PREFETCH_L1(&dms->chainTable[chainIndex]);
423
555
  }
424
- }
425
556
 
557
+ for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
558
+ size_t currentMl=0;
559
+ const BYTE* match;
560
+ matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
561
+ match = ddsBase + matchIndex;
562
+
563
+ if (!matchIndex) {
564
+ return ml;
565
+ }
566
+
567
+ /* guaranteed by table construction */
568
+ (void)ddsLowestIndex;
569
+ assert(matchIndex >= ddsLowestIndex);
570
+ assert(match+4 <= ddsEnd);
571
+ if (MEM_read32(match) == MEM_read32(ip)) {
572
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
573
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
574
+ }
575
+
576
+ /* save best solution */
577
+ if (currentMl > ml) {
578
+ ml = currentMl;
579
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
580
+ if (ip+currentMl == iLimit) {
581
+ /* best possible, avoids read overflow on next attempt */
582
+ return ml;
583
+ }
584
+ }
585
+ }
426
586
 
427
- static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
428
- ZSTD_matchState_t* ms,
429
- const BYTE* ip, const BYTE* const iLimit,
430
- size_t* offsetPtr)
431
- {
432
- switch(ms->cParams.minMatch)
433
587
  {
434
- default : /* includes case 3 */
435
- case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
436
- case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
437
- case 7 :
438
- case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
588
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
589
+ U32 chainIndex = chainPackedPointer >> 8;
590
+ U32 const chainLength = chainPackedPointer & 0xFF;
591
+ U32 const chainAttempts = nbAttempts - ddsAttempt;
592
+ U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
593
+ U32 chainAttempt;
594
+
595
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
596
+ PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
597
+ }
598
+
599
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
600
+ size_t currentMl=0;
601
+ const BYTE* match;
602
+ matchIndex = dms->chainTable[chainIndex];
603
+ match = ddsBase + matchIndex;
604
+
605
+ /* guaranteed by table construction */
606
+ assert(matchIndex >= ddsLowestIndex);
607
+ assert(match+4 <= ddsEnd);
608
+ if (MEM_read32(match) == MEM_read32(ip)) {
609
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
610
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
611
+ }
612
+
613
+ /* save best solution */
614
+ if (currentMl > ml) {
615
+ ml = currentMl;
616
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
617
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
618
+ }
619
+ }
439
620
  }
621
+ return ml;
440
622
  }
441
623
 
442
624
 
443
-
444
625
  /* *********************************
445
626
  * Hash Chain
446
627
  ***********************************/
@@ -448,10 +629,12 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
448
629
 
449
630
  /* Update chains up to ip (excluded)
450
631
  Assumption : always within prefix (i.e. not within extDict) */
451
- static U32 ZSTD_insertAndFindFirstIndex_internal(
632
+ FORCE_INLINE_TEMPLATE
633
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
634
+ U32 ZSTD_insertAndFindFirstIndex_internal(
452
635
  ZSTD_matchState_t* ms,
453
636
  const ZSTD_compressionParameters* const cParams,
454
- const BYTE* ip, U32 const mls)
637
+ const BYTE* ip, U32 const mls, U32 const lazySkipping)
455
638
  {
456
639
  U32* const hashTable = ms->hashTable;
457
640
  const U32 hashLog = cParams->hashLog;
@@ -466,6 +649,9 @@ static U32 ZSTD_insertAndFindFirstIndex_internal(
466
649
  NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
467
650
  hashTable[h] = idx;
468
651
  idx++;
652
+ /* Stop inserting every position when in the lazy skipping mode. */
653
+ if (lazySkipping)
654
+ break;
469
655
  }
470
656
 
471
657
  ms->nextToUpdate = target;
@@ -474,13 +660,13 @@ static U32 ZSTD_insertAndFindFirstIndex_internal(
474
660
 
475
661
  U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
476
662
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
477
- return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
663
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
478
664
  }
479
665
 
480
-
481
666
  /* inlining is important to hardwire a hot branch (template emulation) */
482
667
  FORCE_INLINE_TEMPLATE
483
- size_t ZSTD_HcFindBestMatch_generic (
668
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
669
+ size_t ZSTD_HcFindBestMatch(
484
670
  ZSTD_matchState_t* ms,
485
671
  const BYTE* const ip, const BYTE* const iLimit,
486
672
  size_t* offsetPtr,
@@ -495,25 +681,39 @@ size_t ZSTD_HcFindBestMatch_generic (
495
681
  const U32 dictLimit = ms->window.dictLimit;
496
682
  const BYTE* const prefixStart = base + dictLimit;
497
683
  const BYTE* const dictEnd = dictBase + dictLimit;
498
- const U32 current = (U32)(ip-base);
684
+ const U32 curr = (U32)(ip-base);
499
685
  const U32 maxDistance = 1U << cParams->windowLog;
500
686
  const U32 lowestValid = ms->window.lowLimit;
501
- const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
687
+ const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
502
688
  const U32 isDictionary = (ms->loadedDictEnd != 0);
503
689
  const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
504
- const U32 minChain = current > chainSize ? current - chainSize : 0;
690
+ const U32 minChain = curr > chainSize ? curr - chainSize : 0;
505
691
  U32 nbAttempts = 1U << cParams->searchLog;
506
692
  size_t ml=4-1;
507
693
 
694
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
695
+ const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
696
+ ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
697
+ const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
698
+ ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
699
+
700
+ U32 matchIndex;
701
+
702
+ if (dictMode == ZSTD_dedicatedDictSearch) {
703
+ const U32* entry = &dms->hashTable[ddsIdx];
704
+ PREFETCH_L1(entry);
705
+ }
706
+
508
707
  /* HC4 match finder */
509
- U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
708
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
510
709
 
511
- for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
710
+ for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
512
711
  size_t currentMl=0;
513
712
  if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
514
713
  const BYTE* const match = base + matchIndex;
515
714
  assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
516
- if (match[ml] == ip[ml]) /* potentially better */
715
+ /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
716
+ if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
517
717
  currentMl = ZSTD_count(ip, match, iLimit);
518
718
  } else {
519
719
  const BYTE* const match = dictBase + matchIndex;
@@ -525,7 +725,7 @@ size_t ZSTD_HcFindBestMatch_generic (
525
725
  /* save best solution */
526
726
  if (currentMl > ml) {
527
727
  ml = currentMl;
528
- *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
728
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
529
729
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
530
730
  }
531
731
 
@@ -533,8 +733,11 @@ size_t ZSTD_HcFindBestMatch_generic (
533
733
  matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
534
734
  }
535
735
 
536
- if (dictMode == ZSTD_dictMatchState) {
537
- const ZSTD_matchState_t* const dms = ms->dictMatchState;
736
+ assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
737
+ if (dictMode == ZSTD_dedicatedDictSearch) {
738
+ ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms,
739
+ ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
740
+ } else if (dictMode == ZSTD_dictMatchState) {
538
741
  const U32* const dmsChainTable = dms->chainTable;
539
742
  const U32 dmsChainSize = (1 << dms->cParams.chainLog);
540
743
  const U32 dmsChainMask = dmsChainSize - 1;
@@ -547,7 +750,7 @@ size_t ZSTD_HcFindBestMatch_generic (
547
750
 
548
751
  matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
549
752
 
550
- for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
753
+ for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
551
754
  size_t currentMl=0;
552
755
  const BYTE* const match = dmsBase + matchIndex;
553
756
  assert(match+4 <= dmsEnd);
@@ -557,11 +760,13 @@ size_t ZSTD_HcFindBestMatch_generic (
557
760
  /* save best solution */
558
761
  if (currentMl > ml) {
559
762
  ml = currentMl;
560
- *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
763
+ assert(curr > matchIndex + dmsIndexDelta);
764
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
561
765
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
562
766
  }
563
767
 
564
768
  if (matchIndex <= dmsMinChain) break;
769
+
565
770
  matchIndex = dmsChainTable[matchIndex & dmsChainMask];
566
771
  }
567
772
  }
@@ -569,62 +774,748 @@ size_t ZSTD_HcFindBestMatch_generic (
569
774
  return ml;
570
775
  }
571
776
 
777
+ /* *********************************
778
+ * (SIMD) Row-based matchfinder
779
+ ***********************************/
780
+ /* Constants for row-based hash */
781
+ #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
782
+ #define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
572
783
 
573
- FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
574
- ZSTD_matchState_t* ms,
575
- const BYTE* ip, const BYTE* const iLimit,
576
- size_t* offsetPtr)
784
+ #define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
785
+
786
+ typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 representing a mask of matches */
787
+
788
+ /* ZSTD_VecMask_next():
789
+ * Starting from the LSB, returns the idx of the next non-zero bit.
790
+ * Basically counting the nb of trailing zeroes.
791
+ */
792
+ MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
793
+ return ZSTD_countTrailingZeros64(val);
794
+ }
795
+
796
+ /* ZSTD_row_nextIndex():
797
+ * Returns the next index to insert at within a tagTable row, and updates the "head"
798
+ * value to reflect the update. Essentially cycles backwards from [1, {entries per row})
799
+ */
800
+ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
801
+ U32 next = (*tagRow-1) & rowMask;
802
+ next += (next == 0) ? rowMask : 0; /* skip first position */
803
+ *tagRow = (BYTE)next;
804
+ return next;
805
+ }
806
+
807
+ /* ZSTD_isAligned():
808
+ * Checks that a pointer is aligned to "align" bytes which must be a power of 2.
809
+ */
810
+ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
811
+ assert((align & (align - 1)) == 0);
812
+ return (((size_t)ptr) & (align - 1)) == 0;
813
+ }
814
+
815
+ /* ZSTD_row_prefetch():
816
+ * Performs prefetching for the hashTable and tagTable at a given row.
817
+ */
818
+ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
819
+ PREFETCH_L1(hashTable + relRow);
820
+ if (rowLog >= 5) {
821
+ PREFETCH_L1(hashTable + relRow + 16);
822
+ /* Note: prefetching more of the hash table does not appear to be beneficial for 128-entry rows */
823
+ }
824
+ PREFETCH_L1(tagTable + relRow);
825
+ if (rowLog == 6) {
826
+ PREFETCH_L1(tagTable + relRow + 32);
827
+ }
828
+ assert(rowLog == 4 || rowLog == 5 || rowLog == 6);
829
+ assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */
830
+ assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on correct multiple of bytes (32,64,128) */
831
+ }
832
+
833
+ /* ZSTD_row_fillHashCache():
834
+ * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
835
+ * but not beyond iLimit.
836
+ */
837
+ FORCE_INLINE_TEMPLATE
838
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
839
+ void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
840
+ U32 const rowLog, U32 const mls,
841
+ U32 idx, const BYTE* const iLimit)
577
842
  {
578
- switch(ms->cParams.minMatch)
579
- {
580
- default : /* includes case 3 */
581
- case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
582
- case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
583
- case 7 :
584
- case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
843
+ U32 const* const hashTable = ms->hashTable;
844
+ BYTE const* const tagTable = ms->tagTable;
845
+ U32 const hashLog = ms->rowHashLog;
846
+ U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
847
+ U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
848
+
849
+ for (; idx < lim; ++idx) {
850
+ U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
851
+ U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
852
+ ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
853
+ ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
585
854
  }
855
+
856
+ DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1],
857
+ ms->hashCache[2], ms->hashCache[3], ms->hashCache[4],
858
+ ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]);
586
859
  }
587
860
 
861
+ /* ZSTD_row_nextCachedHash():
862
+ * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
863
+ * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
864
+ */
865
+ FORCE_INLINE_TEMPLATE
866
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
867
+ U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
868
+ BYTE const* tagTable, BYTE const* base,
869
+ U32 idx, U32 const hashLog,
870
+ U32 const rowLog, U32 const mls,
871
+ U64 const hashSalt)
872
+ {
873
+ U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
874
+ U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
875
+ ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
876
+ { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
877
+ cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash;
878
+ return hash;
879
+ }
880
+ }
588
881
 
589
- static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
590
- ZSTD_matchState_t* ms,
591
- const BYTE* ip, const BYTE* const iLimit,
592
- size_t* offsetPtr)
882
+ /* ZSTD_row_update_internalImpl():
883
+ * Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
884
+ */
885
+ FORCE_INLINE_TEMPLATE
886
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
887
+ void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
888
+ U32 updateStartIdx, U32 const updateEndIdx,
889
+ U32 const mls, U32 const rowLog,
890
+ U32 const rowMask, U32 const useCache)
593
891
  {
594
- switch(ms->cParams.minMatch)
595
- {
596
- default : /* includes case 3 */
597
- case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
598
- case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
599
- case 7 :
600
- case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
892
+ U32* const hashTable = ms->hashTable;
893
+ BYTE* const tagTable = ms->tagTable;
894
+ U32 const hashLog = ms->rowHashLog;
895
+ const BYTE* const base = ms->window.base;
896
+
897
+ DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
898
+ for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
899
+ U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
900
+ : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
901
+ U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
902
+ U32* const row = hashTable + relRow;
903
+ BYTE* tagRow = tagTable + relRow;
904
+ U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
905
+
906
+ assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
907
+ tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
908
+ row[pos] = updateStartIdx;
601
909
  }
602
910
  }
603
911
 
912
+ /* ZSTD_row_update_internal():
913
+ * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
914
+ * Skips sections of long matches as is necessary.
915
+ */
916
+ FORCE_INLINE_TEMPLATE
917
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
918
+ void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
919
+ U32 const mls, U32 const rowLog,
920
+ U32 const rowMask, U32 const useCache)
921
+ {
922
+ U32 idx = ms->nextToUpdate;
923
+ const BYTE* const base = ms->window.base;
924
+ const U32 target = (U32)(ip - base);
925
+ const U32 kSkipThreshold = 384;
926
+ const U32 kMaxMatchStartPositionsToUpdate = 96;
927
+ const U32 kMaxMatchEndPositionsToUpdate = 32;
928
+
929
+ if (useCache) {
930
+ /* Only skip positions when using hash cache, i.e.
931
+ * if we are loading a dict, don't skip anything.
932
+ * If we decide to skip, then we only update a set number
933
+ * of positions at the beginning and end of the match.
934
+ */
935
+ if (UNLIKELY(target - idx > kSkipThreshold)) {
936
+ U32 const bound = idx + kMaxMatchStartPositionsToUpdate;
937
+ ZSTD_row_update_internalImpl(ms, idx, bound, mls, rowLog, rowMask, useCache);
938
+ idx = target - kMaxMatchEndPositionsToUpdate;
939
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, idx, ip+1);
940
+ }
941
+ }
942
+ assert(target >= idx);
943
+ ZSTD_row_update_internalImpl(ms, idx, target, mls, rowLog, rowMask, useCache);
944
+ ms->nextToUpdate = target;
945
+ }
946
+
947
+ /* ZSTD_row_update():
948
+ * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
949
+ * processing.
950
+ */
951
+ void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
952
+ const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
953
+ const U32 rowMask = (1u << rowLog) - 1;
954
+ const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
955
+
956
+ DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
957
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */);
958
+ }
959
+
960
+ /* Returns the mask width of bits group of which will be set to 1. Given not all
961
+ * architectures have easy movemask instruction, this helps to iterate over
962
+ * groups of bits easier and faster.
963
+ */
964
+ FORCE_INLINE_TEMPLATE U32
965
+ ZSTD_row_matchMaskGroupWidth(const U32 rowEntries)
966
+ {
967
+ assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
968
+ assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
969
+ (void)rowEntries;
970
+ #if defined(ZSTD_ARCH_ARM_NEON)
971
+ /* NEON path only works for little endian */
972
+ if (!MEM_isLittleEndian()) {
973
+ return 1;
974
+ }
975
+ if (rowEntries == 16) {
976
+ return 4;
977
+ }
978
+ if (rowEntries == 32) {
979
+ return 2;
980
+ }
981
+ if (rowEntries == 64) {
982
+ return 1;
983
+ }
984
+ #endif
985
+ return 1;
986
+ }
604
987
 
605
- FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
988
+ #if defined(ZSTD_ARCH_X86_SSE2)
989
+ FORCE_INLINE_TEMPLATE ZSTD_VecMask
990
+ ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U32 head)
991
+ {
992
+ const __m128i comparisonMask = _mm_set1_epi8((char)tag);
993
+ int matches[4] = {0};
994
+ int i;
995
+ assert(nbChunks == 1 || nbChunks == 2 || nbChunks == 4);
996
+ for (i=0; i<nbChunks; i++) {
997
+ const __m128i chunk = _mm_loadu_si128((const __m128i*)(const void*)(src + 16*i));
998
+ const __m128i equalMask = _mm_cmpeq_epi8(chunk, comparisonMask);
999
+ matches[i] = _mm_movemask_epi8(equalMask);
1000
+ }
1001
+ if (nbChunks == 1) return ZSTD_rotateRight_U16((U16)matches[0], head);
1002
+ if (nbChunks == 2) return ZSTD_rotateRight_U32((U32)matches[1] << 16 | (U32)matches[0], head);
1003
+ assert(nbChunks == 4);
1004
+ return ZSTD_rotateRight_U64((U64)matches[3] << 48 | (U64)matches[2] << 32 | (U64)matches[1] << 16 | (U64)matches[0], head);
1005
+ }
1006
+ #endif
1007
+
1008
+ #if defined(ZSTD_ARCH_ARM_NEON)
1009
+ FORCE_INLINE_TEMPLATE ZSTD_VecMask
1010
+ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped)
1011
+ {
1012
+ assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
1013
+ if (rowEntries == 16) {
1014
+ /* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits.
1015
+ * After that groups of 4 bits represent the equalMask. We lower
1016
+ * all bits except the highest in these groups by doing AND with
1017
+ * 0x88 = 0b10001000.
1018
+ */
1019
+ const uint8x16_t chunk = vld1q_u8(src);
1020
+ const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
1021
+ const uint8x8_t res = vshrn_n_u16(equalMask, 4);
1022
+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
1023
+ return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull;
1024
+ } else if (rowEntries == 32) {
1025
+ /* Same idea as with rowEntries == 16 but doing AND with
1026
+ * 0x55 = 0b01010101.
1027
+ */
1028
+ const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src);
1029
+ const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
1030
+ const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
1031
+ const uint8x16_t dup = vdupq_n_u8(tag);
1032
+ const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6);
1033
+ const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6);
1034
+ const uint8x8_t res = vsli_n_u8(t0, t1, 4);
1035
+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ;
1036
+ return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull;
1037
+ } else { /* rowEntries == 64 */
1038
+ const uint8x16x4_t chunk = vld4q_u8(src);
1039
+ const uint8x16_t dup = vdupq_n_u8(tag);
1040
+ const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
1041
+ const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
1042
+ const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
1043
+ const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
1044
+
1045
+ const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
1046
+ const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
1047
+ const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
1048
+ const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
1049
+ const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
1050
+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
1051
+ return ZSTD_rotateRight_U64(matches, headGrouped);
1052
+ }
1053
+ }
1054
+ #endif
1055
+
1056
+ /* Returns a ZSTD_VecMask (U64) that has the nth group (determined by
1057
+ * ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag"
1058
+ * matches the hash at the nth position in a row of the tagTable.
1059
+ * Each row is a circular buffer beginning at the value of "headGrouped". So we
1060
+ * must rotate the "matches" bitfield to match up with the actual layout of the
1061
+ * entries within the hashTable */
1062
+ FORCE_INLINE_TEMPLATE ZSTD_VecMask
1063
+ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
1064
+ {
1065
+ const BYTE* const src = tagRow;
1066
+ assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
1067
+ assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
1068
+ assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
1069
+
1070
+ #if defined(ZSTD_ARCH_X86_SSE2)
1071
+
1072
+ return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped);
1073
+
1074
+ #else /* SW or NEON-LE */
1075
+
1076
+ # if defined(ZSTD_ARCH_ARM_NEON)
1077
+ /* This NEON path only works for little endian - otherwise use SWAR below */
1078
+ if (MEM_isLittleEndian()) {
1079
+ return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped);
1080
+ }
1081
+ # endif /* ZSTD_ARCH_ARM_NEON */
1082
+ /* SWAR */
1083
+ { const int chunkSize = sizeof(size_t);
1084
+ const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
1085
+ const size_t xFF = ~((size_t)0);
1086
+ const size_t x01 = xFF / 0xFF;
1087
+ const size_t x80 = x01 << 7;
1088
+ const size_t splatChar = tag * x01;
1089
+ ZSTD_VecMask matches = 0;
1090
+ int i = rowEntries - chunkSize;
1091
+ assert((sizeof(size_t) == 4) || (sizeof(size_t) == 8));
1092
+ if (MEM_isLittleEndian()) { /* runtime check so have two loops */
1093
+ const size_t extractMagic = (xFF / 0x7F) >> chunkSize;
1094
+ do {
1095
+ size_t chunk = MEM_readST(&src[i]);
1096
+ chunk ^= splatChar;
1097
+ chunk = (((chunk | x80) - x01) | chunk) & x80;
1098
+ matches <<= chunkSize;
1099
+ matches |= (chunk * extractMagic) >> shiftAmount;
1100
+ i -= chunkSize;
1101
+ } while (i >= 0);
1102
+ } else { /* big endian: reverse bits during extraction */
1103
+ const size_t msb = xFF ^ (xFF >> 1);
1104
+ const size_t extractMagic = (msb / 0x1FF) | msb;
1105
+ do {
1106
+ size_t chunk = MEM_readST(&src[i]);
1107
+ chunk ^= splatChar;
1108
+ chunk = (((chunk | x80) - x01) | chunk) & x80;
1109
+ matches <<= chunkSize;
1110
+ matches |= ((chunk >> 7) * extractMagic) >> shiftAmount;
1111
+ i -= chunkSize;
1112
+ } while (i >= 0);
1113
+ }
1114
+ matches = ~matches;
1115
+ if (rowEntries == 16) {
1116
+ return ZSTD_rotateRight_U16((U16)matches, headGrouped);
1117
+ } else if (rowEntries == 32) {
1118
+ return ZSTD_rotateRight_U32((U32)matches, headGrouped);
1119
+ } else {
1120
+ return ZSTD_rotateRight_U64((U64)matches, headGrouped);
1121
+ }
1122
+ }
1123
+ #endif
1124
+ }
1125
+
1126
+ /* The high-level approach of the SIMD row based match finder is as follows:
1127
+ * - Figure out where to insert the new entry:
1128
+ * - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index.
1129
+ * - The hash is salted by a value that changes on every contex reset, so when the same table is used
1130
+ * we will avoid collisions that would otherwise slow us down by intorducing phantom matches.
1131
+ * - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
1132
+ * which row to insert into.
1133
+ * - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
1134
+ * be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
1135
+ * per row).
1136
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
1137
+ * generate a bitfield that we can cycle through to check the collisions in the hash table.
1138
+ * - Pick the longest match.
1139
+ * - Insert the tag into the equivalent row and position in the tagTable.
1140
+ */
1141
+ FORCE_INLINE_TEMPLATE
1142
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1143
+ size_t ZSTD_RowFindBestMatch(
606
1144
  ZSTD_matchState_t* ms,
607
- const BYTE* ip, const BYTE* const iLimit,
608
- size_t* offsetPtr)
1145
+ const BYTE* const ip, const BYTE* const iLimit,
1146
+ size_t* offsetPtr,
1147
+ const U32 mls, const ZSTD_dictMode_e dictMode,
1148
+ const U32 rowLog)
609
1149
  {
610
- switch(ms->cParams.minMatch)
611
- {
612
- default : /* includes case 3 */
613
- case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
614
- case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
615
- case 7 :
616
- case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
1150
+ U32* const hashTable = ms->hashTable;
1151
+ BYTE* const tagTable = ms->tagTable;
1152
+ U32* const hashCache = ms->hashCache;
1153
+ const U32 hashLog = ms->rowHashLog;
1154
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
1155
+ const BYTE* const base = ms->window.base;
1156
+ const BYTE* const dictBase = ms->window.dictBase;
1157
+ const U32 dictLimit = ms->window.dictLimit;
1158
+ const BYTE* const prefixStart = base + dictLimit;
1159
+ const BYTE* const dictEnd = dictBase + dictLimit;
1160
+ const U32 curr = (U32)(ip-base);
1161
+ const U32 maxDistance = 1U << cParams->windowLog;
1162
+ const U32 lowestValid = ms->window.lowLimit;
1163
+ const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1164
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
1165
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
1166
+ const U32 rowEntries = (1U << rowLog);
1167
+ const U32 rowMask = rowEntries - 1;
1168
+ const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
1169
+ const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
1170
+ const U64 hashSalt = ms->hashSalt;
1171
+ U32 nbAttempts = 1U << cappedSearchLog;
1172
+ size_t ml=4-1;
1173
+ U32 hash;
1174
+
1175
+ /* DMS/DDS variables that may be referenced laster */
1176
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
1177
+
1178
+ /* Initialize the following variables to satisfy static analyzer */
1179
+ size_t ddsIdx = 0;
1180
+ U32 ddsExtraAttempts = 0; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
1181
+ U32 dmsTag = 0;
1182
+ U32* dmsRow = NULL;
1183
+ BYTE* dmsTagRow = NULL;
1184
+
1185
+ if (dictMode == ZSTD_dedicatedDictSearch) {
1186
+ const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
1187
+ { /* Prefetch DDS hashtable entry */
1188
+ ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG;
1189
+ PREFETCH_L1(&dms->hashTable[ddsIdx]);
1190
+ }
1191
+ ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0;
617
1192
  }
1193
+
1194
+ if (dictMode == ZSTD_dictMatchState) {
1195
+ /* Prefetch DMS rows */
1196
+ U32* const dmsHashTable = dms->hashTable;
1197
+ BYTE* const dmsTagTable = dms->tagTable;
1198
+ U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
1199
+ U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1200
+ dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
1201
+ dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow);
1202
+ dmsRow = dmsHashTable + dmsRelRow;
1203
+ ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog);
1204
+ }
1205
+
1206
+ /* Update the hashTable and tagTable up to (but not including) ip */
1207
+ if (!ms->lazySkipping) {
1208
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
1209
+ hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
1210
+ } else {
1211
+ /* Stop inserting every position when in the lazy skipping mode.
1212
+ * The hash cache is also not kept up to date in this mode.
1213
+ */
1214
+ hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
1215
+ ms->nextToUpdate = curr;
1216
+ }
1217
+ ms->hashSaltEntropy += hash; /* collect salt entropy */
1218
+
1219
+ { /* Get the hash for ip, compute the appropriate row */
1220
+ U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1221
+ U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
1222
+ U32* const row = hashTable + relRow;
1223
+ BYTE* tagRow = (BYTE*)(tagTable + relRow);
1224
+ U32 const headGrouped = (*tagRow & rowMask) * groupWidth;
1225
+ U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
1226
+ size_t numMatches = 0;
1227
+ size_t currMatch = 0;
1228
+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
1229
+
1230
+ /* Cycle through the matches and prefetch */
1231
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
1232
+ U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
1233
+ U32 const matchIndex = row[matchPos];
1234
+ if(matchPos == 0) continue;
1235
+ assert(numMatches < rowEntries);
1236
+ if (matchIndex < lowLimit)
1237
+ break;
1238
+ if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
1239
+ PREFETCH_L1(base + matchIndex);
1240
+ } else {
1241
+ PREFETCH_L1(dictBase + matchIndex);
1242
+ }
1243
+ matchBuffer[numMatches++] = matchIndex;
1244
+ --nbAttempts;
1245
+ }
1246
+
1247
+ /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
1248
+ in ZSTD_row_update_internal() at the next search. */
1249
+ {
1250
+ U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
1251
+ tagRow[pos] = (BYTE)tag;
1252
+ row[pos] = ms->nextToUpdate++;
1253
+ }
1254
+
1255
+ /* Return the longest match */
1256
+ for (; currMatch < numMatches; ++currMatch) {
1257
+ U32 const matchIndex = matchBuffer[currMatch];
1258
+ size_t currentMl=0;
1259
+ assert(matchIndex < curr);
1260
+ assert(matchIndex >= lowLimit);
1261
+
1262
+ if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
1263
+ const BYTE* const match = base + matchIndex;
1264
+ assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
1265
+ /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
1266
+ if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
1267
+ currentMl = ZSTD_count(ip, match, iLimit);
1268
+ } else {
1269
+ const BYTE* const match = dictBase + matchIndex;
1270
+ assert(match+4 <= dictEnd);
1271
+ if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
1272
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
1273
+ }
1274
+
1275
+ /* Save best solution */
1276
+ if (currentMl > ml) {
1277
+ ml = currentMl;
1278
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
1279
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
1280
+ }
1281
+ }
1282
+ }
1283
+
1284
+ assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
1285
+ if (dictMode == ZSTD_dedicatedDictSearch) {
1286
+ ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms,
1287
+ ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
1288
+ } else if (dictMode == ZSTD_dictMatchState) {
1289
+ /* TODO: Measure and potentially add prefetching to DMS */
1290
+ const U32 dmsLowestIndex = dms->window.dictLimit;
1291
+ const BYTE* const dmsBase = dms->window.base;
1292
+ const BYTE* const dmsEnd = dms->window.nextSrc;
1293
+ const U32 dmsSize = (U32)(dmsEnd - dmsBase);
1294
+ const U32 dmsIndexDelta = dictLimit - dmsSize;
1295
+
1296
+ { U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth;
1297
+ U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
1298
+ size_t numMatches = 0;
1299
+ size_t currMatch = 0;
1300
+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
1301
+
1302
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
1303
+ U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
1304
+ U32 const matchIndex = dmsRow[matchPos];
1305
+ if(matchPos == 0) continue;
1306
+ if (matchIndex < dmsLowestIndex)
1307
+ break;
1308
+ PREFETCH_L1(dmsBase + matchIndex);
1309
+ matchBuffer[numMatches++] = matchIndex;
1310
+ --nbAttempts;
1311
+ }
1312
+
1313
+ /* Return the longest match */
1314
+ for (; currMatch < numMatches; ++currMatch) {
1315
+ U32 const matchIndex = matchBuffer[currMatch];
1316
+ size_t currentMl=0;
1317
+ assert(matchIndex >= dmsLowestIndex);
1318
+ assert(matchIndex < curr);
1319
+
1320
+ { const BYTE* const match = dmsBase + matchIndex;
1321
+ assert(match+4 <= dmsEnd);
1322
+ if (MEM_read32(match) == MEM_read32(ip))
1323
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
1324
+ }
1325
+
1326
+ if (currentMl > ml) {
1327
+ ml = currentMl;
1328
+ assert(curr > matchIndex + dmsIndexDelta);
1329
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
1330
+ if (ip+currentMl == iLimit) break;
1331
+ }
1332
+ }
1333
+ }
1334
+ }
1335
+ return ml;
618
1336
  }
619
1337
 
620
1338
 
1339
+ /**
1340
+ * Generate search functions templated on (dictMode, mls, rowLog).
1341
+ * These functions are outlined for code size & compilation time.
1342
+ * ZSTD_searchMax() dispatches to the correct implementation function.
1343
+ *
1344
+ * TODO: The start of the search function involves loading and calculating a
1345
+ * bunch of constants from the ZSTD_matchState_t. These computations could be
1346
+ * done in an initialization function, and saved somewhere in the match state.
1347
+ * Then we could pass a pointer to the saved state instead of the match state,
1348
+ * and avoid duplicate computations.
1349
+ *
1350
+ * TODO: Move the match re-winding into searchMax. This improves compression
1351
+ * ratio, and unlocks further simplifications with the next TODO.
1352
+ *
1353
+ * TODO: Try moving the repcode search into searchMax. After the re-winding
1354
+ * and repcode search are in searchMax, there is no more logic in the match
1355
+ * finder loop that requires knowledge about the dictMode. So we should be
1356
+ * able to avoid force inlining it, and we can join the extDict loop with
1357
+ * the single segment loop. It should go in searchMax instead of its own
1358
+ * function to avoid having multiple virtual function calls per search.
1359
+ */
1360
+
1361
+ #define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls
1362
+ #define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls
1363
+ #define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog
1364
+
1365
+ #define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE
1366
+
1367
+ #define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \
1368
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \
1369
+ ZSTD_matchState_t* ms, \
1370
+ const BYTE* ip, const BYTE* const iLimit, \
1371
+ size_t* offBasePtr) \
1372
+ { \
1373
+ assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1374
+ return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \
1375
+ } \
1376
+
1377
+ #define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \
1378
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \
1379
+ ZSTD_matchState_t* ms, \
1380
+ const BYTE* ip, const BYTE* const iLimit, \
1381
+ size_t* offsetPtr) \
1382
+ { \
1383
+ assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1384
+ return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
1385
+ } \
1386
+
1387
+ #define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \
1388
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \
1389
+ ZSTD_matchState_t* ms, \
1390
+ const BYTE* ip, const BYTE* const iLimit, \
1391
+ size_t* offsetPtr) \
1392
+ { \
1393
+ assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1394
+ assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \
1395
+ return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
1396
+ } \
1397
+
1398
+ #define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \
1399
+ X(dictMode, mls, 4) \
1400
+ X(dictMode, mls, 5) \
1401
+ X(dictMode, mls, 6)
1402
+
1403
+ #define ZSTD_FOR_EACH_MLS_ROWLOG(X, dictMode) \
1404
+ ZSTD_FOR_EACH_ROWLOG(X, dictMode, 4) \
1405
+ ZSTD_FOR_EACH_ROWLOG(X, dictMode, 5) \
1406
+ ZSTD_FOR_EACH_ROWLOG(X, dictMode, 6)
1407
+
1408
+ #define ZSTD_FOR_EACH_MLS(X, dictMode) \
1409
+ X(dictMode, 4) \
1410
+ X(dictMode, 5) \
1411
+ X(dictMode, 6)
1412
+
1413
+ #define ZSTD_FOR_EACH_DICT_MODE(X, ...) \
1414
+ X(__VA_ARGS__, noDict) \
1415
+ X(__VA_ARGS__, extDict) \
1416
+ X(__VA_ARGS__, dictMatchState) \
1417
+ X(__VA_ARGS__, dedicatedDictSearch)
1418
+
1419
+ /* Generate row search fns for each combination of (dictMode, mls, rowLog) */
1420
+ ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN)
1421
+ /* Generate binary Tree search fns for each combination of (dictMode, mls) */
1422
+ ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN)
1423
+ /* Generate hash chain search fns for each combination of (dictMode, mls) */
1424
+ ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN)
1425
+
1426
+ typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
1427
+
1428
+ #define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls) \
1429
+ case mls: \
1430
+ return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
1431
+ #define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls) \
1432
+ case mls: \
1433
+ return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
1434
+ #define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog) \
1435
+ case rowLog: \
1436
+ return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr);
1437
+
1438
+ #define ZSTD_SWITCH_MLS(X, dictMode) \
1439
+ switch (mls) { \
1440
+ ZSTD_FOR_EACH_MLS(X, dictMode) \
1441
+ }
1442
+
1443
+ #define ZSTD_SWITCH_ROWLOG(dictMode, mls) \
1444
+ case mls: \
1445
+ switch (rowLog) { \
1446
+ ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \
1447
+ } \
1448
+ ZSTD_UNREACHABLE; \
1449
+ break;
1450
+
1451
+ #define ZSTD_SWITCH_SEARCH_METHOD(dictMode) \
1452
+ switch (searchMethod) { \
1453
+ case search_hashChain: \
1454
+ ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \
1455
+ break; \
1456
+ case search_binaryTree: \
1457
+ ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \
1458
+ break; \
1459
+ case search_rowHash: \
1460
+ ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \
1461
+ break; \
1462
+ } \
1463
+ ZSTD_UNREACHABLE;
1464
+
1465
+ /**
1466
+ * Searches for the longest match at @p ip.
1467
+ * Dispatches to the correct implementation function based on the
1468
+ * (searchMethod, dictMode, mls, rowLog). We use switch statements
1469
+ * here instead of using an indirect function call through a function
1470
+ * pointer because after Spectre and Meltdown mitigations, indirect
1471
+ * function calls can be very costly, especially in the kernel.
1472
+ *
1473
+ * NOTE: dictMode and searchMethod should be templated, so those switch
1474
+ * statements should be optimized out. Only the mls & rowLog switches
1475
+ * should be left.
1476
+ *
1477
+ * @param ms The match state.
1478
+ * @param ip The position to search at.
1479
+ * @param iend The end of the input data.
1480
+ * @param[out] offsetPtr Stores the match offset into this pointer.
1481
+ * @param mls The minimum search length, in the range [4, 6].
1482
+ * @param rowLog The row log (if applicable), in the range [4, 6].
1483
+ * @param searchMethod The search method to use (templated).
1484
+ * @param dictMode The dictMode (templated).
1485
+ *
1486
+ * @returns The length of the longest match found, or < mls if no match is found.
1487
+ * If a match is found its offset is stored in @p offsetPtr.
1488
+ */
1489
+ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
1490
+ ZSTD_matchState_t* ms,
1491
+ const BYTE* ip,
1492
+ const BYTE* iend,
1493
+ size_t* offsetPtr,
1494
+ U32 const mls,
1495
+ U32 const rowLog,
1496
+ searchMethod_e const searchMethod,
1497
+ ZSTD_dictMode_e const dictMode)
1498
+ {
1499
+ if (dictMode == ZSTD_noDict) {
1500
+ ZSTD_SWITCH_SEARCH_METHOD(noDict)
1501
+ } else if (dictMode == ZSTD_extDict) {
1502
+ ZSTD_SWITCH_SEARCH_METHOD(extDict)
1503
+ } else if (dictMode == ZSTD_dictMatchState) {
1504
+ ZSTD_SWITCH_SEARCH_METHOD(dictMatchState)
1505
+ } else if (dictMode == ZSTD_dedicatedDictSearch) {
1506
+ ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch)
1507
+ }
1508
+ ZSTD_UNREACHABLE;
1509
+ return 0;
1510
+ }
1511
+
621
1512
  /* *******************************
622
1513
  * Common parser - lazy strategy
623
1514
  *********************************/
624
- typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
625
1515
 
626
- FORCE_INLINE_TEMPLATE size_t
627
- ZSTD_compressBlock_lazy_generic(
1516
+ FORCE_INLINE_TEMPLATE
1517
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1518
+ size_t ZSTD_compressBlock_lazy_generic(
628
1519
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
629
1520
  U32 rep[ZSTD_REP_NUM],
630
1521
  const void* src, size_t srcSize,
@@ -635,53 +1526,52 @@ ZSTD_compressBlock_lazy_generic(
635
1526
  const BYTE* ip = istart;
636
1527
  const BYTE* anchor = istart;
637
1528
  const BYTE* const iend = istart + srcSize;
638
- const BYTE* const ilimit = iend - 8;
1529
+ const BYTE* const ilimit = (searchMethod == search_rowHash) ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
639
1530
  const BYTE* const base = ms->window.base;
640
1531
  const U32 prefixLowestIndex = ms->window.dictLimit;
641
1532
  const BYTE* const prefixLowest = base + prefixLowestIndex;
1533
+ const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
1534
+ const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
642
1535
 
643
- typedef size_t (*searchMax_f)(
644
- ZSTD_matchState_t* ms,
645
- const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
646
- searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
647
- (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
648
- : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
649
- (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
650
- : ZSTD_HcFindBestMatch_selectMLS);
651
- U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
1536
+ U32 offset_1 = rep[0], offset_2 = rep[1];
1537
+ U32 offsetSaved1 = 0, offsetSaved2 = 0;
652
1538
 
1539
+ const int isDMS = dictMode == ZSTD_dictMatchState;
1540
+ const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
1541
+ const int isDxS = isDMS || isDDS;
653
1542
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
654
- const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
655
- dms->window.dictLimit : 0;
656
- const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
657
- dms->window.base : NULL;
658
- const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
659
- dictBase + dictLowestIndex : NULL;
660
- const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
661
- dms->window.nextSrc : NULL;
662
- const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
1543
+ const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
1544
+ const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
1545
+ const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
1546
+ const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
1547
+ const U32 dictIndexDelta = isDxS ?
663
1548
  prefixLowestIndex - (U32)(dictEnd - dictBase) :
664
1549
  0;
665
1550
  const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
666
1551
 
667
- DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
668
-
669
- /* init */
1552
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
670
1553
  ip += (dictAndPrefixLength == 0);
671
1554
  if (dictMode == ZSTD_noDict) {
672
- U32 const current = (U32)(ip - base);
673
- U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
674
- U32 const maxRep = current - windowLow;
675
- if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
676
- if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
1555
+ U32 const curr = (U32)(ip - base);
1556
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
1557
+ U32 const maxRep = curr - windowLow;
1558
+ if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
1559
+ if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
677
1560
  }
678
- if (dictMode == ZSTD_dictMatchState) {
1561
+ if (isDxS) {
679
1562
  /* dictMatchState repCode checks don't currently handle repCode == 0
680
1563
  * disabling. */
681
1564
  assert(offset_1 <= dictAndPrefixLength);
682
1565
  assert(offset_2 <= dictAndPrefixLength);
683
1566
  }
684
1567
 
1568
+ /* Reset the lazy skipping state */
1569
+ ms->lazySkipping = 0;
1570
+
1571
+ if (searchMethod == search_rowHash) {
1572
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1573
+ }
1574
+
685
1575
  /* Match Loop */
686
1576
  #if defined(__GNUC__) && defined(__x86_64__)
687
1577
  /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
@@ -691,13 +1581,14 @@ ZSTD_compressBlock_lazy_generic(
691
1581
  #endif
692
1582
  while (ip < ilimit) {
693
1583
  size_t matchLength=0;
694
- size_t offset=0;
1584
+ size_t offBase = REPCODE1_TO_OFFBASE;
695
1585
  const BYTE* start=ip+1;
1586
+ DEBUGLOG(7, "search baseline (depth 0)");
696
1587
 
697
1588
  /* check repCode */
698
- if (dictMode == ZSTD_dictMatchState) {
1589
+ if (isDxS) {
699
1590
  const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
700
- const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
1591
+ const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
701
1592
  && repIndex < prefixLowestIndex) ?
702
1593
  dictBase + (repIndex - dictIndexDelta) :
703
1594
  base + repIndex;
@@ -715,30 +1606,40 @@ ZSTD_compressBlock_lazy_generic(
715
1606
  }
716
1607
 
717
1608
  /* first search (depth 0) */
718
- { size_t offsetFound = 999999999;
719
- size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
1609
+ { size_t offbaseFound = 999999999;
1610
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode);
720
1611
  if (ml2 > matchLength)
721
- matchLength = ml2, start = ip, offset=offsetFound;
1612
+ matchLength = ml2, start = ip, offBase = offbaseFound;
722
1613
  }
723
1614
 
724
1615
  if (matchLength < 4) {
725
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
1616
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */;
1617
+ ip += step;
1618
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
1619
+ * In this mode we stop inserting every position into our tables, and only insert
1620
+ * positions that we search, which is one in step positions.
1621
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
1622
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
1623
+ * triggered once we've gone 2KB without finding any matches.
1624
+ */
1625
+ ms->lazySkipping = step > kLazySkippingStep;
726
1626
  continue;
727
1627
  }
728
1628
 
729
1629
  /* let's try to find a better solution */
730
1630
  if (depth>=1)
731
1631
  while (ip<ilimit) {
1632
+ DEBUGLOG(7, "search depth 1");
732
1633
  ip ++;
733
1634
  if ( (dictMode == ZSTD_noDict)
734
- && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1635
+ && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
735
1636
  size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
736
1637
  int const gain2 = (int)(mlRep * 3);
737
- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1638
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
738
1639
  if ((mlRep >= 4) && (gain2 > gain1))
739
- matchLength = mlRep, offset = 0, start = ip;
1640
+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
740
1641
  }
741
- if (dictMode == ZSTD_dictMatchState) {
1642
+ if (isDxS) {
742
1643
  const U32 repIndex = (U32)(ip - base) - offset_1;
743
1644
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
744
1645
  dictBase + (repIndex - dictIndexDelta) :
@@ -748,32 +1649,33 @@ ZSTD_compressBlock_lazy_generic(
748
1649
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
749
1650
  size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
750
1651
  int const gain2 = (int)(mlRep * 3);
751
- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1652
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
752
1653
  if ((mlRep >= 4) && (gain2 > gain1))
753
- matchLength = mlRep, offset = 0, start = ip;
1654
+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
754
1655
  }
755
1656
  }
756
- { size_t offset2=999999999;
757
- size_t const ml2 = searchMax(ms, ip, iend, &offset2);
758
- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
759
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1657
+ { size_t ofbCandidate=999999999;
1658
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
1659
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
1660
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
760
1661
  if ((ml2 >= 4) && (gain2 > gain1)) {
761
- matchLength = ml2, offset = offset2, start = ip;
1662
+ matchLength = ml2, offBase = ofbCandidate, start = ip;
762
1663
  continue; /* search a better one */
763
1664
  } }
764
1665
 
765
1666
  /* let's find an even better one */
766
1667
  if ((depth==2) && (ip<ilimit)) {
1668
+ DEBUGLOG(7, "search depth 2");
767
1669
  ip ++;
768
1670
  if ( (dictMode == ZSTD_noDict)
769
- && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1671
+ && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
770
1672
  size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
771
1673
  int const gain2 = (int)(mlRep * 4);
772
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1674
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
773
1675
  if ((mlRep >= 4) && (gain2 > gain1))
774
- matchLength = mlRep, offset = 0, start = ip;
1676
+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
775
1677
  }
776
- if (dictMode == ZSTD_dictMatchState) {
1678
+ if (isDxS) {
777
1679
  const U32 repIndex = (U32)(ip - base) - offset_1;
778
1680
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
779
1681
  dictBase + (repIndex - dictIndexDelta) :
@@ -783,64 +1685,69 @@ ZSTD_compressBlock_lazy_generic(
783
1685
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
784
1686
  size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
785
1687
  int const gain2 = (int)(mlRep * 4);
786
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1688
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
787
1689
  if ((mlRep >= 4) && (gain2 > gain1))
788
- matchLength = mlRep, offset = 0, start = ip;
1690
+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
789
1691
  }
790
1692
  }
791
- { size_t offset2=999999999;
792
- size_t const ml2 = searchMax(ms, ip, iend, &offset2);
793
- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
794
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1693
+ { size_t ofbCandidate=999999999;
1694
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
1695
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
1696
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
795
1697
  if ((ml2 >= 4) && (gain2 > gain1)) {
796
- matchLength = ml2, offset = offset2, start = ip;
1698
+ matchLength = ml2, offBase = ofbCandidate, start = ip;
797
1699
  continue;
798
1700
  } } }
799
1701
  break; /* nothing found : store previous solution */
800
1702
  }
801
1703
 
802
1704
  /* NOTE:
803
- * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
804
- * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
805
- * overflows the pointer, which is undefined behavior.
1705
+ * Pay attention that `start[-value]` can lead to strange undefined behavior
1706
+ * notably if `value` is unsigned, resulting in a large positive `-value`.
806
1707
  */
807
1708
  /* catch up */
808
- if (offset) {
1709
+ if (OFFBASE_IS_OFFSET(offBase)) {
809
1710
  if (dictMode == ZSTD_noDict) {
810
- while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
811
- && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
1711
+ while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest))
1712
+ && (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) ) /* only search for offset within prefix */
812
1713
  { start--; matchLength++; }
813
1714
  }
814
- if (dictMode == ZSTD_dictMatchState) {
815
- U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
1715
+ if (isDxS) {
1716
+ U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
816
1717
  const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
817
1718
  const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
818
1719
  while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
819
1720
  }
820
- offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1721
+ offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
821
1722
  }
822
1723
  /* store sequence */
823
1724
  _storeSequence:
824
- { size_t const litLength = start - anchor;
825
- ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1725
+ { size_t const litLength = (size_t)(start - anchor);
1726
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
826
1727
  anchor = ip = start + matchLength;
827
1728
  }
1729
+ if (ms->lazySkipping) {
1730
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
1731
+ if (searchMethod == search_rowHash) {
1732
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1733
+ }
1734
+ ms->lazySkipping = 0;
1735
+ }
828
1736
 
829
1737
  /* check immediate repcode */
830
- if (dictMode == ZSTD_dictMatchState) {
1738
+ if (isDxS) {
831
1739
  while (ip <= ilimit) {
832
1740
  U32 const current2 = (U32)(ip-base);
833
1741
  U32 const repIndex = current2 - offset_2;
834
- const BYTE* repMatch = dictMode == ZSTD_dictMatchState
835
- && repIndex < prefixLowestIndex ?
1742
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
836
1743
  dictBase - dictIndexDelta + repIndex :
837
1744
  base + repIndex;
838
1745
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
839
1746
  && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
840
1747
  const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
841
1748
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
842
- offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
843
- ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1749
+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset_2 <=> offset_1 */
1750
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
844
1751
  ip += matchLength;
845
1752
  anchor = ip;
846
1753
  continue;
@@ -854,36 +1761,72 @@ _storeSequence:
854
1761
  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
855
1762
  /* store sequence */
856
1763
  matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
857
- offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
858
- ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1764
+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */
1765
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
859
1766
  ip += matchLength;
860
1767
  anchor = ip;
861
1768
  continue; /* faster when present ... (?) */
862
1769
  } } }
863
1770
 
864
- /* Save reps for next block */
865
- rep[0] = offset_1 ? offset_1 : savedOffset;
866
- rep[1] = offset_2 ? offset_2 : savedOffset;
1771
+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
1772
+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
1773
+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
1774
+
1775
+ /* save reps for next block */
1776
+ rep[0] = offset_1 ? offset_1 : offsetSaved1;
1777
+ rep[1] = offset_2 ? offset_2 : offsetSaved2;
867
1778
 
868
1779
  /* Return the last literals size */
869
1780
  return (size_t)(iend - anchor);
870
1781
  }
1782
+ #endif /* build exclusions */
871
1783
 
872
1784
 
873
- size_t ZSTD_compressBlock_btlazy2(
1785
+ #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
1786
+ size_t ZSTD_compressBlock_greedy(
874
1787
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
875
1788
  void const* src, size_t srcSize)
876
1789
  {
877
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
1790
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
878
1791
  }
879
1792
 
880
- size_t ZSTD_compressBlock_lazy2(
1793
+ size_t ZSTD_compressBlock_greedy_dictMatchState(
881
1794
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
882
1795
  void const* src, size_t srcSize)
883
1796
  {
884
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
1797
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
1798
+ }
1799
+
1800
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
1801
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1802
+ void const* src, size_t srcSize)
1803
+ {
1804
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
885
1805
  }
886
1806
 
1807
+ size_t ZSTD_compressBlock_greedy_row(
1808
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1809
+ void const* src, size_t srcSize)
1810
+ {
1811
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
1812
+ }
1813
+
1814
+ size_t ZSTD_compressBlock_greedy_dictMatchState_row(
1815
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1816
+ void const* src, size_t srcSize)
1817
+ {
1818
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
1819
+ }
1820
+
1821
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
1822
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1823
+ void const* src, size_t srcSize)
1824
+ {
1825
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
1826
+ }
1827
+ #endif
1828
+
1829
+ #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
887
1830
  size_t ZSTD_compressBlock_lazy(
888
1831
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
889
1832
  void const* src, size_t srcSize)
@@ -891,18 +1834,48 @@ size_t ZSTD_compressBlock_lazy(
891
1834
  return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
892
1835
  }
893
1836
 
894
- size_t ZSTD_compressBlock_greedy(
1837
+ size_t ZSTD_compressBlock_lazy_dictMatchState(
895
1838
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
896
1839
  void const* src, size_t srcSize)
897
1840
  {
898
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
1841
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
899
1842
  }
900
1843
 
901
- size_t ZSTD_compressBlock_btlazy2_dictMatchState(
1844
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
902
1845
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
903
1846
  void const* src, size_t srcSize)
904
1847
  {
905
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
1848
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
1849
+ }
1850
+
1851
+ size_t ZSTD_compressBlock_lazy_row(
1852
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1853
+ void const* src, size_t srcSize)
1854
+ {
1855
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
1856
+ }
1857
+
1858
+ size_t ZSTD_compressBlock_lazy_dictMatchState_row(
1859
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1860
+ void const* src, size_t srcSize)
1861
+ {
1862
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
1863
+ }
1864
+
1865
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
1866
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1867
+ void const* src, size_t srcSize)
1868
+ {
1869
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
1870
+ }
1871
+ #endif
1872
+
1873
+ #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
1874
+ size_t ZSTD_compressBlock_lazy2(
1875
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1876
+ void const* src, size_t srcSize)
1877
+ {
1878
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
906
1879
  }
907
1880
 
908
1881
  size_t ZSTD_compressBlock_lazy2_dictMatchState(
@@ -912,22 +1885,57 @@ size_t ZSTD_compressBlock_lazy2_dictMatchState(
912
1885
  return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
913
1886
  }
914
1887
 
915
- size_t ZSTD_compressBlock_lazy_dictMatchState(
1888
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
916
1889
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
917
1890
  void const* src, size_t srcSize)
918
1891
  {
919
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
1892
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
920
1893
  }
921
1894
 
922
- size_t ZSTD_compressBlock_greedy_dictMatchState(
1895
+ size_t ZSTD_compressBlock_lazy2_row(
923
1896
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
924
1897
  void const* src, size_t srcSize)
925
1898
  {
926
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
1899
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
1900
+ }
1901
+
1902
+ size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
1903
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1904
+ void const* src, size_t srcSize)
1905
+ {
1906
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
1907
+ }
1908
+
1909
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
1910
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1911
+ void const* src, size_t srcSize)
1912
+ {
1913
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
927
1914
  }
1915
+ #endif
928
1916
 
1917
+ #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
1918
+ size_t ZSTD_compressBlock_btlazy2(
1919
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1920
+ void const* src, size_t srcSize)
1921
+ {
1922
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
1923
+ }
929
1924
 
1925
+ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
1926
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1927
+ void const* src, size_t srcSize)
1928
+ {
1929
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
1930
+ }
1931
+ #endif
1932
+
1933
+ #if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
1934
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
1935
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
1936
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
930
1937
  FORCE_INLINE_TEMPLATE
1938
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
931
1939
  size_t ZSTD_compressBlock_lazy_extDict_generic(
932
1940
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
933
1941
  U32 rep[ZSTD_REP_NUM],
@@ -938,7 +1946,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
938
1946
  const BYTE* ip = istart;
939
1947
  const BYTE* anchor = istart;
940
1948
  const BYTE* const iend = istart + srcSize;
941
- const BYTE* const ilimit = iend - 8;
1949
+ const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
942
1950
  const BYTE* const base = ms->window.base;
943
1951
  const U32 dictLimit = ms->window.dictLimit;
944
1952
  const BYTE* const prefixStart = base + dictLimit;
@@ -946,18 +1954,21 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
946
1954
  const BYTE* const dictEnd = dictBase + dictLimit;
947
1955
  const BYTE* const dictStart = dictBase + ms->window.lowLimit;
948
1956
  const U32 windowLog = ms->cParams.windowLog;
949
-
950
- typedef size_t (*searchMax_f)(
951
- ZSTD_matchState_t* ms,
952
- const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
953
- searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
1957
+ const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
1958
+ const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
954
1959
 
955
1960
  U32 offset_1 = rep[0], offset_2 = rep[1];
956
1961
 
957
- DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
1962
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
1963
+
1964
+ /* Reset the lazy skipping state */
1965
+ ms->lazySkipping = 0;
958
1966
 
959
1967
  /* init */
960
1968
  ip += (ip == prefixStart);
1969
+ if (searchMethod == search_rowHash) {
1970
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1971
+ }
961
1972
 
962
1973
  /* Match Loop */
963
1974
  #if defined(__GNUC__) && defined(__x86_64__)
@@ -968,16 +1979,17 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
968
1979
  #endif
969
1980
  while (ip < ilimit) {
970
1981
  size_t matchLength=0;
971
- size_t offset=0;
1982
+ size_t offBase = REPCODE1_TO_OFFBASE;
972
1983
  const BYTE* start=ip+1;
973
- U32 current = (U32)(ip-base);
1984
+ U32 curr = (U32)(ip-base);
974
1985
 
975
1986
  /* check repCode */
976
- { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
977
- const U32 repIndex = (U32)(current+1 - offset_1);
1987
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
1988
+ const U32 repIndex = (U32)(curr+1 - offset_1);
978
1989
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
979
1990
  const BYTE* const repMatch = repBase + repIndex;
980
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1991
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
1992
+ & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */
981
1993
  if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
982
1994
  /* repcode detected we should take it */
983
1995
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -986,14 +1998,23 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
986
1998
  } }
987
1999
 
988
2000
  /* first search (depth 0) */
989
- { size_t offsetFound = 999999999;
990
- size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
2001
+ { size_t ofbCandidate = 999999999;
2002
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
991
2003
  if (ml2 > matchLength)
992
- matchLength = ml2, start = ip, offset=offsetFound;
2004
+ matchLength = ml2, start = ip, offBase = ofbCandidate;
993
2005
  }
994
2006
 
995
- if (matchLength < 4) {
996
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
2007
+ if (matchLength < 4) {
2008
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
2009
+ ip += step + 1; /* jump faster over incompressible sections */
2010
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
2011
+ * In this mode we stop inserting every position into our tables, and only insert
2012
+ * positions that we search, which is one in step positions.
2013
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
2014
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
2015
+ * triggered once we've gone 2KB without finding any matches.
2016
+ */
2017
+ ms->lazySkipping = step > kLazySkippingStep;
997
2018
  continue;
998
2019
  }
999
2020
 
@@ -1001,82 +2022,91 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1001
2022
  if (depth>=1)
1002
2023
  while (ip<ilimit) {
1003
2024
  ip ++;
1004
- current++;
2025
+ curr++;
1005
2026
  /* check repCode */
1006
- if (offset) {
1007
- const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1008
- const U32 repIndex = (U32)(current - offset_1);
2027
+ if (offBase) {
2028
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
2029
+ const U32 repIndex = (U32)(curr - offset_1);
1009
2030
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1010
2031
  const BYTE* const repMatch = repBase + repIndex;
1011
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
2032
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
2033
+ & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
1012
2034
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1013
2035
  /* repcode detected */
1014
2036
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1015
2037
  size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1016
2038
  int const gain2 = (int)(repLength * 3);
1017
- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
2039
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
1018
2040
  if ((repLength >= 4) && (gain2 > gain1))
1019
- matchLength = repLength, offset = 0, start = ip;
2041
+ matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
1020
2042
  } }
1021
2043
 
1022
2044
  /* search match, depth 1 */
1023
- { size_t offset2=999999999;
1024
- size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1025
- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1026
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
2045
+ { size_t ofbCandidate = 999999999;
2046
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
2047
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
2048
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
1027
2049
  if ((ml2 >= 4) && (gain2 > gain1)) {
1028
- matchLength = ml2, offset = offset2, start = ip;
2050
+ matchLength = ml2, offBase = ofbCandidate, start = ip;
1029
2051
  continue; /* search a better one */
1030
2052
  } }
1031
2053
 
1032
2054
  /* let's find an even better one */
1033
2055
  if ((depth==2) && (ip<ilimit)) {
1034
2056
  ip ++;
1035
- current++;
2057
+ curr++;
1036
2058
  /* check repCode */
1037
- if (offset) {
1038
- const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1039
- const U32 repIndex = (U32)(current - offset_1);
2059
+ if (offBase) {
2060
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
2061
+ const U32 repIndex = (U32)(curr - offset_1);
1040
2062
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1041
2063
  const BYTE* const repMatch = repBase + repIndex;
1042
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
2064
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
2065
+ & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
1043
2066
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1044
2067
  /* repcode detected */
1045
2068
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1046
2069
  size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1047
2070
  int const gain2 = (int)(repLength * 4);
1048
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
2071
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
1049
2072
  if ((repLength >= 4) && (gain2 > gain1))
1050
- matchLength = repLength, offset = 0, start = ip;
2073
+ matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
1051
2074
  } }
1052
2075
 
1053
2076
  /* search match, depth 2 */
1054
- { size_t offset2=999999999;
1055
- size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1056
- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1057
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
2077
+ { size_t ofbCandidate = 999999999;
2078
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
2079
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
2080
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
1058
2081
  if ((ml2 >= 4) && (gain2 > gain1)) {
1059
- matchLength = ml2, offset = offset2, start = ip;
2082
+ matchLength = ml2, offBase = ofbCandidate, start = ip;
1060
2083
  continue;
1061
2084
  } } }
1062
2085
  break; /* nothing found : store previous solution */
1063
2086
  }
1064
2087
 
1065
2088
  /* catch up */
1066
- if (offset) {
1067
- U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
2089
+ if (OFFBASE_IS_OFFSET(offBase)) {
2090
+ U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
1068
2091
  const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
1069
2092
  const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
1070
2093
  while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
1071
- offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
2094
+ offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
1072
2095
  }
1073
2096
 
1074
2097
  /* store sequence */
1075
2098
  _storeSequence:
1076
- { size_t const litLength = start - anchor;
1077
- ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
2099
+ { size_t const litLength = (size_t)(start - anchor);
2100
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
1078
2101
  anchor = ip = start + matchLength;
1079
2102
  }
2103
+ if (ms->lazySkipping) {
2104
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
2105
+ if (searchMethod == search_rowHash) {
2106
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
2107
+ }
2108
+ ms->lazySkipping = 0;
2109
+ }
1080
2110
 
1081
2111
  /* check immediate repcode */
1082
2112
  while (ip <= ilimit) {
@@ -1085,13 +2115,14 @@ _storeSequence:
1085
2115
  const U32 repIndex = repCurrent - offset_2;
1086
2116
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1087
2117
  const BYTE* const repMatch = repBase + repIndex;
1088
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
2118
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
2119
+ & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
1089
2120
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1090
2121
  /* repcode detected we should take it */
1091
2122
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1092
2123
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1093
- offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
1094
- ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
2124
+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset history */
2125
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
1095
2126
  ip += matchLength;
1096
2127
  anchor = ip;
1097
2128
  continue; /* faster when present ... (?) */
@@ -1106,8 +2137,9 @@ _storeSequence:
1106
2137
  /* Return the last literals size */
1107
2138
  return (size_t)(iend - anchor);
1108
2139
  }
2140
+ #endif /* build exclusions */
1109
2141
 
1110
-
2142
+ #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
1111
2143
  size_t ZSTD_compressBlock_greedy_extDict(
1112
2144
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1113
2145
  void const* src, size_t srcSize)
@@ -1115,6 +2147,15 @@ size_t ZSTD_compressBlock_greedy_extDict(
1115
2147
  return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1116
2148
  }
1117
2149
 
2150
+ size_t ZSTD_compressBlock_greedy_extDict_row(
2151
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2152
+ void const* src, size_t srcSize)
2153
+ {
2154
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
2155
+ }
2156
+ #endif
2157
+
2158
+ #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
1118
2159
  size_t ZSTD_compressBlock_lazy_extDict(
1119
2160
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1120
2161
  void const* src, size_t srcSize)
@@ -1123,6 +2164,16 @@ size_t ZSTD_compressBlock_lazy_extDict(
1123
2164
  return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1124
2165
  }
1125
2166
 
2167
+ size_t ZSTD_compressBlock_lazy_extDict_row(
2168
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2169
+ void const* src, size_t srcSize)
2170
+
2171
+ {
2172
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
2173
+ }
2174
+ #endif
2175
+
2176
+ #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
1126
2177
  size_t ZSTD_compressBlock_lazy2_extDict(
1127
2178
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1128
2179
  void const* src, size_t srcSize)
@@ -1131,6 +2182,15 @@ size_t ZSTD_compressBlock_lazy2_extDict(
1131
2182
  return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1132
2183
  }
1133
2184
 
2185
+ size_t ZSTD_compressBlock_lazy2_extDict_row(
2186
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2187
+ void const* src, size_t srcSize)
2188
+ {
2189
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
2190
+ }
2191
+ #endif
2192
+
2193
+ #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
1134
2194
  size_t ZSTD_compressBlock_btlazy2_extDict(
1135
2195
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1136
2196
  void const* src, size_t srcSize)
@@ -1138,5 +2198,6 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
1138
2198
  {
1139
2199
  return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1140
2200
  }
2201
+ #endif
1141
2202
 
1142
- }
2203
+ } // namespace duckdb_zstd