duckdb 0.8.1-dev96.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. package/binding.gyp +8 -8
  2. package/package.json +3 -1
  3. package/src/duckdb/extension/icu/icu-datepart.cpp +1 -1
  4. package/src/duckdb/extension/icu/icu-extension.cpp +1 -1
  5. package/src/duckdb/extension/icu/icu-makedate.cpp +5 -4
  6. package/src/duckdb/extension/icu/icu-strptime.cpp +1 -1
  7. package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp +0 -2
  8. package/src/duckdb/extension/json/buffered_json_reader.cpp +23 -14
  9. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +6 -6
  10. package/src/duckdb/extension/json/include/json_common.hpp +12 -2
  11. package/src/duckdb/extension/json/include/json_scan.hpp +5 -2
  12. package/src/duckdb/extension/json/json_functions/json_contains.cpp +5 -0
  13. package/src/duckdb/extension/json/json_functions/json_create.cpp +10 -10
  14. package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +2 -2
  15. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +2 -2
  16. package/src/duckdb/extension/json/json_functions/json_structure.cpp +5 -3
  17. package/src/duckdb/extension/json/json_functions/json_transform.cpp +11 -11
  18. package/src/duckdb/extension/json/json_functions/read_json.cpp +2 -1
  19. package/src/duckdb/extension/json/json_functions.cpp +6 -3
  20. package/src/duckdb/extension/json/json_scan.cpp +43 -27
  21. package/src/duckdb/extension/parquet/column_reader.cpp +5 -1
  22. package/src/duckdb/extension/parquet/include/decode_utils.hpp +6 -0
  23. package/src/duckdb/extension/parquet/parquet-extension.cpp +26 -1
  24. package/src/duckdb/src/catalog/catalog.cpp +5 -17
  25. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +7 -1
  26. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +121 -0
  27. package/src/duckdb/src/catalog/catalog_search_path.cpp +49 -12
  28. package/src/duckdb/src/catalog/default/default_types.cpp +9 -84
  29. package/src/duckdb/src/common/adbc/adbc.cpp +118 -12
  30. package/src/duckdb/src/common/adbc/driver_manager.cpp +0 -20
  31. package/src/duckdb/src/common/arrow/arrow_converter.cpp +11 -12
  32. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +4 -3
  33. package/src/duckdb/src/common/exception.cpp +4 -1
  34. package/src/duckdb/src/common/exception_format_value.cpp +24 -15
  35. package/src/duckdb/src/common/multi_file_reader.cpp +3 -0
  36. package/src/duckdb/src/common/random_engine.cpp +1 -1
  37. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +5 -4
  38. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +44 -7
  39. package/src/duckdb/src/common/types/time.cpp +2 -8
  40. package/src/duckdb/src/common/types/timestamp.cpp +37 -1
  41. package/src/duckdb/src/common/types/value.cpp +1 -0
  42. package/src/duckdb/src/common/types.cpp +4 -0
  43. package/src/duckdb/src/core_functions/aggregate/algebraic/avg.cpp +30 -33
  44. package/src/duckdb/src/core_functions/aggregate/algebraic/covar.cpp +0 -4
  45. package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +30 -33
  46. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +52 -65
  47. package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +48 -48
  48. package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +39 -40
  49. package/src/duckdb/src/core_functions/aggregate/distributive/bool.cpp +32 -32
  50. package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +34 -34
  51. package/src/duckdb/src/core_functions/aggregate/distributive/kurtosis.cpp +30 -31
  52. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +88 -100
  53. package/src/duckdb/src/core_functions/aggregate/distributive/product.cpp +17 -17
  54. package/src/duckdb/src/core_functions/aggregate/distributive/skew.cpp +25 -27
  55. package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +37 -38
  56. package/src/duckdb/src/core_functions/aggregate/distributive/sum.cpp +22 -22
  57. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +44 -80
  58. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +49 -51
  59. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +104 -122
  60. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +57 -93
  61. package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +22 -23
  62. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +18 -19
  63. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +16 -18
  64. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +22 -25
  65. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +19 -24
  66. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +18 -23
  67. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +14 -18
  68. package/src/duckdb/src/core_functions/function_list.cpp +1 -0
  69. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +1 -1
  70. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +3 -0
  71. package/src/duckdb/src/core_functions/scalar/generic/system_functions.cpp +14 -0
  72. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +1 -1
  73. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +23 -6
  74. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +1 -2
  75. package/src/duckdb/src/core_functions/scalar/map/map_concat.cpp +3 -0
  76. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +3 -3
  77. package/src/duckdb/src/execution/index/art/art.cpp +80 -7
  78. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +20 -1
  79. package/src/duckdb/src/execution/index/art/leaf.cpp +11 -11
  80. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +10 -0
  81. package/src/duckdb/src/execution/index/art/node.cpp +48 -35
  82. package/src/duckdb/src/execution/index/art/node16.cpp +3 -0
  83. package/src/duckdb/src/execution/index/art/node256.cpp +1 -0
  84. package/src/duckdb/src/execution/index/art/node4.cpp +3 -0
  85. package/src/duckdb/src/execution/index/art/node48.cpp +2 -0
  86. package/src/duckdb/src/execution/index/art/prefix.cpp +2 -0
  87. package/src/duckdb/src/execution/join_hashtable.cpp +2 -0
  88. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +26 -9
  89. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +5 -2
  90. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +5 -1
  91. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +3 -6
  92. package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +1 -0
  93. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +8 -3
  94. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +0 -1
  95. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +29 -3
  96. package/src/duckdb/src/execution/reservoir_sample.cpp +18 -4
  97. package/src/duckdb/src/function/aggregate/distributive/count.cpp +159 -21
  98. package/src/duckdb/src/function/aggregate/distributive/first.cpp +67 -74
  99. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +7 -7
  100. package/src/duckdb/src/function/cast/list_casts.cpp +2 -4
  101. package/src/duckdb/src/function/pragma/pragma_queries.cpp +33 -23
  102. package/src/duckdb/src/function/scalar/list/list_extract.cpp +1 -1
  103. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +1 -1
  104. package/src/duckdb/src/function/scalar/string/regexp/regexp_util.cpp +6 -2
  105. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +2 -2
  106. package/src/duckdb/src/function/table/arrow.cpp +2 -2
  107. package/src/duckdb/src/function/table/checkpoint.cpp +3 -0
  108. package/src/duckdb/src/function/table/read_csv.cpp +15 -17
  109. package/src/duckdb/src/function/table/repeat.cpp +3 -0
  110. package/src/duckdb/src/function/table/repeat_row.cpp +8 -1
  111. package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +4 -4
  112. package/src/duckdb/src/function/table/system/test_vector_types.cpp +81 -25
  113. package/src/duckdb/src/function/table/table_scan.cpp +2 -2
  114. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  115. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +0 -3
  116. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +2 -0
  117. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +11 -1
  118. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +8 -2
  119. package/src/duckdb/src/include/duckdb/catalog/default/builtin_types/types.hpp +97 -0
  120. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  121. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +2 -3
  122. package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +8 -1
  123. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +0 -1
  124. package/src/duckdb/src/include/duckdb/common/bit_utils.hpp +16 -22
  125. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  126. package/src/duckdb/src/include/duckdb/common/types/time.hpp +2 -0
  127. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +4 -14
  128. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +92 -57
  129. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +20 -24
  130. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +36 -39
  131. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/stddev.hpp +57 -53
  132. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +8 -9
  133. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +16 -18
  134. package/src/duckdb/src/include/duckdb/core_functions/aggregate/sum_helpers.hpp +7 -8
  135. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +9 -0
  136. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +2 -6
  137. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +16 -36
  138. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +10 -4
  139. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +3 -0
  140. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +1 -1
  141. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +2 -0
  142. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +13 -3
  143. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
  144. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +9 -30
  145. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +95 -0
  146. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +4 -2
  147. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +1 -1
  148. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -1
  149. package/src/duckdb/src/include/duckdb/function/table_function.hpp +3 -2
  150. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +4 -1
  151. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +3 -1
  152. package/src/duckdb/src/include/duckdb/main/config.hpp +5 -0
  153. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +1 -0
  154. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +142 -136
  155. package/src/duckdb/src/include/duckdb/main/query_result.hpp +6 -0
  156. package/src/duckdb/src/include/duckdb/main/settings.hpp +19 -0
  157. package/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp +4 -0
  158. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +12 -3
  159. package/src/duckdb/src/include/duckdb/parser/parser.hpp +2 -0
  160. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -1
  161. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +2 -2
  162. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +0 -2
  163. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +1 -1
  164. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +3 -0
  165. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -1
  166. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +5 -0
  167. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +6 -2
  168. package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +4 -0
  169. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +5 -1
  170. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +5 -1
  171. package/src/duckdb/src/include/duckdb/planner/operator/logical_pragma.hpp +6 -2
  172. package/src/duckdb/src/include/duckdb/planner/operator/logical_prepare.hpp +4 -0
  173. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/byte_reader.hpp +4 -0
  174. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  175. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +13 -13
  176. package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -2
  177. package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +0 -6
  178. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  179. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +2 -1
  180. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +2 -2
  181. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +2 -1
  182. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +2 -1
  183. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +2 -1
  184. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +0 -2
  185. package/src/duckdb/src/main/attached_database.cpp +8 -0
  186. package/src/duckdb/src/main/capi/arrow-c.cpp +4 -4
  187. package/src/duckdb/src/main/capi/config-c.cpp +2 -5
  188. package/src/duckdb/src/main/client_context.cpp +4 -3
  189. package/src/duckdb/src/main/config.cpp +2 -0
  190. package/src/duckdb/src/main/database.cpp +1 -0
  191. package/src/duckdb/src/main/database_manager.cpp +21 -0
  192. package/src/duckdb/src/main/query_result.cpp +6 -2
  193. package/src/duckdb/src/main/settings/settings.cpp +41 -6
  194. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +27 -16
  195. package/src/duckdb/src/parallel/executor.cpp +38 -14
  196. package/src/duckdb/src/parallel/meta_pipeline.cpp +17 -3
  197. package/src/duckdb/src/parser/column_definition.cpp +5 -8
  198. package/src/duckdb/src/parser/parsed_data/create_info.cpp +0 -3
  199. package/src/duckdb/src/parser/parser.cpp +95 -35
  200. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +1 -0
  201. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +3 -0
  202. package/src/duckdb/src/parser/transform/helpers/nodetype_to_string.cpp +0 -2
  203. package/src/duckdb/src/parser/transform/statement/transform_drop.cpp +0 -3
  204. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +3 -0
  205. package/src/duckdb/src/parser/transformer.cpp +0 -2
  206. package/src/duckdb/src/planner/bind_context.cpp +3 -4
  207. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +0 -2
  208. package/src/duckdb/src/planner/binder/expression/bind_positional_reference_expression.cpp +8 -3
  209. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +31 -15
  210. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +0 -27
  211. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +18 -1
  212. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +0 -25
  213. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +2 -114
  214. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +6 -1
  215. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +5 -0
  216. package/src/duckdb/src/planner/bound_result_modifier.cpp +14 -0
  217. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +6 -5
  218. package/src/duckdb/src/planner/expression/bound_default_expression.cpp +7 -1
  219. package/src/duckdb/src/planner/expression.cpp +3 -0
  220. package/src/duckdb/src/planner/expression_binder.cpp +3 -2
  221. package/src/duckdb/src/planner/operator/logical_distinct.cpp +5 -4
  222. package/src/duckdb/src/planner/operator/logical_pivot.cpp +14 -2
  223. package/src/duckdb/src/planner/planner.cpp +5 -15
  224. package/src/duckdb/src/storage/data_table.cpp +10 -8
  225. package/src/duckdb/src/storage/index.cpp +13 -0
  226. package/src/duckdb/src/storage/storage_manager.cpp +6 -0
  227. package/src/duckdb/src/storage/table/column_data.cpp +7 -3
  228. package/src/duckdb/src/storage/table/list_column_data.cpp +5 -4
  229. package/src/duckdb/src/storage/table/row_group.cpp +4 -3
  230. package/src/duckdb/src/storage/table/row_group_collection.cpp +6 -3
  231. package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -3
  232. package/src/duckdb/src/storage/table/struct_column_data.cpp +4 -3
  233. package/src/duckdb/src/storage/wal_replay.cpp +4 -5
  234. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +0 -1
  235. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +0 -14
  236. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12828 -12956
  237. package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3 -0
  238. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +4 -0
  239. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
  240. package/src/duckdb/ub_src_parser_transform_statement.cpp +0 -2
  241. package/test/extension.test.ts +11 -0
  242. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_database_info.hpp +0 -46
  243. package/src/duckdb/src/parser/transform/statement/transform_create_database.cpp +0 -27
@@ -69,6 +69,9 @@ void Node::Free(ART &art, Node &node) {
69
69
 
70
70
  // free the prefixes and children of the nodes
71
71
  switch (type) {
72
+ case NType::LEAF_SEGMENT:
73
+ LeafSegment::Free(art, node);
74
+ break;
72
75
  case NType::LEAF:
73
76
  Leaf::Free(art, node);
74
77
  break;
@@ -159,65 +162,57 @@ void Node::DeleteChild(ART &art, Node &node, const uint8_t byte) {
159
162
 
160
163
  optional_ptr<Node> Node::GetChild(ART &art, const uint8_t byte) const {
161
164
 
162
- D_ASSERT(!IsSwizzled());
165
+ D_ASSERT(IsSet() && !IsSwizzled());
163
166
 
164
167
  optional_ptr<Node> child;
165
168
  switch (DecodeARTNodeType()) {
166
- case NType::NODE_4: {
169
+ case NType::NODE_4:
167
170
  child = Node4::Get(art, *this).GetChild(byte);
168
171
  break;
169
- }
170
- case NType::NODE_16: {
172
+ case NType::NODE_16:
171
173
  child = Node16::Get(art, *this).GetChild(byte);
172
174
  break;
173
- }
174
- case NType::NODE_48: {
175
+ case NType::NODE_48:
175
176
  child = Node48::Get(art, *this).GetChild(byte);
176
177
  break;
177
- }
178
- case NType::NODE_256: {
178
+ case NType::NODE_256:
179
179
  child = Node256::Get(art, *this).GetChild(byte);
180
180
  break;
181
- }
182
181
  default:
183
182
  throw InternalException("Invalid node type for GetChild.");
184
183
  }
185
184
 
186
- // unswizzle the ART node before returning it
185
+ // deserialize the ART node before returning it
187
186
  if (child && child->IsSwizzled()) {
188
187
  child->Deserialize(art);
189
188
  }
190
189
  return child;
191
190
  }
192
191
 
193
- optional_ptr<Node> Node::GetNextChild(ART &art, uint8_t &byte) const {
192
+ optional_ptr<Node> Node::GetNextChild(ART &art, uint8_t &byte, const bool deserialize) const {
194
193
 
195
- D_ASSERT(!IsSwizzled());
194
+ D_ASSERT(IsSet() && !IsSwizzled());
196
195
 
197
196
  optional_ptr<Node> child;
198
197
  switch (DecodeARTNodeType()) {
199
- case NType::NODE_4: {
198
+ case NType::NODE_4:
200
199
  child = Node4::Get(art, *this).GetNextChild(byte);
201
200
  break;
202
- }
203
- case NType::NODE_16: {
201
+ case NType::NODE_16:
204
202
  child = Node16::Get(art, *this).GetNextChild(byte);
205
203
  break;
206
- }
207
- case NType::NODE_48: {
204
+ case NType::NODE_48:
208
205
  child = Node48::Get(art, *this).GetNextChild(byte);
209
206
  break;
210
- }
211
- case NType::NODE_256: {
207
+ case NType::NODE_256:
212
208
  child = Node256::Get(art, *this).GetNextChild(byte);
213
209
  break;
214
- }
215
210
  default:
216
211
  throw InternalException("Invalid node type for GetNextChild.");
217
212
  }
218
213
 
219
- // unswizzle the ART node before returning it
220
- if (child && child->IsSwizzled()) {
214
+ // deserialize the ART node before returning it
215
+ if (child && deserialize && child->IsSwizzled()) {
221
216
  child->Deserialize(art);
222
217
  }
223
218
  return child;
@@ -260,10 +255,11 @@ void Node::Deserialize(ART &art) {
260
255
  type = reader.Read<uint8_t>();
261
256
  swizzle_flag = 0;
262
257
 
263
- auto type = DecodeARTNodeType();
264
- SetPtr(Node::GetAllocator(art, type).New());
258
+ auto decoded_type = DecodeARTNodeType();
259
+ SetPtr(Node::GetAllocator(art, decoded_type).New());
260
+ type = (uint8_t)decoded_type;
265
261
 
266
- switch (type) {
262
+ switch (decoded_type) {
267
263
  case NType::LEAF:
268
264
  return Leaf::Get(art, *this).Deserialize(art, reader);
269
265
  case NType::NODE_4:
@@ -283,28 +279,44 @@ void Node::Deserialize(ART &art) {
283
279
  // Utility
284
280
  //===--------------------------------------------------------------------===//
285
281
 
286
- string Node::ToString(ART &art) const {
282
+ string Node::VerifyAndToString(ART &art, const bool only_verify) {
287
283
 
288
- D_ASSERT(!IsSwizzled());
284
+ D_ASSERT(IsSet());
285
+ if (IsSwizzled()) {
286
+ return only_verify ? "" : "swizzled";
287
+ }
289
288
 
290
- if (DecodeARTNodeType() == NType::LEAF) {
291
- return Leaf::Get(art, *this).ToString(art);
289
+ auto type = DecodeARTNodeType();
290
+ if (type == NType::LEAF) {
291
+ auto str = Leaf::Get(art, *this).VerifyAndToString(art, only_verify);
292
+ return only_verify ? "" : "\n" + str;
292
293
  }
293
294
 
294
295
  string str = "Node" + to_string(GetCapacity()) + ": [";
295
296
 
297
+ idx_t child_count = 0;
296
298
  uint8_t byte = 0;
297
- auto child = GetNextChild(art, byte);
299
+ auto child = GetNextChild(art, byte, false);
298
300
  while (child) {
299
- str += "(" + to_string(byte) + ", " + child->ToString(art) + ")";
300
- if (byte == NumericLimits<uint8_t>::Maximum()) {
301
- break;
301
+ child_count++;
302
+ if (child->IsSwizzled()) {
303
+ if (!only_verify) {
304
+ str += "(swizzled)";
305
+ }
306
+ } else {
307
+ str += "(" + to_string(byte) + ", " + child->VerifyAndToString(art, only_verify) + ")";
308
+ if (byte == NumericLimits<uint8_t>::Maximum()) {
309
+ break;
310
+ }
302
311
  }
303
312
  byte++;
304
- child = GetNextChild(art, byte);
313
+ child = GetNextChild(art, byte, false);
305
314
  }
306
315
 
307
- return str + "]";
316
+ (void)child_count;
317
+ // ensure that the child count is at least two
318
+ D_ASSERT(child_count > 1);
319
+ return only_verify ? "" : "\n" + str + "]";
308
320
  }
309
321
 
310
322
  idx_t Node::GetCapacity() const {
@@ -567,6 +579,7 @@ void Node::Vacuum(ART &art, Node &node, const ARTFlags &flags) {
567
579
  needs_vacuum = flags.vacuum_flags[node.type - 1] && allocator.NeedsVacuum(node);
568
580
  if (needs_vacuum) {
569
581
  node.SetPtr(allocator.VacuumPointer(node));
582
+ node.type = (uint8_t)type;
570
583
  }
571
584
 
572
585
  switch (type) {
@@ -60,6 +60,7 @@ Node16 &Node16::ShrinkNode48(ART &art, Node &node16, Node &node48) {
60
60
  n16.prefix.Move(n48.prefix);
61
61
 
62
62
  for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) {
63
+ D_ASSERT(n16.count <= Node::NODE_16_CAPACITY);
63
64
  if (n48.child_index[i] != Node::EMPTY_MARKER) {
64
65
  n16.key[n16.count] = i;
65
66
  n16.children[n16.count] = n48.children[n48.child_index[i]];
@@ -160,6 +161,7 @@ optional_ptr<Node> Node16::GetChild(const uint8_t byte) {
160
161
 
161
162
  for (idx_t i = 0; i < count; i++) {
162
163
  if (key[i] == byte) {
164
+ D_ASSERT(children[i].IsSet());
163
165
  return &children[i];
164
166
  }
165
167
  }
@@ -171,6 +173,7 @@ optional_ptr<Node> Node16::GetNextChild(uint8_t &byte) {
171
173
  for (idx_t i = 0; i < count; i++) {
172
174
  if (key[i] >= byte) {
173
175
  byte = key[i];
176
+ D_ASSERT(children[i].IsSet());
174
177
  return &children[i];
175
178
  }
176
179
  }
@@ -83,6 +83,7 @@ void Node256::InsertChild(ART &art, Node &node, const uint8_t byte, const Node c
83
83
  D_ASSERT(!n256.children[byte].IsSet());
84
84
 
85
85
  n256.count++;
86
+ D_ASSERT(n256.count <= Node::NODE_256_CAPACITY);
86
87
  n256.children[byte] = child;
87
88
  }
88
89
 
@@ -37,6 +37,7 @@ Node4 &Node4::ShrinkNode16(ART &art, Node &node4, Node &node16) {
37
37
  auto &n4 = Node4::New(art, node4);
38
38
  auto &n16 = Node16::Get(art, node16);
39
39
 
40
+ D_ASSERT(n16.count <= Node::NODE_4_CAPACITY);
40
41
  n4.count = n16.count;
41
42
  n4.prefix.Move(n16.prefix);
42
43
 
@@ -145,6 +146,7 @@ optional_ptr<Node> Node4::GetChild(const uint8_t byte) {
145
146
 
146
147
  for (idx_t i = 0; i < count; i++) {
147
148
  if (key[i] == byte) {
149
+ D_ASSERT(children[i].IsSet());
148
150
  return &children[i];
149
151
  }
150
152
  }
@@ -156,6 +158,7 @@ optional_ptr<Node> Node4::GetNextChild(uint8_t &byte) {
156
158
  for (idx_t i = 0; i < count; i++) {
157
159
  if (key[i] >= byte) {
158
160
  byte = key[i];
161
+ D_ASSERT(children[i].IsSet());
159
162
  return &children[i];
160
163
  }
161
164
  }
@@ -85,6 +85,7 @@ Node48 &Node48::ShrinkNode256(ART &art, Node &node48, Node &node256) {
85
85
  n48.prefix.Move(n256.prefix);
86
86
 
87
87
  for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) {
88
+ D_ASSERT(n48.count <= Node::NODE_48_CAPACITY);
88
89
  if (n256.children[i].IsSet()) {
89
90
  n48.child_index[i] = n48.count;
90
91
  n48.children[n48.count] = n256.children[i];
@@ -168,6 +169,7 @@ optional_ptr<Node> Node48::GetNextChild(uint8_t &byte) {
168
169
  for (idx_t i = byte; i < Node::NODE_256_CAPACITY; i++) {
169
170
  if (child_index[i] != Node::EMPTY_MARKER) {
170
171
  byte = i;
172
+ D_ASSERT(children[child_index[i]].IsSet());
171
173
  return &children[child_index[i]];
172
174
  }
173
175
  }
@@ -427,6 +427,7 @@ void Prefix::Vacuum(ART &art) {
427
427
  auto &allocator = Node::GetAllocator(art, NType::PREFIX_SEGMENT);
428
428
  if (allocator.NeedsVacuum(data.ptr)) {
429
429
  data.ptr.SetPtr(allocator.VacuumPointer(data.ptr));
430
+ data.ptr.type = (uint8_t)NType::PREFIX_SEGMENT;
430
431
  }
431
432
 
432
433
  auto ptr = data.ptr;
@@ -435,6 +436,7 @@ void Prefix::Vacuum(ART &art) {
435
436
  ptr = segment.next;
436
437
  if (ptr.IsSet() && allocator.NeedsVacuum(ptr)) {
437
438
  segment.next.SetPtr(allocator.VacuumPointer(ptr));
439
+ segment.next.type = (uint8_t)NType::PREFIX_SEGMENT;
438
440
  ptr = segment.next;
439
441
  }
440
442
  }
@@ -627,6 +627,8 @@ void ScanStructure::NextMarkJoin(DataChunk &keys, DataChunk &input, DataChunk &r
627
627
  ConstructMarkJoinResult(keys, input, result);
628
628
  } else {
629
629
  auto &info = ht.correlated_mark_join_info;
630
+ lock_guard<mutex> mj_lock(info.mj_lock);
631
+
630
632
  // there are correlated columns
631
633
  // first we fetch the counts from the aggregate hashtable corresponding to these entries
632
634
  D_ASSERT(keys.ColumnCount() == info.group_chunk.ColumnCount() + 1);
@@ -1,7 +1,9 @@
1
1
  #include "duckdb/execution/operator/aggregate/physical_window.hpp"
2
2
 
3
+ #include "duckdb/common/operator/add.hpp"
3
4
  #include "duckdb/common/operator/cast_operators.hpp"
4
5
  #include "duckdb/common/operator/comparison_operators.hpp"
6
+ #include "duckdb/common/operator/subtract.hpp"
5
7
  #include "duckdb/common/optional_ptr.hpp"
6
8
  #include "duckdb/common/radix_partitioning.hpp"
7
9
  #include "duckdb/common/row_operations/row_operations.hpp"
@@ -224,12 +226,14 @@ struct WindowInputColumn {
224
226
  }
225
227
 
226
228
  void Append(DataChunk &input_chunk) {
227
- if (input_expr.expr && (!input_expr.scalar || !count)) {
228
- input_expr.Execute(input_chunk);
229
- auto &source = input_expr.chunk.data[0];
230
- const auto source_count = input_expr.chunk.size();
229
+ if (input_expr.expr) {
230
+ const auto source_count = input_chunk.size();
231
231
  D_ASSERT(count + source_count <= capacity);
232
- VectorOperations::Copy(source, *target, source_count, 0, count);
232
+ if (!input_expr.scalar || !count) {
233
+ input_expr.Execute(input_chunk);
234
+ auto &source = input_expr.chunk.data[0];
235
+ VectorOperations::Copy(source, *target, source_count, 0, count);
236
+ }
233
237
  count += source_count;
234
238
  }
235
239
  }
@@ -527,11 +531,17 @@ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range
527
531
  bounds.window_start = bounds.peer_start;
528
532
  break;
529
533
  case WindowBoundary::EXPR_PRECEDING_ROWS: {
530
- bounds.window_start = (int64_t)row_idx - boundary_start.GetCell<int64_t>(expr_idx);
534
+ if (!TrySubtractOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(expr_idx),
535
+ bounds.window_start)) {
536
+ throw OutOfRangeException("Overflow computing ROWS PRECEDING start");
537
+ }
531
538
  break;
532
539
  }
533
540
  case WindowBoundary::EXPR_FOLLOWING_ROWS: {
534
- bounds.window_start = row_idx + boundary_start.GetCell<int64_t>(expr_idx);
541
+ if (!TryAddOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(expr_idx),
542
+ bounds.window_start)) {
543
+ throw OutOfRangeException("Overflow computing ROWS FOLLOWING start");
544
+ }
535
545
  break;
536
546
  }
537
547
  case WindowBoundary::EXPR_PRECEDING_RANGE: {
@@ -567,10 +577,16 @@ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range
567
577
  bounds.window_end = bounds.partition_end;
568
578
  break;
569
579
  case WindowBoundary::EXPR_PRECEDING_ROWS:
570
- bounds.window_end = (int64_t)row_idx - boundary_end.GetCell<int64_t>(expr_idx) + 1;
580
+ if (!TrySubtractOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(expr_idx),
581
+ bounds.window_end)) {
582
+ throw OutOfRangeException("Overflow computing ROWS PRECEDING end");
583
+ }
571
584
  break;
572
585
  case WindowBoundary::EXPR_FOLLOWING_ROWS:
573
- bounds.window_end = row_idx + boundary_end.GetCell<int64_t>(expr_idx) + 1;
586
+ if (!TryAddOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(expr_idx),
587
+ bounds.window_end)) {
588
+ throw OutOfRangeException("Overflow computing ROWS FOLLOWING end");
589
+ }
574
590
  break;
575
591
  case WindowBoundary::EXPR_PRECEDING_RANGE: {
576
592
  if (boundary_end.CellIsNull(expr_idx)) {
@@ -1254,6 +1270,7 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1254
1270
  // Overwrite the collections with the sorted data
1255
1271
  hash_group = std::move(gsink.hash_groups[hash_bin]);
1256
1272
  hash_group->ComputeMasks(partition_mask, order_mask);
1273
+ external = hash_group->global_sort->external;
1257
1274
  MaterializeSortedData();
1258
1275
  } else {
1259
1276
  return;
@@ -20,10 +20,14 @@ void PhysicalReset::ResetExtensionVariable(ExecutionContext &context, DBConfig &
20
20
  }
21
21
 
22
22
  SourceResultType PhysicalReset::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
23
+ auto &config = DBConfig::GetConfig(context.client);
24
+ if (config.options.lock_configuration) {
25
+ throw InvalidInputException("Cannot reset configuration option \"%s\" - the configuration has been locked",
26
+ name);
27
+ }
23
28
  auto option = DBConfig::GetOptionByName(name);
24
29
  if (!option) {
25
30
  // check if this is an extra extension variable
26
- auto &config = DBConfig::GetConfig(context.client);
27
31
  auto entry = config.extension_parameters.find(name);
28
32
  if (entry == config.extension_parameters.end()) {
29
33
  throw Catalog::UnrecognizedConfigurationError(context.client, name);
@@ -49,7 +53,6 @@ SourceResultType PhysicalReset::GetData(ExecutionContext &context, DataChunk &ch
49
53
  throw CatalogException("option \"%s\" cannot be reset globally", name);
50
54
  }
51
55
  auto &db = DatabaseInstance::GetDatabase(context.client);
52
- auto &config = DBConfig::GetConfig(context.client);
53
56
  config.ResetOption(&db, *option);
54
57
  break;
55
58
  }
@@ -23,10 +23,14 @@ void PhysicalSet::SetExtensionVariable(ClientContext &context, ExtensionOption &
23
23
  }
24
24
 
25
25
  SourceResultType PhysicalSet::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
26
+ auto &config = DBConfig::GetConfig(context.client);
27
+ if (config.options.lock_configuration) {
28
+ throw InvalidInputException("Cannot change configuration option \"%s\" - the configuration has been locked",
29
+ name);
30
+ }
26
31
  auto option = DBConfig::GetOptionByName(name);
27
32
  if (!option) {
28
33
  // check if this is an extra extension variable
29
- auto &config = DBConfig::GetConfig(context.client);
30
34
  auto entry = config.extension_parameters.find(name);
31
35
  if (entry == config.extension_parameters.end()) {
32
36
  throw Catalog::UnrecognizedConfigurationError(context.client, name);
@@ -1016,19 +1016,16 @@ void PhysicalIEJoin::BuildPipelines(Pipeline &current, MetaPipeline &meta_pipeli
1016
1016
 
1017
1017
  // Create one child meta pipeline that will hold the LHS and RHS pipelines
1018
1018
  auto &child_meta_pipeline = meta_pipeline.CreateChildMetaPipeline(current, *this);
1019
- auto lhs_pipeline = child_meta_pipeline.GetBasePipeline();
1020
- auto rhs_pipeline = child_meta_pipeline.CreatePipeline();
1021
1019
 
1022
1020
  // Build out LHS
1021
+ auto lhs_pipeline = child_meta_pipeline.GetBasePipeline();
1023
1022
  children[0]->BuildPipelines(*lhs_pipeline, child_meta_pipeline);
1024
1023
 
1025
- // RHS depends on everything in LHS
1026
- child_meta_pipeline.AddDependenciesFrom(rhs_pipeline, lhs_pipeline.get(), true);
1027
-
1028
1024
  // Build out RHS
1025
+ auto rhs_pipeline = child_meta_pipeline.CreatePipeline();
1029
1026
  children[1]->BuildPipelines(*rhs_pipeline, child_meta_pipeline);
1030
1027
 
1031
- // Despite having the same sink, RHS needs its own PipelineFinishEvent
1028
+ // Despite having the same sink, RHS and everything created after it need their own (same) PipelineFinishEvent
1032
1029
  child_meta_pipeline.AddFinishEvent(rhs_pipeline);
1033
1030
  }
1034
1031
 
@@ -70,6 +70,7 @@ OperatorResultType PhysicalTableInOutFunction::Execute(ExecutionContext &context
70
70
  return OperatorResultType::NEED_MORE_INPUT;
71
71
  }
72
72
  // we are processing a new row: fetch the data for the current row
73
+ state.input_chunk.Reset();
73
74
  D_ASSERT(input.ColumnCount() == state.input_chunk.ColumnCount());
74
75
  // set up the input data to the table in-out function
75
76
  for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) {
@@ -65,8 +65,8 @@ void UnnestOperatorState::SetLongestListLength() {
65
65
  if (vector_data.validity.RowIsValid(current_idx)) {
66
66
 
67
67
  // check if this list is longer
68
- auto list_data = UnifiedVectorFormat::GetData<list_entry_t>(vector_data);
69
- auto list_entry = list_data[current_idx];
68
+ auto list_data_entries = UnifiedVectorFormat::GetData<list_entry_t>(vector_data);
69
+ auto list_entry = list_data_entries[current_idx];
70
70
  if (list_entry.length > longest_list_length) {
71
71
  longest_list_length = list_entry.length;
72
72
  }
@@ -259,6 +259,11 @@ OperatorResultType PhysicalUnnest::ExecuteInternal(ExecutionContext &context, Da
259
259
  auto &state = state_p.Cast<UnnestOperatorState>();
260
260
 
261
261
  do {
262
+ // reset validities, if previous loop iteration contained UNNEST(NULL)
263
+ if (include_input) {
264
+ chunk.Reset();
265
+ }
266
+
262
267
  // prepare the input data by executing any expressions and getting the
263
268
  // UnifiedVectorFormat of each LIST vector (list_vector_data) and its child vector (list_child_data)
264
269
  if (state.first_fetch) {
@@ -271,7 +276,7 @@ OperatorResultType PhysicalUnnest::ExecuteInternal(ExecutionContext &context, Da
271
276
  return OperatorResultType::NEED_MORE_INPUT;
272
277
  }
273
278
 
274
- // each UNNEST in the select_list contains a list (or NULL) for this row, find longest list
279
+ // each UNNEST in the select_list contains a list (or NULL) for this row, find the longest list
275
280
  // because this length determines how many times we need to repeat for the current row
276
281
  if (state.longest_list_length == DConstants::INVALID_INDEX) {
277
282
  state.SetLongestListLength();
@@ -14,7 +14,6 @@ namespace duckdb {
14
14
  //===--------------------------------------------------------------------===//
15
15
  // Source
16
16
  //===--------------------------------------------------------------------===//
17
-
18
17
  SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &chunk,
19
18
  OperatorSourceInput &input) const {
20
19
  // parse the options
@@ -7,6 +7,8 @@
7
7
  #include "duckdb/storage/storage_manager.hpp"
8
8
  #include "duckdb/main/database_manager.hpp"
9
9
  #include "duckdb/execution/index/art/art_key.hpp"
10
+ #include "duckdb/execution/index/art/node.hpp"
11
+ #include "duckdb/execution/index/art/leaf.hpp"
10
12
 
11
13
  namespace duckdb {
12
14
 
@@ -106,6 +108,28 @@ SinkResultType PhysicalCreateIndex::Sink(ExecutionContext &context, DataChunk &c
106
108
  if (!lstate.local_index->MergeIndexes(*art)) {
107
109
  throw ConstraintException("Data contains duplicates on indexed column(s)");
108
110
  }
111
+
112
+ #ifdef DEBUG
113
+ // ensure that all row IDs of this chunk exist in the ART
114
+ auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
115
+ for (idx_t i = 0; i < lstate.key_chunk.size(); i++) {
116
+ auto leaf_node =
117
+ lstate.local_index->Cast<ART>().Lookup(*lstate.local_index->Cast<ART>().tree, lstate.keys[i], 0);
118
+ D_ASSERT(leaf_node.IsSet());
119
+ auto &leaf = Leaf::Get(lstate.local_index->Cast<ART>(), leaf_node);
120
+
121
+ if (leaf.IsInlined()) {
122
+ D_ASSERT(row_ids[i] == leaf.row_ids.inlined);
123
+ continue;
124
+ }
125
+
126
+ D_ASSERT(leaf.row_ids.ptr.IsSet());
127
+ Node leaf_segment = leaf.row_ids.ptr;
128
+ auto position = leaf.FindRowId(lstate.local_index->Cast<ART>(), leaf_segment, row_ids[i]);
129
+ D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
130
+ }
131
+ #endif
132
+
109
133
  return SinkResultType::NEED_MORE_INPUT;
110
134
  }
111
135
 
@@ -119,6 +143,9 @@ void PhysicalCreateIndex::Combine(ExecutionContext &context, GlobalSinkState &gs
119
143
  if (!gstate.global_index->MergeIndexes(*lstate.local_index)) {
120
144
  throw ConstraintException("Data contains duplicates on indexed column(s)");
121
145
  }
146
+
147
+ // vacuum excess memory
148
+ gstate.global_index->Vacuum();
122
149
  }
123
150
 
124
151
  SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
@@ -127,6 +154,8 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event,
127
154
  // here, we just set the resulting global index as the newly created index of the table
128
155
 
129
156
  auto &state = gstate_p.Cast<CreateIndexGlobalSinkState>();
157
+ D_ASSERT(!state.global_index->VerifyAndToString(true).empty());
158
+
130
159
  auto &storage = table.GetStorage();
131
160
  if (!storage.IsRoot()) {
132
161
  throw TransactionException("Transaction conflict: cannot add an index to a table that has been altered!");
@@ -147,9 +176,6 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event,
147
176
  index.parsed_expressions.push_back(parsed_expr->Copy());
148
177
  }
149
178
 
150
- // vacuum excess memory
151
- state.global_index->Vacuum();
152
-
153
179
  // add index to storage
154
180
  storage.info->indexes.AddIndex(std::move(state.global_index));
155
181
  return SinkFinalizeType::READY;
@@ -104,10 +104,24 @@ void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) {
104
104
  idx_t append_to_next_sample = input.size() - append_to_current_sample_count;
105
105
  if (append_to_current_sample_count > 0) {
106
106
  // we have elements remaining, first add them to the current sample
107
- input.Flatten();
108
-
109
- input.SetCardinality(append_to_current_sample_count);
110
- current_sample->AddToReservoir(input);
107
+ if (append_to_next_sample > 0) {
108
+ // we need to also add to the next sample
109
+ DataChunk new_chunk;
110
+ new_chunk.Initialize(allocator, input.GetTypes());
111
+ SelectionVector sel(append_to_current_sample_count);
112
+ for (idx_t r = 0; r < append_to_current_sample_count; r++) {
113
+ sel.set_index(r, r);
114
+ }
115
+ new_chunk.Slice(sel, append_to_current_sample_count);
116
+ new_chunk.Flatten();
117
+
118
+ current_sample->AddToReservoir(new_chunk);
119
+ } else {
120
+ input.Flatten();
121
+
122
+ input.SetCardinality(append_to_current_sample_count);
123
+ current_sample->AddToReservoir(input);
124
+ }
111
125
  }
112
126
  if (append_to_next_sample > 0) {
113
127
  // slice the input for the remainder