duckdb 1.2.1-dev4.0 → 1.2.1-dev8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/package.json +1 -1
  2. package/src/connection.cpp +57 -35
  3. package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
  4. package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
  5. package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
  6. package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
  8. package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
  9. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
  10. package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
  11. package/src/duckdb/extension/json/json_extension.cpp +8 -3
  12. package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
  13. package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
  14. package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
  15. package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
  16. package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
  17. package/src/duckdb/src/catalog/catalog.cpp +12 -0
  18. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  19. package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
  20. package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
  21. package/src/duckdb/src/common/bind_helpers.cpp +3 -0
  22. package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
  23. package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
  24. package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
  25. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  26. package/src/duckdb/src/execution/index/art/art.cpp +19 -6
  27. package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
  28. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
  29. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
  30. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
  31. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
  32. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
  33. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
  34. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
  35. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
  36. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
  37. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
  38. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
  39. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
  40. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
  41. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
  42. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
  43. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
  44. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
  45. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
  46. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
  47. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
  48. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
  49. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  50. package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
  51. package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
  52. package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
  53. package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
  54. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
  56. package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
  58. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
  59. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
  60. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
  61. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
  63. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
  65. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
  66. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
  67. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
  68. package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
  69. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
  70. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
  71. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
  72. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
  74. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
  75. package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
  76. package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
  77. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
  78. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
  79. package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
  80. package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
  81. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
  82. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
  84. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
  85. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
  86. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
  87. package/src/duckdb/src/include/duckdb.h +495 -480
  88. package/src/duckdb/src/main/attached_database.cpp +1 -1
  89. package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
  90. package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
  91. package/src/duckdb/src/main/config.cpp +7 -1
  92. package/src/duckdb/src/main/database.cpp +8 -8
  93. package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
  94. package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
  95. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  96. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
  97. package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
  98. package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
  99. package/src/duckdb/src/parallel/executor_task.cpp +10 -6
  100. package/src/duckdb/src/parallel/task_executor.cpp +4 -1
  101. package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
  102. package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
  103. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
  104. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
  105. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
  106. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
  107. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
  108. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
  109. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  110. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
  111. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  112. package/src/duckdb/src/storage/storage_manager.cpp +2 -2
  113. package/src/duckdb/src/storage/table/row_group.cpp +5 -6
  114. package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
  115. package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
  116. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
  117. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
  118. package/src/duckdb/ub_src_parallel.cpp +2 -0
@@ -180,9 +180,9 @@ struct OperationCompare : public std::function<bool(T, T)> {
180
180
  };
181
181
 
182
182
  template <typename T, typename OP, bool FROM>
183
- static idx_t FindTypedRangeBound(WindowCursor &over, const idx_t order_begin, const idx_t order_end,
184
- const WindowBoundary range, WindowInputExpression &boundary, const idx_t chunk_idx,
185
- const FrameBounds &prev) {
183
+ static idx_t FindTypedRangeBound(WindowCursor &range_lo, WindowCursor &range_hi, const idx_t order_begin,
184
+ const idx_t order_end, const WindowBoundary range, WindowInputExpression &boundary,
185
+ const idx_t chunk_idx, const FrameBounds &prev) {
186
186
  D_ASSERT(!boundary.CellIsNull(chunk_idx));
187
187
  const auto val = boundary.GetCell<T>(chunk_idx);
188
188
 
@@ -191,36 +191,43 @@ static idx_t FindTypedRangeBound(WindowCursor &over, const idx_t order_begin, co
191
191
  // Check that the value we are searching for is in range.
192
192
  if (range == WindowBoundary::EXPR_PRECEDING_RANGE) {
193
193
  // Preceding but value past the current value
194
- const auto cur_val = over.GetCell<T>(0, order_end - 1);
194
+ const auto cur_val = range_hi.GetCell<T>(0, order_end - 1);
195
195
  if (comp(cur_val, val)) {
196
196
  throw OutOfRangeException("Invalid RANGE PRECEDING value");
197
197
  }
198
198
  } else {
199
199
  // Following but value before the current value
200
200
  D_ASSERT(range == WindowBoundary::EXPR_FOLLOWING_RANGE);
201
- const auto cur_val = over.GetCell<T>(0, order_begin);
201
+ const auto cur_val = range_lo.GetCell<T>(0, order_begin);
202
202
  if (comp(val, cur_val)) {
203
203
  throw OutOfRangeException("Invalid RANGE FOLLOWING value");
204
204
  }
205
205
  }
206
-
207
206
  // Try to reuse the previous bounds to restrict the search.
208
207
  // This is only valid if the previous bounds were non-empty
209
208
  // Only inject the comparisons if the previous bounds are a strict subset.
210
- WindowColumnIterator<T> begin(over, order_begin);
211
- WindowColumnIterator<T> end(over, order_end);
209
+ WindowColumnIterator<T> begin(range_lo, order_begin);
210
+ WindowColumnIterator<T> end(range_hi, order_end);
212
211
  if (prev.start < prev.end) {
213
212
  if (order_begin < prev.start && prev.start < order_end) {
214
- const auto first = over.GetCell<T>(0, prev.start);
215
- if (!comp(val, first)) {
216
- // prev.first <= val, so we can start further forward
213
+ const auto first = range_lo.GetCell<T>(0, prev.start);
214
+ if (FROM && !comp(val, first)) {
215
+ // If prev.start == val and we are looking for a lower bound, then we are done
216
+ if (!comp(first, val)) {
217
+ return prev.start;
218
+ }
219
+ // prev.start <= val, so we can start further forward
217
220
  begin += UnsafeNumericCast<int64_t>(prev.start - order_begin);
218
221
  }
219
222
  }
220
223
  if (order_begin < prev.end && prev.end < order_end) {
221
- const auto second = over.GetCell<T>(0, prev.end - 1);
224
+ const auto second = range_hi.GetCell<T>(0, prev.end - 1);
222
225
  if (!comp(second, val)) {
223
- // val <= prev.second, so we can end further back
226
+ // If val == prev.end and we are looking for an upper bound, then we are done
227
+ if (!FROM && !comp(val, second)) {
228
+ return prev.end;
229
+ }
230
+ // val <= prev.end, so we can end further back
224
231
  // (prev.second is the largest peer)
225
232
  end -= UnsafeNumericCast<int64_t>(order_end - prev.end - 1);
226
233
  }
@@ -235,52 +242,65 @@ static idx_t FindTypedRangeBound(WindowCursor &over, const idx_t order_begin, co
235
242
  }
236
243
 
237
244
  template <typename OP, bool FROM>
238
- static idx_t FindRangeBound(WindowCursor &over, const idx_t order_begin, const idx_t order_end,
239
- const WindowBoundary range, WindowInputExpression &boundary, const idx_t chunk_idx,
240
- const FrameBounds &prev) {
245
+ static idx_t FindRangeBound(WindowCursor &range_lo, WindowCursor &range_hi, const idx_t order_begin,
246
+ const idx_t order_end, const WindowBoundary range, WindowInputExpression &boundary,
247
+ const idx_t chunk_idx, const FrameBounds &prev) {
241
248
  switch (boundary.InternalType()) {
242
249
  case PhysicalType::INT8:
243
- return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
250
+ return FindTypedRangeBound<int8_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
251
+ chunk_idx, prev);
244
252
  case PhysicalType::INT16:
245
- return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
253
+ return FindTypedRangeBound<int16_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
254
+ chunk_idx, prev);
246
255
  case PhysicalType::INT32:
247
- return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
256
+ return FindTypedRangeBound<int32_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
257
+ chunk_idx, prev);
248
258
  case PhysicalType::INT64:
249
- return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
259
+ return FindTypedRangeBound<int64_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
260
+ chunk_idx, prev);
250
261
  case PhysicalType::UINT8:
251
- return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
262
+ return FindTypedRangeBound<uint8_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
263
+ chunk_idx, prev);
252
264
  case PhysicalType::UINT16:
253
- return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
265
+ return FindTypedRangeBound<uint16_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
266
+ chunk_idx, prev);
254
267
  case PhysicalType::UINT32:
255
- return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
268
+ return FindTypedRangeBound<uint32_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
269
+ chunk_idx, prev);
256
270
  case PhysicalType::UINT64:
257
- return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
271
+ return FindTypedRangeBound<uint64_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
272
+ chunk_idx, prev);
258
273
  case PhysicalType::INT128:
259
- return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
274
+ return FindTypedRangeBound<hugeint_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
275
+ chunk_idx, prev);
260
276
  case PhysicalType::UINT128:
261
- return FindTypedRangeBound<uhugeint_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx,
262
- prev);
277
+ return FindTypedRangeBound<uhugeint_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
278
+ chunk_idx, prev);
263
279
  case PhysicalType::FLOAT:
264
- return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
280
+ return FindTypedRangeBound<float, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
281
+ chunk_idx, prev);
265
282
  case PhysicalType::DOUBLE:
266
- return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
283
+ return FindTypedRangeBound<double, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
284
+ chunk_idx, prev);
267
285
  case PhysicalType::INTERVAL:
268
- return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, range, boundary, chunk_idx,
269
- prev);
286
+ return FindTypedRangeBound<interval_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
287
+ chunk_idx, prev);
270
288
  default:
271
289
  throw InternalException("Unsupported column type for RANGE");
272
290
  }
273
291
  }
274
292
 
275
293
  template <bool FROM>
276
- static idx_t FindOrderedRangeBound(WindowCursor &over, const OrderType range_sense, const idx_t order_begin,
277
- const idx_t order_end, const WindowBoundary range, WindowInputExpression &boundary,
278
- const idx_t chunk_idx, const FrameBounds &prev) {
294
+ static idx_t FindOrderedRangeBound(WindowCursor &range_lo, WindowCursor &range_hi, const OrderType range_sense,
295
+ const idx_t order_begin, const idx_t order_end, const WindowBoundary range,
296
+ WindowInputExpression &boundary, const idx_t chunk_idx, const FrameBounds &prev) {
279
297
  switch (range_sense) {
280
298
  case OrderType::ASCENDING:
281
- return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
299
+ return FindRangeBound<LessThan, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary, chunk_idx,
300
+ prev);
282
301
  case OrderType::DESCENDING:
283
- return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, range, boundary, chunk_idx, prev);
302
+ return FindRangeBound<GreaterThan, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary, chunk_idx,
303
+ prev);
284
304
  default:
285
305
  throw InternalException("Unsupported ORDER BY sense for RANGE");
286
306
  }
@@ -686,19 +706,15 @@ void WindowBoundariesState::ValidEnd(DataChunk &bounds, idx_t row_idx, const idx
686
706
  if (!is_same_partition || is_jump) {
687
707
  // Find valid ordering values for the new partition
688
708
  // so we can exclude NULLs from RANGE expression computations
709
+ const auto valid_start = valid_begin_data[chunk_idx];
689
710
  valid_end = partition_end_data[chunk_idx];
690
711
 
691
712
  if ((valid_start < valid_end) && has_following_range) {
692
713
  // Exclude any trailing NULLs
693
- const auto valid_start = valid_begin_data[chunk_idx];
694
714
  if (range->CellIsNull(0, valid_end - 1)) {
695
715
  idx_t n = 1;
696
716
  valid_end = FindPrevStart(order_mask, valid_start, valid_end, n);
697
717
  }
698
-
699
- // Reset range hints
700
- prev.start = valid_start;
701
- prev.end = valid_end;
702
718
  }
703
719
  }
704
720
 
@@ -718,6 +734,18 @@ void WindowBoundariesState::FrameBegin(DataChunk &bounds, idx_t row_idx, const i
718
734
 
719
735
  idx_t window_start = NumericLimits<idx_t>::Maximum();
720
736
 
737
+ // Reset previous range hints
738
+ idx_t prev_partition = partition_begin_data[0];
739
+ prev.start = valid_begin_data[0];
740
+ prev.end = valid_end_data[0];
741
+
742
+ if (has_preceding_range || has_following_range) {
743
+ if (range_lo.get() != range.get()) {
744
+ range_lo = range.get();
745
+ range_hi = range_lo->Copy();
746
+ }
747
+ }
748
+
721
749
  switch (start_boundary) {
722
750
  case WindowBoundary::UNBOUNDED_PRECEDING:
723
751
  bounds.data[FRAME_BEGIN].Reference(bounds.data[PARTITION_BEGIN]);
@@ -766,7 +794,12 @@ void WindowBoundariesState::FrameBegin(DataChunk &bounds, idx_t row_idx, const i
766
794
  } else {
767
795
  const auto valid_start = valid_begin_data[chunk_idx];
768
796
  prev.end = valid_end_data[chunk_idx];
769
- window_start = FindOrderedRangeBound<true>(*range, range_sense, valid_start, row_idx + 1,
797
+ const auto cur_partition = partition_begin_data[chunk_idx];
798
+ if (cur_partition != prev_partition) {
799
+ prev.start = valid_start;
800
+ prev_partition = cur_partition;
801
+ }
802
+ window_start = FindOrderedRangeBound<true>(*range_lo, *range_hi, range_sense, valid_start, row_idx + 1,
770
803
  start_boundary, boundary_begin, chunk_idx, prev);
771
804
  prev.start = window_start;
772
805
  }
@@ -780,8 +813,13 @@ void WindowBoundariesState::FrameBegin(DataChunk &bounds, idx_t row_idx, const i
780
813
  } else {
781
814
  const auto valid_end = valid_end_data[chunk_idx];
782
815
  prev.end = valid_end;
783
- window_start = FindOrderedRangeBound<true>(*range, range_sense, row_idx, valid_end, start_boundary,
784
- boundary_begin, chunk_idx, prev);
816
+ const auto cur_partition = partition_begin_data[chunk_idx];
817
+ if (cur_partition != prev_partition) {
818
+ prev.start = valid_begin_data[chunk_idx];
819
+ prev_partition = cur_partition;
820
+ }
821
+ window_start = FindOrderedRangeBound<true>(*range_lo, *range_hi, range_sense, row_idx, valid_end,
822
+ start_boundary, boundary_begin, chunk_idx, prev);
785
823
  prev.start = window_start;
786
824
  }
787
825
  frame_begin_data[chunk_idx] = window_start;
@@ -852,6 +890,18 @@ void WindowBoundariesState::FrameEnd(DataChunk &bounds, idx_t row_idx, const idx
852
890
 
853
891
  idx_t window_end = NumericLimits<idx_t>::Maximum();
854
892
 
893
+ // Reset previous range hints
894
+ idx_t prev_partition = partition_begin_data[0];
895
+ prev.start = valid_begin_data[0];
896
+ prev.end = valid_end_data[0];
897
+
898
+ if (has_preceding_range || has_following_range) {
899
+ if (range_lo.get() != range.get()) {
900
+ range_lo = range.get();
901
+ range_hi = range_lo->Copy();
902
+ }
903
+ }
904
+
855
905
  switch (end_boundary) {
856
906
  case WindowBoundary::CURRENT_ROW_ROWS:
857
907
  for (idx_t chunk_idx = 0; chunk_idx < count; ++chunk_idx, ++row_idx) {
@@ -901,8 +951,13 @@ void WindowBoundariesState::FrameEnd(DataChunk &bounds, idx_t row_idx, const idx
901
951
  } else {
902
952
  const auto valid_start = valid_begin_data[chunk_idx];
903
953
  prev.start = valid_start;
904
- window_end = FindOrderedRangeBound<false>(*range, range_sense, valid_start, row_idx + 1, end_boundary,
905
- boundary_end, chunk_idx, prev);
954
+ const auto cur_partition = partition_begin_data[chunk_idx];
955
+ if (cur_partition != prev_partition) {
956
+ prev.end = valid_end;
957
+ prev_partition = cur_partition;
958
+ }
959
+ window_end = FindOrderedRangeBound<false>(*range_lo, *range_hi, range_sense, valid_start, row_idx + 1,
960
+ end_boundary, boundary_end, chunk_idx, prev);
906
961
  prev.end = window_end;
907
962
  }
908
963
  frame_end_data[chunk_idx] = window_end;
@@ -915,8 +970,13 @@ void WindowBoundariesState::FrameEnd(DataChunk &bounds, idx_t row_idx, const idx
915
970
  } else {
916
971
  const auto valid_end = valid_end_data[chunk_idx];
917
972
  prev.start = valid_begin_data[chunk_idx];
918
- window_end = FindOrderedRangeBound<false>(*range, range_sense, row_idx, valid_end, end_boundary,
919
- boundary_end, chunk_idx, prev);
973
+ const auto cur_partition = partition_begin_data[chunk_idx];
974
+ if (cur_partition != prev_partition) {
975
+ prev.end = valid_end;
976
+ prev_partition = cur_partition;
977
+ }
978
+ window_end = FindOrderedRangeBound<false>(*range_lo, *range_hi, range_sense, row_idx, valid_end,
979
+ end_boundary, boundary_end, chunk_idx, prev);
920
980
  prev.end = window_end;
921
981
  }
922
982
  frame_end_data[chunk_idx] = window_end;
@@ -18,6 +18,10 @@ public:
18
18
 
19
19
  void Finalize(const FrameStats &stats);
20
20
 
21
+ ~WindowConstantAggregatorGlobalState() override {
22
+ statef.Destroy();
23
+ }
24
+
21
25
  //! Partition starts
22
26
  vector<idx_t> partition_offsets;
23
27
  //! Reused result state container for the window functions
@@ -304,11 +308,7 @@ void WindowConstantAggregator::Finalize(WindowAggregatorState &gstate, WindowAgg
304
308
  lastate.statef.Combine(gastate.statef);
305
309
  lastate.statef.Destroy();
306
310
 
307
- // Last one out turns off the lights!
308
- if (++gastate.finalized == gastate.locals) {
309
- gastate.statef.Finalize(*gastate.results);
310
- gastate.statef.Destroy();
311
- }
311
+ gastate.statef.Finalize(*gastate.results);
312
312
  }
313
313
 
314
314
  unique_ptr<WindowAggregatorState> WindowConstantAggregator::GetLocalState(const WindowAggregatorState &gstate) const {
@@ -190,6 +190,10 @@ class WindowDistinctAggregatorLocalState : public WindowAggregatorLocalState {
190
190
  public:
191
191
  explicit WindowDistinctAggregatorLocalState(const WindowDistinctAggregatorGlobalState &aggregator);
192
192
 
193
+ ~WindowDistinctAggregatorLocalState() override {
194
+ statef.Destroy();
195
+ }
196
+
193
197
  void Sink(DataChunk &sink_chunk, DataChunk &coll_chunk, idx_t input_idx, optional_ptr<SelectionVector> filter_sel,
194
198
  idx_t filtered);
195
199
  void Finalize(WindowAggregatorGlobalState &gastate, CollectionPtr collection) override;
@@ -740,6 +744,8 @@ void WindowDistinctAggregatorLocalState::Evaluate(const WindowDistinctAggregator
740
744
 
741
745
  // Finalise the result aggregates and write to the result
742
746
  statef.Finalize(result);
747
+
748
+ // Destruct any non-POD state
743
749
  statef.Destroy();
744
750
  }
745
751
 
@@ -56,7 +56,7 @@ public:
56
56
  OnEntryNotFound on_entry_not_found = OnEntryNotFound::THROW_EXCEPTION,
57
57
  QueryErrorContext error_context = QueryErrorContext());
58
58
 
59
- CatalogSearchPath &GetSearchPath();
59
+ const CatalogSearchPath &GetSearchPath() const;
60
60
  void SetSearchPath(vector<CatalogSearchEntry> entries);
61
61
 
62
62
  void SetCallback(catalog_entry_callback_t callback);
@@ -48,20 +48,21 @@ public:
48
48
  DUCKDB_API void Set(vector<CatalogSearchEntry> new_paths, CatalogSetPathType set_type);
49
49
  DUCKDB_API void Reset();
50
50
 
51
- DUCKDB_API const vector<CatalogSearchEntry> &Get();
52
- const vector<CatalogSearchEntry> &GetSetPaths() {
51
+ DUCKDB_API const vector<CatalogSearchEntry> &Get() const;
52
+ const vector<CatalogSearchEntry> &GetSetPaths() const {
53
53
  return set_paths;
54
54
  }
55
- DUCKDB_API const CatalogSearchEntry &GetDefault();
55
+ DUCKDB_API const CatalogSearchEntry &GetDefault() const;
56
56
  //! FIXME: this method is deprecated
57
- DUCKDB_API string GetDefaultSchema(const string &catalog);
58
- DUCKDB_API string GetDefaultSchema(ClientContext &context, const string &catalog);
59
- DUCKDB_API string GetDefaultCatalog(const string &schema);
57
+ DUCKDB_API string GetDefaultSchema(const string &catalog) const;
58
+ DUCKDB_API string GetDefaultSchema(ClientContext &context, const string &catalog) const;
59
+ DUCKDB_API string GetDefaultCatalog(const string &schema) const;
60
60
 
61
- DUCKDB_API vector<string> GetSchemasForCatalog(const string &catalog);
62
- DUCKDB_API vector<string> GetCatalogsForSchema(const string &schema);
61
+ DUCKDB_API vector<string> GetSchemasForCatalog(const string &catalog) const;
62
+ DUCKDB_API vector<string> GetCatalogsForSchema(const string &schema) const;
63
63
 
64
- DUCKDB_API bool SchemaInSearchPath(ClientContext &context, const string &catalog_name, const string &schema_name);
64
+ DUCKDB_API bool SchemaInSearchPath(ClientContext &context, const string &catalog_name,
65
+ const string &schema_name) const;
65
66
 
66
67
  private:
67
68
  //! Set paths without checking if they exist
@@ -30,7 +30,7 @@ extern "C" {
30
30
  typedef uint8_t AdbcStatusCode;
31
31
 
32
32
  //! We gotta leak the symbols of the init function
33
- DUCKDB_API AdbcStatusCode duckdb_adbc_init(int version, void *driver, struct AdbcError *error);
33
+ DUCKDB_C_API AdbcStatusCode duckdb_adbc_init(int version, void *driver, struct AdbcError *error);
34
34
 
35
35
  #ifdef __cplusplus
36
36
  }
@@ -168,7 +168,7 @@ struct MultiFileFilterEntry {
168
168
  struct MultiFileConstantEntry {
169
169
  MultiFileConstantEntry(idx_t column_id, Value value_p) : column_id(column_id), value(std::move(value_p)) {
170
170
  }
171
- //! The column id to apply the constant value to
171
+ //! The (global) column id to apply the constant value to
172
172
  idx_t column_id;
173
173
  //! The constant value
174
174
  Value value;
@@ -273,7 +273,7 @@ struct MultiFileReader {
273
273
  const string &initial_file, const MultiFileReaderBindData &options,
274
274
  optional_ptr<MultiFileReaderGlobalState> global_state);
275
275
  //! Populated the filter_map
276
- DUCKDB_API virtual void CreateFilterMap(const vector<MultiFileReaderColumnDefinition> &global_columns,
276
+ DUCKDB_API virtual void CreateFilterMap(const vector<ColumnIndex> &global_column_ids,
277
277
  optional_ptr<TableFilterSet> filters, MultiFileReaderData &reader_data,
278
278
  optional_ptr<MultiFileReaderGlobalState> global_state);
279
279
 
@@ -90,6 +90,8 @@ private:
90
90
  GateStatus status;
91
91
  //! Depth in a nested leaf.
92
92
  uint8_t nested_depth = 0;
93
+ //! True, if we entered a nested leaf to retrieve the next node.
94
+ bool entered_nested_leaf = false;
93
95
 
94
96
  private:
95
97
  //! Goes to the next leaf in the ART and sets it as last_leaf,
@@ -116,7 +116,7 @@ class BaseScanner {
116
116
  public:
117
117
  explicit BaseScanner(shared_ptr<CSVBufferManager> buffer_manager, shared_ptr<CSVStateMachine> state_machine,
118
118
  shared_ptr<CSVErrorHandler> error_handler, bool sniffing = false,
119
- shared_ptr<CSVFileScan> csv_file_scan = nullptr, CSVIterator iterator = {});
119
+ shared_ptr<CSVFileScan> csv_file_scan = nullptr, const CSVIterator &iterator = {});
120
120
 
121
121
  virtual ~BaseScanner() = default;
122
122
 
@@ -45,14 +45,15 @@ class CSVBuffer {
45
45
  public:
46
46
  //! Constructor for Initial Buffer
47
47
  CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
48
- idx_t &global_csv_current_position, idx_t file_number);
48
+ const idx_t &global_csv_current_position, idx_t file_number);
49
49
 
50
50
  //! Constructor for `Next()` Buffers
51
51
  CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t buffer_size, idx_t global_csv_current_position,
52
52
  idx_t file_number_p, idx_t buffer_idx);
53
53
 
54
54
  //! Creates a new buffer with the next part of the CSV File
55
- shared_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number, bool &has_seaked);
55
+ shared_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number,
56
+ bool &has_seaked) const;
56
57
 
57
58
  //! Gets the buffer actual size
58
59
  idx_t GetBufferSize() const;
@@ -67,12 +68,12 @@ public:
67
68
  //! Wrapper for the Pin Function, if it can seek, it means that the buffer might have been destroyed, hence we must
68
69
  //! Scan it from the disk file again.
69
70
  shared_ptr<CSVBufferHandle> Pin(CSVFileHandle &file_handle, bool &has_seeked);
70
- //! Wrapper for the unpin
71
+ //! Wrapper for unpin
71
72
  void Unpin();
72
73
  char *Ptr() {
73
74
  return char_ptr_cast(handle.Ptr());
74
75
  }
75
- bool IsUnloaded() {
76
+ bool IsUnloaded() const {
76
77
  return block->IsUnloaded();
77
78
  }
78
79
 
@@ -30,7 +30,7 @@ struct CSVOption { // NOLINT: work-around bug in clang-tidy
30
30
  public:
31
31
  CSVOption(T value_p) : value(value_p) { // NOLINT: allow implicit conversion from value
32
32
  }
33
- CSVOption(T value_p, bool set_by_user_p) : value(value_p), set_by_user(set_by_user_p) {
33
+ CSVOption(T value_p, bool set_by_user_p) : set_by_user(set_by_user_p), value(value_p) {
34
34
  }
35
35
 
36
36
  CSVOption() {};
@@ -14,7 +14,7 @@
14
14
  namespace duckdb {
15
15
  //! Basic CSV Column Info
16
16
  struct CSVColumnInfo {
17
- CSVColumnInfo(string &name_p, LogicalType &type_p) : name(name_p), type(type_p) {
17
+ CSVColumnInfo(const string &name_p, const LogicalType &type_p) : name(name_p), type(type_p) {
18
18
  }
19
19
  string name;
20
20
  LogicalType type;
@@ -25,7 +25,7 @@ struct CSVSchema {
25
25
  explicit CSVSchema(const bool empty = false) : empty(empty) {
26
26
  }
27
27
 
28
- CSVSchema(vector<string> &names, vector<LogicalType> &types, const string &file_path, idx_t rows_read,
28
+ CSVSchema(const vector<string> &names, const vector<LogicalType> &types, const string &file_path, idx_t rows_read,
29
29
  const bool empty = false);
30
30
 
31
31
  //! Initializes the schema based on names and types
@@ -46,7 +46,7 @@ private:
46
46
  class CSVEncoder {
47
47
  public:
48
48
  //! Constructor, basically takes an encoding and the output buffer size
49
- CSVEncoder(DBConfig &config, const string &encoding_name, idx_t buffer_size);
49
+ CSVEncoder(const DBConfig &config, const string &encoding_name, idx_t buffer_size);
50
50
  //! Main encode function, it reads the file into an encoded buffer and converts it to the output buffer
51
51
  idx_t Encode(FileHandle &file_handle_input, char *output_buffer, const idx_t decoded_buffer_size);
52
52
  string encoding_name;
@@ -222,7 +222,7 @@ private:
222
222
  //! ------------------ Type Refinement ------------------ //
223
223
  //! ------------------------------------------------------//
224
224
  void RefineTypes();
225
- bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type);
225
+ bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type) const;
226
226
  vector<LogicalType> detected_types;
227
227
  //! If when finding a SQLNULL type in type detection we default it to varchar
228
228
  const bool default_null_to_varchar;
@@ -121,8 +121,8 @@ private:
121
121
  };
122
122
 
123
123
  struct ParseTypeInfo {
124
- ParseTypeInfo() {};
125
- ParseTypeInfo(const LogicalType &type, bool validate_utf_8_p) : validate_utf8(validate_utf_8_p) {
124
+ ParseTypeInfo() : validate_utf8(false), type_id(), internal_type(), scale(0), width(0) {};
125
+ ParseTypeInfo(const LogicalType &type, const bool validate_utf_8_p) : validate_utf8(validate_utf_8_p) {
126
126
  type_id = type.id();
127
127
  internal_type = type.InternalType();
128
128
  if (type.id() == LogicalTypeId::DECIMAL) {
@@ -19,12 +19,10 @@ public:
19
19
  static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::STREAMING_SAMPLE;
20
20
 
21
21
  public:
22
- PhysicalStreamingSample(vector<LogicalType> types, SampleMethod method, double percentage, int64_t seed,
23
- idx_t estimated_cardinality);
22
+ PhysicalStreamingSample(vector<LogicalType> types, unique_ptr<SampleOptions> options, idx_t estimated_cardinality);
24
23
 
25
- SampleMethod method;
24
+ unique_ptr<SampleOptions> sample_options;
26
25
  double percentage;
27
- int64_t seed;
28
26
 
29
27
  public:
30
28
  // Operator interface
@@ -32,9 +30,7 @@ public:
32
30
  OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
33
31
  GlobalOperatorState &gstate, OperatorState &state) const override;
34
32
 
35
- bool ParallelOperator() const override {
36
- return true;
37
- }
33
+ bool ParallelOperator() const override;
38
34
 
39
35
  InsertionOrderPreservingMap<string> ParamsToString() const override;
40
36
 
@@ -201,7 +201,8 @@ public:
201
201
  void ConvertToReservoirSample();
202
202
 
203
203
  //! Get the capactiy of the data chunk reserved for storing samples
204
- idx_t GetReservoirChunkCapacity() const;
204
+ template <typename T>
205
+ T GetReservoirChunkCapacity() const;
205
206
 
206
207
  //! If for_serialization=true then the sample_chunk is not padded with extra spaces for
207
208
  //! future sampling values
@@ -17,7 +17,7 @@
17
17
 
18
18
  namespace duckdb {
19
19
 
20
- struct ListLambdaBindData : public FunctionData {
20
+ struct ListLambdaBindData final : public FunctionData {
21
21
  public:
22
22
  ListLambdaBindData(const LogicalType &return_type, unique_ptr<Expression> lambda_expr, const bool has_index = false)
23
23
  : return_type(return_type), lambda_expr(std::move(lambda_expr)), has_index(has_index) {};
@@ -30,8 +30,16 @@ public:
30
30
  bool has_index;
31
31
 
32
32
  public:
33
- bool Equals(const FunctionData &other_p) const override;
34
- unique_ptr<FunctionData> Copy() const override;
33
+ unique_ptr<FunctionData> Copy() const override {
34
+ auto lambda_expr_copy = lambda_expr ? lambda_expr->Copy() : nullptr;
35
+ return make_uniq<ListLambdaBindData>(return_type, std::move(lambda_expr_copy), has_index);
36
+ }
37
+
38
+ bool Equals(const FunctionData &other_p) const override {
39
+ auto &other = other_p.Cast<ListLambdaBindData>();
40
+ return Expression::Equals(lambda_expr, other.lambda_expr) && return_type == other.return_type &&
41
+ has_index == other.has_index;
42
+ }
35
43
 
36
44
  //! Serializes a lambda function's bind data
37
45
  static void Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
@@ -148,6 +148,10 @@ struct WindowBoundariesState {
148
148
  idx_t valid_end = 0;
149
149
 
150
150
  FrameBounds prev;
151
+
152
+ // Extra range cursor
153
+ optional_ptr<WindowCursor> range_lo;
154
+ unique_ptr<WindowCursor> range_hi;
151
155
  };
152
156
 
153
157
  } // namespace duckdb
@@ -80,6 +80,10 @@ public:
80
80
  }
81
81
  virtual void WriteProfilingInformation(std::ostream &ss) {
82
82
  }
83
+ virtual void OnTaskStart(ClientContext &context) {
84
+ }
85
+ virtual void OnTaskStop(ClientContext &context) {
86
+ }
83
87
 
84
88
  public:
85
89
  template <class TARGET>