duckdb 1.2.1-dev4.0 → 1.2.1-dev8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/connection.cpp +57 -35
- package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
- package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
- package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
- package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
- package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
- package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
- package/src/duckdb/extension/json/json_extension.cpp +8 -3
- package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
- package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
- package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
- package/src/duckdb/src/catalog/catalog.cpp +12 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
- package/src/duckdb/src/common/bind_helpers.cpp +3 -0
- package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +19 -6
- package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
- package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
- package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
- package/src/duckdb/src/include/duckdb.h +495 -480
- package/src/duckdb/src/main/attached_database.cpp +1 -1
- package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
- package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
- package/src/duckdb/src/main/config.cpp +7 -1
- package/src/duckdb/src/main/database.cpp +8 -8
- package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
- package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
- package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
- package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
- package/src/duckdb/src/parallel/executor_task.cpp +10 -6
- package/src/duckdb/src/parallel/task_executor.cpp +4 -1
- package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/storage_manager.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group.cpp +5 -6
- package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
- package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
- package/src/duckdb/ub_src_parallel.cpp +2 -0
@@ -180,9 +180,9 @@ struct OperationCompare : public std::function<bool(T, T)> {
|
|
180
180
|
};
|
181
181
|
|
182
182
|
template <typename T, typename OP, bool FROM>
|
183
|
-
static idx_t FindTypedRangeBound(WindowCursor &
|
184
|
-
const WindowBoundary range, WindowInputExpression &boundary,
|
185
|
-
const FrameBounds &prev) {
|
183
|
+
static idx_t FindTypedRangeBound(WindowCursor &range_lo, WindowCursor &range_hi, const idx_t order_begin,
|
184
|
+
const idx_t order_end, const WindowBoundary range, WindowInputExpression &boundary,
|
185
|
+
const idx_t chunk_idx, const FrameBounds &prev) {
|
186
186
|
D_ASSERT(!boundary.CellIsNull(chunk_idx));
|
187
187
|
const auto val = boundary.GetCell<T>(chunk_idx);
|
188
188
|
|
@@ -191,36 +191,43 @@ static idx_t FindTypedRangeBound(WindowCursor &over, const idx_t order_begin, co
|
|
191
191
|
// Check that the value we are searching for is in range.
|
192
192
|
if (range == WindowBoundary::EXPR_PRECEDING_RANGE) {
|
193
193
|
// Preceding but value past the current value
|
194
|
-
const auto cur_val =
|
194
|
+
const auto cur_val = range_hi.GetCell<T>(0, order_end - 1);
|
195
195
|
if (comp(cur_val, val)) {
|
196
196
|
throw OutOfRangeException("Invalid RANGE PRECEDING value");
|
197
197
|
}
|
198
198
|
} else {
|
199
199
|
// Following but value before the current value
|
200
200
|
D_ASSERT(range == WindowBoundary::EXPR_FOLLOWING_RANGE);
|
201
|
-
const auto cur_val =
|
201
|
+
const auto cur_val = range_lo.GetCell<T>(0, order_begin);
|
202
202
|
if (comp(val, cur_val)) {
|
203
203
|
throw OutOfRangeException("Invalid RANGE FOLLOWING value");
|
204
204
|
}
|
205
205
|
}
|
206
|
-
|
207
206
|
// Try to reuse the previous bounds to restrict the search.
|
208
207
|
// This is only valid if the previous bounds were non-empty
|
209
208
|
// Only inject the comparisons if the previous bounds are a strict subset.
|
210
|
-
WindowColumnIterator<T> begin(
|
211
|
-
WindowColumnIterator<T> end(
|
209
|
+
WindowColumnIterator<T> begin(range_lo, order_begin);
|
210
|
+
WindowColumnIterator<T> end(range_hi, order_end);
|
212
211
|
if (prev.start < prev.end) {
|
213
212
|
if (order_begin < prev.start && prev.start < order_end) {
|
214
|
-
const auto first =
|
215
|
-
if (!comp(val, first)) {
|
216
|
-
//
|
213
|
+
const auto first = range_lo.GetCell<T>(0, prev.start);
|
214
|
+
if (FROM && !comp(val, first)) {
|
215
|
+
// If prev.start == val and we are looking for a lower bound, then we are done
|
216
|
+
if (!comp(first, val)) {
|
217
|
+
return prev.start;
|
218
|
+
}
|
219
|
+
// prev.start <= val, so we can start further forward
|
217
220
|
begin += UnsafeNumericCast<int64_t>(prev.start - order_begin);
|
218
221
|
}
|
219
222
|
}
|
220
223
|
if (order_begin < prev.end && prev.end < order_end) {
|
221
|
-
const auto second =
|
224
|
+
const auto second = range_hi.GetCell<T>(0, prev.end - 1);
|
222
225
|
if (!comp(second, val)) {
|
223
|
-
//
|
226
|
+
// If val == prev.end and we are looking for an upper bound, then we are done
|
227
|
+
if (!FROM && !comp(val, second)) {
|
228
|
+
return prev.end;
|
229
|
+
}
|
230
|
+
// val <= prev.end, so we can end further back
|
224
231
|
// (prev.second is the largest peer)
|
225
232
|
end -= UnsafeNumericCast<int64_t>(order_end - prev.end - 1);
|
226
233
|
}
|
@@ -235,52 +242,65 @@ static idx_t FindTypedRangeBound(WindowCursor &over, const idx_t order_begin, co
|
|
235
242
|
}
|
236
243
|
|
237
244
|
template <typename OP, bool FROM>
|
238
|
-
static idx_t FindRangeBound(WindowCursor &
|
239
|
-
const WindowBoundary range, WindowInputExpression &boundary,
|
240
|
-
const FrameBounds &prev) {
|
245
|
+
static idx_t FindRangeBound(WindowCursor &range_lo, WindowCursor &range_hi, const idx_t order_begin,
|
246
|
+
const idx_t order_end, const WindowBoundary range, WindowInputExpression &boundary,
|
247
|
+
const idx_t chunk_idx, const FrameBounds &prev) {
|
241
248
|
switch (boundary.InternalType()) {
|
242
249
|
case PhysicalType::INT8:
|
243
|
-
return FindTypedRangeBound<int8_t, OP, FROM>(
|
250
|
+
return FindTypedRangeBound<int8_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
251
|
+
chunk_idx, prev);
|
244
252
|
case PhysicalType::INT16:
|
245
|
-
return FindTypedRangeBound<int16_t, OP, FROM>(
|
253
|
+
return FindTypedRangeBound<int16_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
254
|
+
chunk_idx, prev);
|
246
255
|
case PhysicalType::INT32:
|
247
|
-
return FindTypedRangeBound<int32_t, OP, FROM>(
|
256
|
+
return FindTypedRangeBound<int32_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
257
|
+
chunk_idx, prev);
|
248
258
|
case PhysicalType::INT64:
|
249
|
-
return FindTypedRangeBound<int64_t, OP, FROM>(
|
259
|
+
return FindTypedRangeBound<int64_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
260
|
+
chunk_idx, prev);
|
250
261
|
case PhysicalType::UINT8:
|
251
|
-
return FindTypedRangeBound<uint8_t, OP, FROM>(
|
262
|
+
return FindTypedRangeBound<uint8_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
263
|
+
chunk_idx, prev);
|
252
264
|
case PhysicalType::UINT16:
|
253
|
-
return FindTypedRangeBound<uint16_t, OP, FROM>(
|
265
|
+
return FindTypedRangeBound<uint16_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
266
|
+
chunk_idx, prev);
|
254
267
|
case PhysicalType::UINT32:
|
255
|
-
return FindTypedRangeBound<uint32_t, OP, FROM>(
|
268
|
+
return FindTypedRangeBound<uint32_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
269
|
+
chunk_idx, prev);
|
256
270
|
case PhysicalType::UINT64:
|
257
|
-
return FindTypedRangeBound<uint64_t, OP, FROM>(
|
271
|
+
return FindTypedRangeBound<uint64_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
272
|
+
chunk_idx, prev);
|
258
273
|
case PhysicalType::INT128:
|
259
|
-
return FindTypedRangeBound<hugeint_t, OP, FROM>(
|
274
|
+
return FindTypedRangeBound<hugeint_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
275
|
+
chunk_idx, prev);
|
260
276
|
case PhysicalType::UINT128:
|
261
|
-
return FindTypedRangeBound<uhugeint_t, OP, FROM>(
|
262
|
-
prev);
|
277
|
+
return FindTypedRangeBound<uhugeint_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
278
|
+
chunk_idx, prev);
|
263
279
|
case PhysicalType::FLOAT:
|
264
|
-
return FindTypedRangeBound<float, OP, FROM>(
|
280
|
+
return FindTypedRangeBound<float, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
281
|
+
chunk_idx, prev);
|
265
282
|
case PhysicalType::DOUBLE:
|
266
|
-
return FindTypedRangeBound<double, OP, FROM>(
|
283
|
+
return FindTypedRangeBound<double, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
284
|
+
chunk_idx, prev);
|
267
285
|
case PhysicalType::INTERVAL:
|
268
|
-
return FindTypedRangeBound<interval_t, OP, FROM>(
|
269
|
-
prev);
|
286
|
+
return FindTypedRangeBound<interval_t, OP, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary,
|
287
|
+
chunk_idx, prev);
|
270
288
|
default:
|
271
289
|
throw InternalException("Unsupported column type for RANGE");
|
272
290
|
}
|
273
291
|
}
|
274
292
|
|
275
293
|
template <bool FROM>
|
276
|
-
static idx_t FindOrderedRangeBound(WindowCursor &
|
277
|
-
const idx_t order_end, const WindowBoundary range,
|
278
|
-
const idx_t chunk_idx, const FrameBounds &prev) {
|
294
|
+
static idx_t FindOrderedRangeBound(WindowCursor &range_lo, WindowCursor &range_hi, const OrderType range_sense,
|
295
|
+
const idx_t order_begin, const idx_t order_end, const WindowBoundary range,
|
296
|
+
WindowInputExpression &boundary, const idx_t chunk_idx, const FrameBounds &prev) {
|
279
297
|
switch (range_sense) {
|
280
298
|
case OrderType::ASCENDING:
|
281
|
-
return FindRangeBound<LessThan, FROM>(
|
299
|
+
return FindRangeBound<LessThan, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary, chunk_idx,
|
300
|
+
prev);
|
282
301
|
case OrderType::DESCENDING:
|
283
|
-
return FindRangeBound<GreaterThan, FROM>(
|
302
|
+
return FindRangeBound<GreaterThan, FROM>(range_lo, range_hi, order_begin, order_end, range, boundary, chunk_idx,
|
303
|
+
prev);
|
284
304
|
default:
|
285
305
|
throw InternalException("Unsupported ORDER BY sense for RANGE");
|
286
306
|
}
|
@@ -686,19 +706,15 @@ void WindowBoundariesState::ValidEnd(DataChunk &bounds, idx_t row_idx, const idx
|
|
686
706
|
if (!is_same_partition || is_jump) {
|
687
707
|
// Find valid ordering values for the new partition
|
688
708
|
// so we can exclude NULLs from RANGE expression computations
|
709
|
+
const auto valid_start = valid_begin_data[chunk_idx];
|
689
710
|
valid_end = partition_end_data[chunk_idx];
|
690
711
|
|
691
712
|
if ((valid_start < valid_end) && has_following_range) {
|
692
713
|
// Exclude any trailing NULLs
|
693
|
-
const auto valid_start = valid_begin_data[chunk_idx];
|
694
714
|
if (range->CellIsNull(0, valid_end - 1)) {
|
695
715
|
idx_t n = 1;
|
696
716
|
valid_end = FindPrevStart(order_mask, valid_start, valid_end, n);
|
697
717
|
}
|
698
|
-
|
699
|
-
// Reset range hints
|
700
|
-
prev.start = valid_start;
|
701
|
-
prev.end = valid_end;
|
702
718
|
}
|
703
719
|
}
|
704
720
|
|
@@ -718,6 +734,18 @@ void WindowBoundariesState::FrameBegin(DataChunk &bounds, idx_t row_idx, const i
|
|
718
734
|
|
719
735
|
idx_t window_start = NumericLimits<idx_t>::Maximum();
|
720
736
|
|
737
|
+
// Reset previous range hints
|
738
|
+
idx_t prev_partition = partition_begin_data[0];
|
739
|
+
prev.start = valid_begin_data[0];
|
740
|
+
prev.end = valid_end_data[0];
|
741
|
+
|
742
|
+
if (has_preceding_range || has_following_range) {
|
743
|
+
if (range_lo.get() != range.get()) {
|
744
|
+
range_lo = range.get();
|
745
|
+
range_hi = range_lo->Copy();
|
746
|
+
}
|
747
|
+
}
|
748
|
+
|
721
749
|
switch (start_boundary) {
|
722
750
|
case WindowBoundary::UNBOUNDED_PRECEDING:
|
723
751
|
bounds.data[FRAME_BEGIN].Reference(bounds.data[PARTITION_BEGIN]);
|
@@ -766,7 +794,12 @@ void WindowBoundariesState::FrameBegin(DataChunk &bounds, idx_t row_idx, const i
|
|
766
794
|
} else {
|
767
795
|
const auto valid_start = valid_begin_data[chunk_idx];
|
768
796
|
prev.end = valid_end_data[chunk_idx];
|
769
|
-
|
797
|
+
const auto cur_partition = partition_begin_data[chunk_idx];
|
798
|
+
if (cur_partition != prev_partition) {
|
799
|
+
prev.start = valid_start;
|
800
|
+
prev_partition = cur_partition;
|
801
|
+
}
|
802
|
+
window_start = FindOrderedRangeBound<true>(*range_lo, *range_hi, range_sense, valid_start, row_idx + 1,
|
770
803
|
start_boundary, boundary_begin, chunk_idx, prev);
|
771
804
|
prev.start = window_start;
|
772
805
|
}
|
@@ -780,8 +813,13 @@ void WindowBoundariesState::FrameBegin(DataChunk &bounds, idx_t row_idx, const i
|
|
780
813
|
} else {
|
781
814
|
const auto valid_end = valid_end_data[chunk_idx];
|
782
815
|
prev.end = valid_end;
|
783
|
-
|
784
|
-
|
816
|
+
const auto cur_partition = partition_begin_data[chunk_idx];
|
817
|
+
if (cur_partition != prev_partition) {
|
818
|
+
prev.start = valid_begin_data[chunk_idx];
|
819
|
+
prev_partition = cur_partition;
|
820
|
+
}
|
821
|
+
window_start = FindOrderedRangeBound<true>(*range_lo, *range_hi, range_sense, row_idx, valid_end,
|
822
|
+
start_boundary, boundary_begin, chunk_idx, prev);
|
785
823
|
prev.start = window_start;
|
786
824
|
}
|
787
825
|
frame_begin_data[chunk_idx] = window_start;
|
@@ -852,6 +890,18 @@ void WindowBoundariesState::FrameEnd(DataChunk &bounds, idx_t row_idx, const idx
|
|
852
890
|
|
853
891
|
idx_t window_end = NumericLimits<idx_t>::Maximum();
|
854
892
|
|
893
|
+
// Reset previous range hints
|
894
|
+
idx_t prev_partition = partition_begin_data[0];
|
895
|
+
prev.start = valid_begin_data[0];
|
896
|
+
prev.end = valid_end_data[0];
|
897
|
+
|
898
|
+
if (has_preceding_range || has_following_range) {
|
899
|
+
if (range_lo.get() != range.get()) {
|
900
|
+
range_lo = range.get();
|
901
|
+
range_hi = range_lo->Copy();
|
902
|
+
}
|
903
|
+
}
|
904
|
+
|
855
905
|
switch (end_boundary) {
|
856
906
|
case WindowBoundary::CURRENT_ROW_ROWS:
|
857
907
|
for (idx_t chunk_idx = 0; chunk_idx < count; ++chunk_idx, ++row_idx) {
|
@@ -901,8 +951,13 @@ void WindowBoundariesState::FrameEnd(DataChunk &bounds, idx_t row_idx, const idx
|
|
901
951
|
} else {
|
902
952
|
const auto valid_start = valid_begin_data[chunk_idx];
|
903
953
|
prev.start = valid_start;
|
904
|
-
|
905
|
-
|
954
|
+
const auto cur_partition = partition_begin_data[chunk_idx];
|
955
|
+
if (cur_partition != prev_partition) {
|
956
|
+
prev.end = valid_end;
|
957
|
+
prev_partition = cur_partition;
|
958
|
+
}
|
959
|
+
window_end = FindOrderedRangeBound<false>(*range_lo, *range_hi, range_sense, valid_start, row_idx + 1,
|
960
|
+
end_boundary, boundary_end, chunk_idx, prev);
|
906
961
|
prev.end = window_end;
|
907
962
|
}
|
908
963
|
frame_end_data[chunk_idx] = window_end;
|
@@ -915,8 +970,13 @@ void WindowBoundariesState::FrameEnd(DataChunk &bounds, idx_t row_idx, const idx
|
|
915
970
|
} else {
|
916
971
|
const auto valid_end = valid_end_data[chunk_idx];
|
917
972
|
prev.start = valid_begin_data[chunk_idx];
|
918
|
-
|
919
|
-
|
973
|
+
const auto cur_partition = partition_begin_data[chunk_idx];
|
974
|
+
if (cur_partition != prev_partition) {
|
975
|
+
prev.end = valid_end;
|
976
|
+
prev_partition = cur_partition;
|
977
|
+
}
|
978
|
+
window_end = FindOrderedRangeBound<false>(*range_lo, *range_hi, range_sense, row_idx, valid_end,
|
979
|
+
end_boundary, boundary_end, chunk_idx, prev);
|
920
980
|
prev.end = window_end;
|
921
981
|
}
|
922
982
|
frame_end_data[chunk_idx] = window_end;
|
@@ -18,6 +18,10 @@ public:
|
|
18
18
|
|
19
19
|
void Finalize(const FrameStats &stats);
|
20
20
|
|
21
|
+
~WindowConstantAggregatorGlobalState() override {
|
22
|
+
statef.Destroy();
|
23
|
+
}
|
24
|
+
|
21
25
|
//! Partition starts
|
22
26
|
vector<idx_t> partition_offsets;
|
23
27
|
//! Reused result state container for the window functions
|
@@ -304,11 +308,7 @@ void WindowConstantAggregator::Finalize(WindowAggregatorState &gstate, WindowAgg
|
|
304
308
|
lastate.statef.Combine(gastate.statef);
|
305
309
|
lastate.statef.Destroy();
|
306
310
|
|
307
|
-
|
308
|
-
if (++gastate.finalized == gastate.locals) {
|
309
|
-
gastate.statef.Finalize(*gastate.results);
|
310
|
-
gastate.statef.Destroy();
|
311
|
-
}
|
311
|
+
gastate.statef.Finalize(*gastate.results);
|
312
312
|
}
|
313
313
|
|
314
314
|
unique_ptr<WindowAggregatorState> WindowConstantAggregator::GetLocalState(const WindowAggregatorState &gstate) const {
|
@@ -190,6 +190,10 @@ class WindowDistinctAggregatorLocalState : public WindowAggregatorLocalState {
|
|
190
190
|
public:
|
191
191
|
explicit WindowDistinctAggregatorLocalState(const WindowDistinctAggregatorGlobalState &aggregator);
|
192
192
|
|
193
|
+
~WindowDistinctAggregatorLocalState() override {
|
194
|
+
statef.Destroy();
|
195
|
+
}
|
196
|
+
|
193
197
|
void Sink(DataChunk &sink_chunk, DataChunk &coll_chunk, idx_t input_idx, optional_ptr<SelectionVector> filter_sel,
|
194
198
|
idx_t filtered);
|
195
199
|
void Finalize(WindowAggregatorGlobalState &gastate, CollectionPtr collection) override;
|
@@ -740,6 +744,8 @@ void WindowDistinctAggregatorLocalState::Evaluate(const WindowDistinctAggregator
|
|
740
744
|
|
741
745
|
// Finalise the result aggregates and write to the result
|
742
746
|
statef.Finalize(result);
|
747
|
+
|
748
|
+
// Destruct any non-POD state
|
743
749
|
statef.Destroy();
|
744
750
|
}
|
745
751
|
|
@@ -56,7 +56,7 @@ public:
|
|
56
56
|
OnEntryNotFound on_entry_not_found = OnEntryNotFound::THROW_EXCEPTION,
|
57
57
|
QueryErrorContext error_context = QueryErrorContext());
|
58
58
|
|
59
|
-
CatalogSearchPath &GetSearchPath();
|
59
|
+
const CatalogSearchPath &GetSearchPath() const;
|
60
60
|
void SetSearchPath(vector<CatalogSearchEntry> entries);
|
61
61
|
|
62
62
|
void SetCallback(catalog_entry_callback_t callback);
|
@@ -48,20 +48,21 @@ public:
|
|
48
48
|
DUCKDB_API void Set(vector<CatalogSearchEntry> new_paths, CatalogSetPathType set_type);
|
49
49
|
DUCKDB_API void Reset();
|
50
50
|
|
51
|
-
DUCKDB_API const vector<CatalogSearchEntry> &Get();
|
52
|
-
const vector<CatalogSearchEntry> &GetSetPaths() {
|
51
|
+
DUCKDB_API const vector<CatalogSearchEntry> &Get() const;
|
52
|
+
const vector<CatalogSearchEntry> &GetSetPaths() const {
|
53
53
|
return set_paths;
|
54
54
|
}
|
55
|
-
DUCKDB_API const CatalogSearchEntry &GetDefault();
|
55
|
+
DUCKDB_API const CatalogSearchEntry &GetDefault() const;
|
56
56
|
//! FIXME: this method is deprecated
|
57
|
-
DUCKDB_API string GetDefaultSchema(const string &catalog);
|
58
|
-
DUCKDB_API string GetDefaultSchema(ClientContext &context, const string &catalog);
|
59
|
-
DUCKDB_API string GetDefaultCatalog(const string &schema);
|
57
|
+
DUCKDB_API string GetDefaultSchema(const string &catalog) const;
|
58
|
+
DUCKDB_API string GetDefaultSchema(ClientContext &context, const string &catalog) const;
|
59
|
+
DUCKDB_API string GetDefaultCatalog(const string &schema) const;
|
60
60
|
|
61
|
-
DUCKDB_API vector<string> GetSchemasForCatalog(const string &catalog);
|
62
|
-
DUCKDB_API vector<string> GetCatalogsForSchema(const string &schema);
|
61
|
+
DUCKDB_API vector<string> GetSchemasForCatalog(const string &catalog) const;
|
62
|
+
DUCKDB_API vector<string> GetCatalogsForSchema(const string &schema) const;
|
63
63
|
|
64
|
-
DUCKDB_API bool SchemaInSearchPath(ClientContext &context, const string &catalog_name,
|
64
|
+
DUCKDB_API bool SchemaInSearchPath(ClientContext &context, const string &catalog_name,
|
65
|
+
const string &schema_name) const;
|
65
66
|
|
66
67
|
private:
|
67
68
|
//! Set paths without checking if they exist
|
@@ -30,7 +30,7 @@ extern "C" {
|
|
30
30
|
typedef uint8_t AdbcStatusCode;
|
31
31
|
|
32
32
|
//! We gotta leak the symbols of the init function
|
33
|
-
|
33
|
+
DUCKDB_C_API AdbcStatusCode duckdb_adbc_init(int version, void *driver, struct AdbcError *error);
|
34
34
|
|
35
35
|
#ifdef __cplusplus
|
36
36
|
}
|
@@ -168,7 +168,7 @@ struct MultiFileFilterEntry {
|
|
168
168
|
struct MultiFileConstantEntry {
|
169
169
|
MultiFileConstantEntry(idx_t column_id, Value value_p) : column_id(column_id), value(std::move(value_p)) {
|
170
170
|
}
|
171
|
-
//! The column id to apply the constant value to
|
171
|
+
//! The (global) column id to apply the constant value to
|
172
172
|
idx_t column_id;
|
173
173
|
//! The constant value
|
174
174
|
Value value;
|
@@ -273,7 +273,7 @@ struct MultiFileReader {
|
|
273
273
|
const string &initial_file, const MultiFileReaderBindData &options,
|
274
274
|
optional_ptr<MultiFileReaderGlobalState> global_state);
|
275
275
|
//! Populated the filter_map
|
276
|
-
DUCKDB_API virtual void CreateFilterMap(const vector<
|
276
|
+
DUCKDB_API virtual void CreateFilterMap(const vector<ColumnIndex> &global_column_ids,
|
277
277
|
optional_ptr<TableFilterSet> filters, MultiFileReaderData &reader_data,
|
278
278
|
optional_ptr<MultiFileReaderGlobalState> global_state);
|
279
279
|
|
@@ -90,6 +90,8 @@ private:
|
|
90
90
|
GateStatus status;
|
91
91
|
//! Depth in a nested leaf.
|
92
92
|
uint8_t nested_depth = 0;
|
93
|
+
//! True, if we entered a nested leaf to retrieve the next node.
|
94
|
+
bool entered_nested_leaf = false;
|
93
95
|
|
94
96
|
private:
|
95
97
|
//! Goes to the next leaf in the ART and sets it as last_leaf,
|
@@ -116,7 +116,7 @@ class BaseScanner {
|
|
116
116
|
public:
|
117
117
|
explicit BaseScanner(shared_ptr<CSVBufferManager> buffer_manager, shared_ptr<CSVStateMachine> state_machine,
|
118
118
|
shared_ptr<CSVErrorHandler> error_handler, bool sniffing = false,
|
119
|
-
shared_ptr<CSVFileScan> csv_file_scan = nullptr, CSVIterator iterator = {});
|
119
|
+
shared_ptr<CSVFileScan> csv_file_scan = nullptr, const CSVIterator &iterator = {});
|
120
120
|
|
121
121
|
virtual ~BaseScanner() = default;
|
122
122
|
|
@@ -45,14 +45,15 @@ class CSVBuffer {
|
|
45
45
|
public:
|
46
46
|
//! Constructor for Initial Buffer
|
47
47
|
CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
|
48
|
-
idx_t &global_csv_current_position, idx_t file_number);
|
48
|
+
const idx_t &global_csv_current_position, idx_t file_number);
|
49
49
|
|
50
50
|
//! Constructor for `Next()` Buffers
|
51
51
|
CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t buffer_size, idx_t global_csv_current_position,
|
52
52
|
idx_t file_number_p, idx_t buffer_idx);
|
53
53
|
|
54
54
|
//! Creates a new buffer with the next part of the CSV File
|
55
|
-
shared_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number,
|
55
|
+
shared_ptr<CSVBuffer> Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number,
|
56
|
+
bool &has_seaked) const;
|
56
57
|
|
57
58
|
//! Gets the buffer actual size
|
58
59
|
idx_t GetBufferSize() const;
|
@@ -67,12 +68,12 @@ public:
|
|
67
68
|
//! Wrapper for the Pin Function, if it can seek, it means that the buffer might have been destroyed, hence we must
|
68
69
|
//! Scan it from the disk file again.
|
69
70
|
shared_ptr<CSVBufferHandle> Pin(CSVFileHandle &file_handle, bool &has_seeked);
|
70
|
-
//! Wrapper for
|
71
|
+
//! Wrapper for unpin
|
71
72
|
void Unpin();
|
72
73
|
char *Ptr() {
|
73
74
|
return char_ptr_cast(handle.Ptr());
|
74
75
|
}
|
75
|
-
bool IsUnloaded() {
|
76
|
+
bool IsUnloaded() const {
|
76
77
|
return block->IsUnloaded();
|
77
78
|
}
|
78
79
|
|
@@ -30,7 +30,7 @@ struct CSVOption { // NOLINT: work-around bug in clang-tidy
|
|
30
30
|
public:
|
31
31
|
CSVOption(T value_p) : value(value_p) { // NOLINT: allow implicit conversion from value
|
32
32
|
}
|
33
|
-
CSVOption(T value_p, bool set_by_user_p) :
|
33
|
+
CSVOption(T value_p, bool set_by_user_p) : set_by_user(set_by_user_p), value(value_p) {
|
34
34
|
}
|
35
35
|
|
36
36
|
CSVOption() {};
|
@@ -14,7 +14,7 @@
|
|
14
14
|
namespace duckdb {
|
15
15
|
//! Basic CSV Column Info
|
16
16
|
struct CSVColumnInfo {
|
17
|
-
CSVColumnInfo(string &name_p, LogicalType &type_p) : name(name_p), type(type_p) {
|
17
|
+
CSVColumnInfo(const string &name_p, const LogicalType &type_p) : name(name_p), type(type_p) {
|
18
18
|
}
|
19
19
|
string name;
|
20
20
|
LogicalType type;
|
@@ -25,7 +25,7 @@ struct CSVSchema {
|
|
25
25
|
explicit CSVSchema(const bool empty = false) : empty(empty) {
|
26
26
|
}
|
27
27
|
|
28
|
-
CSVSchema(vector<string> &names, vector<LogicalType> &types, const string &file_path, idx_t rows_read,
|
28
|
+
CSVSchema(const vector<string> &names, const vector<LogicalType> &types, const string &file_path, idx_t rows_read,
|
29
29
|
const bool empty = false);
|
30
30
|
|
31
31
|
//! Initializes the schema based on names and types
|
@@ -46,7 +46,7 @@ private:
|
|
46
46
|
class CSVEncoder {
|
47
47
|
public:
|
48
48
|
//! Constructor, basically takes an encoding and the output buffer size
|
49
|
-
CSVEncoder(DBConfig &config, const string &encoding_name, idx_t buffer_size);
|
49
|
+
CSVEncoder(const DBConfig &config, const string &encoding_name, idx_t buffer_size);
|
50
50
|
//! Main encode function, it reads the file into an encoded buffer and converts it to the output buffer
|
51
51
|
idx_t Encode(FileHandle &file_handle_input, char *output_buffer, const idx_t decoded_buffer_size);
|
52
52
|
string encoding_name;
|
package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
CHANGED
@@ -222,7 +222,7 @@ private:
|
|
222
222
|
//! ------------------ Type Refinement ------------------ //
|
223
223
|
//! ------------------------------------------------------//
|
224
224
|
void RefineTypes();
|
225
|
-
bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type);
|
225
|
+
bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type) const;
|
226
226
|
vector<LogicalType> detected_types;
|
227
227
|
//! If when finding a SQLNULL type in type detection we default it to varchar
|
228
228
|
const bool default_null_to_varchar;
|
package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
CHANGED
@@ -121,8 +121,8 @@ private:
|
|
121
121
|
};
|
122
122
|
|
123
123
|
struct ParseTypeInfo {
|
124
|
-
ParseTypeInfo() {};
|
125
|
-
ParseTypeInfo(const LogicalType &type, bool validate_utf_8_p) : validate_utf8(validate_utf_8_p) {
|
124
|
+
ParseTypeInfo() : validate_utf8(false), type_id(), internal_type(), scale(0), width(0) {};
|
125
|
+
ParseTypeInfo(const LogicalType &type, const bool validate_utf_8_p) : validate_utf8(validate_utf_8_p) {
|
126
126
|
type_id = type.id();
|
127
127
|
internal_type = type.InternalType();
|
128
128
|
if (type.id() == LogicalTypeId::DECIMAL) {
|
package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp
CHANGED
@@ -19,12 +19,10 @@ public:
|
|
19
19
|
static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::STREAMING_SAMPLE;
|
20
20
|
|
21
21
|
public:
|
22
|
-
PhysicalStreamingSample(vector<LogicalType> types,
|
23
|
-
idx_t estimated_cardinality);
|
22
|
+
PhysicalStreamingSample(vector<LogicalType> types, unique_ptr<SampleOptions> options, idx_t estimated_cardinality);
|
24
23
|
|
25
|
-
|
24
|
+
unique_ptr<SampleOptions> sample_options;
|
26
25
|
double percentage;
|
27
|
-
int64_t seed;
|
28
26
|
|
29
27
|
public:
|
30
28
|
// Operator interface
|
@@ -32,9 +30,7 @@ public:
|
|
32
30
|
OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
33
31
|
GlobalOperatorState &gstate, OperatorState &state) const override;
|
34
32
|
|
35
|
-
bool ParallelOperator() const override
|
36
|
-
return true;
|
37
|
-
}
|
33
|
+
bool ParallelOperator() const override;
|
38
34
|
|
39
35
|
InsertionOrderPreservingMap<string> ParamsToString() const override;
|
40
36
|
|
@@ -201,7 +201,8 @@ public:
|
|
201
201
|
void ConvertToReservoirSample();
|
202
202
|
|
203
203
|
//! Get the capactiy of the data chunk reserved for storing samples
|
204
|
-
|
204
|
+
template <typename T>
|
205
|
+
T GetReservoirChunkCapacity() const;
|
205
206
|
|
206
207
|
//! If for_serialization=true then the sample_chunk is not padded with extra spaces for
|
207
208
|
//! future sampling values
|
@@ -17,7 +17,7 @@
|
|
17
17
|
|
18
18
|
namespace duckdb {
|
19
19
|
|
20
|
-
struct ListLambdaBindData : public FunctionData {
|
20
|
+
struct ListLambdaBindData final : public FunctionData {
|
21
21
|
public:
|
22
22
|
ListLambdaBindData(const LogicalType &return_type, unique_ptr<Expression> lambda_expr, const bool has_index = false)
|
23
23
|
: return_type(return_type), lambda_expr(std::move(lambda_expr)), has_index(has_index) {};
|
@@ -30,8 +30,16 @@ public:
|
|
30
30
|
bool has_index;
|
31
31
|
|
32
32
|
public:
|
33
|
-
|
34
|
-
|
33
|
+
unique_ptr<FunctionData> Copy() const override {
|
34
|
+
auto lambda_expr_copy = lambda_expr ? lambda_expr->Copy() : nullptr;
|
35
|
+
return make_uniq<ListLambdaBindData>(return_type, std::move(lambda_expr_copy), has_index);
|
36
|
+
}
|
37
|
+
|
38
|
+
bool Equals(const FunctionData &other_p) const override {
|
39
|
+
auto &other = other_p.Cast<ListLambdaBindData>();
|
40
|
+
return Expression::Equals(lambda_expr, other.lambda_expr) && return_type == other.return_type &&
|
41
|
+
has_index == other.has_index;
|
42
|
+
}
|
35
43
|
|
36
44
|
//! Serializes a lambda function's bind data
|
37
45
|
static void Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
|