duckdb 0.8.2-dev1791.0 → 0.8.2-dev1862.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/src/common/constants.cpp +2 -1
  4. package/src/duckdb/src/common/enum_util.cpp +5 -5
  5. package/src/duckdb/src/core_functions/function_list.cpp +2 -0
  6. package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
  7. package/src/duckdb/src/execution/index/art/art.cpp +49 -108
  8. package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
  9. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +10 -14
  10. package/src/duckdb/src/execution/index/art/iterator.cpp +13 -19
  11. package/src/duckdb/src/execution/index/art/leaf.cpp +290 -241
  12. package/src/duckdb/src/execution/index/art/node.cpp +104 -95
  13. package/src/duckdb/src/execution/index/art/node16.cpp +6 -6
  14. package/src/duckdb/src/execution/index/art/node256.cpp +6 -6
  15. package/src/duckdb/src/execution/index/art/node4.cpp +6 -6
  16. package/src/duckdb/src/execution/index/art/node48.cpp +6 -6
  17. package/src/duckdb/src/execution/index/art/prefix.cpp +49 -39
  18. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +34 -1175
  19. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +4 -14
  20. package/src/duckdb/src/execution/window_executor.cpp +1280 -0
  21. package/src/duckdb/src/execution/window_segment_tree.cpp +224 -117
  22. package/src/duckdb/src/function/table/read_csv.cpp +4 -3
  23. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  24. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  25. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  26. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
  27. package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
  28. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -1
  29. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
  30. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
  31. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -2
  32. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +43 -40
  33. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +119 -40
  34. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +1 -0
  35. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +1 -0
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +1 -0
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
  38. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +4 -2
  39. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
  40. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +60 -53
  41. package/src/duckdb/src/storage/compression/rle.cpp +52 -12
  42. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
  43. package/src/duckdb/ub_src_execution.cpp +2 -0
  44. package/src/duckdb/ub_src_execution_index_art.cpp +0 -4
  45. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
  46. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
  47. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
  48. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
@@ -9,34 +9,20 @@
9
9
  namespace duckdb {
10
10
 
11
11
  //===--------------------------------------------------------------------===//
12
- // WindowAggregateState
12
+ // WindowAggregator
13
13
  //===--------------------------------------------------------------------===//
14
- WindowAggregateState::WindowAggregateState(AggregateObject aggr, const LogicalType &result_type_p,
15
- idx_t partition_count_p)
16
- : aggr(std::move(aggr)), result_type(result_type_p), partition_count(partition_count_p),
17
- state_size(this->aggr.function.state_size()), state(state_size),
18
- statef(Value::POINTER(CastPointerToValue(state.data()))), filter_pos(0),
19
- allocator(Allocator::DefaultAllocator()) {
20
- statef.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants
14
+ WindowAggregatorState::WindowAggregatorState() : allocator(Allocator::DefaultAllocator()) {
21
15
  }
22
16
 
23
- WindowAggregateState::~WindowAggregateState() {
17
+ WindowAggregator::WindowAggregator(AggregateObject aggr, const LogicalType &result_type_p, idx_t partition_count_p)
18
+ : aggr(std::move(aggr)), result_type(result_type_p), partition_count(partition_count_p),
19
+ state_size(aggr.function.state_size()), filter_pos(0) {
24
20
  }
25
21
 
26
- void WindowAggregateState::AggregateInit() {
27
- aggr.function.initialize(state.data());
22
+ WindowAggregator::~WindowAggregator() {
28
23
  }
29
24
 
30
- void WindowAggregateState::AggegateFinal(Vector &result, idx_t rid) {
31
- AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
32
- aggr.function.finalize(statef, aggr_input_data, result, 1, rid);
33
-
34
- if (aggr.function.destructor) {
35
- aggr.function.destructor(statef, aggr_input_data, 1);
36
- }
37
- }
38
-
39
- void WindowAggregateState::Sink(DataChunk &payload_chunk, SelectionVector *filter_sel, idx_t filtered) {
25
+ void WindowAggregator::Sink(DataChunk &payload_chunk, SelectionVector *filter_sel, idx_t filtered) {
40
26
  if (!inputs.ColumnCount() && payload_chunk.ColumnCount()) {
41
27
  inputs.Initialize(Allocator::DefaultAllocator(), payload_chunk.GetTypes());
42
28
  }
@@ -57,34 +43,19 @@ void WindowAggregateState::Sink(DataChunk &payload_chunk, SelectionVector *filte
57
43
  }
58
44
  }
59
45
 
60
- void WindowAggregateState::Finalize() {
61
- }
62
-
63
- void WindowAggregateState::Compute(Vector &result, idx_t rid, idx_t start, idx_t end) {
64
- }
65
-
66
- void WindowAggregateState::Evaluate(const idx_t *begins, const idx_t *ends, Vector &result, idx_t count) {
67
- auto &rmask = FlatVector::Validity(result);
68
- for (idx_t i = 0; i < count; ++i) {
69
- const auto begin = begins[i];
70
- const auto end = ends[i];
71
- if (begin >= end) {
72
- rmask.SetInvalid(i);
73
- continue;
74
- }
75
- Compute(result, i, begin, end);
76
- }
46
+ void WindowAggregator::Finalize() {
77
47
  }
78
48
 
79
49
  //===--------------------------------------------------------------------===//
80
50
  // WindowConstantAggregate
81
51
  //===--------------------------------------------------------------------===//
52
+ WindowConstantAggregator::WindowConstantAggregator(AggregateObject aggr, const LogicalType &result_type,
53
+ const ValidityMask &partition_mask, const idx_t count)
54
+ : WindowAggregator(std::move(aggr), result_type, count), partition(0), row(0), state(state_size),
55
+ statep(Value::POINTER(CastPointerToValue(state.data()))),
56
+ statef(Value::POINTER(CastPointerToValue(state.data()))) {
82
57
 
83
- WindowConstantAggregate::WindowConstantAggregate(AggregateObject aggr, const LogicalType &result_type,
84
- const ValidityMask &partition_mask, const idx_t count)
85
- : WindowAggregateState(std::move(aggr), result_type, count), partition(0), row(0),
86
- statep(Value::POINTER(CastPointerToValue(state.data()))) {
87
- matches.Initialize();
58
+ statef.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants
88
59
 
89
60
  // Locate the partition boundaries
90
61
  if (partition_mask.AllValid()) {
@@ -116,11 +87,27 @@ WindowConstantAggregate::WindowConstantAggregate(AggregateObject aggr, const Log
116
87
  results = make_uniq<Vector>(result_type, partition_offsets.size());
117
88
  partition_offsets.emplace_back(count);
118
89
 
90
+ // Create an aggregate state for intermediate aggregates
91
+ gstate = make_uniq<WindowAggregatorState>();
92
+
119
93
  // Start the first aggregate
120
94
  AggregateInit();
121
95
  }
122
96
 
123
- void WindowConstantAggregate::Sink(DataChunk &payload_chunk, SelectionVector *filter_sel, idx_t filtered) {
97
+ void WindowConstantAggregator::AggregateInit() {
98
+ aggr.function.initialize(state.data());
99
+ }
100
+
101
+ void WindowConstantAggregator::AggegateFinal(Vector &result, idx_t rid) {
102
+ AggregateInputData aggr_input_data(aggr.GetFunctionData(), gstate->allocator);
103
+ aggr.function.finalize(statef, aggr_input_data, result, 1, rid);
104
+
105
+ if (aggr.function.destructor) {
106
+ aggr.function.destructor(statef, aggr_input_data, 1);
107
+ }
108
+ }
109
+
110
+ void WindowConstantAggregator::Sink(DataChunk &payload_chunk, SelectionVector *filter_sel, idx_t filtered) {
124
111
  const auto chunk_begin = row;
125
112
  const auto chunk_end = chunk_begin + payload_chunk.size();
126
113
 
@@ -128,7 +115,7 @@ void WindowConstantAggregate::Sink(DataChunk &payload_chunk, SelectionVector *fi
128
115
  inputs.Initialize(Allocator::DefaultAllocator(), payload_chunk.GetTypes());
129
116
  }
130
117
 
131
- AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
118
+ AggregateInputData aggr_input_data(aggr.GetFunctionData(), gstate->allocator);
132
119
  idx_t begin = 0;
133
120
  idx_t filter_idx = 0;
134
121
  auto partition_end = partition_offsets[partition + 1];
@@ -193,64 +180,127 @@ void WindowConstantAggregate::Sink(DataChunk &payload_chunk, SelectionVector *fi
193
180
  }
194
181
  }
195
182
 
196
- void WindowConstantAggregate::Finalize() {
183
+ void WindowConstantAggregator::Finalize() {
197
184
  AggegateFinal(*results, partition++);
185
+ }
198
186
 
199
- partition = 0;
200
- row = 0;
187
+ class WindowConstantAggregatorState : public WindowAggregatorState {
188
+ public:
189
+ WindowConstantAggregatorState() : partition(0) {
190
+ matches.Initialize();
191
+ }
192
+ ~WindowConstantAggregatorState() override {
193
+ }
194
+
195
+ public:
196
+ //! The current result partition being read
197
+ idx_t partition;
198
+ //! Shared SV for evaluation
199
+ SelectionVector matches;
200
+ };
201
+
202
+ unique_ptr<WindowAggregatorState> WindowConstantAggregator::GetLocalState() const {
203
+ return make_uniq<WindowConstantAggregatorState>();
201
204
  }
202
205
 
203
- void WindowConstantAggregate::Evaluate(const idx_t *begins, const idx_t *ends, Vector &target, idx_t count) {
206
+ void WindowConstantAggregator::Evaluate(WindowAggregatorState &lstate, const idx_t *begins, const idx_t *ends,
207
+ Vector &target, idx_t count) const {
204
208
  // Chunk up the constants and copy them one at a time
209
+ auto &lcstate = lstate.Cast<WindowConstantAggregatorState>();
205
210
  idx_t matched = 0;
206
211
  idx_t target_offset = 0;
207
212
  for (idx_t i = 0; i < count; ++i) {
208
213
  const auto begin = begins[i];
209
214
  // Find the partition containing [begin, end)
210
- while (partition_offsets[partition + 1] <= begin) {
215
+ while (partition_offsets[lcstate.partition + 1] <= begin) {
211
216
  // Flush the previous partition's data
212
217
  if (matched) {
213
- VectorOperations::Copy(*results, target, matches, matched, 0, target_offset);
218
+ VectorOperations::Copy(*results, target, lcstate.matches, matched, 0, target_offset);
214
219
  target_offset += matched;
215
220
  matched = 0;
216
221
  }
217
- ++partition;
222
+ ++lcstate.partition;
218
223
  }
219
224
 
220
- matches.set_index(matched++, partition);
225
+ lcstate.matches.set_index(matched++, lcstate.partition);
221
226
  }
222
227
 
223
228
  // Flush the last partition
224
229
  if (matched) {
225
- VectorOperations::Copy(*results, target, matches, matched, 0, target_offset);
230
+ VectorOperations::Copy(*results, target, lcstate.matches, matched, 0, target_offset);
226
231
  }
227
232
  }
228
233
 
229
234
  //===--------------------------------------------------------------------===//
230
- // WindowCustomAggregate
235
+ // WindowCustomAggregator
231
236
  //===--------------------------------------------------------------------===//
232
- WindowCustomAggregate::WindowCustomAggregate(AggregateObject aggr, const LogicalType &result_type, idx_t count)
233
- : WindowAggregateState(std::move(aggr), result_type, count) {
237
+ WindowCustomAggregator::WindowCustomAggregator(AggregateObject aggr, const LogicalType &result_type, idx_t count)
238
+ : WindowAggregator(std::move(aggr), result_type, count) {
239
+ }
240
+
241
+ WindowCustomAggregator::~WindowCustomAggregator() {
242
+ }
243
+
244
+ class WindowCustomAggregatorState : public WindowAggregatorState {
245
+ public:
246
+ explicit WindowCustomAggregatorState(const AggregateObject &aggr, DataChunk &inputs);
247
+ ~WindowCustomAggregatorState() override;
248
+
249
+ public:
250
+ //! The aggregate function
251
+ const AggregateObject &aggr;
252
+ //! The aggregate function
253
+ DataChunk &inputs;
254
+ //! Data pointer that contains a single state, shared by all the custom evaluators
255
+ vector<data_t> state;
256
+ //! Reused result state container for the window functions
257
+ Vector statef;
258
+ //! The frame boundaries, used for the window functions
259
+ FrameBounds frame;
260
+ };
261
+
262
+ WindowCustomAggregatorState::WindowCustomAggregatorState(const AggregateObject &aggr, DataChunk &inputs)
263
+ : aggr(aggr), inputs(inputs), state(aggr.function.state_size()),
264
+ statef(Value::POINTER(CastPointerToValue(state.data()))), frame(0, 0) {
234
265
  // if we have a frame-by-frame method, share the single state
235
- AggregateInit();
266
+ aggr.function.initialize(state.data());
236
267
  }
237
268
 
238
- WindowCustomAggregate::~WindowCustomAggregate() {
269
+ WindowCustomAggregatorState::~WindowCustomAggregatorState() {
239
270
  if (aggr.function.destructor) {
240
271
  AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
241
272
  aggr.function.destructor(statef, aggr_input_data, 1);
242
273
  }
243
274
  }
244
275
 
245
- void WindowCustomAggregate::Compute(Vector &result, idx_t rid, idx_t begin, idx_t end) {
246
- // Frame boundaries
247
- auto prev = frame;
248
- frame = FrameBounds(begin, end);
276
+ unique_ptr<WindowAggregatorState> WindowCustomAggregator::GetLocalState() const {
277
+ return make_uniq<WindowCustomAggregatorState>(aggr, const_cast<DataChunk &>(inputs));
278
+ }
249
279
 
250
- // Extract the range
251
- AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
252
- aggr.function.window(inputs.data.data(), filter_mask, aggr_input_data, inputs.ColumnCount(), state.data(), frame,
253
- prev, result, rid, 0);
280
+ void WindowCustomAggregator::Evaluate(WindowAggregatorState &lstate, const idx_t *begins, const idx_t *ends,
281
+ Vector &result, idx_t count) const {
282
+ // TODO: window should take a const Vector*
283
+ auto &lcstate = lstate.Cast<WindowCustomAggregatorState>();
284
+ auto &frame = lcstate.frame;
285
+ auto params = lcstate.inputs.data.data();
286
+ auto &rmask = FlatVector::Validity(result);
287
+ for (idx_t i = 0; i < count; ++i) {
288
+ const auto begin = begins[i];
289
+ const auto end = ends[i];
290
+ if (begin >= end) {
291
+ rmask.SetInvalid(i);
292
+ continue;
293
+ }
294
+
295
+ // Frame boundaries
296
+ auto prev = frame;
297
+ frame = FrameBounds(begin, end);
298
+
299
+ // Extract the range
300
+ AggregateInputData aggr_input_data(aggr.GetFunctionData(), lstate.allocator);
301
+ aggr.function.window(params, filter_mask, aggr_input_data, inputs.ColumnCount(), lcstate.state.data(), frame,
302
+ prev, result, i, 0);
303
+ }
254
304
  }
255
305
 
256
306
  //===--------------------------------------------------------------------===//
@@ -258,28 +308,12 @@ void WindowCustomAggregate::Compute(Vector &result, idx_t rid, idx_t begin, idx_
258
308
  //===--------------------------------------------------------------------===//
259
309
  WindowSegmentTree::WindowSegmentTree(AggregateObject aggr, const LogicalType &result_type, idx_t count,
260
310
  WindowAggregationMode mode_p)
261
- : WindowAggregateState(std::move(aggr), result_type, count),
262
- statep(Value::POINTER(CastPointerToValue(state.data()))), frame(0, 0), statel(LogicalType::POINTER),
263
- flush_count(0), internal_nodes(0), mode(mode_p), allocator(Allocator::DefaultAllocator()) {
264
- state.resize(state_size * STANDARD_VECTOR_SIZE);
265
- statep.Flatten(STANDARD_VECTOR_SIZE);
266
-
267
- // Build the finalise vector that just points to the result states
268
- data_ptr_t state_ptr = state.data();
269
- D_ASSERT(statef.GetVectorType() == VectorType::FLAT_VECTOR);
270
- statef.SetVectorType(VectorType::CONSTANT_VECTOR);
271
- statef.Flatten(STANDARD_VECTOR_SIZE);
272
- auto fdata = FlatVector::GetData<data_ptr_t>(statef);
273
- for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; ++i) {
274
- fdata[i] = state_ptr;
275
- state_ptr += state_size;
276
- }
311
+ : WindowAggregator(std::move(aggr), result_type, count), internal_nodes(0), mode(mode_p) {
277
312
  }
278
313
 
279
314
  void WindowSegmentTree::Finalize() {
315
+ gstate = GetLocalState();
280
316
  if (inputs.ColumnCount() > 0) {
281
- leaves.Initialize(Allocator::DefaultAllocator(), inputs.GetTypes());
282
- filter_sel.Initialize();
283
317
  if (aggr.function.combine && UseCombineAPI()) {
284
318
  ConstructTree();
285
319
  }
@@ -291,7 +325,7 @@ WindowSegmentTree::~WindowSegmentTree() {
291
325
  // nothing to destroy
292
326
  return;
293
327
  }
294
- AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
328
+ AggregateInputData aggr_input_data(aggr.GetFunctionData(), gstate->allocator);
295
329
  // call the destructor for all the intermediate states
296
330
  data_ptr_t address_data[STANDARD_VECTOR_SIZE];
297
331
  Vector addresses(LogicalType::POINTER, data_ptr_cast(address_data));
@@ -308,7 +342,72 @@ WindowSegmentTree::~WindowSegmentTree() {
308
342
  }
309
343
  }
310
344
 
311
- void WindowSegmentTree::FlushStates(bool combining) {
345
+ class WindowSegmentTreeState : public WindowAggregatorState {
346
+ public:
347
+ WindowSegmentTreeState(const AggregateObject &aggr, DataChunk &inputs, const ValidityMask &filter_mask);
348
+ ~WindowSegmentTreeState() override;
349
+
350
+ void FlushStates(bool combining);
351
+ void ExtractFrame(idx_t begin, idx_t end, data_ptr_t current_state);
352
+ void WindowSegmentValue(const WindowSegmentTree &tree, idx_t l_idx, idx_t begin, idx_t end,
353
+ data_ptr_t current_state);
354
+ void Finalize(Vector &result, idx_t count);
355
+
356
+ public:
357
+ //! The aggregate function
358
+ const AggregateObject &aggr;
359
+ //! The aggregate function
360
+ DataChunk &inputs;
361
+ //! The filtered rows in inputs
362
+ const ValidityMask &filter_mask;
363
+ //! The size of a single aggregate state
364
+ const idx_t state_size;
365
+ //! Data pointer that contains a single state, used for intermediate window segment aggregation
366
+ vector<data_t> state;
367
+ //! Input data chunk, used for leaf segment aggregation
368
+ DataChunk leaves;
369
+ //! The filtered rows in inputs.
370
+ SelectionVector filter_sel;
371
+ //! A vector of pointers to "state", used for intermediate window segment aggregation
372
+ Vector statep;
373
+ //! Reused state pointers for combining segment tree levels
374
+ Vector statel;
375
+ //! Reused result state container for the window functions
376
+ Vector statef;
377
+ //! Count of buffered values
378
+ idx_t flush_count;
379
+ };
380
+
381
+ WindowSegmentTreeState::WindowSegmentTreeState(const AggregateObject &aggr, DataChunk &inputs,
382
+ const ValidityMask &filter_mask)
383
+ : aggr(aggr), inputs(inputs), filter_mask(filter_mask), state_size(aggr.function.state_size()),
384
+ state(state_size * STANDARD_VECTOR_SIZE), statep(LogicalType::POINTER), statel(LogicalType::POINTER),
385
+ statef(LogicalType::POINTER), flush_count(0) {
386
+ if (inputs.ColumnCount() > 0) {
387
+ leaves.Initialize(Allocator::DefaultAllocator(), inputs.GetTypes());
388
+ filter_sel.Initialize();
389
+ }
390
+
391
+ // Build the finalise vector that just points to the result states
392
+ data_ptr_t state_ptr = state.data();
393
+ D_ASSERT(statef.GetVectorType() == VectorType::FLAT_VECTOR);
394
+ statef.SetVectorType(VectorType::CONSTANT_VECTOR);
395
+ statef.Flatten(STANDARD_VECTOR_SIZE);
396
+ auto fdata = FlatVector::GetData<data_ptr_t>(statef);
397
+ for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; ++i) {
398
+ fdata[i] = state_ptr;
399
+ state_ptr += state_size;
400
+ }
401
+ }
402
+
403
+ WindowSegmentTreeState::~WindowSegmentTreeState() {
404
+ }
405
+
406
+ unique_ptr<WindowAggregatorState> WindowSegmentTree::GetLocalState() const {
407
+ return make_uniq<WindowSegmentTreeState>(aggr, const_cast<DataChunk &>(inputs), filter_mask);
408
+ }
409
+
410
+ void WindowSegmentTreeState::FlushStates(bool combining) {
312
411
  if (!flush_count) {
313
412
  return;
314
413
  }
@@ -326,9 +425,8 @@ void WindowSegmentTree::FlushStates(bool combining) {
326
425
  flush_count = 0;
327
426
  }
328
427
 
329
- void WindowSegmentTree::ExtractFrame(idx_t begin, idx_t end, data_ptr_t state_ptr) {
428
+ void WindowSegmentTreeState::ExtractFrame(idx_t begin, idx_t end, data_ptr_t state_ptr) {
330
429
  const auto count = end - begin;
331
- D_ASSERT(count <= TREE_FANOUT);
332
430
 
333
431
  // If we are not filtering,
334
432
  // just update the shared dictionary selection to the range
@@ -355,7 +453,8 @@ void WindowSegmentTree::ExtractFrame(idx_t begin, idx_t end, data_ptr_t state_pt
355
453
  }
356
454
  }
357
455
 
358
- void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end, data_ptr_t state_ptr) {
456
+ void WindowSegmentTreeState::WindowSegmentValue(const WindowSegmentTree &tree, idx_t l_idx, idx_t begin, idx_t end,
457
+ data_ptr_t state_ptr) {
359
458
  D_ASSERT(begin <= end);
360
459
  if (begin == end || inputs.ColumnCount() == 0) {
361
460
  return;
@@ -366,7 +465,7 @@ void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end,
366
465
  ExtractFrame(begin, end, state_ptr);
367
466
  } else {
368
467
  // find out where the states begin
369
- data_ptr_t begin_ptr = levels_flat_native.get() + state_size * (begin + levels_flat_start[l_idx - 1]);
468
+ auto begin_ptr = tree.levels_flat_native.get() + state_size * (begin + tree.levels_flat_start[l_idx - 1]);
370
469
  // set up a vector of pointers that point towards the set of states
371
470
  auto ldata = FlatVector::GetData<data_ptr_t>(statel);
372
471
  auto pdata = FlatVector::GetData<data_ptr_t>(statep);
@@ -380,10 +479,23 @@ void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end,
380
479
  }
381
480
  }
382
481
  }
482
+ void WindowSegmentTreeState::Finalize(Vector &result, idx_t count) {
483
+ // Finalise the result aggregates
484
+ AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
485
+ aggr.function.finalize(statef, aggr_input_data, result, count, 0);
486
+
487
+ // Destruct the result aggregates
488
+ if (aggr.function.destructor) {
489
+ aggr.function.destructor(statef, aggr_input_data, count);
490
+ }
491
+ }
383
492
 
384
493
  void WindowSegmentTree::ConstructTree() {
385
494
  D_ASSERT(inputs.ColumnCount() > 0);
386
495
 
496
+ // Use a temporary scan state to build the tree
497
+ auto &gtstate = gstate->Cast<WindowSegmentTreeState>();
498
+
387
499
  // compute space required to store internal nodes of segment tree
388
500
  internal_nodes = 0;
389
501
  idx_t level_nodes = inputs.size();
@@ -405,8 +517,8 @@ void WindowSegmentTree::ConstructTree() {
405
517
  // compute the aggregate for this entry in the segment tree
406
518
  data_ptr_t state_ptr = levels_flat_native.get() + (levels_flat_offset * state_size);
407
519
  aggr.function.initialize(state_ptr);
408
- WindowSegmentValue(level_current, pos, MinValue(level_size, pos + TREE_FANOUT), state_ptr);
409
- FlushStates(level_current > 0);
520
+ gtstate.WindowSegmentValue(*this, level_current, pos, MinValue(level_size, pos + TREE_FANOUT), state_ptr);
521
+ gtstate.FlushStates(level_current > 0);
410
522
 
411
523
  levels_flat_offset++;
412
524
  }
@@ -421,17 +533,19 @@ void WindowSegmentTree::ConstructTree() {
421
533
  }
422
534
  }
423
535
 
424
- void WindowSegmentTree::Evaluate(const idx_t *begins, const idx_t *ends, Vector &result, idx_t count) {
536
+ void WindowSegmentTree::Evaluate(WindowAggregatorState &lstate, const idx_t *begins, const idx_t *ends, Vector &result,
537
+ idx_t count) const {
538
+ auto &ltstate = lstate.Cast<WindowSegmentTreeState>();
425
539
  const auto cant_combine = (!aggr.function.combine || !UseCombineAPI());
426
- auto fdata = FlatVector::GetData<data_ptr_t>(statef);
540
+ auto fdata = FlatVector::GetData<data_ptr_t>(ltstate.statef);
427
541
 
428
542
  // First pass: aggregate the segment tree nodes
429
543
  // Share adjacent identical states
430
544
  // We do this first because we want to share only tree aggregations
431
545
  idx_t prev_begin = 1;
432
546
  idx_t prev_end = 0;
433
- auto ldata = FlatVector::GetData<data_ptr_t>(statel);
434
- auto pdata = FlatVector::GetData<data_ptr_t>(statep);
547
+ auto ldata = FlatVector::GetData<data_ptr_t>(ltstate.statel);
548
+ auto pdata = FlatVector::GetData<data_ptr_t>(ltstate.statep);
435
549
  data_ptr_t prev_state = nullptr;
436
550
  for (idx_t rid = 0; rid < count; ++rid) {
437
551
  auto state_ptr = fdata[rid];
@@ -455,10 +569,10 @@ void WindowSegmentTree::Evaluate(const idx_t *begins, const idx_t *ends, Vector
455
569
  idx_t parent_end = end / TREE_FANOUT;
456
570
  if (prev_state && l_idx == 1 && begin == prev_begin && end == prev_end) {
457
571
  // Just combine the previous top level result
458
- ldata[flush_count] = prev_state;
459
- pdata[flush_count] = state_ptr;
460
- if (++flush_count >= STANDARD_VECTOR_SIZE) {
461
- FlushStates(true);
572
+ ldata[ltstate.flush_count] = prev_state;
573
+ pdata[ltstate.flush_count] = state_ptr;
574
+ if (++ltstate.flush_count >= STANDARD_VECTOR_SIZE) {
575
+ ltstate.FlushStates(true);
462
576
  }
463
577
  break;
464
578
  }
@@ -471,28 +585,28 @@ void WindowSegmentTree::Evaluate(const idx_t *begins, const idx_t *ends, Vector
471
585
 
472
586
  if (parent_begin == parent_end) {
473
587
  if (l_idx) {
474
- WindowSegmentValue(l_idx, begin, end, state_ptr);
588
+ ltstate.WindowSegmentValue(*this, l_idx, begin, end, state_ptr);
475
589
  }
476
590
  break;
477
591
  }
478
592
  idx_t group_begin = parent_begin * TREE_FANOUT;
479
593
  if (begin != group_begin) {
480
594
  if (l_idx) {
481
- WindowSegmentValue(l_idx, begin, group_begin + TREE_FANOUT, state_ptr);
595
+ ltstate.WindowSegmentValue(*this, l_idx, begin, group_begin + TREE_FANOUT, state_ptr);
482
596
  }
483
597
  parent_begin++;
484
598
  }
485
599
  idx_t group_end = parent_end * TREE_FANOUT;
486
600
  if (end != group_end) {
487
601
  if (l_idx) {
488
- WindowSegmentValue(l_idx, group_end, end, state_ptr);
602
+ ltstate.WindowSegmentValue(*this, l_idx, group_end, end, state_ptr);
489
603
  }
490
604
  }
491
605
  begin = parent_begin;
492
606
  end = parent_end;
493
607
  }
494
608
  }
495
- FlushStates(true);
609
+ ltstate.FlushStates(true);
496
610
 
497
611
  // Second pass: aggregate the ragged leaves
498
612
  // (or everything if we can't combine)
@@ -509,30 +623,23 @@ void WindowSegmentTree::Evaluate(const idx_t *begins, const idx_t *ends, Vector
509
623
  idx_t parent_begin = begin / TREE_FANOUT;
510
624
  idx_t parent_end = end / TREE_FANOUT;
511
625
  if (parent_begin == parent_end || cant_combine) {
512
- WindowSegmentValue(0, begin, end, state_ptr);
626
+ ltstate.WindowSegmentValue(*this, 0, begin, end, state_ptr);
513
627
  continue;
514
628
  }
515
629
 
516
630
  idx_t group_begin = parent_begin * TREE_FANOUT;
517
631
  if (begin != group_begin) {
518
- WindowSegmentValue(0, begin, group_begin + TREE_FANOUT, state_ptr);
632
+ ltstate.WindowSegmentValue(*this, 0, begin, group_begin + TREE_FANOUT, state_ptr);
519
633
  parent_begin++;
520
634
  }
521
635
  idx_t group_end = parent_end * TREE_FANOUT;
522
636
  if (end != group_end) {
523
- WindowSegmentValue(0, group_end, end, state_ptr);
637
+ ltstate.WindowSegmentValue(*this, 0, group_end, end, state_ptr);
524
638
  }
525
639
  }
526
- FlushStates(false);
527
-
528
- // Finalise the result aggregates
529
- AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
530
- aggr.function.finalize(statef, aggr_input_data, result, count, 0);
640
+ ltstate.FlushStates(false);
531
641
 
532
- // Destruct the result aggregates
533
- if (aggr.function.destructor) {
534
- aggr.function.destructor(statef, aggr_input_data, count);
535
- }
642
+ ltstate.Finalize(result, count);
536
643
 
537
644
  // Set the validity mask on the invalid rows
538
645
  auto &rmask = FlatVector::Validity(result);
@@ -343,7 +343,8 @@ public:
343
343
  line_info.lines_read[0][0]++;
344
344
  }
345
345
  }
346
- ParallelCSVGlobalState() : line_info(main_mutex, batch_to_tuple_end, tuple_start, tuple_end) {
346
+ explicit ParallelCSVGlobalState(idx_t system_threads_p)
347
+ : system_threads(system_threads_p), line_info(main_mutex, batch_to_tuple_end, tuple_start, tuple_end) {
347
348
  running_threads = MaxThreads();
348
349
  }
349
350
 
@@ -405,7 +406,7 @@ private:
405
406
  //! How many bytes we should execute per local state
406
407
  idx_t bytes_per_local_state;
407
408
  //! Size of first file
408
- idx_t first_file_size;
409
+ idx_t first_file_size = 0;
409
410
  //! Whether or not this is an on-disk file
410
411
  bool on_disk_file = true;
411
412
  //! Basically max number of threads in DuckDB
@@ -691,7 +692,7 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
691
692
  auto &bind_data = input.bind_data->CastNoConst<ReadCSVData>();
692
693
  if (bind_data.files.empty()) {
693
694
  // This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
694
- return make_uniq<ParallelCSVGlobalState>();
695
+ return make_uniq<ParallelCSVGlobalState>(context.db->NumberOfThreads());
695
696
  }
696
697
  unique_ptr<CSVFileHandle> file_handle;
697
698
 
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev1791"
2
+ #define DUCKDB_VERSION "0.8.2-dev1862"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "ecae3d0c87"
5
+ #define DUCKDB_SOURCE_ID "9b0a6350ab"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -39,6 +39,8 @@ DUCKDB_API bool IsRowIdColumnId(column_t column_id);
39
39
 
40
40
  //! The maximum row identifier used in tables
41
41
  extern const row_t MAX_ROW_ID;
42
+ //! Transaction-local row IDs start at MAX_ROW_ID
43
+ extern const row_t MAX_ROW_ID_LOCAL;
42
44
 
43
45
  extern const transaction_t TRANSACTION_ID_START;
44
46
  extern const transaction_t MAX_TRANSACTION_ID;
@@ -1,3 +1,11 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/type_util.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
1
9
  #pragma once
2
10
 
3
11
  #include "duckdb/common/types.hpp"
@@ -1,3 +1,11 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/typedefs.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
1
9
  #pragma once
2
10
 
3
11
  #include <cstdint>
@@ -0,0 +1,27 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/core_functions/scalar/debug_functions.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+ // This file is automatically generated by scripts/generate_functions.py
9
+ // Do not edit this file manually, your changes will be overwritten
10
+ //===----------------------------------------------------------------------===//
11
+
12
+ #pragma once
13
+
14
+ #include "duckdb/function/function_set.hpp"
15
+
16
+ namespace duckdb {
17
+
18
+ struct VectorTypeFun {
19
+ static constexpr const char *Name = "vector_type";
20
+ static constexpr const char *Parameters = "col";
21
+ static constexpr const char *Description = "Returns the VectorType of a given column";
22
+ static constexpr const char *Example = "vector_type(col)";
23
+
24
+ static ScalarFunction GetFunction();
25
+ };
26
+
27
+ } // namespace duckdb
@@ -105,7 +105,7 @@ public:
105
105
 
106
106
  private:
107
107
  //! Insert a row ID into a leaf
108
- bool InsertToLeaf(Node &leaf_node, const row_t &row_id);
108
+ bool InsertToLeaf(Node &leaf, const row_t &row_id);
109
109
  //! Insert a key into the tree
110
110
  bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
111
111
  //! Erase a key from the tree (if a leaf has more than one value) or erase the leaf itself
@@ -59,7 +59,6 @@ public:
59
59
  return data[i];
60
60
  }
61
61
  bool operator>(const ARTKey &k) const;
62
- bool operator<(const ARTKey &k) const;
63
62
  bool operator>=(const ARTKey &k) const;
64
63
  bool operator==(const ARTKey &k) const;
65
64