duckdb 0.8.2-dev1791.0 → 0.8.2-dev1859.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/src/common/constants.cpp +2 -1
- package/src/duckdb/src/common/enum_util.cpp +5 -5
- package/src/duckdb/src/core_functions/function_list.cpp +2 -0
- package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
- package/src/duckdb/src/execution/index/art/art.cpp +49 -108
- package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +10 -14
- package/src/duckdb/src/execution/index/art/iterator.cpp +13 -19
- package/src/duckdb/src/execution/index/art/leaf.cpp +290 -241
- package/src/duckdb/src/execution/index/art/node.cpp +104 -95
- package/src/duckdb/src/execution/index/art/node16.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node256.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node4.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node48.cpp +6 -6
- package/src/duckdb/src/execution/index/art/prefix.cpp +49 -39
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +34 -1175
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +4 -14
- package/src/duckdb/src/execution/window_executor.cpp +1280 -0
- package/src/duckdb/src/execution/window_segment_tree.cpp +224 -117
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +43 -40
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +119 -40
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +60 -53
- package/src/duckdb/src/storage/compression/rle.cpp +52 -12
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_src_execution.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -4
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
- package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
- package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
@@ -15,6 +15,7 @@
|
|
15
15
|
#include "duckdb/common/windows_undefs.hpp"
|
16
16
|
#include "duckdb/execution/expression_executor.hpp"
|
17
17
|
#include "duckdb/execution/partitionable_hashtable.hpp"
|
18
|
+
#include "duckdb/execution/window_executor.hpp"
|
18
19
|
#include "duckdb/execution/window_segment_tree.hpp"
|
19
20
|
#include "duckdb/main/client_config.hpp"
|
20
21
|
#include "duckdb/main/config.hpp"
|
@@ -78,1187 +79,38 @@ PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expr
|
|
78
79
|
}
|
79
80
|
}
|
80
81
|
|
81
|
-
static
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
return start;
|
86
|
-
}
|
87
|
-
|
88
|
-
while (l < r) {
|
89
|
-
// If l is aligned with the start of a block, and the block is blank, then skip forward one block.
|
90
|
-
idx_t entry_idx;
|
91
|
-
idx_t shift;
|
92
|
-
mask.GetEntryIndex(l, entry_idx, shift);
|
93
|
-
|
94
|
-
const auto block = mask.GetValidityEntry(entry_idx);
|
95
|
-
if (mask.NoneValid(block) && !shift) {
|
96
|
-
l += ValidityMask::BITS_PER_VALUE;
|
97
|
-
continue;
|
98
|
-
}
|
99
|
-
|
100
|
-
// Loop over the block
|
101
|
-
for (; shift < ValidityMask::BITS_PER_VALUE && l < r; ++shift, ++l) {
|
102
|
-
if (mask.RowIsValid(block, shift) && --n == 0) {
|
103
|
-
return MinValue(l, r);
|
104
|
-
}
|
105
|
-
}
|
106
|
-
}
|
107
|
-
|
108
|
-
// Didn't find a start so return the end of the range
|
109
|
-
return r;
|
110
|
-
}
|
111
|
-
|
112
|
-
static idx_t FindPrevStart(const ValidityMask &mask, const idx_t l, idx_t r, idx_t &n) {
|
113
|
-
if (mask.AllValid()) {
|
114
|
-
auto start = (r <= l + n) ? l : r - n;
|
115
|
-
n -= r - start;
|
116
|
-
return start;
|
117
|
-
}
|
118
|
-
|
119
|
-
while (l < r) {
|
120
|
-
// If r is aligned with the start of a block, and the previous block is blank,
|
121
|
-
// then skip backwards one block.
|
122
|
-
idx_t entry_idx;
|
123
|
-
idx_t shift;
|
124
|
-
mask.GetEntryIndex(r - 1, entry_idx, shift);
|
125
|
-
|
126
|
-
const auto block = mask.GetValidityEntry(entry_idx);
|
127
|
-
if (mask.NoneValid(block) && (shift + 1 == ValidityMask::BITS_PER_VALUE)) {
|
128
|
-
// r is nonzero (> l) and word aligned, so this will not underflow.
|
129
|
-
r -= ValidityMask::BITS_PER_VALUE;
|
130
|
-
continue;
|
131
|
-
}
|
132
|
-
|
133
|
-
// Loop backwards over the block
|
134
|
-
// shift is probing r-1 >= l >= 0
|
135
|
-
for (++shift; shift-- > 0; --r) {
|
136
|
-
if (mask.RowIsValid(block, shift) && --n == 0) {
|
137
|
-
return MaxValue(l, r - 1);
|
138
|
-
}
|
139
|
-
}
|
140
|
-
}
|
141
|
-
|
142
|
-
// Didn't find a start so return the start of the range
|
143
|
-
return l;
|
144
|
-
}
|
145
|
-
|
146
|
-
static void PrepareInputExpressions(vector<unique_ptr<Expression>> &exprs, ExpressionExecutor &executor,
|
147
|
-
DataChunk &chunk) {
|
148
|
-
if (exprs.empty()) {
|
149
|
-
return;
|
150
|
-
}
|
151
|
-
|
152
|
-
vector<LogicalType> types;
|
153
|
-
for (idx_t expr_idx = 0; expr_idx < exprs.size(); ++expr_idx) {
|
154
|
-
types.push_back(exprs[expr_idx]->return_type);
|
155
|
-
executor.AddExpression(*exprs[expr_idx]);
|
156
|
-
}
|
157
|
-
|
158
|
-
if (!types.empty()) {
|
159
|
-
auto &allocator = executor.GetAllocator();
|
160
|
-
chunk.Initialize(allocator, types);
|
161
|
-
}
|
162
|
-
}
|
163
|
-
|
164
|
-
static void PrepareInputExpression(Expression &expr, ExpressionExecutor &executor, DataChunk &chunk) {
|
165
|
-
vector<LogicalType> types;
|
166
|
-
types.push_back(expr.return_type);
|
167
|
-
executor.AddExpression(expr);
|
168
|
-
|
169
|
-
auto &allocator = executor.GetAllocator();
|
170
|
-
chunk.Initialize(allocator, types);
|
171
|
-
}
|
172
|
-
|
173
|
-
struct WindowInputExpression {
|
174
|
-
WindowInputExpression(optional_ptr<Expression> expr_p, ClientContext &context)
|
175
|
-
: expr(expr_p), ptype(PhysicalType::INVALID), scalar(true), executor(context) {
|
176
|
-
if (expr) {
|
177
|
-
PrepareInputExpression(*expr, executor, chunk);
|
178
|
-
ptype = expr->return_type.InternalType();
|
179
|
-
scalar = expr->IsScalar();
|
180
|
-
}
|
181
|
-
}
|
182
|
-
|
183
|
-
void Execute(DataChunk &input_chunk) {
|
184
|
-
if (expr) {
|
185
|
-
chunk.Reset();
|
186
|
-
executor.Execute(input_chunk, chunk);
|
187
|
-
chunk.Verify();
|
188
|
-
}
|
189
|
-
}
|
190
|
-
|
191
|
-
template <typename T>
|
192
|
-
inline T GetCell(idx_t i) const {
|
193
|
-
D_ASSERT(!chunk.data.empty());
|
194
|
-
const auto data = FlatVector::GetData<T>(chunk.data[0]);
|
195
|
-
return data[scalar ? 0 : i];
|
196
|
-
}
|
197
|
-
|
198
|
-
inline bool CellIsNull(idx_t i) const {
|
199
|
-
D_ASSERT(!chunk.data.empty());
|
200
|
-
if (chunk.data[0].GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
201
|
-
return ConstantVector::IsNull(chunk.data[0]);
|
202
|
-
}
|
203
|
-
return FlatVector::IsNull(chunk.data[0], i);
|
204
|
-
}
|
205
|
-
|
206
|
-
inline void CopyCell(Vector &target, idx_t target_offset) const {
|
207
|
-
D_ASSERT(!chunk.data.empty());
|
208
|
-
auto &source = chunk.data[0];
|
209
|
-
auto source_offset = scalar ? 0 : target_offset;
|
210
|
-
VectorOperations::Copy(source, target, source_offset + 1, source_offset, target_offset);
|
211
|
-
}
|
212
|
-
|
213
|
-
optional_ptr<Expression> expr;
|
214
|
-
PhysicalType ptype;
|
215
|
-
bool scalar;
|
216
|
-
ExpressionExecutor executor;
|
217
|
-
DataChunk chunk;
|
218
|
-
};
|
219
|
-
|
220
|
-
struct WindowInputColumn {
|
221
|
-
WindowInputColumn(Expression *expr_p, ClientContext &context, idx_t capacity_p)
|
222
|
-
: input_expr(expr_p, context), count(0), capacity(capacity_p) {
|
223
|
-
if (input_expr.expr) {
|
224
|
-
target = make_uniq<Vector>(input_expr.chunk.data[0].GetType(), capacity);
|
225
|
-
}
|
226
|
-
}
|
227
|
-
|
228
|
-
void Append(DataChunk &input_chunk) {
|
229
|
-
if (input_expr.expr) {
|
230
|
-
const auto source_count = input_chunk.size();
|
231
|
-
D_ASSERT(count + source_count <= capacity);
|
232
|
-
if (!input_expr.scalar || !count) {
|
233
|
-
input_expr.Execute(input_chunk);
|
234
|
-
auto &source = input_expr.chunk.data[0];
|
235
|
-
VectorOperations::Copy(source, *target, source_count, 0, count);
|
236
|
-
}
|
237
|
-
count += source_count;
|
238
|
-
}
|
239
|
-
}
|
240
|
-
|
241
|
-
inline bool CellIsNull(idx_t i) {
|
242
|
-
D_ASSERT(target);
|
243
|
-
D_ASSERT(i < count);
|
244
|
-
return FlatVector::IsNull(*target, input_expr.scalar ? 0 : i);
|
245
|
-
}
|
246
|
-
|
247
|
-
template <typename T>
|
248
|
-
inline T GetCell(idx_t i) const {
|
249
|
-
D_ASSERT(target);
|
250
|
-
D_ASSERT(i < count);
|
251
|
-
const auto data = FlatVector::GetData<T>(*target);
|
252
|
-
return data[input_expr.scalar ? 0 : i];
|
253
|
-
}
|
254
|
-
|
255
|
-
WindowInputExpression input_expr;
|
256
|
-
|
257
|
-
private:
|
258
|
-
unique_ptr<Vector> target;
|
259
|
-
idx_t count;
|
260
|
-
idx_t capacity;
|
261
|
-
};
|
262
|
-
|
263
|
-
static inline bool BoundaryNeedsPeer(const WindowBoundary &boundary) {
|
264
|
-
switch (boundary) {
|
265
|
-
case WindowBoundary::CURRENT_ROW_RANGE:
|
266
|
-
case WindowBoundary::EXPR_PRECEDING_RANGE:
|
267
|
-
case WindowBoundary::EXPR_FOLLOWING_RANGE:
|
268
|
-
return true;
|
269
|
-
default:
|
270
|
-
return false;
|
271
|
-
}
|
272
|
-
}
|
273
|
-
|
274
|
-
enum WindowBounds : uint8_t { PARTITION_BEGIN, PARTITION_END, PEER_BEGIN, PEER_END, WINDOW_BEGIN, WINDOW_END };
|
275
|
-
|
276
|
-
struct WindowBoundariesState {
|
277
|
-
static inline bool IsScalar(const unique_ptr<Expression> &expr) {
|
278
|
-
return expr ? expr->IsScalar() : true;
|
279
|
-
}
|
280
|
-
|
281
|
-
WindowBoundariesState(BoundWindowExpression &wexpr, const idx_t input_size)
|
282
|
-
: type(wexpr.type), input_size(input_size), start_boundary(wexpr.start), end_boundary(wexpr.end),
|
283
|
-
partition_count(wexpr.partitions.size()), order_count(wexpr.orders.size()),
|
284
|
-
range_sense(wexpr.orders.empty() ? OrderType::INVALID : wexpr.orders[0].type),
|
285
|
-
has_preceding_range(wexpr.start == WindowBoundary::EXPR_PRECEDING_RANGE ||
|
286
|
-
wexpr.end == WindowBoundary::EXPR_PRECEDING_RANGE),
|
287
|
-
has_following_range(wexpr.start == WindowBoundary::EXPR_FOLLOWING_RANGE ||
|
288
|
-
wexpr.end == WindowBoundary::EXPR_FOLLOWING_RANGE),
|
289
|
-
needs_peer(BoundaryNeedsPeer(wexpr.end) || wexpr.type == ExpressionType::WINDOW_CUME_DIST) {
|
290
|
-
}
|
291
|
-
|
292
|
-
void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t chunk_idx,
|
293
|
-
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
294
|
-
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
295
|
-
|
296
|
-
void Bounds(DataChunk &bounds, idx_t row_idx, WindowInputColumn &range, const idx_t count,
|
297
|
-
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
298
|
-
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
299
|
-
|
300
|
-
// Cached lookups
|
301
|
-
const ExpressionType type;
|
302
|
-
const idx_t input_size;
|
303
|
-
const WindowBoundary start_boundary;
|
304
|
-
const WindowBoundary end_boundary;
|
305
|
-
const size_t partition_count;
|
306
|
-
const size_t order_count;
|
307
|
-
const OrderType range_sense;
|
308
|
-
const bool has_preceding_range;
|
309
|
-
const bool has_following_range;
|
310
|
-
const bool needs_peer;
|
311
|
-
|
312
|
-
idx_t partition_start = 0;
|
313
|
-
idx_t partition_end = 0;
|
314
|
-
idx_t peer_start = 0;
|
315
|
-
idx_t peer_end = 0;
|
316
|
-
idx_t valid_start = 0;
|
317
|
-
idx_t valid_end = 0;
|
318
|
-
int64_t window_start = -1;
|
319
|
-
int64_t window_end = -1;
|
320
|
-
FrameBounds prev;
|
321
|
-
};
|
322
|
-
|
323
|
-
template <typename T>
|
324
|
-
static T GetCell(DataChunk &chunk, idx_t column, idx_t index) {
|
325
|
-
D_ASSERT(chunk.ColumnCount() > column);
|
326
|
-
auto &source = chunk.data[column];
|
327
|
-
const auto data = FlatVector::GetData<T>(source);
|
328
|
-
return data[index];
|
329
|
-
}
|
330
|
-
|
331
|
-
static bool CellIsNull(DataChunk &chunk, idx_t column, idx_t index) {
|
332
|
-
D_ASSERT(chunk.ColumnCount() > column);
|
333
|
-
auto &source = chunk.data[column];
|
334
|
-
return FlatVector::IsNull(source, index);
|
335
|
-
}
|
336
|
-
|
337
|
-
static void CopyCell(DataChunk &chunk, idx_t column, idx_t index, Vector &target, idx_t target_offset) {
|
338
|
-
D_ASSERT(chunk.ColumnCount() > column);
|
339
|
-
auto &source = chunk.data[column];
|
340
|
-
VectorOperations::Copy(source, target, index + 1, index, target_offset);
|
341
|
-
}
|
342
|
-
|
343
|
-
template <typename T>
|
344
|
-
struct WindowColumnIterator {
|
345
|
-
using iterator = WindowColumnIterator<T>;
|
346
|
-
using iterator_category = std::random_access_iterator_tag;
|
347
|
-
using difference_type = std::ptrdiff_t;
|
348
|
-
using value_type = T;
|
349
|
-
using reference = T;
|
350
|
-
using pointer = idx_t;
|
351
|
-
|
352
|
-
explicit WindowColumnIterator(WindowInputColumn &coll_p, pointer pos_p = 0) : coll(&coll_p), pos(pos_p) {
|
353
|
-
}
|
354
|
-
|
355
|
-
// Forward iterator
|
356
|
-
inline reference operator*() const {
|
357
|
-
return coll->GetCell<T>(pos);
|
358
|
-
}
|
359
|
-
inline explicit operator pointer() const {
|
360
|
-
return pos;
|
361
|
-
}
|
362
|
-
|
363
|
-
inline iterator &operator++() {
|
364
|
-
++pos;
|
365
|
-
return *this;
|
366
|
-
}
|
367
|
-
inline iterator operator++(int) {
|
368
|
-
auto result = *this;
|
369
|
-
++(*this);
|
370
|
-
return result;
|
371
|
-
}
|
372
|
-
|
373
|
-
// Bidirectional iterator
|
374
|
-
inline iterator &operator--() {
|
375
|
-
--pos;
|
376
|
-
return *this;
|
377
|
-
}
|
378
|
-
inline iterator operator--(int) {
|
379
|
-
auto result = *this;
|
380
|
-
--(*this);
|
381
|
-
return result;
|
382
|
-
}
|
383
|
-
|
384
|
-
// Random Access
|
385
|
-
inline iterator &operator+=(difference_type n) {
|
386
|
-
pos += n;
|
387
|
-
return *this;
|
388
|
-
}
|
389
|
-
inline iterator &operator-=(difference_type n) {
|
390
|
-
pos -= n;
|
391
|
-
return *this;
|
392
|
-
}
|
393
|
-
|
394
|
-
inline reference operator[](difference_type m) const {
|
395
|
-
return coll->GetCell<T>(pos + m);
|
396
|
-
}
|
397
|
-
|
398
|
-
friend inline iterator operator+(const iterator &a, difference_type n) {
|
399
|
-
return iterator(a.coll, a.pos + n);
|
400
|
-
}
|
401
|
-
|
402
|
-
friend inline iterator operator-(const iterator &a, difference_type n) {
|
403
|
-
return iterator(a.coll, a.pos - n);
|
404
|
-
}
|
405
|
-
|
406
|
-
friend inline iterator operator+(difference_type n, const iterator &a) {
|
407
|
-
return a + n;
|
408
|
-
}
|
409
|
-
friend inline difference_type operator-(const iterator &a, const iterator &b) {
|
410
|
-
return difference_type(a.pos - b.pos);
|
411
|
-
}
|
412
|
-
|
413
|
-
friend inline bool operator==(const iterator &a, const iterator &b) {
|
414
|
-
return a.pos == b.pos;
|
415
|
-
}
|
416
|
-
friend inline bool operator!=(const iterator &a, const iterator &b) {
|
417
|
-
return a.pos != b.pos;
|
418
|
-
}
|
419
|
-
friend inline bool operator<(const iterator &a, const iterator &b) {
|
420
|
-
return a.pos < b.pos;
|
421
|
-
}
|
422
|
-
friend inline bool operator<=(const iterator &a, const iterator &b) {
|
423
|
-
return a.pos <= b.pos;
|
424
|
-
}
|
425
|
-
friend inline bool operator>(const iterator &a, const iterator &b) {
|
426
|
-
return a.pos > b.pos;
|
427
|
-
}
|
428
|
-
friend inline bool operator>=(const iterator &a, const iterator &b) {
|
429
|
-
return a.pos >= b.pos;
|
430
|
-
}
|
431
|
-
|
432
|
-
private:
|
433
|
-
optional_ptr<WindowInputColumn> coll;
|
434
|
-
pointer pos;
|
435
|
-
};
|
436
|
-
|
437
|
-
template <typename T, typename OP>
|
438
|
-
struct OperationCompare : public std::function<bool(T, T)> {
|
439
|
-
inline bool operator()(const T &lhs, const T &val) const {
|
440
|
-
return OP::template Operation(lhs, val);
|
441
|
-
}
|
442
|
-
};
|
443
|
-
|
444
|
-
template <typename T, typename OP, bool FROM>
|
445
|
-
static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
|
446
|
-
WindowInputExpression &boundary, const idx_t chunk_idx, const FrameBounds &prev) {
|
447
|
-
D_ASSERT(!boundary.CellIsNull(chunk_idx));
|
448
|
-
const auto val = boundary.GetCell<T>(chunk_idx);
|
449
|
-
|
450
|
-
OperationCompare<T, OP> comp;
|
451
|
-
WindowColumnIterator<T> begin(over, order_begin);
|
452
|
-
WindowColumnIterator<T> end(over, order_end);
|
453
|
-
|
454
|
-
if (order_begin < prev.first && prev.first < order_end) {
|
455
|
-
const auto first = over.GetCell<T>(prev.first);
|
456
|
-
if (!comp(val, first)) {
|
457
|
-
// prev.first <= val, so we can start further forward
|
458
|
-
begin += (prev.first - order_begin);
|
459
|
-
}
|
460
|
-
}
|
461
|
-
if (order_begin <= prev.second && prev.second < order_end) {
|
462
|
-
const auto second = over.GetCell<T>(prev.second);
|
463
|
-
if (!comp(second, val)) {
|
464
|
-
// val <= prev.second, so we can end further back
|
465
|
-
// (prev.second is the largest peer)
|
466
|
-
end -= (order_end - prev.second - 1);
|
467
|
-
}
|
468
|
-
}
|
469
|
-
|
470
|
-
if (FROM) {
|
471
|
-
return idx_t(std::lower_bound(begin, end, val, comp));
|
472
|
-
} else {
|
473
|
-
return idx_t(std::upper_bound(begin, end, val, comp));
|
474
|
-
}
|
475
|
-
}
|
476
|
-
|
477
|
-
template <typename OP, bool FROM>
|
478
|
-
static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
|
479
|
-
WindowInputExpression &boundary, const idx_t chunk_idx, const FrameBounds &prev) {
|
480
|
-
D_ASSERT(boundary.chunk.ColumnCount() == 1);
|
481
|
-
D_ASSERT(boundary.chunk.data[0].GetType().InternalType() == over.input_expr.ptype);
|
482
|
-
|
483
|
-
switch (over.input_expr.ptype) {
|
484
|
-
case PhysicalType::INT8:
|
485
|
-
return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
486
|
-
case PhysicalType::INT16:
|
487
|
-
return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
488
|
-
case PhysicalType::INT32:
|
489
|
-
return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
490
|
-
case PhysicalType::INT64:
|
491
|
-
return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
492
|
-
case PhysicalType::UINT8:
|
493
|
-
return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
494
|
-
case PhysicalType::UINT16:
|
495
|
-
return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
496
|
-
case PhysicalType::UINT32:
|
497
|
-
return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
498
|
-
case PhysicalType::UINT64:
|
499
|
-
return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
500
|
-
case PhysicalType::INT128:
|
501
|
-
return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
502
|
-
case PhysicalType::FLOAT:
|
503
|
-
return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
504
|
-
case PhysicalType::DOUBLE:
|
505
|
-
return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
506
|
-
case PhysicalType::INTERVAL:
|
507
|
-
return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
508
|
-
default:
|
509
|
-
throw InternalException("Unsupported column type for RANGE");
|
510
|
-
}
|
511
|
-
}
|
512
|
-
|
513
|
-
template <bool FROM>
|
514
|
-
static idx_t FindOrderedRangeBound(WindowInputColumn &over, const OrderType range_sense, const idx_t order_begin,
|
515
|
-
const idx_t order_end, WindowInputExpression &boundary, const idx_t chunk_idx,
|
516
|
-
const FrameBounds &prev) {
|
517
|
-
switch (range_sense) {
|
518
|
-
case OrderType::ASCENDING:
|
519
|
-
return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
520
|
-
case OrderType::DESCENDING:
|
521
|
-
return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
|
522
|
-
default:
|
523
|
-
throw InternalException("Unsupported ORDER BY sense for RANGE");
|
524
|
-
}
|
525
|
-
}
|
526
|
-
|
527
|
-
void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t chunk_idx,
|
528
|
-
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
529
|
-
const ValidityMask &partition_mask, const ValidityMask &order_mask) {
|
530
|
-
|
531
|
-
if (partition_count + order_count > 0) {
|
532
|
-
|
533
|
-
// determine partition and peer group boundaries to ultimately figure out window size
|
534
|
-
const auto is_same_partition = !partition_mask.RowIsValidUnsafe(row_idx);
|
535
|
-
const auto is_peer = !order_mask.RowIsValidUnsafe(row_idx);
|
536
|
-
|
537
|
-
// when the partition changes, recompute the boundaries
|
538
|
-
if (!is_same_partition) {
|
539
|
-
partition_start = row_idx;
|
540
|
-
peer_start = row_idx;
|
541
|
-
|
542
|
-
// find end of partition
|
543
|
-
partition_end = input_size;
|
544
|
-
if (partition_count) {
|
545
|
-
idx_t n = 1;
|
546
|
-
partition_end = FindNextStart(partition_mask, partition_start + 1, input_size, n);
|
547
|
-
}
|
548
|
-
|
549
|
-
// Find valid ordering values for the new partition
|
550
|
-
// so we can exclude NULLs from RANGE expression computations
|
551
|
-
valid_start = partition_start;
|
552
|
-
valid_end = partition_end;
|
553
|
-
|
554
|
-
if ((valid_start < valid_end) && has_preceding_range) {
|
555
|
-
// Exclude any leading NULLs
|
556
|
-
if (range_collection.CellIsNull(valid_start)) {
|
557
|
-
idx_t n = 1;
|
558
|
-
valid_start = FindNextStart(order_mask, valid_start + 1, valid_end, n);
|
559
|
-
}
|
560
|
-
}
|
561
|
-
|
562
|
-
if ((valid_start < valid_end) && has_following_range) {
|
563
|
-
// Exclude any trailing NULLs
|
564
|
-
if (range_collection.CellIsNull(valid_end - 1)) {
|
565
|
-
idx_t n = 1;
|
566
|
-
valid_end = FindPrevStart(order_mask, valid_start, valid_end, n);
|
567
|
-
}
|
568
|
-
|
569
|
-
// Reset range hints
|
570
|
-
prev.first = valid_start;
|
571
|
-
prev.second = valid_end;
|
572
|
-
}
|
573
|
-
} else if (!is_peer) {
|
574
|
-
peer_start = row_idx;
|
575
|
-
}
|
576
|
-
|
577
|
-
if (needs_peer) {
|
578
|
-
peer_end = partition_end;
|
579
|
-
if (order_count) {
|
580
|
-
idx_t n = 1;
|
581
|
-
peer_end = FindNextStart(order_mask, peer_start + 1, partition_end, n);
|
582
|
-
}
|
583
|
-
}
|
584
|
-
|
585
|
-
} else {
|
586
|
-
// OVER()
|
587
|
-
partition_end = input_size;
|
588
|
-
peer_end = partition_end;
|
589
|
-
}
|
590
|
-
|
591
|
-
// determine window boundaries depending on the type of expression
|
592
|
-
window_start = -1;
|
593
|
-
window_end = -1;
|
594
|
-
|
595
|
-
switch (start_boundary) {
|
596
|
-
case WindowBoundary::UNBOUNDED_PRECEDING:
|
597
|
-
window_start = partition_start;
|
598
|
-
break;
|
599
|
-
case WindowBoundary::CURRENT_ROW_ROWS:
|
600
|
-
window_start = row_idx;
|
601
|
-
break;
|
602
|
-
case WindowBoundary::CURRENT_ROW_RANGE:
|
603
|
-
window_start = peer_start;
|
604
|
-
break;
|
605
|
-
case WindowBoundary::EXPR_PRECEDING_ROWS: {
|
606
|
-
if (!TrySubtractOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(chunk_idx),
|
607
|
-
window_start)) {
|
608
|
-
throw OutOfRangeException("Overflow computing ROWS PRECEDING start");
|
609
|
-
}
|
610
|
-
break;
|
611
|
-
}
|
612
|
-
case WindowBoundary::EXPR_FOLLOWING_ROWS: {
|
613
|
-
if (!TryAddOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(chunk_idx), window_start)) {
|
614
|
-
throw OutOfRangeException("Overflow computing ROWS FOLLOWING start");
|
615
|
-
}
|
616
|
-
break;
|
617
|
-
}
|
618
|
-
case WindowBoundary::EXPR_PRECEDING_RANGE: {
|
619
|
-
if (boundary_start.CellIsNull(chunk_idx)) {
|
620
|
-
window_start = peer_start;
|
621
|
-
} else {
|
622
|
-
prev.first = FindOrderedRangeBound<true>(range_collection, range_sense, valid_start, row_idx,
|
623
|
-
boundary_start, chunk_idx, prev);
|
624
|
-
window_start = prev.first;
|
625
|
-
}
|
626
|
-
break;
|
627
|
-
}
|
628
|
-
case WindowBoundary::EXPR_FOLLOWING_RANGE: {
|
629
|
-
if (boundary_start.CellIsNull(chunk_idx)) {
|
630
|
-
window_start = peer_start;
|
631
|
-
} else {
|
632
|
-
prev.first = FindOrderedRangeBound<true>(range_collection, range_sense, row_idx, valid_end, boundary_start,
|
633
|
-
chunk_idx, prev);
|
634
|
-
window_start = prev.first;
|
635
|
-
}
|
636
|
-
break;
|
637
|
-
}
|
638
|
-
default:
|
639
|
-
throw InternalException("Unsupported window start boundary");
|
640
|
-
}
|
641
|
-
|
642
|
-
switch (end_boundary) {
|
643
|
-
case WindowBoundary::CURRENT_ROW_ROWS:
|
644
|
-
window_end = row_idx + 1;
|
645
|
-
break;
|
646
|
-
case WindowBoundary::CURRENT_ROW_RANGE:
|
647
|
-
window_end = peer_end;
|
648
|
-
break;
|
649
|
-
case WindowBoundary::UNBOUNDED_FOLLOWING:
|
650
|
-
window_end = partition_end;
|
651
|
-
break;
|
652
|
-
case WindowBoundary::EXPR_PRECEDING_ROWS:
|
653
|
-
if (!TrySubtractOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(chunk_idx),
|
654
|
-
window_end)) {
|
655
|
-
throw OutOfRangeException("Overflow computing ROWS PRECEDING end");
|
656
|
-
}
|
657
|
-
break;
|
658
|
-
case WindowBoundary::EXPR_FOLLOWING_ROWS:
|
659
|
-
if (!TryAddOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(chunk_idx), window_end)) {
|
660
|
-
throw OutOfRangeException("Overflow computing ROWS FOLLOWING end");
|
661
|
-
}
|
662
|
-
break;
|
663
|
-
case WindowBoundary::EXPR_PRECEDING_RANGE: {
|
664
|
-
if (boundary_end.CellIsNull(chunk_idx)) {
|
665
|
-
window_end = peer_end;
|
666
|
-
} else {
|
667
|
-
prev.second = FindOrderedRangeBound<false>(range_collection, range_sense, valid_start, row_idx,
|
668
|
-
boundary_end, chunk_idx, prev);
|
669
|
-
window_end = prev.second;
|
670
|
-
}
|
671
|
-
break;
|
672
|
-
}
|
673
|
-
case WindowBoundary::EXPR_FOLLOWING_RANGE: {
|
674
|
-
if (boundary_end.CellIsNull(chunk_idx)) {
|
675
|
-
window_end = peer_end;
|
676
|
-
} else {
|
677
|
-
prev.second = FindOrderedRangeBound<false>(range_collection, range_sense, row_idx, valid_end, boundary_end,
|
678
|
-
chunk_idx, prev);
|
679
|
-
window_end = prev.second;
|
680
|
-
}
|
681
|
-
break;
|
682
|
-
}
|
683
|
-
default:
|
684
|
-
throw InternalException("Unsupported window end boundary");
|
685
|
-
}
|
686
|
-
|
687
|
-
// clamp windows to partitions if they should exceed
|
688
|
-
if (window_start < (int64_t)partition_start) {
|
689
|
-
window_start = partition_start;
|
690
|
-
}
|
691
|
-
if (window_start > (int64_t)partition_end) {
|
692
|
-
window_start = partition_end;
|
693
|
-
}
|
694
|
-
if (window_end < (int64_t)partition_start) {
|
695
|
-
window_end = partition_start;
|
696
|
-
}
|
697
|
-
if (window_end > (int64_t)partition_end) {
|
698
|
-
window_end = partition_end;
|
699
|
-
}
|
700
|
-
|
701
|
-
if (window_start < 0 || window_end < 0) {
|
702
|
-
throw InternalException("Failed to compute window boundaries");
|
703
|
-
}
|
704
|
-
}
|
705
|
-
|
706
|
-
void WindowBoundariesState::Bounds(DataChunk &bounds, idx_t row_idx, WindowInputColumn &range, const idx_t count,
|
707
|
-
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
708
|
-
const ValidityMask &partition_mask, const ValidityMask &order_mask) {
|
709
|
-
bounds.Reset();
|
710
|
-
D_ASSERT(bounds.ColumnCount() == 6);
|
711
|
-
auto partition_begin_data = FlatVector::GetData<idx_t>(bounds.data[PARTITION_BEGIN]);
|
712
|
-
auto partition_end_data = FlatVector::GetData<idx_t>(bounds.data[PARTITION_END]);
|
713
|
-
auto peer_begin_data = FlatVector::GetData<idx_t>(bounds.data[PEER_BEGIN]);
|
714
|
-
auto peer_end_data = FlatVector::GetData<idx_t>(bounds.data[PEER_END]);
|
715
|
-
auto window_begin_data = FlatVector::GetData<int64_t>(bounds.data[WINDOW_BEGIN]);
|
716
|
-
auto window_end_data = FlatVector::GetData<int64_t>(bounds.data[WINDOW_END]);
|
717
|
-
for (idx_t chunk_idx = 0; chunk_idx < count; ++chunk_idx, ++row_idx) {
|
718
|
-
Update(row_idx, range, chunk_idx, boundary_start, boundary_end, partition_mask, order_mask);
|
719
|
-
*partition_begin_data++ = partition_start;
|
720
|
-
*partition_end_data++ = partition_end;
|
721
|
-
if (needs_peer) {
|
722
|
-
*peer_begin_data++ = peer_start;
|
723
|
-
*peer_end_data++ = peer_end;
|
724
|
-
}
|
725
|
-
*window_begin_data++ = window_start;
|
726
|
-
*window_end_data++ = window_end;
|
727
|
-
}
|
728
|
-
bounds.SetCardinality(count);
|
729
|
-
}
|
730
|
-
|
731
|
-
struct WindowExecutor {
|
732
|
-
bool IsConstantAggregate();
|
733
|
-
bool IsCustomAggregate();
|
734
|
-
|
735
|
-
WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
|
736
|
-
const idx_t count, WindowAggregationMode mode);
|
737
|
-
|
738
|
-
void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count);
|
739
|
-
void Finalize();
|
740
|
-
|
741
|
-
void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
|
742
|
-
const ValidityMask &order_mask);
|
743
|
-
|
744
|
-
// The function
|
745
|
-
BoundWindowExpression &wexpr;
|
746
|
-
const WindowAggregationMode mode;
|
747
|
-
|
748
|
-
// Frame management
|
749
|
-
WindowBoundariesState state;
|
750
|
-
DataChunk bounds;
|
751
|
-
uint64_t dense_rank = 1;
|
752
|
-
uint64_t rank_equal = 0;
|
753
|
-
uint64_t rank = 1;
|
754
|
-
|
755
|
-
// Expression collections
|
756
|
-
DataChunk payload_collection;
|
757
|
-
ExpressionExecutor payload_executor;
|
758
|
-
DataChunk payload_chunk;
|
759
|
-
|
760
|
-
ExpressionExecutor filter_executor;
|
761
|
-
SelectionVector filter_sel;
|
762
|
-
|
763
|
-
// LEAD/LAG Evaluation
|
764
|
-
WindowInputExpression leadlag_offset;
|
765
|
-
WindowInputExpression leadlag_default;
|
766
|
-
|
767
|
-
// evaluate boundaries if present. Parser has checked boundary types.
|
768
|
-
WindowInputExpression boundary_start;
|
769
|
-
WindowInputExpression boundary_end;
|
770
|
-
|
771
|
-
// evaluate RANGE expressions, if needed
|
772
|
-
WindowInputColumn range;
|
773
|
-
|
774
|
-
// IGNORE NULLS
|
775
|
-
ValidityMask ignore_nulls;
|
776
|
-
|
777
|
-
// aggregate computation algorithm
|
778
|
-
unique_ptr<WindowAggregateState> aggregate_state = nullptr;
|
779
|
-
|
780
|
-
protected:
|
781
|
-
void NextRank(idx_t partition_begin, idx_t peer_begin, idx_t row_idx);
|
782
|
-
void Aggregate(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
783
|
-
void RowNumber(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
784
|
-
void Rank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
785
|
-
void DenseRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
786
|
-
void PercentRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
787
|
-
void CumeDist(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
788
|
-
void Ntile(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
789
|
-
void LeadLag(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
790
|
-
void FirstValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
791
|
-
void LastValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
792
|
-
void NthValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
793
|
-
};
|
794
|
-
|
795
|
-
bool WindowExecutor::IsConstantAggregate() {
|
796
|
-
if (!wexpr.aggregate) {
|
797
|
-
return false;
|
798
|
-
}
|
799
|
-
|
800
|
-
// COUNT(*) is already handled efficiently by segment trees.
|
801
|
-
if (wexpr.children.empty()) {
|
802
|
-
return false;
|
803
|
-
}
|
804
|
-
|
805
|
-
/*
|
806
|
-
The default framing option is RANGE UNBOUNDED PRECEDING, which
|
807
|
-
is the same as RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT
|
808
|
-
ROW; it sets the frame to be all rows from the partition start
|
809
|
-
up through the current row's last peer (a row that the window's
|
810
|
-
ORDER BY clause considers equivalent to the current row; all
|
811
|
-
rows are peers if there is no ORDER BY). In general, UNBOUNDED
|
812
|
-
PRECEDING means that the frame starts with the first row of the
|
813
|
-
partition, and similarly UNBOUNDED FOLLOWING means that the
|
814
|
-
frame ends with the last row of the partition, regardless of
|
815
|
-
RANGE, ROWS or GROUPS mode. In ROWS mode, CURRENT ROW means that
|
816
|
-
the frame starts or ends with the current row; but in RANGE or
|
817
|
-
GROUPS mode it means that the frame starts or ends with the
|
818
|
-
current row's first or last peer in the ORDER BY ordering. The
|
819
|
-
offset PRECEDING and offset FOLLOWING options vary in meaning
|
820
|
-
depending on the frame mode.
|
821
|
-
*/
|
822
|
-
switch (wexpr.start) {
|
823
|
-
case WindowBoundary::UNBOUNDED_PRECEDING:
|
824
|
-
break;
|
825
|
-
case WindowBoundary::CURRENT_ROW_RANGE:
|
826
|
-
if (!wexpr.orders.empty()) {
|
827
|
-
return false;
|
828
|
-
}
|
829
|
-
break;
|
830
|
-
default:
|
831
|
-
return false;
|
832
|
-
}
|
833
|
-
|
834
|
-
switch (wexpr.end) {
|
835
|
-
case WindowBoundary::UNBOUNDED_FOLLOWING:
|
836
|
-
break;
|
837
|
-
case WindowBoundary::CURRENT_ROW_RANGE:
|
838
|
-
if (!wexpr.orders.empty()) {
|
839
|
-
return false;
|
840
|
-
}
|
841
|
-
break;
|
842
|
-
default:
|
843
|
-
return false;
|
844
|
-
}
|
845
|
-
|
846
|
-
return true;
|
847
|
-
}
|
848
|
-
|
849
|
-
bool WindowExecutor::IsCustomAggregate() {
|
850
|
-
if (!wexpr.aggregate) {
|
851
|
-
return false;
|
852
|
-
}
|
853
|
-
|
854
|
-
if (!AggregateObject(wexpr).function.window) {
|
855
|
-
return false;
|
856
|
-
}
|
857
|
-
|
858
|
-
return (mode < WindowAggregationMode::COMBINE);
|
859
|
-
}
|
860
|
-
|
861
|
-
WindowExecutor::WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
|
862
|
-
const idx_t count, WindowAggregationMode mode)
|
863
|
-
: wexpr(wexpr), mode(mode), state(wexpr, count), payload_collection(), payload_executor(context),
|
864
|
-
filter_executor(context), leadlag_offset(wexpr.offset_expr.get(), context),
|
865
|
-
leadlag_default(wexpr.default_expr.get(), context), boundary_start(wexpr.start_expr.get(), context),
|
866
|
-
boundary_end(wexpr.end_expr.get(), context),
|
867
|
-
range((state.has_preceding_range || state.has_following_range) ? wexpr.orders[0].expression.get() : nullptr,
|
868
|
-
context, count)
|
869
|
-
|
870
|
-
{
|
871
|
-
// TODO we could evaluate those expressions in parallel
|
872
|
-
|
873
|
-
// Check for constant aggregate
|
874
|
-
if (IsConstantAggregate()) {
|
875
|
-
aggregate_state =
|
876
|
-
make_uniq<WindowConstantAggregate>(AggregateObject(wexpr), wexpr.return_type, partition_mask, count);
|
877
|
-
} else if (IsCustomAggregate()) {
|
878
|
-
aggregate_state = make_uniq<WindowCustomAggregate>(AggregateObject(wexpr), wexpr.return_type, count);
|
879
|
-
} else if (wexpr.aggregate) {
|
880
|
-
// build a segment tree for frame-adhering aggregates
|
881
|
-
// see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
|
882
|
-
aggregate_state = make_uniq<WindowSegmentTree>(AggregateObject(wexpr), wexpr.return_type, count, mode);
|
883
|
-
}
|
884
|
-
|
885
|
-
// evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
|
886
|
-
if (wexpr.filter_expr) {
|
887
|
-
filter_executor.AddExpression(*wexpr.filter_expr);
|
888
|
-
filter_sel.Initialize(STANDARD_VECTOR_SIZE);
|
889
|
-
}
|
890
|
-
|
891
|
-
// TODO: child may be a scalar, don't need to materialize the whole collection then
|
892
|
-
|
893
|
-
// evaluate inner expressions of window functions, could be more complex
|
894
|
-
PrepareInputExpressions(wexpr.children, payload_executor, payload_chunk);
|
895
|
-
|
896
|
-
auto types = payload_chunk.GetTypes();
|
897
|
-
if (!types.empty()) {
|
898
|
-
payload_collection.Initialize(Allocator::Get(context), types);
|
899
|
-
}
|
900
|
-
|
901
|
-
vector<LogicalType> bounds_types(6, LogicalType(LogicalTypeId::UBIGINT));
|
902
|
-
bounds.Initialize(Allocator::Get(context), bounds_types);
|
903
|
-
}
|
904
|
-
|
905
|
-
void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
|
906
|
-
// Single pass over the input to produce the global data.
|
907
|
-
// Vectorisation for the win...
|
908
|
-
|
909
|
-
// Set up a validity mask for IGNORE NULLS
|
910
|
-
bool check_nulls = false;
|
911
|
-
if (wexpr.ignore_nulls) {
|
912
|
-
switch (wexpr.type) {
|
913
|
-
case ExpressionType::WINDOW_LEAD:
|
914
|
-
case ExpressionType::WINDOW_LAG:
|
915
|
-
case ExpressionType::WINDOW_FIRST_VALUE:
|
916
|
-
case ExpressionType::WINDOW_LAST_VALUE:
|
917
|
-
case ExpressionType::WINDOW_NTH_VALUE:
|
918
|
-
check_nulls = true;
|
919
|
-
break;
|
920
|
-
default:
|
921
|
-
break;
|
922
|
-
}
|
923
|
-
}
|
924
|
-
|
925
|
-
const auto count = input_chunk.size();
|
926
|
-
|
927
|
-
idx_t filtered = 0;
|
928
|
-
SelectionVector *filtering = nullptr;
|
929
|
-
if (wexpr.filter_expr) {
|
930
|
-
filtering = &filter_sel;
|
931
|
-
filtered = filter_executor.SelectExpression(input_chunk, filter_sel);
|
932
|
-
}
|
933
|
-
|
934
|
-
if (!wexpr.children.empty()) {
|
935
|
-
payload_chunk.Reset();
|
936
|
-
payload_executor.Execute(input_chunk, payload_chunk);
|
937
|
-
payload_chunk.Verify();
|
938
|
-
if (aggregate_state) {
|
939
|
-
aggregate_state->Sink(payload_chunk, filtering, filtered);
|
940
|
-
} else {
|
941
|
-
payload_collection.Append(payload_chunk, true);
|
942
|
-
}
|
943
|
-
|
944
|
-
// process payload chunks while they are still piping hot
|
945
|
-
if (check_nulls) {
|
946
|
-
UnifiedVectorFormat vdata;
|
947
|
-
payload_chunk.data[0].ToUnifiedFormat(count, vdata);
|
948
|
-
if (!vdata.validity.AllValid()) {
|
949
|
-
// Lazily materialise the contents when we find the first NULL
|
950
|
-
if (ignore_nulls.AllValid()) {
|
951
|
-
ignore_nulls.Initialize(total_count);
|
952
|
-
}
|
953
|
-
// Write to the current position
|
954
|
-
if (input_idx % ValidityMask::BITS_PER_VALUE == 0) {
|
955
|
-
// If we are at the edge of an output entry, just copy the entries
|
956
|
-
auto dst = ignore_nulls.GetData() + ignore_nulls.EntryCount(input_idx);
|
957
|
-
auto src = vdata.validity.GetData();
|
958
|
-
for (auto entry_count = vdata.validity.EntryCount(count); entry_count-- > 0;) {
|
959
|
-
*dst++ = *src++;
|
960
|
-
}
|
961
|
-
} else {
|
962
|
-
// If not, we have ragged data and need to copy one bit at a time.
|
963
|
-
for (idx_t i = 0; i < count; ++i) {
|
964
|
-
ignore_nulls.Set(input_idx + i, vdata.validity.RowIsValid(i));
|
965
|
-
}
|
966
|
-
}
|
967
|
-
}
|
968
|
-
}
|
969
|
-
} else if (aggregate_state) {
|
970
|
-
// Zero-argument aggregate (e.g., COUNT(*)
|
971
|
-
payload_chunk.SetCardinality(input_chunk);
|
972
|
-
aggregate_state->Sink(payload_chunk, filtering, filtered);
|
973
|
-
}
|
974
|
-
|
975
|
-
range.Append(input_chunk);
|
976
|
-
}
|
977
|
-
|
978
|
-
void WindowExecutor::Finalize() {
|
979
|
-
if (aggregate_state) {
|
980
|
-
aggregate_state->Finalize();
|
981
|
-
}
|
982
|
-
}
|
983
|
-
|
984
|
-
void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
|
985
|
-
const ValidityMask &order_mask) {
|
986
|
-
// Evaluate the row-level arguments
|
987
|
-
boundary_start.Execute(input_chunk);
|
988
|
-
boundary_end.Execute(input_chunk);
|
989
|
-
|
990
|
-
leadlag_offset.Execute(input_chunk);
|
991
|
-
leadlag_default.Execute(input_chunk);
|
992
|
-
|
993
|
-
const auto count = input_chunk.size();
|
994
|
-
bounds.Reset();
|
995
|
-
state.Bounds(bounds, row_idx, range, input_chunk.size(), boundary_start, boundary_end, partition_mask, order_mask);
|
996
|
-
|
82
|
+
static unique_ptr<WindowExecutor> WindowExecutorFactory(BoundWindowExpression &wexpr, ClientContext &context,
|
83
|
+
const ValidityMask &partition_mask,
|
84
|
+
const ValidityMask &order_mask, const idx_t payload_count,
|
85
|
+
WindowAggregationMode mode) {
|
997
86
|
switch (wexpr.type) {
|
998
87
|
case ExpressionType::WINDOW_AGGREGATE:
|
999
|
-
|
1000
|
-
break;
|
88
|
+
return make_uniq<WindowAggregateExecutor>(wexpr, context, payload_count, partition_mask, order_mask, mode);
|
1001
89
|
case ExpressionType::WINDOW_ROW_NUMBER:
|
1002
|
-
|
1003
|
-
break;
|
90
|
+
return make_uniq<WindowRowNumberExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
1004
91
|
case ExpressionType::WINDOW_RANK_DENSE:
|
1005
|
-
|
1006
|
-
break;
|
92
|
+
return make_uniq<WindowDenseRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
1007
93
|
case ExpressionType::WINDOW_RANK:
|
1008
|
-
|
1009
|
-
break;
|
94
|
+
return make_uniq<WindowRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
1010
95
|
case ExpressionType::WINDOW_PERCENT_RANK:
|
1011
|
-
|
1012
|
-
break;
|
96
|
+
return make_uniq<WindowPercentRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
1013
97
|
case ExpressionType::WINDOW_CUME_DIST:
|
1014
|
-
|
1015
|
-
break;
|
98
|
+
return make_uniq<WindowCumeDistExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
1016
99
|
case ExpressionType::WINDOW_NTILE:
|
1017
|
-
|
1018
|
-
break;
|
100
|
+
return make_uniq<WindowNtileExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
1019
101
|
case ExpressionType::WINDOW_LEAD:
|
1020
102
|
case ExpressionType::WINDOW_LAG:
|
1021
|
-
|
1022
|
-
break;
|
103
|
+
return make_uniq<WindowLeadLagExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
1023
104
|
case ExpressionType::WINDOW_FIRST_VALUE:
|
1024
|
-
|
1025
|
-
break;
|
105
|
+
return make_uniq<WindowFirstValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
1026
106
|
case ExpressionType::WINDOW_LAST_VALUE:
|
1027
|
-
|
1028
|
-
break;
|
107
|
+
return make_uniq<WindowLastValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
1029
108
|
case ExpressionType::WINDOW_NTH_VALUE:
|
1030
|
-
|
109
|
+
return make_uniq<WindowNthValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
|
1031
110
|
break;
|
1032
111
|
default:
|
1033
112
|
throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
|
1034
113
|
}
|
1035
|
-
|
1036
|
-
result.Verify(count);
|
1037
|
-
}
|
1038
|
-
|
1039
|
-
void WindowExecutor::NextRank(idx_t partition_begin, idx_t peer_begin, idx_t row_idx) {
|
1040
|
-
if (partition_begin == row_idx) {
|
1041
|
-
dense_rank = 1;
|
1042
|
-
rank = 1;
|
1043
|
-
rank_equal = 0;
|
1044
|
-
} else if (peer_begin == row_idx) {
|
1045
|
-
dense_rank++;
|
1046
|
-
rank += rank_equal;
|
1047
|
-
rank_equal = 0;
|
1048
|
-
}
|
1049
|
-
rank_equal++;
|
1050
|
-
}
|
1051
|
-
|
1052
|
-
void WindowExecutor::Aggregate(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1053
|
-
D_ASSERT(aggregate_state);
|
1054
|
-
auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
|
1055
|
-
auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
|
1056
|
-
aggregate_state->Evaluate(window_begin, window_end, result, count);
|
1057
|
-
}
|
1058
|
-
|
1059
|
-
void WindowExecutor::RowNumber(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1060
|
-
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1061
|
-
auto rdata = FlatVector::GetData<int64_t>(result);
|
1062
|
-
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1063
|
-
rdata[i] = row_idx - partition_begin[i] + 1;
|
1064
|
-
}
|
1065
|
-
}
|
1066
|
-
|
1067
|
-
void WindowExecutor::Rank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1068
|
-
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1069
|
-
auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
|
1070
|
-
auto rdata = FlatVector::GetData<int64_t>(result);
|
1071
|
-
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1072
|
-
NextRank(partition_begin[i], peer_begin[i], row_idx);
|
1073
|
-
rdata[i] = rank;
|
1074
|
-
}
|
1075
|
-
}
|
1076
|
-
|
1077
|
-
void WindowExecutor::DenseRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1078
|
-
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1079
|
-
auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
|
1080
|
-
auto rdata = FlatVector::GetData<int64_t>(result);
|
1081
|
-
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1082
|
-
NextRank(partition_begin[i], peer_begin[i], row_idx);
|
1083
|
-
rdata[i] = dense_rank;
|
1084
|
-
}
|
1085
|
-
}
|
1086
|
-
|
1087
|
-
void WindowExecutor::PercentRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1088
|
-
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1089
|
-
auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
|
1090
|
-
auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
|
1091
|
-
auto rdata = FlatVector::GetData<double>(result);
|
1092
|
-
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1093
|
-
NextRank(partition_begin[i], peer_begin[i], row_idx);
|
1094
|
-
int64_t denom = partition_end[i] - partition_begin[i] - 1;
|
1095
|
-
double percent_rank = denom > 0 ? ((double)rank - 1) / denom : 0;
|
1096
|
-
rdata[i] = percent_rank;
|
1097
|
-
}
|
1098
|
-
}
|
1099
|
-
|
1100
|
-
void WindowExecutor::CumeDist(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1101
|
-
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1102
|
-
auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
|
1103
|
-
auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
|
1104
|
-
auto peer_end = FlatVector::GetData<const idx_t>(bounds.data[PEER_END]);
|
1105
|
-
auto rdata = FlatVector::GetData<double>(result);
|
1106
|
-
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1107
|
-
NextRank(partition_begin[i], peer_begin[i], row_idx);
|
1108
|
-
int64_t denom = partition_end[i] - partition_begin[i];
|
1109
|
-
double cume_dist = denom > 0 ? ((double)(peer_end[i] - partition_begin[i])) / denom : 0;
|
1110
|
-
rdata[i] = cume_dist;
|
1111
|
-
}
|
1112
|
-
}
|
1113
|
-
|
1114
|
-
void WindowExecutor::Ntile(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1115
|
-
D_ASSERT(payload_collection.ColumnCount() == 1);
|
1116
|
-
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1117
|
-
auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
|
1118
|
-
auto rdata = FlatVector::GetData<int64_t>(result);
|
1119
|
-
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1120
|
-
if (CellIsNull(payload_collection, 0, row_idx)) {
|
1121
|
-
FlatVector::SetNull(result, i, true);
|
1122
|
-
} else {
|
1123
|
-
auto n_param = GetCell<int64_t>(payload_collection, 0, row_idx);
|
1124
|
-
if (n_param < 1) {
|
1125
|
-
throw InvalidInputException("Argument for ntile must be greater than zero");
|
1126
|
-
}
|
1127
|
-
// With thanks from SQLite's ntileValueFunc()
|
1128
|
-
int64_t n_total = partition_end[i] - partition_begin[i];
|
1129
|
-
if (n_param > n_total) {
|
1130
|
-
// more groups allowed than we have values
|
1131
|
-
// map every entry to a unique group
|
1132
|
-
n_param = n_total;
|
1133
|
-
}
|
1134
|
-
int64_t n_size = (n_total / n_param);
|
1135
|
-
// find the row idx within the group
|
1136
|
-
D_ASSERT(row_idx >= partition_begin[i]);
|
1137
|
-
int64_t adjusted_row_idx = row_idx - partition_begin[i];
|
1138
|
-
// now compute the ntile
|
1139
|
-
int64_t n_large = n_total - n_param * n_size;
|
1140
|
-
int64_t i_small = n_large * (n_size + 1);
|
1141
|
-
int64_t result_ntile;
|
1142
|
-
|
1143
|
-
D_ASSERT((n_large * (n_size + 1) + (n_param - n_large) * n_size) == n_total);
|
1144
|
-
|
1145
|
-
if (adjusted_row_idx < i_small) {
|
1146
|
-
result_ntile = 1 + adjusted_row_idx / (n_size + 1);
|
1147
|
-
} else {
|
1148
|
-
result_ntile = 1 + n_large + (adjusted_row_idx - i_small) / n_size;
|
1149
|
-
}
|
1150
|
-
// result has to be between [1, NTILE]
|
1151
|
-
D_ASSERT(result_ntile >= 1 && result_ntile <= n_param);
|
1152
|
-
rdata[i] = result_ntile;
|
1153
|
-
}
|
1154
|
-
}
|
1155
|
-
}
|
1156
|
-
|
1157
|
-
void WindowExecutor::LeadLag(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1158
|
-
auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
|
1159
|
-
auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
|
1160
|
-
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1161
|
-
int64_t offset = 1;
|
1162
|
-
if (wexpr.offset_expr) {
|
1163
|
-
offset = leadlag_offset.GetCell<int64_t>(i);
|
1164
|
-
}
|
1165
|
-
int64_t val_idx = (int64_t)row_idx;
|
1166
|
-
if (wexpr.type == ExpressionType::WINDOW_LEAD) {
|
1167
|
-
val_idx += offset;
|
1168
|
-
} else {
|
1169
|
-
val_idx -= offset;
|
1170
|
-
}
|
1171
|
-
|
1172
|
-
idx_t delta = 0;
|
1173
|
-
if (val_idx < (int64_t)row_idx) {
|
1174
|
-
// Count backwards
|
1175
|
-
delta = idx_t(row_idx - val_idx);
|
1176
|
-
val_idx = FindPrevStart(ignore_nulls, partition_begin[i], row_idx, delta);
|
1177
|
-
} else if (val_idx > (int64_t)row_idx) {
|
1178
|
-
delta = idx_t(val_idx - row_idx);
|
1179
|
-
val_idx = FindNextStart(ignore_nulls, row_idx + 1, partition_end[i], delta);
|
1180
|
-
}
|
1181
|
-
// else offset is zero, so don't move.
|
1182
|
-
|
1183
|
-
if (!delta) {
|
1184
|
-
CopyCell(payload_collection, 0, val_idx, result, i);
|
1185
|
-
} else if (wexpr.default_expr) {
|
1186
|
-
leadlag_default.CopyCell(result, i);
|
1187
|
-
} else {
|
1188
|
-
FlatVector::SetNull(result, i, true);
|
1189
|
-
}
|
1190
|
-
}
|
1191
|
-
}
|
1192
|
-
|
1193
|
-
void WindowExecutor::FirstValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1194
|
-
auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
|
1195
|
-
auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
|
1196
|
-
auto &rmask = FlatVector::Validity(result);
|
1197
|
-
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1198
|
-
if (window_begin[i] >= window_end[i]) {
|
1199
|
-
rmask.SetInvalid(i);
|
1200
|
-
continue;
|
1201
|
-
}
|
1202
|
-
// Same as NTH_VALUE(..., 1)
|
1203
|
-
idx_t n = 1;
|
1204
|
-
const auto first_idx = FindNextStart(ignore_nulls, window_begin[i], window_end[i], n);
|
1205
|
-
if (!n) {
|
1206
|
-
CopyCell(payload_collection, 0, first_idx, result, i);
|
1207
|
-
} else {
|
1208
|
-
FlatVector::SetNull(result, i, true);
|
1209
|
-
}
|
1210
|
-
}
|
1211
|
-
}
|
1212
|
-
|
1213
|
-
void WindowExecutor::LastValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1214
|
-
auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
|
1215
|
-
auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
|
1216
|
-
auto &rmask = FlatVector::Validity(result);
|
1217
|
-
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1218
|
-
if (window_begin[i] >= window_end[i]) {
|
1219
|
-
rmask.SetInvalid(i);
|
1220
|
-
continue;
|
1221
|
-
}
|
1222
|
-
idx_t n = 1;
|
1223
|
-
const auto last_idx = FindPrevStart(ignore_nulls, window_begin[i], window_end[i], n);
|
1224
|
-
if (!n) {
|
1225
|
-
CopyCell(payload_collection, 0, last_idx, result, i);
|
1226
|
-
} else {
|
1227
|
-
FlatVector::SetNull(result, i, true);
|
1228
|
-
}
|
1229
|
-
}
|
1230
|
-
}
|
1231
|
-
|
1232
|
-
void WindowExecutor::NthValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
1233
|
-
D_ASSERT(payload_collection.ColumnCount() == 2);
|
1234
|
-
|
1235
|
-
auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
|
1236
|
-
auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
|
1237
|
-
auto &rmask = FlatVector::Validity(result);
|
1238
|
-
for (idx_t i = 0; i < count; ++i, ++row_idx) {
|
1239
|
-
if (window_begin[i] >= window_end[i]) {
|
1240
|
-
rmask.SetInvalid(i);
|
1241
|
-
continue;
|
1242
|
-
}
|
1243
|
-
// Returns value evaluated at the row that is the n'th row of the window frame (counting from 1);
|
1244
|
-
// returns NULL if there is no such row.
|
1245
|
-
if (CellIsNull(payload_collection, 1, row_idx)) {
|
1246
|
-
FlatVector::SetNull(result, i, true);
|
1247
|
-
} else {
|
1248
|
-
auto n_param = GetCell<int64_t>(payload_collection, 1, row_idx);
|
1249
|
-
if (n_param < 1) {
|
1250
|
-
FlatVector::SetNull(result, i, true);
|
1251
|
-
} else {
|
1252
|
-
auto n = idx_t(n_param);
|
1253
|
-
const auto nth_index = FindNextStart(ignore_nulls, window_begin[i], window_end[i], n);
|
1254
|
-
if (!n) {
|
1255
|
-
CopyCell(payload_collection, 0, nth_index, result, i);
|
1256
|
-
} else {
|
1257
|
-
FlatVector::SetNull(result, i, true);
|
1258
|
-
}
|
1259
|
-
}
|
1260
|
-
}
|
1261
|
-
}
|
1262
114
|
}
|
1263
115
|
|
1264
116
|
//===--------------------------------------------------------------------===//
|
@@ -1347,8 +199,10 @@ public:
|
|
1347
199
|
class WindowLocalSourceState : public LocalSourceState {
|
1348
200
|
public:
|
1349
201
|
using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
|
1350
|
-
using
|
1351
|
-
using
|
202
|
+
using ExecutorPtr = unique_ptr<WindowExecutor>;
|
203
|
+
using Executors = vector<ExecutorPtr>;
|
204
|
+
using LocalStatePtr = unique_ptr<WindowExecutorState>;
|
205
|
+
using LocalStates = vector<LocalStatePtr>;
|
1352
206
|
|
1353
207
|
WindowLocalSourceState(const PhysicalWindow &op_p, ExecutionContext &context, WindowGlobalSourceState &gsource)
|
1354
208
|
: context(context.client), op(op_p), gsink(gsource.gsink) {
|
@@ -1387,7 +241,8 @@ public:
|
|
1387
241
|
vector<validity_t> order_bits;
|
1388
242
|
ValidityMask order_mask;
|
1389
243
|
//! The current execution functions
|
1390
|
-
|
244
|
+
Executors executors;
|
245
|
+
LocalStates local_states;
|
1391
246
|
|
1392
247
|
//! The read partition
|
1393
248
|
idx_t hash_bin;
|
@@ -1489,12 +344,13 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1489
344
|
}
|
1490
345
|
|
1491
346
|
// Create the executors for each function
|
1492
|
-
|
347
|
+
local_states.clear();
|
348
|
+
executors.clear();
|
1493
349
|
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
1494
350
|
D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
1495
351
|
auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
|
1496
|
-
auto wexec =
|
1497
|
-
|
352
|
+
auto wexec = WindowExecutorFactory(wexpr, context, partition_mask, order_mask, count, gstate.mode);
|
353
|
+
executors.emplace_back(std::move(wexec));
|
1498
354
|
}
|
1499
355
|
|
1500
356
|
// First pass over the input without flushing
|
@@ -1509,15 +365,16 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1509
365
|
}
|
1510
366
|
|
1511
367
|
// TODO: Parallelization opportunity
|
1512
|
-
for (auto &wexec :
|
368
|
+
for (auto &wexec : executors) {
|
1513
369
|
wexec->Sink(input_chunk, input_idx, scanner->Count());
|
1514
370
|
}
|
1515
371
|
input_idx += input_chunk.size();
|
1516
372
|
}
|
1517
373
|
|
1518
374
|
// TODO: Parallelization opportunity
|
1519
|
-
for (auto &wexec :
|
375
|
+
for (auto &wexec : executors) {
|
1520
376
|
wexec->Finalize();
|
377
|
+
local_states.emplace_back(wexec->GetExecutorState());
|
1521
378
|
}
|
1522
379
|
|
1523
380
|
// External scanning assumes all blocks are swizzled.
|
@@ -1538,9 +395,11 @@ void WindowLocalSourceState::Scan(DataChunk &result) {
|
|
1538
395
|
scanner->Scan(input_chunk);
|
1539
396
|
|
1540
397
|
output_chunk.Reset();
|
1541
|
-
for (idx_t expr_idx = 0; expr_idx <
|
1542
|
-
auto &executor = *
|
1543
|
-
|
398
|
+
for (idx_t expr_idx = 0; expr_idx < executors.size(); ++expr_idx) {
|
399
|
+
auto &executor = *executors[expr_idx];
|
400
|
+
auto &lstate = *local_states[expr_idx];
|
401
|
+
auto &result = output_chunk.data[expr_idx];
|
402
|
+
executor.Evaluate(position, input_chunk, result, lstate);
|
1544
403
|
}
|
1545
404
|
output_chunk.SetCardinality(input_chunk);
|
1546
405
|
output_chunk.Verify();
|