duckdb 0.4.1-dev829.0 → 0.4.1-dev833.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +169 -120
- package/src/duckdb.hpp +2 -2
- package/src/parquet-amalgamation.cpp +37435 -37435
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -60907,8 +60907,8 @@ static idx_t FindPrevStart(const ValidityMask &mask, const idx_t l, idx_t r, idx
|
|
|
60907
60907
|
return l;
|
|
60908
60908
|
}
|
|
60909
60909
|
|
|
60910
|
-
static void PrepareInputExpressions(Expression **exprs, idx_t expr_count,
|
|
60911
|
-
|
|
60910
|
+
static void PrepareInputExpressions(Expression **exprs, idx_t expr_count, ExpressionExecutor &executor,
|
|
60911
|
+
DataChunk &chunk) {
|
|
60912
60912
|
if (expr_count == 0) {
|
|
60913
60913
|
return;
|
|
60914
60914
|
}
|
|
@@ -60924,36 +60924,67 @@ static void PrepareInputExpressions(Expression **exprs, idx_t expr_count, ChunkC
|
|
|
60924
60924
|
}
|
|
60925
60925
|
}
|
|
60926
60926
|
|
|
60927
|
-
static void PrepareInputExpression(Expression *expr,
|
|
60928
|
-
|
|
60929
|
-
PrepareInputExpressions(&expr, 1, output, executor, chunk);
|
|
60927
|
+
static void PrepareInputExpression(Expression *expr, ExpressionExecutor &executor, DataChunk &chunk) {
|
|
60928
|
+
PrepareInputExpressions(&expr, 1, executor, chunk);
|
|
60930
60929
|
}
|
|
60931
60930
|
|
|
60932
60931
|
struct WindowInputExpression {
|
|
60933
|
-
WindowInputExpression(Expression *expr_p, Allocator &allocator)
|
|
60934
|
-
: expr(expr_p), scalar(false), collection(allocator), executor(allocator) {
|
|
60932
|
+
WindowInputExpression(Expression *expr_p, Allocator &allocator) : expr(expr_p), scalar(true), executor(allocator) {
|
|
60935
60933
|
if (expr) {
|
|
60936
|
-
PrepareInputExpression(expr,
|
|
60934
|
+
PrepareInputExpression(expr, executor, chunk);
|
|
60937
60935
|
scalar = expr->IsScalar();
|
|
60938
60936
|
}
|
|
60939
60937
|
}
|
|
60940
60938
|
|
|
60941
60939
|
void Execute(DataChunk &input_chunk) {
|
|
60942
|
-
if (expr
|
|
60940
|
+
if (expr) {
|
|
60943
60941
|
chunk.Reset();
|
|
60944
60942
|
executor.Execute(input_chunk, chunk);
|
|
60945
60943
|
chunk.Verify();
|
|
60946
|
-
collection.Append(chunk);
|
|
60947
60944
|
}
|
|
60948
60945
|
}
|
|
60949
60946
|
|
|
60947
|
+
template <typename T>
|
|
60948
|
+
inline T GetCell(idx_t i) const {
|
|
60949
|
+
D_ASSERT(!chunk.data.empty());
|
|
60950
|
+
const auto data = FlatVector::GetData<T>(chunk.data[0]);
|
|
60951
|
+
return data[scalar ? 0 : i];
|
|
60952
|
+
}
|
|
60953
|
+
|
|
60954
|
+
inline bool CellIsNull(idx_t i) const {
|
|
60955
|
+
D_ASSERT(!chunk.data.empty());
|
|
60956
|
+
return FlatVector::IsNull(chunk.data[0], scalar ? 0 : i);
|
|
60957
|
+
}
|
|
60958
|
+
|
|
60959
|
+
inline void CopyCell(Vector &target, idx_t target_offset) const {
|
|
60960
|
+
D_ASSERT(!chunk.data.empty());
|
|
60961
|
+
auto &source = chunk.data[0];
|
|
60962
|
+
auto source_offset = scalar ? 0 : target_offset;
|
|
60963
|
+
VectorOperations::Copy(source, target, source_offset + 1, source_offset, target_offset);
|
|
60964
|
+
}
|
|
60965
|
+
|
|
60950
60966
|
Expression *expr;
|
|
60951
60967
|
bool scalar;
|
|
60952
|
-
ChunkCollection collection;
|
|
60953
60968
|
ExpressionExecutor executor;
|
|
60954
60969
|
DataChunk chunk;
|
|
60955
60970
|
};
|
|
60956
60971
|
|
|
60972
|
+
struct WindowInputCollection {
|
|
60973
|
+
WindowInputCollection(Expression *expr_p, Allocator &allocator)
|
|
60974
|
+
: input_expr(expr_p, allocator), collection(allocator) {
|
|
60975
|
+
}
|
|
60976
|
+
|
|
60977
|
+
void Append(DataChunk &input_chunk) {
|
|
60978
|
+
if (input_expr.expr && (!input_expr.scalar || collection.Count() == 0)) {
|
|
60979
|
+
input_expr.Execute(input_chunk);
|
|
60980
|
+
collection.Append(input_expr.chunk);
|
|
60981
|
+
}
|
|
60982
|
+
}
|
|
60983
|
+
|
|
60984
|
+
WindowInputExpression input_expr;
|
|
60985
|
+
ChunkCollection collection;
|
|
60986
|
+
};
|
|
60987
|
+
|
|
60957
60988
|
static void ScanRowCollection(RowDataCollection &rows, RowDataCollection &heap, ChunkCollection &cols,
|
|
60958
60989
|
const vector<LogicalType> &types) {
|
|
60959
60990
|
auto &allocator = cols.GetAllocator();
|
|
@@ -61022,11 +61053,10 @@ struct WindowBoundariesState {
|
|
|
61022
61053
|
return expr ? expr->IsScalar() : true;
|
|
61023
61054
|
}
|
|
61024
61055
|
|
|
61025
|
-
|
|
61026
|
-
: type(wexpr->type), start_boundary(wexpr->start), end_boundary(wexpr->end),
|
|
61056
|
+
WindowBoundariesState(BoundWindowExpression *wexpr, const idx_t input_size)
|
|
61057
|
+
: type(wexpr->type), input_size(input_size), start_boundary(wexpr->start), end_boundary(wexpr->end),
|
|
61027
61058
|
partition_count(wexpr->partitions.size()), order_count(wexpr->orders.size()),
|
|
61028
61059
|
range_sense(wexpr->orders.empty() ? OrderType::INVALID : wexpr->orders[0].type),
|
|
61029
|
-
scalar_start(IsScalar(wexpr->start_expr)), scalar_end(IsScalar(wexpr->end_expr)),
|
|
61030
61060
|
has_preceding_range(wexpr->start == WindowBoundary::EXPR_PRECEDING_RANGE ||
|
|
61031
61061
|
wexpr->end == WindowBoundary::EXPR_PRECEDING_RANGE),
|
|
61032
61062
|
has_following_range(wexpr->start == WindowBoundary::EXPR_FOLLOWING_RANGE ||
|
|
@@ -61034,15 +61064,18 @@ struct WindowBoundariesState {
|
|
|
61034
61064
|
needs_peer(BoundaryNeedsPeer(wexpr->end) || wexpr->type == ExpressionType::WINDOW_CUME_DIST) {
|
|
61035
61065
|
}
|
|
61036
61066
|
|
|
61067
|
+
void Update(const idx_t row_idx, ChunkCollection &range_collection, const idx_t source_offset,
|
|
61068
|
+
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
|
61069
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
|
61070
|
+
|
|
61037
61071
|
// Cached lookups
|
|
61038
61072
|
const ExpressionType type;
|
|
61073
|
+
const idx_t input_size;
|
|
61039
61074
|
const WindowBoundary start_boundary;
|
|
61040
61075
|
const WindowBoundary end_boundary;
|
|
61041
|
-
const
|
|
61042
|
-
const
|
|
61076
|
+
const size_t partition_count;
|
|
61077
|
+
const size_t order_count;
|
|
61043
61078
|
const OrderType range_sense;
|
|
61044
|
-
const bool scalar_start;
|
|
61045
|
-
const bool scalar_end;
|
|
61046
61079
|
const bool has_preceding_range;
|
|
61047
61080
|
const bool has_following_range;
|
|
61048
61081
|
const bool needs_peer;
|
|
@@ -61134,9 +61167,9 @@ struct OperationCompare : public std::function<bool(T, T)> {
|
|
|
61134
61167
|
|
|
61135
61168
|
template <typename T, typename OP, bool FROM>
|
|
61136
61169
|
static idx_t FindTypedRangeBound(ChunkCollection &over, const idx_t order_col, const idx_t order_begin,
|
|
61137
|
-
const idx_t order_end,
|
|
61138
|
-
D_ASSERT(!CellIsNull(
|
|
61139
|
-
const auto val = GetCell<T>(
|
|
61170
|
+
const idx_t order_end, WindowInputExpression &boundary, const idx_t boundary_row) {
|
|
61171
|
+
D_ASSERT(!boundary.CellIsNull(boundary_row));
|
|
61172
|
+
const auto val = boundary.GetCell<T>(boundary_row);
|
|
61140
61173
|
|
|
61141
61174
|
OperationCompare<T, OP> comp;
|
|
61142
61175
|
ChunkCollectionIterator<T> begin(over, order_col, order_begin);
|
|
@@ -61150,11 +61183,11 @@ static idx_t FindTypedRangeBound(ChunkCollection &over, const idx_t order_col, c
|
|
|
61150
61183
|
|
|
61151
61184
|
template <typename OP, bool FROM>
|
|
61152
61185
|
static idx_t FindRangeBound(ChunkCollection &over, const idx_t order_col, const idx_t order_begin,
|
|
61153
|
-
const idx_t order_end,
|
|
61186
|
+
const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
|
|
61154
61187
|
const auto &over_types = over.Types();
|
|
61155
61188
|
D_ASSERT(over_types.size() > order_col);
|
|
61156
|
-
D_ASSERT(boundary.
|
|
61157
|
-
D_ASSERT(boundary.
|
|
61189
|
+
D_ASSERT(boundary.chunk.ColumnCount() == 1);
|
|
61190
|
+
D_ASSERT(boundary.chunk.data[0].GetType() == over_types[order_col]);
|
|
61158
61191
|
|
|
61159
61192
|
switch (over_types[order_col].InternalType()) {
|
|
61160
61193
|
case PhysicalType::INT8:
|
|
@@ -61188,7 +61221,7 @@ static idx_t FindRangeBound(ChunkCollection &over, const idx_t order_col, const
|
|
|
61188
61221
|
|
|
61189
61222
|
template <bool FROM>
|
|
61190
61223
|
static idx_t FindOrderedRangeBound(ChunkCollection &over, const idx_t order_col, const OrderType range_sense,
|
|
61191
|
-
const idx_t order_begin, const idx_t order_end,
|
|
61224
|
+
const idx_t order_begin, const idx_t order_end, WindowInputExpression &boundary,
|
|
61192
61225
|
const idx_t expr_idx) {
|
|
61193
61226
|
switch (range_sense) {
|
|
61194
61227
|
case OrderType::ASCENDING:
|
|
@@ -61200,11 +61233,11 @@ static idx_t FindOrderedRangeBound(ChunkCollection &over, const idx_t order_col,
|
|
|
61200
61233
|
}
|
|
61201
61234
|
}
|
|
61202
61235
|
|
|
61203
|
-
|
|
61204
|
-
|
|
61205
|
-
|
|
61206
|
-
const ValidityMask &order_mask) {
|
|
61236
|
+
void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_collection, const idx_t expr_idx,
|
|
61237
|
+
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
|
61238
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask) {
|
|
61207
61239
|
|
|
61240
|
+
auto &bounds = *this;
|
|
61208
61241
|
if (bounds.partition_count + bounds.order_count > 0) {
|
|
61209
61242
|
|
|
61210
61243
|
// determine partition and peer group boundaries to ultimately figure out window size
|
|
@@ -61217,10 +61250,10 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61217
61250
|
bounds.peer_start = row_idx;
|
|
61218
61251
|
|
|
61219
61252
|
// find end of partition
|
|
61220
|
-
bounds.partition_end = input_size;
|
|
61253
|
+
bounds.partition_end = bounds.input_size;
|
|
61221
61254
|
if (bounds.partition_count) {
|
|
61222
61255
|
idx_t n = 1;
|
|
61223
|
-
bounds.partition_end = FindNextStart(partition_mask, bounds.partition_start + 1, input_size, n);
|
|
61256
|
+
bounds.partition_end = FindNextStart(partition_mask, bounds.partition_start + 1, bounds.input_size, n);
|
|
61224
61257
|
}
|
|
61225
61258
|
|
|
61226
61259
|
// Find valid ordering values for the new partition
|
|
@@ -61259,7 +61292,7 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61259
61292
|
} else {
|
|
61260
61293
|
bounds.is_same_partition = false;
|
|
61261
61294
|
bounds.is_peer = true;
|
|
61262
|
-
bounds.partition_end = input_size;
|
|
61295
|
+
bounds.partition_end = bounds.input_size;
|
|
61263
61296
|
bounds.peer_end = bounds.partition_end;
|
|
61264
61297
|
}
|
|
61265
61298
|
|
|
@@ -61278,33 +61311,28 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61278
61311
|
bounds.window_start = bounds.peer_start;
|
|
61279
61312
|
break;
|
|
61280
61313
|
case WindowBoundary::EXPR_PRECEDING_ROWS: {
|
|
61281
|
-
bounds.window_start =
|
|
61282
|
-
(int64_t)row_idx - GetCell<int64_t>(boundary_start_collection, 0, bounds.scalar_start ? 0 : row_idx);
|
|
61314
|
+
bounds.window_start = (int64_t)row_idx - boundary_start.GetCell<int64_t>(expr_idx);
|
|
61283
61315
|
break;
|
|
61284
61316
|
}
|
|
61285
61317
|
case WindowBoundary::EXPR_FOLLOWING_ROWS: {
|
|
61286
|
-
bounds.window_start =
|
|
61287
|
-
row_idx + GetCell<int64_t>(boundary_start_collection, 0, bounds.scalar_start ? 0 : row_idx);
|
|
61318
|
+
bounds.window_start = row_idx + boundary_start.GetCell<int64_t>(expr_idx);
|
|
61288
61319
|
break;
|
|
61289
61320
|
}
|
|
61290
61321
|
case WindowBoundary::EXPR_PRECEDING_RANGE: {
|
|
61291
|
-
|
|
61292
|
-
if (CellIsNull(boundary_start_collection, 0, expr_idx)) {
|
|
61322
|
+
if (boundary_start.CellIsNull(expr_idx)) {
|
|
61293
61323
|
bounds.window_start = bounds.peer_start;
|
|
61294
61324
|
} else {
|
|
61295
|
-
bounds.window_start =
|
|
61296
|
-
|
|
61297
|
-
boundary_start_collection, expr_idx);
|
|
61325
|
+
bounds.window_start = FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense,
|
|
61326
|
+
bounds.valid_start, row_idx, boundary_start, expr_idx);
|
|
61298
61327
|
}
|
|
61299
61328
|
break;
|
|
61300
61329
|
}
|
|
61301
61330
|
case WindowBoundary::EXPR_FOLLOWING_RANGE: {
|
|
61302
|
-
|
|
61303
|
-
if (CellIsNull(boundary_start_collection, 0, expr_idx)) {
|
|
61331
|
+
if (boundary_start.CellIsNull(expr_idx)) {
|
|
61304
61332
|
bounds.window_start = bounds.peer_start;
|
|
61305
61333
|
} else {
|
|
61306
61334
|
bounds.window_start = FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense, row_idx,
|
|
61307
|
-
bounds.valid_end,
|
|
61335
|
+
bounds.valid_end, boundary_start, expr_idx);
|
|
61308
61336
|
}
|
|
61309
61337
|
break;
|
|
61310
61338
|
}
|
|
@@ -61323,30 +61351,26 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61323
61351
|
bounds.window_end = bounds.partition_end;
|
|
61324
61352
|
break;
|
|
61325
61353
|
case WindowBoundary::EXPR_PRECEDING_ROWS:
|
|
61326
|
-
bounds.window_end =
|
|
61327
|
-
(int64_t)row_idx - GetCell<int64_t>(boundary_end_collection, 0, bounds.scalar_end ? 0 : row_idx) + 1;
|
|
61354
|
+
bounds.window_end = (int64_t)row_idx - boundary_end.GetCell<int64_t>(expr_idx) + 1;
|
|
61328
61355
|
break;
|
|
61329
61356
|
case WindowBoundary::EXPR_FOLLOWING_ROWS:
|
|
61330
|
-
bounds.window_end = row_idx + GetCell<int64_t>(
|
|
61357
|
+
bounds.window_end = row_idx + boundary_end.GetCell<int64_t>(expr_idx) + 1;
|
|
61331
61358
|
break;
|
|
61332
61359
|
case WindowBoundary::EXPR_PRECEDING_RANGE: {
|
|
61333
|
-
|
|
61334
|
-
if (CellIsNull(boundary_end_collection, 0, expr_idx)) {
|
|
61360
|
+
if (boundary_end.CellIsNull(expr_idx)) {
|
|
61335
61361
|
bounds.window_end = bounds.peer_end;
|
|
61336
61362
|
} else {
|
|
61337
|
-
bounds.window_end =
|
|
61338
|
-
|
|
61339
|
-
boundary_end_collection, expr_idx);
|
|
61363
|
+
bounds.window_end = FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense,
|
|
61364
|
+
bounds.valid_start, row_idx, boundary_end, expr_idx);
|
|
61340
61365
|
}
|
|
61341
61366
|
break;
|
|
61342
61367
|
}
|
|
61343
61368
|
case WindowBoundary::EXPR_FOLLOWING_RANGE: {
|
|
61344
|
-
|
|
61345
|
-
if (CellIsNull(boundary_end_collection, 0, expr_idx)) {
|
|
61369
|
+
if (boundary_end.CellIsNull(expr_idx)) {
|
|
61346
61370
|
bounds.window_end = bounds.peer_end;
|
|
61347
61371
|
} else {
|
|
61348
61372
|
bounds.window_end = FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense, row_idx,
|
|
61349
|
-
bounds.valid_end,
|
|
61373
|
+
bounds.valid_end, boundary_end, expr_idx);
|
|
61350
61374
|
}
|
|
61351
61375
|
break;
|
|
61352
61376
|
}
|
|
@@ -61373,37 +61397,62 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
|
|
|
61373
61397
|
}
|
|
61374
61398
|
}
|
|
61375
61399
|
|
|
61376
|
-
|
|
61377
|
-
|
|
61378
|
-
WindowAggregationMode mode) {
|
|
61400
|
+
struct WindowExecutor {
|
|
61401
|
+
WindowExecutor(BoundWindowExpression *wexpr, ChunkCollection &input, WindowAggregationMode mode);
|
|
61379
61402
|
|
|
61380
|
-
|
|
61381
|
-
|
|
61382
|
-
uint64_t dense_rank = 1, rank_equal = 0, rank = 1;
|
|
61403
|
+
void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
|
|
61404
|
+
const ValidityMask &order_mask);
|
|
61383
61405
|
|
|
61384
|
-
//
|
|
61385
|
-
|
|
61406
|
+
// The function
|
|
61407
|
+
BoundWindowExpression *wexpr;
|
|
61386
61408
|
|
|
61387
|
-
|
|
61409
|
+
// Frame management
|
|
61410
|
+
WindowBoundariesState bounds;
|
|
61411
|
+
uint64_t dense_rank;
|
|
61412
|
+
uint64_t rank_equal;
|
|
61413
|
+
uint64_t rank;
|
|
61388
61414
|
|
|
61389
|
-
//
|
|
61390
|
-
ChunkCollection payload_collection
|
|
61391
|
-
vector<Expression *> exprs;
|
|
61392
|
-
for (auto &child : wexpr->children) {
|
|
61393
|
-
exprs.push_back(child.get());
|
|
61394
|
-
}
|
|
61415
|
+
// Expression collections
|
|
61416
|
+
ChunkCollection payload_collection;
|
|
61395
61417
|
|
|
61396
|
-
|
|
61397
|
-
|
|
61398
|
-
DataChunk payload_chunk;
|
|
61399
|
-
PrepareInputExpressions(exprs.data(), exprs.size(), payload_collection, payload_executor, payload_chunk);
|
|
61418
|
+
WindowInputExpression leadlag_offset;
|
|
61419
|
+
WindowInputExpression leadlag_default;
|
|
61400
61420
|
|
|
61401
|
-
|
|
61402
|
-
WindowInputExpression
|
|
61421
|
+
// evaluate boundaries if present. Parser has checked boundary types.
|
|
61422
|
+
WindowInputExpression boundary_start;
|
|
61423
|
+
WindowInputExpression boundary_end;
|
|
61403
61424
|
|
|
61404
|
-
// evaluate
|
|
61425
|
+
// evaluate RANGE expressions, if needed
|
|
61426
|
+
WindowInputCollection range;
|
|
61427
|
+
|
|
61428
|
+
// IGNORE NULLS
|
|
61429
|
+
ValidityMask ignore_nulls;
|
|
61430
|
+
|
|
61431
|
+
// build a segment tree for frame-adhering aggregates
|
|
61432
|
+
// see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
|
|
61405
61433
|
ValidityMask filter_mask;
|
|
61406
61434
|
vector<validity_t> filter_bits;
|
|
61435
|
+
unique_ptr<WindowSegmentTree> segment_tree = nullptr;
|
|
61436
|
+
};
|
|
61437
|
+
|
|
61438
|
+
WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, ChunkCollection &input, WindowAggregationMode mode)
|
|
61439
|
+
: wexpr(wexpr), bounds(wexpr, input.Count()), dense_rank(1), rank_equal(0), rank(1),
|
|
61440
|
+
payload_collection(input.GetAllocator()), leadlag_offset(wexpr->offset_expr.get(), input.GetAllocator()),
|
|
61441
|
+
leadlag_default(wexpr->default_expr.get(), input.GetAllocator()),
|
|
61442
|
+
boundary_start(wexpr->start_expr.get(), input.GetAllocator()),
|
|
61443
|
+
boundary_end(wexpr->end_expr.get(), input.GetAllocator()),
|
|
61444
|
+
range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr->orders[0].expression.get() : nullptr,
|
|
61445
|
+
input.GetAllocator())
|
|
61446
|
+
|
|
61447
|
+
{
|
|
61448
|
+
auto &allocator = input.GetAllocator();
|
|
61449
|
+
|
|
61450
|
+
// TODO we could evaluate those expressions in parallel
|
|
61451
|
+
|
|
61452
|
+
// Single pass over the input to produce the payload columns.
|
|
61453
|
+
// Vectorisation for the win...
|
|
61454
|
+
|
|
61455
|
+
// evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
|
|
61407
61456
|
ExpressionExecutor filter_executor(allocator);
|
|
61408
61457
|
SelectionVector filter_sel;
|
|
61409
61458
|
if (wexpr->filter_expr) {
|
|
@@ -61414,20 +61463,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61414
61463
|
filter_sel.Initialize(STANDARD_VECTOR_SIZE);
|
|
61415
61464
|
}
|
|
61416
61465
|
|
|
61417
|
-
// evaluate boundaries if present. Parser has checked boundary types.
|
|
61418
|
-
WindowInputExpression boundary_start(wexpr->start_expr.get(), allocator);
|
|
61419
|
-
WindowInputExpression boundary_end(wexpr->end_expr.get(), allocator);
|
|
61420
|
-
|
|
61421
|
-
// evaluate RANGE expressions, if needed
|
|
61422
|
-
Expression *range_expr = nullptr;
|
|
61423
|
-
if (bounds.has_preceding_range || bounds.has_following_range) {
|
|
61424
|
-
D_ASSERT(wexpr->orders.size() == 1);
|
|
61425
|
-
range_expr = wexpr->orders[0].expression.get();
|
|
61426
|
-
}
|
|
61427
|
-
WindowInputExpression range(range_expr, allocator);
|
|
61428
|
-
|
|
61429
61466
|
// Set up a validity mask for IGNORE NULLS
|
|
61430
|
-
ValidityMask ignore_nulls;
|
|
61431
61467
|
bool check_nulls = false;
|
|
61432
61468
|
if (wexpr->ignore_nulls) {
|
|
61433
61469
|
switch (wexpr->type) {
|
|
@@ -61443,7 +61479,18 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61443
61479
|
}
|
|
61444
61480
|
}
|
|
61445
61481
|
|
|
61446
|
-
//
|
|
61482
|
+
// TODO: child may be a scalar, don't need to materialize the whole collection then
|
|
61483
|
+
ExpressionExecutor payload_executor(allocator);
|
|
61484
|
+
DataChunk payload_chunk;
|
|
61485
|
+
|
|
61486
|
+
// evaluate inner expressions of window functions, could be more complex
|
|
61487
|
+
vector<Expression *> exprs;
|
|
61488
|
+
for (auto &child : wexpr->children) {
|
|
61489
|
+
exprs.push_back(child.get());
|
|
61490
|
+
}
|
|
61491
|
+
PrepareInputExpressions(exprs.data(), exprs.size(), payload_executor, payload_chunk);
|
|
61492
|
+
|
|
61493
|
+
// Single pass over the input to produce the global data.
|
|
61447
61494
|
// Vectorisation for the win...
|
|
61448
61495
|
idx_t input_idx = 0;
|
|
61449
61496
|
for (auto &input_chunk : input.Chunks()) {
|
|
@@ -61475,9 +61522,6 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61475
61522
|
}
|
|
61476
61523
|
}
|
|
61477
61524
|
|
|
61478
|
-
leadlag_offset.Execute(*input_chunk);
|
|
61479
|
-
leadlag_default.Execute(*input_chunk);
|
|
61480
|
-
|
|
61481
61525
|
if (wexpr->filter_expr) {
|
|
61482
61526
|
const auto filtered = filter_executor.SelectExpression(*input_chunk, filter_sel);
|
|
61483
61527
|
for (idx_t f = 0; f < filtered; ++f) {
|
|
@@ -61485,40 +61529,34 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61485
61529
|
}
|
|
61486
61530
|
}
|
|
61487
61531
|
|
|
61488
|
-
|
|
61489
|
-
boundary_end.Execute(*input_chunk);
|
|
61490
|
-
|
|
61491
|
-
range.Execute(*input_chunk);
|
|
61532
|
+
range.Append(*input_chunk);
|
|
61492
61533
|
|
|
61493
61534
|
input_idx += count;
|
|
61494
61535
|
}
|
|
61495
61536
|
|
|
61496
61537
|
// build a segment tree for frame-adhering aggregates
|
|
61497
61538
|
// see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
|
|
61498
|
-
unique_ptr<WindowSegmentTree> segment_tree = nullptr;
|
|
61499
61539
|
|
|
61500
61540
|
if (wexpr->aggregate) {
|
|
61501
61541
|
segment_tree = make_unique<WindowSegmentTree>(*(wexpr->aggregate), wexpr->bind_info.get(), wexpr->return_type,
|
|
61502
61542
|
&payload_collection, filter_mask, mode);
|
|
61503
61543
|
}
|
|
61544
|
+
}
|
|
61504
61545
|
|
|
61505
|
-
|
|
61506
|
-
|
|
61507
|
-
|
|
61508
|
-
|
|
61509
|
-
|
|
61510
|
-
// Grow the chunk if necessary.
|
|
61511
|
-
const auto output_offset = row_idx % STANDARD_VECTOR_SIZE;
|
|
61512
|
-
if (output_offset == 0) {
|
|
61513
|
-
output.Append(output_chunk);
|
|
61514
|
-
output_chunk.Reset();
|
|
61515
|
-
output_chunk.SetCardinality(MinValue(idx_t(STANDARD_VECTOR_SIZE), input.Count() - row_idx));
|
|
61516
|
-
}
|
|
61517
|
-
auto &result = output_chunk.data[0];
|
|
61546
|
+
void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
|
|
61547
|
+
const ValidityMask &order_mask) {
|
|
61548
|
+
// Evaluate the row-level arguments
|
|
61549
|
+
boundary_start.Execute(input_chunk);
|
|
61550
|
+
boundary_end.Execute(input_chunk);
|
|
61518
61551
|
|
|
61552
|
+
leadlag_offset.Execute(input_chunk);
|
|
61553
|
+
leadlag_default.Execute(input_chunk);
|
|
61554
|
+
|
|
61555
|
+
// this is the main loop, go through all sorted rows and compute window function result
|
|
61556
|
+
for (idx_t output_offset = 0; output_offset < input_chunk.size(); ++output_offset, ++row_idx) {
|
|
61519
61557
|
// special case, OVER (), aggregate over everything
|
|
61520
|
-
|
|
61521
|
-
|
|
61558
|
+
bounds.Update(row_idx, range.collection, output_offset, boundary_start, boundary_end, partition_mask,
|
|
61559
|
+
order_mask);
|
|
61522
61560
|
if (WindowNeedsRank(wexpr)) {
|
|
61523
61561
|
if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
|
|
61524
61562
|
dense_rank = 1;
|
|
@@ -61615,7 +61653,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61615
61653
|
case ExpressionType::WINDOW_LAG: {
|
|
61616
61654
|
int64_t offset = 1;
|
|
61617
61655
|
if (wexpr->offset_expr) {
|
|
61618
|
-
offset = GetCell<int64_t>(
|
|
61656
|
+
offset = leadlag_offset.GetCell<int64_t>(output_offset);
|
|
61619
61657
|
}
|
|
61620
61658
|
int64_t val_idx = (int64_t)row_idx;
|
|
61621
61659
|
if (wexpr->type == ExpressionType::WINDOW_LEAD) {
|
|
@@ -61638,8 +61676,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61638
61676
|
if (!delta) {
|
|
61639
61677
|
payload_collection.CopyCell(0, val_idx, result, output_offset);
|
|
61640
61678
|
} else if (wexpr->default_expr) {
|
|
61641
|
-
|
|
61642
|
-
leadlag_default.collection.CopyCell(0, source_row, result, output_offset);
|
|
61679
|
+
leadlag_default.CopyCell(result, output_offset);
|
|
61643
61680
|
} else {
|
|
61644
61681
|
FlatVector::SetNull(result, output_offset, true);
|
|
61645
61682
|
}
|
|
@@ -61684,8 +61721,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
61684
61721
|
}
|
|
61685
61722
|
}
|
|
61686
61723
|
|
|
61687
|
-
|
|
61688
|
-
output.Append(output_chunk);
|
|
61724
|
+
result.Verify(input_chunk.size());
|
|
61689
61725
|
}
|
|
61690
61726
|
|
|
61691
61727
|
using WindowExpressions = vector<BoundWindowExpression *>;
|
|
@@ -61699,8 +61735,21 @@ static void ComputeWindowExpressions(WindowExpressions &window_exprs, ChunkColle
|
|
|
61699
61735
|
}
|
|
61700
61736
|
// Compute the functions columnwise
|
|
61701
61737
|
for (idx_t expr_idx = 0; expr_idx < window_exprs.size(); ++expr_idx) {
|
|
61738
|
+
auto wexpr = window_exprs[expr_idx];
|
|
61739
|
+
WindowExecutor executor(wexpr, input, mode);
|
|
61702
61740
|
ChunkCollection output(input.GetAllocator());
|
|
61703
|
-
|
|
61741
|
+
DataChunk output_chunk;
|
|
61742
|
+
const vector<LogicalType> output_types(1, wexpr->return_type);
|
|
61743
|
+
output_chunk.Initialize(input.GetAllocator(), output_types);
|
|
61744
|
+
idx_t row_idx = 0;
|
|
61745
|
+
for (auto &input_chunk : input.Chunks()) {
|
|
61746
|
+
output_chunk.Reset();
|
|
61747
|
+
executor.Evaluate(row_idx, *input_chunk, output_chunk.data[0], partition_mask, order_mask);
|
|
61748
|
+
output_chunk.SetCardinality(*input_chunk);
|
|
61749
|
+
output_chunk.Verify();
|
|
61750
|
+
output.Append(output_chunk);
|
|
61751
|
+
row_idx += input_chunk->size();
|
|
61752
|
+
}
|
|
61704
61753
|
window_results.Fuse(output);
|
|
61705
61754
|
}
|
|
61706
61755
|
}
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.4.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "6281f2b2e"
|
|
15
|
+
#define DUCKDB_VERSION "v0.4.1-dev833"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|