duckdb 0.4.1-dev829.0 → 0.4.1-dev833.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.4.1-dev829.0",
4
+ "version": "0.4.1-dev833.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -60907,8 +60907,8 @@ static idx_t FindPrevStart(const ValidityMask &mask, const idx_t l, idx_t r, idx
60907
60907
  return l;
60908
60908
  }
60909
60909
 
60910
- static void PrepareInputExpressions(Expression **exprs, idx_t expr_count, ChunkCollection &output,
60911
- ExpressionExecutor &executor, DataChunk &chunk) {
60910
+ static void PrepareInputExpressions(Expression **exprs, idx_t expr_count, ExpressionExecutor &executor,
60911
+ DataChunk &chunk) {
60912
60912
  if (expr_count == 0) {
60913
60913
  return;
60914
60914
  }
@@ -60924,36 +60924,67 @@ static void PrepareInputExpressions(Expression **exprs, idx_t expr_count, ChunkC
60924
60924
  }
60925
60925
  }
60926
60926
 
60927
- static void PrepareInputExpression(Expression *expr, ChunkCollection &output, ExpressionExecutor &executor,
60928
- DataChunk &chunk) {
60929
- PrepareInputExpressions(&expr, 1, output, executor, chunk);
60927
+ static void PrepareInputExpression(Expression *expr, ExpressionExecutor &executor, DataChunk &chunk) {
60928
+ PrepareInputExpressions(&expr, 1, executor, chunk);
60930
60929
  }
60931
60930
 
60932
60931
  struct WindowInputExpression {
60933
- WindowInputExpression(Expression *expr_p, Allocator &allocator)
60934
- : expr(expr_p), scalar(false), collection(allocator), executor(allocator) {
60932
+ WindowInputExpression(Expression *expr_p, Allocator &allocator) : expr(expr_p), scalar(true), executor(allocator) {
60935
60933
  if (expr) {
60936
- PrepareInputExpression(expr, collection, executor, chunk);
60934
+ PrepareInputExpression(expr, executor, chunk);
60937
60935
  scalar = expr->IsScalar();
60938
60936
  }
60939
60937
  }
60940
60938
 
60941
60939
  void Execute(DataChunk &input_chunk) {
60942
- if (expr && (!scalar || collection.Count() == 0)) {
60940
+ if (expr) {
60943
60941
  chunk.Reset();
60944
60942
  executor.Execute(input_chunk, chunk);
60945
60943
  chunk.Verify();
60946
- collection.Append(chunk);
60947
60944
  }
60948
60945
  }
60949
60946
 
60947
+ template <typename T>
60948
+ inline T GetCell(idx_t i) const {
60949
+ D_ASSERT(!chunk.data.empty());
60950
+ const auto data = FlatVector::GetData<T>(chunk.data[0]);
60951
+ return data[scalar ? 0 : i];
60952
+ }
60953
+
60954
+ inline bool CellIsNull(idx_t i) const {
60955
+ D_ASSERT(!chunk.data.empty());
60956
+ return FlatVector::IsNull(chunk.data[0], scalar ? 0 : i);
60957
+ }
60958
+
60959
+ inline void CopyCell(Vector &target, idx_t target_offset) const {
60960
+ D_ASSERT(!chunk.data.empty());
60961
+ auto &source = chunk.data[0];
60962
+ auto source_offset = scalar ? 0 : target_offset;
60963
+ VectorOperations::Copy(source, target, source_offset + 1, source_offset, target_offset);
60964
+ }
60965
+
60950
60966
  Expression *expr;
60951
60967
  bool scalar;
60952
- ChunkCollection collection;
60953
60968
  ExpressionExecutor executor;
60954
60969
  DataChunk chunk;
60955
60970
  };
60956
60971
 
60972
+ struct WindowInputCollection {
60973
+ WindowInputCollection(Expression *expr_p, Allocator &allocator)
60974
+ : input_expr(expr_p, allocator), collection(allocator) {
60975
+ }
60976
+
60977
+ void Append(DataChunk &input_chunk) {
60978
+ if (input_expr.expr && (!input_expr.scalar || collection.Count() == 0)) {
60979
+ input_expr.Execute(input_chunk);
60980
+ collection.Append(input_expr.chunk);
60981
+ }
60982
+ }
60983
+
60984
+ WindowInputExpression input_expr;
60985
+ ChunkCollection collection;
60986
+ };
60987
+
60957
60988
  static void ScanRowCollection(RowDataCollection &rows, RowDataCollection &heap, ChunkCollection &cols,
60958
60989
  const vector<LogicalType> &types) {
60959
60990
  auto &allocator = cols.GetAllocator();
@@ -61022,11 +61053,10 @@ struct WindowBoundariesState {
61022
61053
  return expr ? expr->IsScalar() : true;
61023
61054
  }
61024
61055
 
61025
- explicit WindowBoundariesState(BoundWindowExpression *wexpr)
61026
- : type(wexpr->type), start_boundary(wexpr->start), end_boundary(wexpr->end),
61056
+ WindowBoundariesState(BoundWindowExpression *wexpr, const idx_t input_size)
61057
+ : type(wexpr->type), input_size(input_size), start_boundary(wexpr->start), end_boundary(wexpr->end),
61027
61058
  partition_count(wexpr->partitions.size()), order_count(wexpr->orders.size()),
61028
61059
  range_sense(wexpr->orders.empty() ? OrderType::INVALID : wexpr->orders[0].type),
61029
- scalar_start(IsScalar(wexpr->start_expr)), scalar_end(IsScalar(wexpr->end_expr)),
61030
61060
  has_preceding_range(wexpr->start == WindowBoundary::EXPR_PRECEDING_RANGE ||
61031
61061
  wexpr->end == WindowBoundary::EXPR_PRECEDING_RANGE),
61032
61062
  has_following_range(wexpr->start == WindowBoundary::EXPR_FOLLOWING_RANGE ||
@@ -61034,15 +61064,18 @@ struct WindowBoundariesState {
61034
61064
  needs_peer(BoundaryNeedsPeer(wexpr->end) || wexpr->type == ExpressionType::WINDOW_CUME_DIST) {
61035
61065
  }
61036
61066
 
61067
+ void Update(const idx_t row_idx, ChunkCollection &range_collection, const idx_t source_offset,
61068
+ WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
61069
+ const ValidityMask &partition_mask, const ValidityMask &order_mask);
61070
+
61037
61071
  // Cached lookups
61038
61072
  const ExpressionType type;
61073
+ const idx_t input_size;
61039
61074
  const WindowBoundary start_boundary;
61040
61075
  const WindowBoundary end_boundary;
61041
- const idx_t partition_count;
61042
- const idx_t order_count;
61076
+ const size_t partition_count;
61077
+ const size_t order_count;
61043
61078
  const OrderType range_sense;
61044
- const bool scalar_start;
61045
- const bool scalar_end;
61046
61079
  const bool has_preceding_range;
61047
61080
  const bool has_following_range;
61048
61081
  const bool needs_peer;
@@ -61134,9 +61167,9 @@ struct OperationCompare : public std::function<bool(T, T)> {
61134
61167
 
61135
61168
  template <typename T, typename OP, bool FROM>
61136
61169
  static idx_t FindTypedRangeBound(ChunkCollection &over, const idx_t order_col, const idx_t order_begin,
61137
- const idx_t order_end, ChunkCollection &boundary, const idx_t boundary_row) {
61138
- D_ASSERT(!CellIsNull(boundary, 0, boundary_row));
61139
- const auto val = GetCell<T>(boundary, 0, boundary_row);
61170
+ const idx_t order_end, WindowInputExpression &boundary, const idx_t boundary_row) {
61171
+ D_ASSERT(!boundary.CellIsNull(boundary_row));
61172
+ const auto val = boundary.GetCell<T>(boundary_row);
61140
61173
 
61141
61174
  OperationCompare<T, OP> comp;
61142
61175
  ChunkCollectionIterator<T> begin(over, order_col, order_begin);
@@ -61150,11 +61183,11 @@ static idx_t FindTypedRangeBound(ChunkCollection &over, const idx_t order_col, c
61150
61183
 
61151
61184
  template <typename OP, bool FROM>
61152
61185
  static idx_t FindRangeBound(ChunkCollection &over, const idx_t order_col, const idx_t order_begin,
61153
- const idx_t order_end, ChunkCollection &boundary, const idx_t expr_idx) {
61186
+ const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
61154
61187
  const auto &over_types = over.Types();
61155
61188
  D_ASSERT(over_types.size() > order_col);
61156
- D_ASSERT(boundary.Types().size() == 1);
61157
- D_ASSERT(boundary.Types()[0] == over_types[order_col]);
61189
+ D_ASSERT(boundary.chunk.ColumnCount() == 1);
61190
+ D_ASSERT(boundary.chunk.data[0].GetType() == over_types[order_col]);
61158
61191
 
61159
61192
  switch (over_types[order_col].InternalType()) {
61160
61193
  case PhysicalType::INT8:
@@ -61188,7 +61221,7 @@ static idx_t FindRangeBound(ChunkCollection &over, const idx_t order_col, const
61188
61221
 
61189
61222
  template <bool FROM>
61190
61223
  static idx_t FindOrderedRangeBound(ChunkCollection &over, const idx_t order_col, const OrderType range_sense,
61191
- const idx_t order_begin, const idx_t order_end, ChunkCollection &boundary,
61224
+ const idx_t order_begin, const idx_t order_end, WindowInputExpression &boundary,
61192
61225
  const idx_t expr_idx) {
61193
61226
  switch (range_sense) {
61194
61227
  case OrderType::ASCENDING:
@@ -61200,11 +61233,11 @@ static idx_t FindOrderedRangeBound(ChunkCollection &over, const idx_t order_col,
61200
61233
  }
61201
61234
  }
61202
61235
 
61203
- static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t input_size, const idx_t row_idx,
61204
- ChunkCollection &range_collection, ChunkCollection &boundary_start_collection,
61205
- ChunkCollection &boundary_end_collection, const ValidityMask &partition_mask,
61206
- const ValidityMask &order_mask) {
61236
+ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_collection, const idx_t expr_idx,
61237
+ WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
61238
+ const ValidityMask &partition_mask, const ValidityMask &order_mask) {
61207
61239
 
61240
+ auto &bounds = *this;
61208
61241
  if (bounds.partition_count + bounds.order_count > 0) {
61209
61242
 
61210
61243
  // determine partition and peer group boundaries to ultimately figure out window size
@@ -61217,10 +61250,10 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
61217
61250
  bounds.peer_start = row_idx;
61218
61251
 
61219
61252
  // find end of partition
61220
- bounds.partition_end = input_size;
61253
+ bounds.partition_end = bounds.input_size;
61221
61254
  if (bounds.partition_count) {
61222
61255
  idx_t n = 1;
61223
- bounds.partition_end = FindNextStart(partition_mask, bounds.partition_start + 1, input_size, n);
61256
+ bounds.partition_end = FindNextStart(partition_mask, bounds.partition_start + 1, bounds.input_size, n);
61224
61257
  }
61225
61258
 
61226
61259
  // Find valid ordering values for the new partition
@@ -61259,7 +61292,7 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
61259
61292
  } else {
61260
61293
  bounds.is_same_partition = false;
61261
61294
  bounds.is_peer = true;
61262
- bounds.partition_end = input_size;
61295
+ bounds.partition_end = bounds.input_size;
61263
61296
  bounds.peer_end = bounds.partition_end;
61264
61297
  }
61265
61298
 
@@ -61278,33 +61311,28 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
61278
61311
  bounds.window_start = bounds.peer_start;
61279
61312
  break;
61280
61313
  case WindowBoundary::EXPR_PRECEDING_ROWS: {
61281
- bounds.window_start =
61282
- (int64_t)row_idx - GetCell<int64_t>(boundary_start_collection, 0, bounds.scalar_start ? 0 : row_idx);
61314
+ bounds.window_start = (int64_t)row_idx - boundary_start.GetCell<int64_t>(expr_idx);
61283
61315
  break;
61284
61316
  }
61285
61317
  case WindowBoundary::EXPR_FOLLOWING_ROWS: {
61286
- bounds.window_start =
61287
- row_idx + GetCell<int64_t>(boundary_start_collection, 0, bounds.scalar_start ? 0 : row_idx);
61318
+ bounds.window_start = row_idx + boundary_start.GetCell<int64_t>(expr_idx);
61288
61319
  break;
61289
61320
  }
61290
61321
  case WindowBoundary::EXPR_PRECEDING_RANGE: {
61291
- const auto expr_idx = bounds.scalar_start ? 0 : row_idx;
61292
- if (CellIsNull(boundary_start_collection, 0, expr_idx)) {
61322
+ if (boundary_start.CellIsNull(expr_idx)) {
61293
61323
  bounds.window_start = bounds.peer_start;
61294
61324
  } else {
61295
- bounds.window_start =
61296
- FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense, bounds.valid_start, row_idx,
61297
- boundary_start_collection, expr_idx);
61325
+ bounds.window_start = FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense,
61326
+ bounds.valid_start, row_idx, boundary_start, expr_idx);
61298
61327
  }
61299
61328
  break;
61300
61329
  }
61301
61330
  case WindowBoundary::EXPR_FOLLOWING_RANGE: {
61302
- const auto expr_idx = bounds.scalar_start ? 0 : row_idx;
61303
- if (CellIsNull(boundary_start_collection, 0, expr_idx)) {
61331
+ if (boundary_start.CellIsNull(expr_idx)) {
61304
61332
  bounds.window_start = bounds.peer_start;
61305
61333
  } else {
61306
61334
  bounds.window_start = FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense, row_idx,
61307
- bounds.valid_end, boundary_start_collection, expr_idx);
61335
+ bounds.valid_end, boundary_start, expr_idx);
61308
61336
  }
61309
61337
  break;
61310
61338
  }
@@ -61323,30 +61351,26 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
61323
61351
  bounds.window_end = bounds.partition_end;
61324
61352
  break;
61325
61353
  case WindowBoundary::EXPR_PRECEDING_ROWS:
61326
- bounds.window_end =
61327
- (int64_t)row_idx - GetCell<int64_t>(boundary_end_collection, 0, bounds.scalar_end ? 0 : row_idx) + 1;
61354
+ bounds.window_end = (int64_t)row_idx - boundary_end.GetCell<int64_t>(expr_idx) + 1;
61328
61355
  break;
61329
61356
  case WindowBoundary::EXPR_FOLLOWING_ROWS:
61330
- bounds.window_end = row_idx + GetCell<int64_t>(boundary_end_collection, 0, bounds.scalar_end ? 0 : row_idx) + 1;
61357
+ bounds.window_end = row_idx + boundary_end.GetCell<int64_t>(expr_idx) + 1;
61331
61358
  break;
61332
61359
  case WindowBoundary::EXPR_PRECEDING_RANGE: {
61333
- const auto expr_idx = bounds.scalar_end ? 0 : row_idx;
61334
- if (CellIsNull(boundary_end_collection, 0, expr_idx)) {
61360
+ if (boundary_end.CellIsNull(expr_idx)) {
61335
61361
  bounds.window_end = bounds.peer_end;
61336
61362
  } else {
61337
- bounds.window_end =
61338
- FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense, bounds.valid_start, row_idx,
61339
- boundary_end_collection, expr_idx);
61363
+ bounds.window_end = FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense,
61364
+ bounds.valid_start, row_idx, boundary_end, expr_idx);
61340
61365
  }
61341
61366
  break;
61342
61367
  }
61343
61368
  case WindowBoundary::EXPR_FOLLOWING_RANGE: {
61344
- const auto expr_idx = bounds.scalar_end ? 0 : row_idx;
61345
- if (CellIsNull(boundary_end_collection, 0, expr_idx)) {
61369
+ if (boundary_end.CellIsNull(expr_idx)) {
61346
61370
  bounds.window_end = bounds.peer_end;
61347
61371
  } else {
61348
61372
  bounds.window_end = FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense, row_idx,
61349
- bounds.valid_end, boundary_end_collection, expr_idx);
61373
+ bounds.valid_end, boundary_end, expr_idx);
61350
61374
  }
61351
61375
  break;
61352
61376
  }
@@ -61373,37 +61397,62 @@ static void UpdateWindowBoundaries(WindowBoundariesState &bounds, const idx_t in
61373
61397
  }
61374
61398
  }
61375
61399
 
61376
- static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollection &input, ChunkCollection &output,
61377
- const ValidityMask &partition_mask, const ValidityMask &order_mask,
61378
- WindowAggregationMode mode) {
61400
+ struct WindowExecutor {
61401
+ WindowExecutor(BoundWindowExpression *wexpr, ChunkCollection &input, WindowAggregationMode mode);
61379
61402
 
61380
- // TODO we could evaluate those expressions in parallel
61381
- WindowBoundariesState bounds(wexpr);
61382
- uint64_t dense_rank = 1, rank_equal = 0, rank = 1;
61403
+ void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
61404
+ const ValidityMask &order_mask);
61383
61405
 
61384
- // Single pass over the input to produce the payload columns.
61385
- // Vectorisation for the win...
61406
+ // The function
61407
+ BoundWindowExpression *wexpr;
61386
61408
 
61387
- auto &allocator = input.GetAllocator();
61409
+ // Frame management
61410
+ WindowBoundariesState bounds;
61411
+ uint64_t dense_rank;
61412
+ uint64_t rank_equal;
61413
+ uint64_t rank;
61388
61414
 
61389
- // evaluate inner expressions of window functions, could be more complex
61390
- ChunkCollection payload_collection(allocator);
61391
- vector<Expression *> exprs;
61392
- for (auto &child : wexpr->children) {
61393
- exprs.push_back(child.get());
61394
- }
61415
+ // Expression collections
61416
+ ChunkCollection payload_collection;
61395
61417
 
61396
- // TODO: child may be a scalar, don't need to materialize the whole collection then
61397
- ExpressionExecutor payload_executor(allocator);
61398
- DataChunk payload_chunk;
61399
- PrepareInputExpressions(exprs.data(), exprs.size(), payload_collection, payload_executor, payload_chunk);
61418
+ WindowInputExpression leadlag_offset;
61419
+ WindowInputExpression leadlag_default;
61400
61420
 
61401
- WindowInputExpression leadlag_offset(wexpr->offset_expr.get(), allocator);
61402
- WindowInputExpression leadlag_default(wexpr->default_expr.get(), allocator);
61421
+ // evaluate boundaries if present. Parser has checked boundary types.
61422
+ WindowInputExpression boundary_start;
61423
+ WindowInputExpression boundary_end;
61403
61424
 
61404
- // evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
61425
+ // evaluate RANGE expressions, if needed
61426
+ WindowInputCollection range;
61427
+
61428
+ // IGNORE NULLS
61429
+ ValidityMask ignore_nulls;
61430
+
61431
+ // build a segment tree for frame-adhering aggregates
61432
+ // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
61405
61433
  ValidityMask filter_mask;
61406
61434
  vector<validity_t> filter_bits;
61435
+ unique_ptr<WindowSegmentTree> segment_tree = nullptr;
61436
+ };
61437
+
61438
+ WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, ChunkCollection &input, WindowAggregationMode mode)
61439
+ : wexpr(wexpr), bounds(wexpr, input.Count()), dense_rank(1), rank_equal(0), rank(1),
61440
+ payload_collection(input.GetAllocator()), leadlag_offset(wexpr->offset_expr.get(), input.GetAllocator()),
61441
+ leadlag_default(wexpr->default_expr.get(), input.GetAllocator()),
61442
+ boundary_start(wexpr->start_expr.get(), input.GetAllocator()),
61443
+ boundary_end(wexpr->end_expr.get(), input.GetAllocator()),
61444
+ range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr->orders[0].expression.get() : nullptr,
61445
+ input.GetAllocator())
61446
+
61447
+ {
61448
+ auto &allocator = input.GetAllocator();
61449
+
61450
+ // TODO we could evaluate those expressions in parallel
61451
+
61452
+ // Single pass over the input to produce the payload columns.
61453
+ // Vectorisation for the win...
61454
+
61455
+ // evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
61407
61456
  ExpressionExecutor filter_executor(allocator);
61408
61457
  SelectionVector filter_sel;
61409
61458
  if (wexpr->filter_expr) {
@@ -61414,20 +61463,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
61414
61463
  filter_sel.Initialize(STANDARD_VECTOR_SIZE);
61415
61464
  }
61416
61465
 
61417
- // evaluate boundaries if present. Parser has checked boundary types.
61418
- WindowInputExpression boundary_start(wexpr->start_expr.get(), allocator);
61419
- WindowInputExpression boundary_end(wexpr->end_expr.get(), allocator);
61420
-
61421
- // evaluate RANGE expressions, if needed
61422
- Expression *range_expr = nullptr;
61423
- if (bounds.has_preceding_range || bounds.has_following_range) {
61424
- D_ASSERT(wexpr->orders.size() == 1);
61425
- range_expr = wexpr->orders[0].expression.get();
61426
- }
61427
- WindowInputExpression range(range_expr, allocator);
61428
-
61429
61466
  // Set up a validity mask for IGNORE NULLS
61430
- ValidityMask ignore_nulls;
61431
61467
  bool check_nulls = false;
61432
61468
  if (wexpr->ignore_nulls) {
61433
61469
  switch (wexpr->type) {
@@ -61443,7 +61479,18 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
61443
61479
  }
61444
61480
  }
61445
61481
 
61446
- // Single pass over the input to produce the payload columns.
61482
+ // TODO: child may be a scalar, don't need to materialize the whole collection then
61483
+ ExpressionExecutor payload_executor(allocator);
61484
+ DataChunk payload_chunk;
61485
+
61486
+ // evaluate inner expressions of window functions, could be more complex
61487
+ vector<Expression *> exprs;
61488
+ for (auto &child : wexpr->children) {
61489
+ exprs.push_back(child.get());
61490
+ }
61491
+ PrepareInputExpressions(exprs.data(), exprs.size(), payload_executor, payload_chunk);
61492
+
61493
+ // Single pass over the input to produce the global data.
61447
61494
  // Vectorisation for the win...
61448
61495
  idx_t input_idx = 0;
61449
61496
  for (auto &input_chunk : input.Chunks()) {
@@ -61475,9 +61522,6 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
61475
61522
  }
61476
61523
  }
61477
61524
 
61478
- leadlag_offset.Execute(*input_chunk);
61479
- leadlag_default.Execute(*input_chunk);
61480
-
61481
61525
  if (wexpr->filter_expr) {
61482
61526
  const auto filtered = filter_executor.SelectExpression(*input_chunk, filter_sel);
61483
61527
  for (idx_t f = 0; f < filtered; ++f) {
@@ -61485,40 +61529,34 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
61485
61529
  }
61486
61530
  }
61487
61531
 
61488
- boundary_start.Execute(*input_chunk);
61489
- boundary_end.Execute(*input_chunk);
61490
-
61491
- range.Execute(*input_chunk);
61532
+ range.Append(*input_chunk);
61492
61533
 
61493
61534
  input_idx += count;
61494
61535
  }
61495
61536
 
61496
61537
  // build a segment tree for frame-adhering aggregates
61497
61538
  // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
61498
- unique_ptr<WindowSegmentTree> segment_tree = nullptr;
61499
61539
 
61500
61540
  if (wexpr->aggregate) {
61501
61541
  segment_tree = make_unique<WindowSegmentTree>(*(wexpr->aggregate), wexpr->bind_info.get(), wexpr->return_type,
61502
61542
  &payload_collection, filter_mask, mode);
61503
61543
  }
61544
+ }
61504
61545
 
61505
- // this is the main loop, go through all sorted rows and compute window function result
61506
- const vector<LogicalType> output_types(1, wexpr->return_type);
61507
- DataChunk output_chunk;
61508
- output_chunk.Initialize(allocator, output_types);
61509
- for (idx_t row_idx = 0; row_idx < input.Count(); row_idx++) {
61510
- // Grow the chunk if necessary.
61511
- const auto output_offset = row_idx % STANDARD_VECTOR_SIZE;
61512
- if (output_offset == 0) {
61513
- output.Append(output_chunk);
61514
- output_chunk.Reset();
61515
- output_chunk.SetCardinality(MinValue(idx_t(STANDARD_VECTOR_SIZE), input.Count() - row_idx));
61516
- }
61517
- auto &result = output_chunk.data[0];
61546
+ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
61547
+ const ValidityMask &order_mask) {
61548
+ // Evaluate the row-level arguments
61549
+ boundary_start.Execute(input_chunk);
61550
+ boundary_end.Execute(input_chunk);
61518
61551
 
61552
+ leadlag_offset.Execute(input_chunk);
61553
+ leadlag_default.Execute(input_chunk);
61554
+
61555
+ // this is the main loop, go through all sorted rows and compute window function result
61556
+ for (idx_t output_offset = 0; output_offset < input_chunk.size(); ++output_offset, ++row_idx) {
61519
61557
  // special case, OVER (), aggregate over everything
61520
- UpdateWindowBoundaries(bounds, input.Count(), row_idx, range.collection, boundary_start.collection,
61521
- boundary_end.collection, partition_mask, order_mask);
61558
+ bounds.Update(row_idx, range.collection, output_offset, boundary_start, boundary_end, partition_mask,
61559
+ order_mask);
61522
61560
  if (WindowNeedsRank(wexpr)) {
61523
61561
  if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
61524
61562
  dense_rank = 1;
@@ -61615,7 +61653,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
61615
61653
  case ExpressionType::WINDOW_LAG: {
61616
61654
  int64_t offset = 1;
61617
61655
  if (wexpr->offset_expr) {
61618
- offset = GetCell<int64_t>(leadlag_offset.collection, 0, wexpr->offset_expr->IsScalar() ? 0 : row_idx);
61656
+ offset = leadlag_offset.GetCell<int64_t>(output_offset);
61619
61657
  }
61620
61658
  int64_t val_idx = (int64_t)row_idx;
61621
61659
  if (wexpr->type == ExpressionType::WINDOW_LEAD) {
@@ -61638,8 +61676,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
61638
61676
  if (!delta) {
61639
61677
  payload_collection.CopyCell(0, val_idx, result, output_offset);
61640
61678
  } else if (wexpr->default_expr) {
61641
- const auto source_row = wexpr->default_expr->IsScalar() ? 0 : row_idx;
61642
- leadlag_default.collection.CopyCell(0, source_row, result, output_offset);
61679
+ leadlag_default.CopyCell(result, output_offset);
61643
61680
  } else {
61644
61681
  FlatVector::SetNull(result, output_offset, true);
61645
61682
  }
@@ -61684,8 +61721,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
61684
61721
  }
61685
61722
  }
61686
61723
 
61687
- // Push the last chunk
61688
- output.Append(output_chunk);
61724
+ result.Verify(input_chunk.size());
61689
61725
  }
61690
61726
 
61691
61727
  using WindowExpressions = vector<BoundWindowExpression *>;
@@ -61699,8 +61735,21 @@ static void ComputeWindowExpressions(WindowExpressions &window_exprs, ChunkColle
61699
61735
  }
61700
61736
  // Compute the functions columnwise
61701
61737
  for (idx_t expr_idx = 0; expr_idx < window_exprs.size(); ++expr_idx) {
61738
+ auto wexpr = window_exprs[expr_idx];
61739
+ WindowExecutor executor(wexpr, input, mode);
61702
61740
  ChunkCollection output(input.GetAllocator());
61703
- ComputeWindowExpression(window_exprs[expr_idx], input, output, partition_mask, order_mask, mode);
61741
+ DataChunk output_chunk;
61742
+ const vector<LogicalType> output_types(1, wexpr->return_type);
61743
+ output_chunk.Initialize(input.GetAllocator(), output_types);
61744
+ idx_t row_idx = 0;
61745
+ for (auto &input_chunk : input.Chunks()) {
61746
+ output_chunk.Reset();
61747
+ executor.Evaluate(row_idx, *input_chunk, output_chunk.data[0], partition_mask, order_mask);
61748
+ output_chunk.SetCardinality(*input_chunk);
61749
+ output_chunk.Verify();
61750
+ output.Append(output_chunk);
61751
+ row_idx += input_chunk->size();
61752
+ }
61704
61753
  window_results.Fuse(output);
61705
61754
  }
61706
61755
  }
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "195b7b47b"
15
- #define DUCKDB_VERSION "v0.4.1-dev829"
14
+ #define DUCKDB_SOURCE_ID "6281f2b2e"
15
+ #define DUCKDB_VERSION "v0.4.1-dev833"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //