duckdb 0.4.1-dev1514.0 → 0.4.1-dev1527.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.4.1-dev1514.0",
4
+ "version": "0.4.1-dev1527.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -5657,6 +5657,7 @@ static DefaultMacro internal_macros[] = {
5657
5657
  {DEFAULT_SCHEMA, "list_entropy", {"l", nullptr}, "list_aggr(l, 'entropy')"},
5658
5658
  {DEFAULT_SCHEMA, "list_last", {"l", nullptr}, "list_aggr(l, 'last')"},
5659
5659
  {DEFAULT_SCHEMA, "list_first", {"l", nullptr}, "list_aggr(l, 'first')"},
5660
+ {DEFAULT_SCHEMA, "list_any_value", {"l", nullptr}, "list_aggr(l, 'any_value')"},
5660
5661
  {DEFAULT_SCHEMA, "list_kurtosis", {"l", nullptr}, "list_aggr(l, 'kurtosis')"},
5661
5662
  {DEFAULT_SCHEMA, "list_min", {"l", nullptr}, "list_aggr(l, 'min')"},
5662
5663
  {DEFAULT_SCHEMA, "list_max", {"l", nullptr}, "list_aggr(l, 'max')"},
@@ -63484,9 +63485,11 @@ static void PrepareInputExpression(Expression *expr, ExpressionExecutor &executo
63484
63485
  }
63485
63486
 
63486
63487
  struct WindowInputExpression {
63487
- WindowInputExpression(Expression *expr_p, Allocator &allocator) : expr(expr_p), scalar(true), executor(allocator) {
63488
+ WindowInputExpression(Expression *expr_p, Allocator &allocator)
63489
+ : expr(expr_p), ptype(PhysicalType::INVALID), scalar(true), executor(allocator) {
63488
63490
  if (expr) {
63489
63491
  PrepareInputExpression(expr, executor, chunk);
63492
+ ptype = expr->return_type.InternalType();
63490
63493
  scalar = expr->IsScalar();
63491
63494
  }
63492
63495
  }
@@ -63519,25 +63522,51 @@ struct WindowInputExpression {
63519
63522
  }
63520
63523
 
63521
63524
  Expression *expr;
63525
+ PhysicalType ptype;
63522
63526
  bool scalar;
63523
63527
  ExpressionExecutor executor;
63524
63528
  DataChunk chunk;
63525
63529
  };
63526
63530
 
63527
- struct WindowInputCollection {
63528
- WindowInputCollection(Expression *expr_p, Allocator &allocator)
63529
- : input_expr(expr_p, allocator), collection(allocator) {
63531
+ struct WindowInputColumn {
63532
+ WindowInputColumn(Expression *expr_p, Allocator &allocator, idx_t capacity_p)
63533
+ : input_expr(expr_p, allocator), count(0), capacity(capacity_p) {
63534
+ if (input_expr.expr) {
63535
+ target = make_unique<Vector>(input_expr.chunk.data[0].GetType(), capacity);
63536
+ }
63530
63537
  }
63531
63538
 
63532
63539
  void Append(DataChunk &input_chunk) {
63533
- if (input_expr.expr && (!input_expr.scalar || collection.Count() == 0)) {
63540
+ if (input_expr.expr && (!input_expr.scalar || !count)) {
63534
63541
  input_expr.Execute(input_chunk);
63535
- collection.Append(input_expr.chunk);
63542
+ auto &source = input_expr.chunk.data[0];
63543
+ const auto source_count = input_expr.chunk.size();
63544
+ D_ASSERT(count + source_count <= capacity);
63545
+ VectorOperations::Copy(source, *target, source_count, 0, count);
63546
+ count += source_count;
63536
63547
  }
63537
63548
  }
63538
63549
 
63550
+ inline bool CellIsNull(idx_t i) {
63551
+ D_ASSERT(target);
63552
+ D_ASSERT(i < count);
63553
+ return FlatVector::IsNull(*target, input_expr.scalar ? 0 : i);
63554
+ }
63555
+
63556
+ template <typename T>
63557
+ inline T GetCell(idx_t i) {
63558
+ D_ASSERT(target);
63559
+ D_ASSERT(i < count);
63560
+ const auto data = FlatVector::GetData<T>(*target);
63561
+ return data[input_expr.scalar ? 0 : i];
63562
+ }
63563
+
63539
63564
  WindowInputExpression input_expr;
63540
- ChunkCollection collection;
63565
+
63566
+ private:
63567
+ unique_ptr<Vector> target;
63568
+ idx_t count;
63569
+ idx_t capacity;
63541
63570
  };
63542
63571
 
63543
63572
  static void ScanRowCollection(RowDataCollection &rows, RowDataCollection &heap, ChunkCollection &cols,
@@ -63591,7 +63620,7 @@ struct WindowBoundariesState {
63591
63620
  needs_peer(BoundaryNeedsPeer(wexpr->end) || wexpr->type == ExpressionType::WINDOW_CUME_DIST) {
63592
63621
  }
63593
63622
 
63594
- void Update(const idx_t row_idx, ChunkCollection &range_collection, const idx_t source_offset,
63623
+ void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t source_offset,
63595
63624
  WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
63596
63625
  const ValidityMask &partition_mask, const ValidityMask &order_mask);
63597
63626
 
@@ -63643,20 +63672,19 @@ static bool CellIsNull(ChunkCollection &collection, idx_t column, idx_t index) {
63643
63672
  }
63644
63673
 
63645
63674
  template <typename T>
63646
- struct ChunkCollectionIterator {
63647
- using iterator = ChunkCollectionIterator<T>;
63675
+ struct WindowColumnIterator {
63676
+ using iterator = WindowColumnIterator<T>;
63648
63677
  using iterator_category = std::forward_iterator_tag;
63649
63678
  using difference_type = std::ptrdiff_t;
63650
63679
  using value_type = T;
63651
63680
  using reference = T;
63652
63681
  using pointer = idx_t;
63653
63682
 
63654
- ChunkCollectionIterator(ChunkCollection &coll_p, idx_t col_no_p, pointer pos_p = 0)
63655
- : coll(&coll_p), col_no(col_no_p), pos(pos_p) {
63683
+ explicit WindowColumnIterator(WindowInputColumn &coll_p, pointer pos_p = 0) : coll(&coll_p), pos(pos_p) {
63656
63684
  }
63657
63685
 
63658
63686
  inline reference operator*() const {
63659
- return GetCell<T>(*coll, col_no, pos);
63687
+ return coll->GetCell<T>(pos);
63660
63688
  }
63661
63689
  inline explicit operator pointer() const {
63662
63690
  return pos;
@@ -63680,8 +63708,7 @@ struct ChunkCollectionIterator {
63680
63708
  }
63681
63709
 
63682
63710
  private:
63683
- ChunkCollection *coll;
63684
- idx_t col_no;
63711
+ WindowInputColumn *coll;
63685
63712
  pointer pos;
63686
63713
  };
63687
63714
 
@@ -63693,14 +63720,14 @@ struct OperationCompare : public std::function<bool(T, T)> {
63693
63720
  };
63694
63721
 
63695
63722
  template <typename T, typename OP, bool FROM>
63696
- static idx_t FindTypedRangeBound(ChunkCollection &over, const idx_t order_col, const idx_t order_begin,
63697
- const idx_t order_end, WindowInputExpression &boundary, const idx_t boundary_row) {
63723
+ static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
63724
+ WindowInputExpression &boundary, const idx_t boundary_row) {
63698
63725
  D_ASSERT(!boundary.CellIsNull(boundary_row));
63699
63726
  const auto val = boundary.GetCell<T>(boundary_row);
63700
63727
 
63701
63728
  OperationCompare<T, OP> comp;
63702
- ChunkCollectionIterator<T> begin(over, order_col, order_begin);
63703
- ChunkCollectionIterator<T> end(over, order_col, order_end);
63729
+ WindowColumnIterator<T> begin(over, order_begin);
63730
+ WindowColumnIterator<T> end(over, order_end);
63704
63731
  if (FROM) {
63705
63732
  return idx_t(std::lower_bound(begin, end, val, comp));
63706
63733
  } else {
@@ -63709,58 +63736,55 @@ static idx_t FindTypedRangeBound(ChunkCollection &over, const idx_t order_col, c
63709
63736
  }
63710
63737
 
63711
63738
  template <typename OP, bool FROM>
63712
- static idx_t FindRangeBound(ChunkCollection &over, const idx_t order_col, const idx_t order_begin,
63713
- const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
63714
- const auto &over_types = over.Types();
63715
- D_ASSERT(over_types.size() > order_col);
63739
+ static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
63740
+ WindowInputExpression &boundary, const idx_t expr_idx) {
63716
63741
  D_ASSERT(boundary.chunk.ColumnCount() == 1);
63717
- D_ASSERT(boundary.chunk.data[0].GetType() == over_types[order_col]);
63742
+ D_ASSERT(boundary.chunk.data[0].GetType().InternalType() == over.input_expr.ptype);
63718
63743
 
63719
- switch (over_types[order_col].InternalType()) {
63744
+ switch (over.input_expr.ptype) {
63720
63745
  case PhysicalType::INT8:
63721
- return FindTypedRangeBound<int8_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63746
+ return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63722
63747
  case PhysicalType::INT16:
63723
- return FindTypedRangeBound<int16_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63748
+ return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63724
63749
  case PhysicalType::INT32:
63725
- return FindTypedRangeBound<int32_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63750
+ return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63726
63751
  case PhysicalType::INT64:
63727
- return FindTypedRangeBound<int64_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63752
+ return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63728
63753
  case PhysicalType::UINT8:
63729
- return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63754
+ return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63730
63755
  case PhysicalType::UINT16:
63731
- return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63756
+ return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63732
63757
  case PhysicalType::UINT32:
63733
- return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63758
+ return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63734
63759
  case PhysicalType::UINT64:
63735
- return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63760
+ return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63736
63761
  case PhysicalType::INT128:
63737
- return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63762
+ return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63738
63763
  case PhysicalType::FLOAT:
63739
- return FindTypedRangeBound<float, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63764
+ return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63740
63765
  case PhysicalType::DOUBLE:
63741
- return FindTypedRangeBound<double, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63766
+ return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63742
63767
  case PhysicalType::INTERVAL:
63743
- return FindTypedRangeBound<interval_t, OP, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63768
+ return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
63744
63769
  default:
63745
63770
  throw InternalException("Unsupported column type for RANGE");
63746
63771
  }
63747
63772
  }
63748
63773
 
63749
63774
  template <bool FROM>
63750
- static idx_t FindOrderedRangeBound(ChunkCollection &over, const idx_t order_col, const OrderType range_sense,
63751
- const idx_t order_begin, const idx_t order_end, WindowInputExpression &boundary,
63752
- const idx_t expr_idx) {
63775
+ static idx_t FindOrderedRangeBound(WindowInputColumn &over, const OrderType range_sense, const idx_t order_begin,
63776
+ const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
63753
63777
  switch (range_sense) {
63754
63778
  case OrderType::ASCENDING:
63755
- return FindRangeBound<LessThan, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63779
+ return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
63756
63780
  case OrderType::DESCENDING:
63757
- return FindRangeBound<GreaterThan, FROM>(over, order_col, order_begin, order_end, boundary, expr_idx);
63781
+ return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
63758
63782
  default:
63759
63783
  throw InternalException("Unsupported ORDER BY sense for RANGE");
63760
63784
  }
63761
63785
  }
63762
63786
 
63763
- void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_collection, const idx_t expr_idx,
63787
+ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t expr_idx,
63764
63788
  WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
63765
63789
  const ValidityMask &partition_mask, const ValidityMask &order_mask) {
63766
63790
 
@@ -63790,7 +63814,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
63790
63814
 
63791
63815
  if ((bounds.valid_start < bounds.valid_end) && bounds.has_preceding_range) {
63792
63816
  // Exclude any leading NULLs
63793
- if (CellIsNull(range_collection, 0, bounds.valid_start)) {
63817
+ if (range_collection.CellIsNull(bounds.valid_start)) {
63794
63818
  idx_t n = 1;
63795
63819
  bounds.valid_start = FindNextStart(order_mask, bounds.valid_start + 1, bounds.valid_end, n);
63796
63820
  }
@@ -63798,7 +63822,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
63798
63822
 
63799
63823
  if ((bounds.valid_start < bounds.valid_end) && bounds.has_following_range) {
63800
63824
  // Exclude any trailing NULLs
63801
- if (CellIsNull(range_collection, 0, bounds.valid_end - 1)) {
63825
+ if (range_collection.CellIsNull(bounds.valid_end - 1)) {
63802
63826
  idx_t n = 1;
63803
63827
  bounds.valid_end = FindPrevStart(order_mask, bounds.valid_start, bounds.valid_end, n);
63804
63828
  }
@@ -63849,8 +63873,8 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
63849
63873
  if (boundary_start.CellIsNull(expr_idx)) {
63850
63874
  bounds.window_start = bounds.peer_start;
63851
63875
  } else {
63852
- bounds.window_start = FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense,
63853
- bounds.valid_start, row_idx, boundary_start, expr_idx);
63876
+ bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, bounds.valid_start,
63877
+ row_idx, boundary_start, expr_idx);
63854
63878
  }
63855
63879
  break;
63856
63880
  }
@@ -63858,7 +63882,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
63858
63882
  if (boundary_start.CellIsNull(expr_idx)) {
63859
63883
  bounds.window_start = bounds.peer_start;
63860
63884
  } else {
63861
- bounds.window_start = FindOrderedRangeBound<true>(range_collection, 0, bounds.range_sense, row_idx,
63885
+ bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, row_idx,
63862
63886
  bounds.valid_end, boundary_start, expr_idx);
63863
63887
  }
63864
63888
  break;
@@ -63887,8 +63911,8 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
63887
63911
  if (boundary_end.CellIsNull(expr_idx)) {
63888
63912
  bounds.window_end = bounds.peer_end;
63889
63913
  } else {
63890
- bounds.window_end = FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense,
63891
- bounds.valid_start, row_idx, boundary_end, expr_idx);
63914
+ bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, bounds.valid_start,
63915
+ row_idx, boundary_end, expr_idx);
63892
63916
  }
63893
63917
  break;
63894
63918
  }
@@ -63896,7 +63920,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
63896
63920
  if (boundary_end.CellIsNull(expr_idx)) {
63897
63921
  bounds.window_end = bounds.peer_end;
63898
63922
  } else {
63899
- bounds.window_end = FindOrderedRangeBound<false>(range_collection, 0, bounds.range_sense, row_idx,
63923
+ bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, row_idx,
63900
63924
  bounds.valid_end, boundary_end, expr_idx);
63901
63925
  }
63902
63926
  break;
@@ -63950,7 +63974,7 @@ struct WindowExecutor {
63950
63974
  WindowInputExpression boundary_end;
63951
63975
 
63952
63976
  // evaluate RANGE expressions, if needed
63953
- WindowInputCollection range;
63977
+ WindowInputColumn range;
63954
63978
 
63955
63979
  // IGNORE NULLS
63956
63980
  ValidityMask ignore_nulls;
@@ -63969,7 +63993,7 @@ WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, ChunkCollection &in
63969
63993
  boundary_start(wexpr->start_expr.get(), input.GetAllocator()),
63970
63994
  boundary_end(wexpr->end_expr.get(), input.GetAllocator()),
63971
63995
  range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr->orders[0].expression.get() : nullptr,
63972
- input.GetAllocator())
63996
+ input.GetAllocator(), input.Count())
63973
63997
 
63974
63998
  {
63975
63999
  auto &allocator = input.GetAllocator();
@@ -64082,8 +64106,7 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
64082
64106
  // this is the main loop, go through all sorted rows and compute window function result
64083
64107
  for (idx_t output_offset = 0; output_offset < input_chunk.size(); ++output_offset, ++row_idx) {
64084
64108
  // special case, OVER (), aggregate over everything
64085
- bounds.Update(row_idx, range.collection, output_offset, boundary_start, boundary_end, partition_mask,
64086
- order_mask);
64109
+ bounds.Update(row_idx, range, output_offset, boundary_start, boundary_end, partition_mask, order_mask);
64087
64110
  if (WindowNeedsRank(wexpr)) {
64088
64111
  if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
64089
64112
  dense_rank = 1;
@@ -64328,7 +64351,7 @@ public:
64328
64351
  CreateMergeTasks(pipeline, *this, gstate, hash_group);
64329
64352
  }
64330
64353
 
64331
- static bool CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
64354
+ static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
64332
64355
  WindowGlobalHashGroup &hash_group) {
64333
64356
 
64334
64357
  // Multiple blocks remaining in the group: Schedule the next round
@@ -64336,28 +64359,6 @@ public:
64336
64359
  hash_group.global_sort->InitializeMergeRound();
64337
64360
  auto new_event = make_shared<WindowMergeEvent>(state, pipeline, hash_group);
64338
64361
  event.InsertEvent(move(new_event));
64339
- return true;
64340
- }
64341
-
64342
- // Find the next group to sort
64343
- for (;;) {
64344
- auto group = state.GetNextSortGroup();
64345
- if (group >= state.hash_groups.size()) {
64346
- // Out of groups
64347
- return false;
64348
- }
64349
-
64350
- auto &hash_group = *state.hash_groups[group];
64351
- auto &global_sort = *hash_group.global_sort;
64352
-
64353
- // Prepare for merge sort phase
64354
- hash_group.PrepareMergePhase();
64355
- if (global_sort.sorted_blocks.size() > 1) {
64356
- global_sort.InitializeMergeRound();
64357
- auto new_event = make_shared<WindowMergeEvent>(state, pipeline, hash_group);
64358
- event.InsertEvent(move(new_event));
64359
- return true;
64360
- }
64361
64362
  }
64362
64363
  }
64363
64364
  };
@@ -64381,11 +64382,14 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
64381
64382
  return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
64382
64383
  }
64383
64384
 
64384
- auto &hash_group = *state.hash_groups[group];
64385
+ // Schedule all the sorts for maximum thread utilisation
64386
+ for (; group < state.hash_groups.size(); group = state.GetNextSortGroup()) {
64387
+ auto &hash_group = *state.hash_groups[group];
64385
64388
 
64386
- // Prepare for merge sort phase
64387
- hash_group.PrepareMergePhase();
64388
- WindowMergeEvent::CreateMergeTasks(pipeline, event, state, hash_group);
64389
+ // Prepare for merge sort phase
64390
+ hash_group.PrepareMergePhase();
64391
+ WindowMergeEvent::CreateMergeTasks(pipeline, event, state, hash_group);
64392
+ }
64389
64393
 
64390
64394
  return SinkFinalizeType::READY;
64391
64395
  }
@@ -64475,7 +64479,13 @@ void WindowLocalSourceState::MaterializeInput(const vector<LogicalType> &payload
64475
64479
 
64476
64480
  // scan the sorted row data
64477
64481
  D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
64478
- PayloadScanner scanner(*global_sort_state.sorted_blocks[0]->payload_data, global_sort_state);
64482
+ auto &sb = *global_sort_state.sorted_blocks[0];
64483
+
64484
+ // Free up some memory before allocating more
64485
+ sb.radix_sorting_data.clear();
64486
+ sb.blob_sorting_data = nullptr;
64487
+
64488
+ PayloadScanner scanner(*sb.payload_data, global_sort_state);
64479
64489
  DataChunk payload_chunk;
64480
64490
  payload_chunk.Initialize(allocator, payload_types);
64481
64491
  for (;;) {
@@ -85233,15 +85243,18 @@ struct FirstFunctionBase {
85233
85243
  }
85234
85244
  };
85235
85245
 
85236
- template <bool LAST>
85246
+ template <bool LAST, bool SKIP_NULLS>
85237
85247
  struct FirstFunction : public FirstFunctionBase {
85238
85248
  template <class INPUT_TYPE, class STATE, class OP>
85239
85249
  static void Operation(STATE *state, AggregateInputData &, INPUT_TYPE *input, ValidityMask &mask, idx_t idx) {
85240
85250
  if (LAST || !state->is_set) {
85241
- state->is_set = true;
85242
85251
  if (!mask.RowIsValid(idx)) {
85252
+ if (!SKIP_NULLS) {
85253
+ state->is_set = true;
85254
+ }
85243
85255
  state->is_null = true;
85244
85256
  } else {
85257
+ state->is_set = true;
85245
85258
  state->is_null = false;
85246
85259
  state->value = input[idx];
85247
85260
  }
@@ -85271,14 +85284,17 @@ struct FirstFunction : public FirstFunctionBase {
85271
85284
  }
85272
85285
  };
85273
85286
 
85274
- template <bool LAST>
85287
+ template <bool LAST, bool SKIP_NULLS>
85275
85288
  struct FirstFunctionString : public FirstFunctionBase {
85276
85289
  template <class STATE>
85277
85290
  static void SetValue(STATE *state, string_t value, bool is_null) {
85278
- state->is_set = true;
85279
85291
  if (is_null) {
85280
- state->is_null = true;
85292
+ if (!SKIP_NULLS) {
85293
+ state->is_set = true;
85294
+ state->is_null = true;
85295
+ }
85281
85296
  } else {
85297
+ state->is_set = true;
85282
85298
  if (value.IsInlined()) {
85283
85299
  state->value = value;
85284
85300
  } else {
@@ -85333,7 +85349,7 @@ struct FirstStateVector {
85333
85349
  Vector *value;
85334
85350
  };
85335
85351
 
85336
- template <bool LAST>
85352
+ template <bool LAST, bool SKIP_NULLS>
85337
85353
  struct FirstVectorFunction {
85338
85354
  template <class STATE>
85339
85355
  static void Initialize(STATE *state) {
@@ -85347,7 +85363,7 @@ struct FirstVectorFunction {
85347
85363
  }
85348
85364
  }
85349
85365
  static bool IgnoreNull() {
85350
- return false;
85366
+ return SKIP_NULLS;
85351
85367
  }
85352
85368
 
85353
85369
  template <class STATE>
@@ -85363,11 +85379,18 @@ struct FirstVectorFunction {
85363
85379
 
85364
85380
  static void Update(Vector inputs[], AggregateInputData &, idx_t input_count, Vector &state_vector, idx_t count) {
85365
85381
  auto &input = inputs[0];
85382
+ UnifiedVectorFormat idata;
85383
+ input.ToUnifiedFormat(count, idata);
85384
+
85366
85385
  UnifiedVectorFormat sdata;
85367
85386
  state_vector.ToUnifiedFormat(count, sdata);
85368
85387
 
85369
85388
  auto states = (FirstStateVector **)sdata.data;
85370
85389
  for (idx_t i = 0; i < count; i++) {
85390
+ const auto idx = idata.sel->get_index(i);
85391
+ if (SKIP_NULLS && !idata.validity.RowIsValid(idx)) {
85392
+ continue;
85393
+ }
85371
85394
  auto state = states[sdata.sel->get_index(i)];
85372
85395
  if (LAST || !state->value) {
85373
85396
  SetValue(state, input, i);
@@ -85406,79 +85429,79 @@ struct FirstVectorFunction {
85406
85429
  }
85407
85430
  };
85408
85431
 
85409
- template <class T, bool LAST>
85432
+ template <class T, bool LAST, bool SKIP_NULLS>
85410
85433
  static AggregateFunction GetFirstAggregateTemplated(LogicalType type) {
85411
- auto agg = AggregateFunction::UnaryAggregate<FirstState<T>, T, T, FirstFunction<LAST>>(type, type);
85434
+ auto agg = AggregateFunction::UnaryAggregate<FirstState<T>, T, T, FirstFunction<LAST, SKIP_NULLS>>(type, type);
85412
85435
  return agg;
85413
85436
  }
85414
85437
 
85415
- template <bool LAST>
85438
+ template <bool LAST, bool SKIP_NULLS>
85416
85439
  static AggregateFunction GetFirstFunction(const LogicalType &type);
85417
85440
 
85418
- template <bool LAST>
85441
+ template <bool LAST, bool SKIP_NULLS>
85419
85442
  AggregateFunction GetDecimalFirstFunction(const LogicalType &type) {
85420
85443
  D_ASSERT(type.id() == LogicalTypeId::DECIMAL);
85421
85444
  switch (type.InternalType()) {
85422
85445
  case PhysicalType::INT16:
85423
- return GetFirstFunction<LAST>(LogicalType::SMALLINT);
85446
+ return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::SMALLINT);
85424
85447
  case PhysicalType::INT32:
85425
- return GetFirstFunction<LAST>(LogicalType::INTEGER);
85448
+ return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::INTEGER);
85426
85449
  case PhysicalType::INT64:
85427
- return GetFirstFunction<LAST>(LogicalType::BIGINT);
85450
+ return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::BIGINT);
85428
85451
  default:
85429
- return GetFirstFunction<LAST>(LogicalType::HUGEINT);
85452
+ return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::HUGEINT);
85430
85453
  }
85431
85454
  }
85432
85455
 
85433
- template <bool LAST>
85456
+ template <bool LAST, bool SKIP_NULLS>
85434
85457
  static AggregateFunction GetFirstFunction(const LogicalType &type) {
85435
85458
  switch (type.id()) {
85436
85459
  case LogicalTypeId::BOOLEAN:
85437
- return GetFirstAggregateTemplated<int8_t, LAST>(type);
85460
+ return GetFirstAggregateTemplated<int8_t, LAST, SKIP_NULLS>(type);
85438
85461
  case LogicalTypeId::TINYINT:
85439
- return GetFirstAggregateTemplated<int8_t, LAST>(type);
85462
+ return GetFirstAggregateTemplated<int8_t, LAST, SKIP_NULLS>(type);
85440
85463
  case LogicalTypeId::SMALLINT:
85441
- return GetFirstAggregateTemplated<int16_t, LAST>(type);
85464
+ return GetFirstAggregateTemplated<int16_t, LAST, SKIP_NULLS>(type);
85442
85465
  case LogicalTypeId::INTEGER:
85443
85466
  case LogicalTypeId::DATE:
85444
- return GetFirstAggregateTemplated<int32_t, LAST>(type);
85467
+ return GetFirstAggregateTemplated<int32_t, LAST, SKIP_NULLS>(type);
85445
85468
  case LogicalTypeId::BIGINT:
85446
85469
  case LogicalTypeId::TIME:
85447
85470
  case LogicalTypeId::TIMESTAMP:
85448
85471
  case LogicalTypeId::TIME_TZ:
85449
85472
  case LogicalTypeId::TIMESTAMP_TZ:
85450
- return GetFirstAggregateTemplated<int64_t, LAST>(type);
85473
+ return GetFirstAggregateTemplated<int64_t, LAST, SKIP_NULLS>(type);
85451
85474
  case LogicalTypeId::UTINYINT:
85452
- return GetFirstAggregateTemplated<uint8_t, LAST>(type);
85475
+ return GetFirstAggregateTemplated<uint8_t, LAST, SKIP_NULLS>(type);
85453
85476
  case LogicalTypeId::USMALLINT:
85454
- return GetFirstAggregateTemplated<uint16_t, LAST>(type);
85477
+ return GetFirstAggregateTemplated<uint16_t, LAST, SKIP_NULLS>(type);
85455
85478
  case LogicalTypeId::UINTEGER:
85456
- return GetFirstAggregateTemplated<uint32_t, LAST>(type);
85479
+ return GetFirstAggregateTemplated<uint32_t, LAST, SKIP_NULLS>(type);
85457
85480
  case LogicalTypeId::UBIGINT:
85458
- return GetFirstAggregateTemplated<uint64_t, LAST>(type);
85481
+ return GetFirstAggregateTemplated<uint64_t, LAST, SKIP_NULLS>(type);
85459
85482
  case LogicalTypeId::HUGEINT:
85460
- return GetFirstAggregateTemplated<hugeint_t, LAST>(type);
85483
+ return GetFirstAggregateTemplated<hugeint_t, LAST, SKIP_NULLS>(type);
85461
85484
  case LogicalTypeId::FLOAT:
85462
- return GetFirstAggregateTemplated<float, LAST>(type);
85485
+ return GetFirstAggregateTemplated<float, LAST, SKIP_NULLS>(type);
85463
85486
  case LogicalTypeId::DOUBLE:
85464
- return GetFirstAggregateTemplated<double, LAST>(type);
85487
+ return GetFirstAggregateTemplated<double, LAST, SKIP_NULLS>(type);
85465
85488
  case LogicalTypeId::INTERVAL:
85466
- return GetFirstAggregateTemplated<interval_t, LAST>(type);
85489
+ return GetFirstAggregateTemplated<interval_t, LAST, SKIP_NULLS>(type);
85467
85490
  case LogicalTypeId::VARCHAR:
85468
85491
  case LogicalTypeId::BLOB: {
85469
85492
  auto agg = AggregateFunction::UnaryAggregateDestructor<FirstState<string_t>, string_t, string_t,
85470
- FirstFunctionString<LAST>>(type, type);
85493
+ FirstFunctionString<LAST, SKIP_NULLS>>(type, type);
85471
85494
  return agg;
85472
85495
  }
85473
85496
  case LogicalTypeId::DECIMAL: {
85474
85497
  type.Verify();
85475
- AggregateFunction function = GetDecimalFirstFunction<LAST>(type);
85498
+ AggregateFunction function = GetDecimalFirstFunction<LAST, SKIP_NULLS>(type);
85476
85499
  function.arguments[0] = type;
85477
85500
  function.return_type = type;
85478
85501
  return function;
85479
85502
  }
85480
85503
  default: {
85481
- using OP = FirstVectorFunction<LAST>;
85504
+ using OP = FirstVectorFunction<LAST, SKIP_NULLS>;
85482
85505
  return AggregateFunction({type}, type, AggregateFunction::StateSize<FirstStateVector>,
85483
85506
  AggregateFunction::StateInitialize<FirstStateVector, OP>, OP::Update,
85484
85507
  AggregateFunction::StateCombine<FirstStateVector, OP>,
@@ -85489,16 +85512,16 @@ static AggregateFunction GetFirstFunction(const LogicalType &type) {
85489
85512
  }
85490
85513
 
85491
85514
  AggregateFunction FirstFun::GetFunction(const LogicalType &type) {
85492
- auto fun = GetFirstFunction<false>(type);
85515
+ auto fun = GetFirstFunction<false, false>(type);
85493
85516
  fun.name = "first";
85494
85517
  return fun;
85495
85518
  }
85496
85519
 
85497
- template <bool LAST>
85520
+ template <bool LAST, bool SKIP_NULLS>
85498
85521
  unique_ptr<FunctionData> BindDecimalFirst(ClientContext &context, AggregateFunction &function,
85499
85522
  vector<unique_ptr<Expression>> &arguments) {
85500
85523
  auto decimal_type = arguments[0]->return_type;
85501
- function = GetFirstFunction<LAST>(decimal_type);
85524
+ function = GetFirstFunction<LAST, SKIP_NULLS>(decimal_type);
85502
85525
  function.name = "first";
85503
85526
  function.return_type = decimal_type;
85504
85527
  return nullptr;
@@ -85507,15 +85530,19 @@ unique_ptr<FunctionData> BindDecimalFirst(ClientContext &context, AggregateFunct
85507
85530
  void FirstFun::RegisterFunction(BuiltinFunctions &set) {
85508
85531
  AggregateFunctionSet first("first");
85509
85532
  AggregateFunctionSet last("last");
85533
+ AggregateFunctionSet any_value("any_value");
85510
85534
  for (auto &type : LogicalType::AllTypes()) {
85511
85535
  if (type.id() == LogicalTypeId::DECIMAL) {
85512
85536
  first.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
85513
- BindDecimalFirst<false>, nullptr, nullptr, nullptr));
85537
+ BindDecimalFirst<false, false>, nullptr, nullptr, nullptr));
85514
85538
  last.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
85515
- BindDecimalFirst<true>, nullptr, nullptr, nullptr));
85539
+ BindDecimalFirst<true, false>, nullptr, nullptr, nullptr));
85540
+ any_value.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
85541
+ BindDecimalFirst<false, true>, nullptr, nullptr, nullptr));
85516
85542
  } else {
85517
- first.AddFunction(GetFirstFunction<false>(type));
85518
- last.AddFunction(GetFirstFunction<true>(type));
85543
+ first.AddFunction(GetFirstFunction<false, false>(type));
85544
+ last.AddFunction(GetFirstFunction<true, false>(type));
85545
+ any_value.AddFunction(GetFirstFunction<false, true>(type));
85519
85546
  }
85520
85547
  }
85521
85548
  set.AddFunction(first);
@@ -85523,6 +85550,8 @@ void FirstFun::RegisterFunction(BuiltinFunctions &set) {
85523
85550
  set.AddFunction(first);
85524
85551
 
85525
85552
  set.AddFunction(last);
85553
+
85554
+ set.AddFunction(any_value);
85526
85555
  }
85527
85556
 
85528
85557
  } // namespace duckdb
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "50951241d"
15
- #define DUCKDB_VERSION "v0.4.1-dev1514"
14
+ #define DUCKDB_SOURCE_ID "17ec2ab20"
15
+ #define DUCKDB_VERSION "v0.4.1-dev1527"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //