duckdb 0.4.1-dev1514.0 → 0.4.1-dev1527.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +151 -122
- package/src/duckdb.hpp +2 -2
- package/src/parquet-amalgamation.cpp +37379 -37379
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -5657,6 +5657,7 @@ static DefaultMacro internal_macros[] = {
|
|
|
5657
5657
|
{DEFAULT_SCHEMA, "list_entropy", {"l", nullptr}, "list_aggr(l, 'entropy')"},
|
|
5658
5658
|
{DEFAULT_SCHEMA, "list_last", {"l", nullptr}, "list_aggr(l, 'last')"},
|
|
5659
5659
|
{DEFAULT_SCHEMA, "list_first", {"l", nullptr}, "list_aggr(l, 'first')"},
|
|
5660
|
+
{DEFAULT_SCHEMA, "list_any_value", {"l", nullptr}, "list_aggr(l, 'any_value')"},
|
|
5660
5661
|
{DEFAULT_SCHEMA, "list_kurtosis", {"l", nullptr}, "list_aggr(l, 'kurtosis')"},
|
|
5661
5662
|
{DEFAULT_SCHEMA, "list_min", {"l", nullptr}, "list_aggr(l, 'min')"},
|
|
5662
5663
|
{DEFAULT_SCHEMA, "list_max", {"l", nullptr}, "list_aggr(l, 'max')"},
|
|
@@ -63484,9 +63485,11 @@ static void PrepareInputExpression(Expression *expr, ExpressionExecutor &executo
|
|
|
63484
63485
|
}
|
|
63485
63486
|
|
|
63486
63487
|
struct WindowInputExpression {
|
|
63487
|
-
WindowInputExpression(Expression *expr_p, Allocator &allocator)
|
|
63488
|
+
WindowInputExpression(Expression *expr_p, Allocator &allocator)
|
|
63489
|
+
: expr(expr_p), ptype(PhysicalType::INVALID), scalar(true), executor(allocator) {
|
|
63488
63490
|
if (expr) {
|
|
63489
63491
|
PrepareInputExpression(expr, executor, chunk);
|
|
63492
|
+
ptype = expr->return_type.InternalType();
|
|
63490
63493
|
scalar = expr->IsScalar();
|
|
63491
63494
|
}
|
|
63492
63495
|
}
|
|
@@ -63519,25 +63522,51 @@ struct WindowInputExpression {
|
|
|
63519
63522
|
}
|
|
63520
63523
|
|
|
63521
63524
|
Expression *expr;
|
|
63525
|
+
PhysicalType ptype;
|
|
63522
63526
|
bool scalar;
|
|
63523
63527
|
ExpressionExecutor executor;
|
|
63524
63528
|
DataChunk chunk;
|
|
63525
63529
|
};
|
|
63526
63530
|
|
|
63527
|
-
struct
|
|
63528
|
-
|
|
63529
|
-
: input_expr(expr_p, allocator),
|
|
63531
|
+
struct WindowInputColumn {
|
|
63532
|
+
WindowInputColumn(Expression *expr_p, Allocator &allocator, idx_t capacity_p)
|
|
63533
|
+
: input_expr(expr_p, allocator), count(0), capacity(capacity_p) {
|
|
63534
|
+
if (input_expr.expr) {
|
|
63535
|
+
target = make_unique<Vector>(input_expr.chunk.data[0].GetType(), capacity);
|
|
63536
|
+
}
|
|
63530
63537
|
}
|
|
63531
63538
|
|
|
63532
63539
|
void Append(DataChunk &input_chunk) {
|
|
63533
|
-
if (input_expr.expr && (!input_expr.scalar ||
|
|
63540
|
+
if (input_expr.expr && (!input_expr.scalar || !count)) {
|
|
63534
63541
|
input_expr.Execute(input_chunk);
|
|
63535
|
-
|
|
63542
|
+
auto &source = input_expr.chunk.data[0];
|
|
63543
|
+
const auto source_count = input_expr.chunk.size();
|
|
63544
|
+
D_ASSERT(count + source_count <= capacity);
|
|
63545
|
+
VectorOperations::Copy(source, *target, source_count, 0, count);
|
|
63546
|
+
count += source_count;
|
|
63536
63547
|
}
|
|
63537
63548
|
}
|
|
63538
63549
|
|
|
63550
|
+
inline bool CellIsNull(idx_t i) {
|
|
63551
|
+
D_ASSERT(target);
|
|
63552
|
+
D_ASSERT(i < count);
|
|
63553
|
+
return FlatVector::IsNull(*target, input_expr.scalar ? 0 : i);
|
|
63554
|
+
}
|
|
63555
|
+
|
|
63556
|
+
template <typename T>
|
|
63557
|
+
inline T GetCell(idx_t i) {
|
|
63558
|
+
D_ASSERT(target);
|
|
63559
|
+
D_ASSERT(i < count);
|
|
63560
|
+
const auto data = FlatVector::GetData<T>(*target);
|
|
63561
|
+
return data[input_expr.scalar ? 0 : i];
|
|
63562
|
+
}
|
|
63563
|
+
|
|
63539
63564
|
WindowInputExpression input_expr;
|
|
63540
|
-
|
|
63565
|
+
|
|
63566
|
+
private:
|
|
63567
|
+
unique_ptr<Vector> target;
|
|
63568
|
+
idx_t count;
|
|
63569
|
+
idx_t capacity;
|
|
63541
63570
|
};
|
|
63542
63571
|
|
|
63543
63572
|
static void ScanRowCollection(RowDataCollection &rows, RowDataCollection &heap, ChunkCollection &cols,
|
|
@@ -63591,7 +63620,7 @@ struct WindowBoundariesState {
|
|
|
63591
63620
|
needs_peer(BoundaryNeedsPeer(wexpr->end) || wexpr->type == ExpressionType::WINDOW_CUME_DIST) {
|
|
63592
63621
|
}
|
|
63593
63622
|
|
|
63594
|
-
void Update(const idx_t row_idx,
|
|
63623
|
+
void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t source_offset,
|
|
63595
63624
|
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
|
63596
63625
|
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
|
63597
63626
|
|
|
@@ -63643,20 +63672,19 @@ static bool CellIsNull(ChunkCollection &collection, idx_t column, idx_t index) {
|
|
|
63643
63672
|
}
|
|
63644
63673
|
|
|
63645
63674
|
template <typename T>
|
|
63646
|
-
struct
|
|
63647
|
-
using iterator =
|
|
63675
|
+
struct WindowColumnIterator {
|
|
63676
|
+
using iterator = WindowColumnIterator<T>;
|
|
63648
63677
|
using iterator_category = std::forward_iterator_tag;
|
|
63649
63678
|
using difference_type = std::ptrdiff_t;
|
|
63650
63679
|
using value_type = T;
|
|
63651
63680
|
using reference = T;
|
|
63652
63681
|
using pointer = idx_t;
|
|
63653
63682
|
|
|
63654
|
-
|
|
63655
|
-
: coll(&coll_p), col_no(col_no_p), pos(pos_p) {
|
|
63683
|
+
explicit WindowColumnIterator(WindowInputColumn &coll_p, pointer pos_p = 0) : coll(&coll_p), pos(pos_p) {
|
|
63656
63684
|
}
|
|
63657
63685
|
|
|
63658
63686
|
inline reference operator*() const {
|
|
63659
|
-
return GetCell<T>(
|
|
63687
|
+
return coll->GetCell<T>(pos);
|
|
63660
63688
|
}
|
|
63661
63689
|
inline explicit operator pointer() const {
|
|
63662
63690
|
return pos;
|
|
@@ -63680,8 +63708,7 @@ struct ChunkCollectionIterator {
|
|
|
63680
63708
|
}
|
|
63681
63709
|
|
|
63682
63710
|
private:
|
|
63683
|
-
|
|
63684
|
-
idx_t col_no;
|
|
63711
|
+
WindowInputColumn *coll;
|
|
63685
63712
|
pointer pos;
|
|
63686
63713
|
};
|
|
63687
63714
|
|
|
@@ -63693,14 +63720,14 @@ struct OperationCompare : public std::function<bool(T, T)> {
|
|
|
63693
63720
|
};
|
|
63694
63721
|
|
|
63695
63722
|
template <typename T, typename OP, bool FROM>
|
|
63696
|
-
static idx_t FindTypedRangeBound(
|
|
63697
|
-
|
|
63723
|
+
static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
|
|
63724
|
+
WindowInputExpression &boundary, const idx_t boundary_row) {
|
|
63698
63725
|
D_ASSERT(!boundary.CellIsNull(boundary_row));
|
|
63699
63726
|
const auto val = boundary.GetCell<T>(boundary_row);
|
|
63700
63727
|
|
|
63701
63728
|
OperationCompare<T, OP> comp;
|
|
63702
|
-
|
|
63703
|
-
|
|
63729
|
+
WindowColumnIterator<T> begin(over, order_begin);
|
|
63730
|
+
WindowColumnIterator<T> end(over, order_end);
|
|
63704
63731
|
if (FROM) {
|
|
63705
63732
|
return idx_t(std::lower_bound(begin, end, val, comp));
|
|
63706
63733
|
} else {
|
|
@@ -63709,58 +63736,55 @@ static idx_t FindTypedRangeBound(ChunkCollection &over, const idx_t order_col, c
|
|
|
63709
63736
|
}
|
|
63710
63737
|
|
|
63711
63738
|
template <typename OP, bool FROM>
|
|
63712
|
-
static idx_t FindRangeBound(
|
|
63713
|
-
|
|
63714
|
-
const auto &over_types = over.Types();
|
|
63715
|
-
D_ASSERT(over_types.size() > order_col);
|
|
63739
|
+
static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
|
|
63740
|
+
WindowInputExpression &boundary, const idx_t expr_idx) {
|
|
63716
63741
|
D_ASSERT(boundary.chunk.ColumnCount() == 1);
|
|
63717
|
-
D_ASSERT(boundary.chunk.data[0].GetType() ==
|
|
63742
|
+
D_ASSERT(boundary.chunk.data[0].GetType().InternalType() == over.input_expr.ptype);
|
|
63718
63743
|
|
|
63719
|
-
switch (
|
|
63744
|
+
switch (over.input_expr.ptype) {
|
|
63720
63745
|
case PhysicalType::INT8:
|
|
63721
|
-
return FindTypedRangeBound<int8_t, OP, FROM>(over,
|
|
63746
|
+
return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63722
63747
|
case PhysicalType::INT16:
|
|
63723
|
-
return FindTypedRangeBound<int16_t, OP, FROM>(over,
|
|
63748
|
+
return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63724
63749
|
case PhysicalType::INT32:
|
|
63725
|
-
return FindTypedRangeBound<int32_t, OP, FROM>(over,
|
|
63750
|
+
return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63726
63751
|
case PhysicalType::INT64:
|
|
63727
|
-
return FindTypedRangeBound<int64_t, OP, FROM>(over,
|
|
63752
|
+
return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63728
63753
|
case PhysicalType::UINT8:
|
|
63729
|
-
return FindTypedRangeBound<uint8_t, OP, FROM>(over,
|
|
63754
|
+
return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63730
63755
|
case PhysicalType::UINT16:
|
|
63731
|
-
return FindTypedRangeBound<uint16_t, OP, FROM>(over,
|
|
63756
|
+
return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63732
63757
|
case PhysicalType::UINT32:
|
|
63733
|
-
return FindTypedRangeBound<uint32_t, OP, FROM>(over,
|
|
63758
|
+
return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63734
63759
|
case PhysicalType::UINT64:
|
|
63735
|
-
return FindTypedRangeBound<uint64_t, OP, FROM>(over,
|
|
63760
|
+
return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63736
63761
|
case PhysicalType::INT128:
|
|
63737
|
-
return FindTypedRangeBound<hugeint_t, OP, FROM>(over,
|
|
63762
|
+
return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63738
63763
|
case PhysicalType::FLOAT:
|
|
63739
|
-
return FindTypedRangeBound<float, OP, FROM>(over,
|
|
63764
|
+
return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63740
63765
|
case PhysicalType::DOUBLE:
|
|
63741
|
-
return FindTypedRangeBound<double, OP, FROM>(over,
|
|
63766
|
+
return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63742
63767
|
case PhysicalType::INTERVAL:
|
|
63743
|
-
return FindTypedRangeBound<interval_t, OP, FROM>(over,
|
|
63768
|
+
return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63744
63769
|
default:
|
|
63745
63770
|
throw InternalException("Unsupported column type for RANGE");
|
|
63746
63771
|
}
|
|
63747
63772
|
}
|
|
63748
63773
|
|
|
63749
63774
|
template <bool FROM>
|
|
63750
|
-
static idx_t FindOrderedRangeBound(
|
|
63751
|
-
const idx_t
|
|
63752
|
-
const idx_t expr_idx) {
|
|
63775
|
+
static idx_t FindOrderedRangeBound(WindowInputColumn &over, const OrderType range_sense, const idx_t order_begin,
|
|
63776
|
+
const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
|
|
63753
63777
|
switch (range_sense) {
|
|
63754
63778
|
case OrderType::ASCENDING:
|
|
63755
|
-
return FindRangeBound<LessThan, FROM>(over,
|
|
63779
|
+
return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63756
63780
|
case OrderType::DESCENDING:
|
|
63757
|
-
return FindRangeBound<GreaterThan, FROM>(over,
|
|
63781
|
+
return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
|
|
63758
63782
|
default:
|
|
63759
63783
|
throw InternalException("Unsupported ORDER BY sense for RANGE");
|
|
63760
63784
|
}
|
|
63761
63785
|
}
|
|
63762
63786
|
|
|
63763
|
-
void WindowBoundariesState::Update(const idx_t row_idx,
|
|
63787
|
+
void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t expr_idx,
|
|
63764
63788
|
WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
|
|
63765
63789
|
const ValidityMask &partition_mask, const ValidityMask &order_mask) {
|
|
63766
63790
|
|
|
@@ -63790,7 +63814,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
|
|
|
63790
63814
|
|
|
63791
63815
|
if ((bounds.valid_start < bounds.valid_end) && bounds.has_preceding_range) {
|
|
63792
63816
|
// Exclude any leading NULLs
|
|
63793
|
-
if (CellIsNull(
|
|
63817
|
+
if (range_collection.CellIsNull(bounds.valid_start)) {
|
|
63794
63818
|
idx_t n = 1;
|
|
63795
63819
|
bounds.valid_start = FindNextStart(order_mask, bounds.valid_start + 1, bounds.valid_end, n);
|
|
63796
63820
|
}
|
|
@@ -63798,7 +63822,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
|
|
|
63798
63822
|
|
|
63799
63823
|
if ((bounds.valid_start < bounds.valid_end) && bounds.has_following_range) {
|
|
63800
63824
|
// Exclude any trailing NULLs
|
|
63801
|
-
if (CellIsNull(
|
|
63825
|
+
if (range_collection.CellIsNull(bounds.valid_end - 1)) {
|
|
63802
63826
|
idx_t n = 1;
|
|
63803
63827
|
bounds.valid_end = FindPrevStart(order_mask, bounds.valid_start, bounds.valid_end, n);
|
|
63804
63828
|
}
|
|
@@ -63849,8 +63873,8 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
|
|
|
63849
63873
|
if (boundary_start.CellIsNull(expr_idx)) {
|
|
63850
63874
|
bounds.window_start = bounds.peer_start;
|
|
63851
63875
|
} else {
|
|
63852
|
-
bounds.window_start = FindOrderedRangeBound<true>(range_collection,
|
|
63853
|
-
|
|
63876
|
+
bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, bounds.valid_start,
|
|
63877
|
+
row_idx, boundary_start, expr_idx);
|
|
63854
63878
|
}
|
|
63855
63879
|
break;
|
|
63856
63880
|
}
|
|
@@ -63858,7 +63882,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
|
|
|
63858
63882
|
if (boundary_start.CellIsNull(expr_idx)) {
|
|
63859
63883
|
bounds.window_start = bounds.peer_start;
|
|
63860
63884
|
} else {
|
|
63861
|
-
bounds.window_start = FindOrderedRangeBound<true>(range_collection,
|
|
63885
|
+
bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, row_idx,
|
|
63862
63886
|
bounds.valid_end, boundary_start, expr_idx);
|
|
63863
63887
|
}
|
|
63864
63888
|
break;
|
|
@@ -63887,8 +63911,8 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
|
|
|
63887
63911
|
if (boundary_end.CellIsNull(expr_idx)) {
|
|
63888
63912
|
bounds.window_end = bounds.peer_end;
|
|
63889
63913
|
} else {
|
|
63890
|
-
bounds.window_end = FindOrderedRangeBound<false>(range_collection,
|
|
63891
|
-
|
|
63914
|
+
bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, bounds.valid_start,
|
|
63915
|
+
row_idx, boundary_end, expr_idx);
|
|
63892
63916
|
}
|
|
63893
63917
|
break;
|
|
63894
63918
|
}
|
|
@@ -63896,7 +63920,7 @@ void WindowBoundariesState::Update(const idx_t row_idx, ChunkCollection &range_c
|
|
|
63896
63920
|
if (boundary_end.CellIsNull(expr_idx)) {
|
|
63897
63921
|
bounds.window_end = bounds.peer_end;
|
|
63898
63922
|
} else {
|
|
63899
|
-
bounds.window_end = FindOrderedRangeBound<false>(range_collection,
|
|
63923
|
+
bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, row_idx,
|
|
63900
63924
|
bounds.valid_end, boundary_end, expr_idx);
|
|
63901
63925
|
}
|
|
63902
63926
|
break;
|
|
@@ -63950,7 +63974,7 @@ struct WindowExecutor {
|
|
|
63950
63974
|
WindowInputExpression boundary_end;
|
|
63951
63975
|
|
|
63952
63976
|
// evaluate RANGE expressions, if needed
|
|
63953
|
-
|
|
63977
|
+
WindowInputColumn range;
|
|
63954
63978
|
|
|
63955
63979
|
// IGNORE NULLS
|
|
63956
63980
|
ValidityMask ignore_nulls;
|
|
@@ -63969,7 +63993,7 @@ WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, ChunkCollection &in
|
|
|
63969
63993
|
boundary_start(wexpr->start_expr.get(), input.GetAllocator()),
|
|
63970
63994
|
boundary_end(wexpr->end_expr.get(), input.GetAllocator()),
|
|
63971
63995
|
range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr->orders[0].expression.get() : nullptr,
|
|
63972
|
-
input.GetAllocator())
|
|
63996
|
+
input.GetAllocator(), input.Count())
|
|
63973
63997
|
|
|
63974
63998
|
{
|
|
63975
63999
|
auto &allocator = input.GetAllocator();
|
|
@@ -64082,8 +64106,7 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
|
|
|
64082
64106
|
// this is the main loop, go through all sorted rows and compute window function result
|
|
64083
64107
|
for (idx_t output_offset = 0; output_offset < input_chunk.size(); ++output_offset, ++row_idx) {
|
|
64084
64108
|
// special case, OVER (), aggregate over everything
|
|
64085
|
-
bounds.Update(row_idx, range
|
|
64086
|
-
order_mask);
|
|
64109
|
+
bounds.Update(row_idx, range, output_offset, boundary_start, boundary_end, partition_mask, order_mask);
|
|
64087
64110
|
if (WindowNeedsRank(wexpr)) {
|
|
64088
64111
|
if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
|
|
64089
64112
|
dense_rank = 1;
|
|
@@ -64328,7 +64351,7 @@ public:
|
|
|
64328
64351
|
CreateMergeTasks(pipeline, *this, gstate, hash_group);
|
|
64329
64352
|
}
|
|
64330
64353
|
|
|
64331
|
-
static
|
|
64354
|
+
static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
|
|
64332
64355
|
WindowGlobalHashGroup &hash_group) {
|
|
64333
64356
|
|
|
64334
64357
|
// Multiple blocks remaining in the group: Schedule the next round
|
|
@@ -64336,28 +64359,6 @@ public:
|
|
|
64336
64359
|
hash_group.global_sort->InitializeMergeRound();
|
|
64337
64360
|
auto new_event = make_shared<WindowMergeEvent>(state, pipeline, hash_group);
|
|
64338
64361
|
event.InsertEvent(move(new_event));
|
|
64339
|
-
return true;
|
|
64340
|
-
}
|
|
64341
|
-
|
|
64342
|
-
// Find the next group to sort
|
|
64343
|
-
for (;;) {
|
|
64344
|
-
auto group = state.GetNextSortGroup();
|
|
64345
|
-
if (group >= state.hash_groups.size()) {
|
|
64346
|
-
// Out of groups
|
|
64347
|
-
return false;
|
|
64348
|
-
}
|
|
64349
|
-
|
|
64350
|
-
auto &hash_group = *state.hash_groups[group];
|
|
64351
|
-
auto &global_sort = *hash_group.global_sort;
|
|
64352
|
-
|
|
64353
|
-
// Prepare for merge sort phase
|
|
64354
|
-
hash_group.PrepareMergePhase();
|
|
64355
|
-
if (global_sort.sorted_blocks.size() > 1) {
|
|
64356
|
-
global_sort.InitializeMergeRound();
|
|
64357
|
-
auto new_event = make_shared<WindowMergeEvent>(state, pipeline, hash_group);
|
|
64358
|
-
event.InsertEvent(move(new_event));
|
|
64359
|
-
return true;
|
|
64360
|
-
}
|
|
64361
64362
|
}
|
|
64362
64363
|
}
|
|
64363
64364
|
};
|
|
@@ -64381,11 +64382,14 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
|
|
|
64381
64382
|
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
|
64382
64383
|
}
|
|
64383
64384
|
|
|
64384
|
-
|
|
64385
|
+
// Schedule all the sorts for maximum thread utilisation
|
|
64386
|
+
for (; group < state.hash_groups.size(); group = state.GetNextSortGroup()) {
|
|
64387
|
+
auto &hash_group = *state.hash_groups[group];
|
|
64385
64388
|
|
|
64386
|
-
|
|
64387
|
-
|
|
64388
|
-
|
|
64389
|
+
// Prepare for merge sort phase
|
|
64390
|
+
hash_group.PrepareMergePhase();
|
|
64391
|
+
WindowMergeEvent::CreateMergeTasks(pipeline, event, state, hash_group);
|
|
64392
|
+
}
|
|
64389
64393
|
|
|
64390
64394
|
return SinkFinalizeType::READY;
|
|
64391
64395
|
}
|
|
@@ -64475,7 +64479,13 @@ void WindowLocalSourceState::MaterializeInput(const vector<LogicalType> &payload
|
|
|
64475
64479
|
|
|
64476
64480
|
// scan the sorted row data
|
|
64477
64481
|
D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
|
|
64478
|
-
|
|
64482
|
+
auto &sb = *global_sort_state.sorted_blocks[0];
|
|
64483
|
+
|
|
64484
|
+
// Free up some memory before allocating more
|
|
64485
|
+
sb.radix_sorting_data.clear();
|
|
64486
|
+
sb.blob_sorting_data = nullptr;
|
|
64487
|
+
|
|
64488
|
+
PayloadScanner scanner(*sb.payload_data, global_sort_state);
|
|
64479
64489
|
DataChunk payload_chunk;
|
|
64480
64490
|
payload_chunk.Initialize(allocator, payload_types);
|
|
64481
64491
|
for (;;) {
|
|
@@ -85233,15 +85243,18 @@ struct FirstFunctionBase {
|
|
|
85233
85243
|
}
|
|
85234
85244
|
};
|
|
85235
85245
|
|
|
85236
|
-
template <bool LAST>
|
|
85246
|
+
template <bool LAST, bool SKIP_NULLS>
|
|
85237
85247
|
struct FirstFunction : public FirstFunctionBase {
|
|
85238
85248
|
template <class INPUT_TYPE, class STATE, class OP>
|
|
85239
85249
|
static void Operation(STATE *state, AggregateInputData &, INPUT_TYPE *input, ValidityMask &mask, idx_t idx) {
|
|
85240
85250
|
if (LAST || !state->is_set) {
|
|
85241
|
-
state->is_set = true;
|
|
85242
85251
|
if (!mask.RowIsValid(idx)) {
|
|
85252
|
+
if (!SKIP_NULLS) {
|
|
85253
|
+
state->is_set = true;
|
|
85254
|
+
}
|
|
85243
85255
|
state->is_null = true;
|
|
85244
85256
|
} else {
|
|
85257
|
+
state->is_set = true;
|
|
85245
85258
|
state->is_null = false;
|
|
85246
85259
|
state->value = input[idx];
|
|
85247
85260
|
}
|
|
@@ -85271,14 +85284,17 @@ struct FirstFunction : public FirstFunctionBase {
|
|
|
85271
85284
|
}
|
|
85272
85285
|
};
|
|
85273
85286
|
|
|
85274
|
-
template <bool LAST>
|
|
85287
|
+
template <bool LAST, bool SKIP_NULLS>
|
|
85275
85288
|
struct FirstFunctionString : public FirstFunctionBase {
|
|
85276
85289
|
template <class STATE>
|
|
85277
85290
|
static void SetValue(STATE *state, string_t value, bool is_null) {
|
|
85278
|
-
state->is_set = true;
|
|
85279
85291
|
if (is_null) {
|
|
85280
|
-
|
|
85292
|
+
if (!SKIP_NULLS) {
|
|
85293
|
+
state->is_set = true;
|
|
85294
|
+
state->is_null = true;
|
|
85295
|
+
}
|
|
85281
85296
|
} else {
|
|
85297
|
+
state->is_set = true;
|
|
85282
85298
|
if (value.IsInlined()) {
|
|
85283
85299
|
state->value = value;
|
|
85284
85300
|
} else {
|
|
@@ -85333,7 +85349,7 @@ struct FirstStateVector {
|
|
|
85333
85349
|
Vector *value;
|
|
85334
85350
|
};
|
|
85335
85351
|
|
|
85336
|
-
template <bool LAST>
|
|
85352
|
+
template <bool LAST, bool SKIP_NULLS>
|
|
85337
85353
|
struct FirstVectorFunction {
|
|
85338
85354
|
template <class STATE>
|
|
85339
85355
|
static void Initialize(STATE *state) {
|
|
@@ -85347,7 +85363,7 @@ struct FirstVectorFunction {
|
|
|
85347
85363
|
}
|
|
85348
85364
|
}
|
|
85349
85365
|
static bool IgnoreNull() {
|
|
85350
|
-
return
|
|
85366
|
+
return SKIP_NULLS;
|
|
85351
85367
|
}
|
|
85352
85368
|
|
|
85353
85369
|
template <class STATE>
|
|
@@ -85363,11 +85379,18 @@ struct FirstVectorFunction {
|
|
|
85363
85379
|
|
|
85364
85380
|
static void Update(Vector inputs[], AggregateInputData &, idx_t input_count, Vector &state_vector, idx_t count) {
|
|
85365
85381
|
auto &input = inputs[0];
|
|
85382
|
+
UnifiedVectorFormat idata;
|
|
85383
|
+
input.ToUnifiedFormat(count, idata);
|
|
85384
|
+
|
|
85366
85385
|
UnifiedVectorFormat sdata;
|
|
85367
85386
|
state_vector.ToUnifiedFormat(count, sdata);
|
|
85368
85387
|
|
|
85369
85388
|
auto states = (FirstStateVector **)sdata.data;
|
|
85370
85389
|
for (idx_t i = 0; i < count; i++) {
|
|
85390
|
+
const auto idx = idata.sel->get_index(i);
|
|
85391
|
+
if (SKIP_NULLS && !idata.validity.RowIsValid(idx)) {
|
|
85392
|
+
continue;
|
|
85393
|
+
}
|
|
85371
85394
|
auto state = states[sdata.sel->get_index(i)];
|
|
85372
85395
|
if (LAST || !state->value) {
|
|
85373
85396
|
SetValue(state, input, i);
|
|
@@ -85406,79 +85429,79 @@ struct FirstVectorFunction {
|
|
|
85406
85429
|
}
|
|
85407
85430
|
};
|
|
85408
85431
|
|
|
85409
|
-
template <class T, bool LAST>
|
|
85432
|
+
template <class T, bool LAST, bool SKIP_NULLS>
|
|
85410
85433
|
static AggregateFunction GetFirstAggregateTemplated(LogicalType type) {
|
|
85411
|
-
auto agg = AggregateFunction::UnaryAggregate<FirstState<T>, T, T, FirstFunction<LAST>>(type, type);
|
|
85434
|
+
auto agg = AggregateFunction::UnaryAggregate<FirstState<T>, T, T, FirstFunction<LAST, SKIP_NULLS>>(type, type);
|
|
85412
85435
|
return agg;
|
|
85413
85436
|
}
|
|
85414
85437
|
|
|
85415
|
-
template <bool LAST>
|
|
85438
|
+
template <bool LAST, bool SKIP_NULLS>
|
|
85416
85439
|
static AggregateFunction GetFirstFunction(const LogicalType &type);
|
|
85417
85440
|
|
|
85418
|
-
template <bool LAST>
|
|
85441
|
+
template <bool LAST, bool SKIP_NULLS>
|
|
85419
85442
|
AggregateFunction GetDecimalFirstFunction(const LogicalType &type) {
|
|
85420
85443
|
D_ASSERT(type.id() == LogicalTypeId::DECIMAL);
|
|
85421
85444
|
switch (type.InternalType()) {
|
|
85422
85445
|
case PhysicalType::INT16:
|
|
85423
|
-
return GetFirstFunction<LAST>(LogicalType::SMALLINT);
|
|
85446
|
+
return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::SMALLINT);
|
|
85424
85447
|
case PhysicalType::INT32:
|
|
85425
|
-
return GetFirstFunction<LAST>(LogicalType::INTEGER);
|
|
85448
|
+
return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::INTEGER);
|
|
85426
85449
|
case PhysicalType::INT64:
|
|
85427
|
-
return GetFirstFunction<LAST>(LogicalType::BIGINT);
|
|
85450
|
+
return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::BIGINT);
|
|
85428
85451
|
default:
|
|
85429
|
-
return GetFirstFunction<LAST>(LogicalType::HUGEINT);
|
|
85452
|
+
return GetFirstFunction<LAST, SKIP_NULLS>(LogicalType::HUGEINT);
|
|
85430
85453
|
}
|
|
85431
85454
|
}
|
|
85432
85455
|
|
|
85433
|
-
template <bool LAST>
|
|
85456
|
+
template <bool LAST, bool SKIP_NULLS>
|
|
85434
85457
|
static AggregateFunction GetFirstFunction(const LogicalType &type) {
|
|
85435
85458
|
switch (type.id()) {
|
|
85436
85459
|
case LogicalTypeId::BOOLEAN:
|
|
85437
|
-
return GetFirstAggregateTemplated<int8_t, LAST>(type);
|
|
85460
|
+
return GetFirstAggregateTemplated<int8_t, LAST, SKIP_NULLS>(type);
|
|
85438
85461
|
case LogicalTypeId::TINYINT:
|
|
85439
|
-
return GetFirstAggregateTemplated<int8_t, LAST>(type);
|
|
85462
|
+
return GetFirstAggregateTemplated<int8_t, LAST, SKIP_NULLS>(type);
|
|
85440
85463
|
case LogicalTypeId::SMALLINT:
|
|
85441
|
-
return GetFirstAggregateTemplated<int16_t, LAST>(type);
|
|
85464
|
+
return GetFirstAggregateTemplated<int16_t, LAST, SKIP_NULLS>(type);
|
|
85442
85465
|
case LogicalTypeId::INTEGER:
|
|
85443
85466
|
case LogicalTypeId::DATE:
|
|
85444
|
-
return GetFirstAggregateTemplated<int32_t, LAST>(type);
|
|
85467
|
+
return GetFirstAggregateTemplated<int32_t, LAST, SKIP_NULLS>(type);
|
|
85445
85468
|
case LogicalTypeId::BIGINT:
|
|
85446
85469
|
case LogicalTypeId::TIME:
|
|
85447
85470
|
case LogicalTypeId::TIMESTAMP:
|
|
85448
85471
|
case LogicalTypeId::TIME_TZ:
|
|
85449
85472
|
case LogicalTypeId::TIMESTAMP_TZ:
|
|
85450
|
-
return GetFirstAggregateTemplated<int64_t, LAST>(type);
|
|
85473
|
+
return GetFirstAggregateTemplated<int64_t, LAST, SKIP_NULLS>(type);
|
|
85451
85474
|
case LogicalTypeId::UTINYINT:
|
|
85452
|
-
return GetFirstAggregateTemplated<uint8_t, LAST>(type);
|
|
85475
|
+
return GetFirstAggregateTemplated<uint8_t, LAST, SKIP_NULLS>(type);
|
|
85453
85476
|
case LogicalTypeId::USMALLINT:
|
|
85454
|
-
return GetFirstAggregateTemplated<uint16_t, LAST>(type);
|
|
85477
|
+
return GetFirstAggregateTemplated<uint16_t, LAST, SKIP_NULLS>(type);
|
|
85455
85478
|
case LogicalTypeId::UINTEGER:
|
|
85456
|
-
return GetFirstAggregateTemplated<uint32_t, LAST>(type);
|
|
85479
|
+
return GetFirstAggregateTemplated<uint32_t, LAST, SKIP_NULLS>(type);
|
|
85457
85480
|
case LogicalTypeId::UBIGINT:
|
|
85458
|
-
return GetFirstAggregateTemplated<uint64_t, LAST>(type);
|
|
85481
|
+
return GetFirstAggregateTemplated<uint64_t, LAST, SKIP_NULLS>(type);
|
|
85459
85482
|
case LogicalTypeId::HUGEINT:
|
|
85460
|
-
return GetFirstAggregateTemplated<hugeint_t, LAST>(type);
|
|
85483
|
+
return GetFirstAggregateTemplated<hugeint_t, LAST, SKIP_NULLS>(type);
|
|
85461
85484
|
case LogicalTypeId::FLOAT:
|
|
85462
|
-
return GetFirstAggregateTemplated<float, LAST>(type);
|
|
85485
|
+
return GetFirstAggregateTemplated<float, LAST, SKIP_NULLS>(type);
|
|
85463
85486
|
case LogicalTypeId::DOUBLE:
|
|
85464
|
-
return GetFirstAggregateTemplated<double, LAST>(type);
|
|
85487
|
+
return GetFirstAggregateTemplated<double, LAST, SKIP_NULLS>(type);
|
|
85465
85488
|
case LogicalTypeId::INTERVAL:
|
|
85466
|
-
return GetFirstAggregateTemplated<interval_t, LAST>(type);
|
|
85489
|
+
return GetFirstAggregateTemplated<interval_t, LAST, SKIP_NULLS>(type);
|
|
85467
85490
|
case LogicalTypeId::VARCHAR:
|
|
85468
85491
|
case LogicalTypeId::BLOB: {
|
|
85469
85492
|
auto agg = AggregateFunction::UnaryAggregateDestructor<FirstState<string_t>, string_t, string_t,
|
|
85470
|
-
FirstFunctionString<LAST>>(type, type);
|
|
85493
|
+
FirstFunctionString<LAST, SKIP_NULLS>>(type, type);
|
|
85471
85494
|
return agg;
|
|
85472
85495
|
}
|
|
85473
85496
|
case LogicalTypeId::DECIMAL: {
|
|
85474
85497
|
type.Verify();
|
|
85475
|
-
AggregateFunction function = GetDecimalFirstFunction<LAST>(type);
|
|
85498
|
+
AggregateFunction function = GetDecimalFirstFunction<LAST, SKIP_NULLS>(type);
|
|
85476
85499
|
function.arguments[0] = type;
|
|
85477
85500
|
function.return_type = type;
|
|
85478
85501
|
return function;
|
|
85479
85502
|
}
|
|
85480
85503
|
default: {
|
|
85481
|
-
using OP = FirstVectorFunction<LAST>;
|
|
85504
|
+
using OP = FirstVectorFunction<LAST, SKIP_NULLS>;
|
|
85482
85505
|
return AggregateFunction({type}, type, AggregateFunction::StateSize<FirstStateVector>,
|
|
85483
85506
|
AggregateFunction::StateInitialize<FirstStateVector, OP>, OP::Update,
|
|
85484
85507
|
AggregateFunction::StateCombine<FirstStateVector, OP>,
|
|
@@ -85489,16 +85512,16 @@ static AggregateFunction GetFirstFunction(const LogicalType &type) {
|
|
|
85489
85512
|
}
|
|
85490
85513
|
|
|
85491
85514
|
AggregateFunction FirstFun::GetFunction(const LogicalType &type) {
|
|
85492
|
-
auto fun = GetFirstFunction<false>(type);
|
|
85515
|
+
auto fun = GetFirstFunction<false, false>(type);
|
|
85493
85516
|
fun.name = "first";
|
|
85494
85517
|
return fun;
|
|
85495
85518
|
}
|
|
85496
85519
|
|
|
85497
|
-
template <bool LAST>
|
|
85520
|
+
template <bool LAST, bool SKIP_NULLS>
|
|
85498
85521
|
unique_ptr<FunctionData> BindDecimalFirst(ClientContext &context, AggregateFunction &function,
|
|
85499
85522
|
vector<unique_ptr<Expression>> &arguments) {
|
|
85500
85523
|
auto decimal_type = arguments[0]->return_type;
|
|
85501
|
-
function = GetFirstFunction<LAST>(decimal_type);
|
|
85524
|
+
function = GetFirstFunction<LAST, SKIP_NULLS>(decimal_type);
|
|
85502
85525
|
function.name = "first";
|
|
85503
85526
|
function.return_type = decimal_type;
|
|
85504
85527
|
return nullptr;
|
|
@@ -85507,15 +85530,19 @@ unique_ptr<FunctionData> BindDecimalFirst(ClientContext &context, AggregateFunct
|
|
|
85507
85530
|
void FirstFun::RegisterFunction(BuiltinFunctions &set) {
|
|
85508
85531
|
AggregateFunctionSet first("first");
|
|
85509
85532
|
AggregateFunctionSet last("last");
|
|
85533
|
+
AggregateFunctionSet any_value("any_value");
|
|
85510
85534
|
for (auto &type : LogicalType::AllTypes()) {
|
|
85511
85535
|
if (type.id() == LogicalTypeId::DECIMAL) {
|
|
85512
85536
|
first.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
|
85513
|
-
BindDecimalFirst<false>, nullptr, nullptr, nullptr));
|
|
85537
|
+
BindDecimalFirst<false, false>, nullptr, nullptr, nullptr));
|
|
85514
85538
|
last.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
|
85515
|
-
BindDecimalFirst<true>, nullptr, nullptr, nullptr));
|
|
85539
|
+
BindDecimalFirst<true, false>, nullptr, nullptr, nullptr));
|
|
85540
|
+
any_value.AddFunction(AggregateFunction({type}, type, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
|
85541
|
+
BindDecimalFirst<false, true>, nullptr, nullptr, nullptr));
|
|
85516
85542
|
} else {
|
|
85517
|
-
first.AddFunction(GetFirstFunction<false>(type));
|
|
85518
|
-
last.AddFunction(GetFirstFunction<true>(type));
|
|
85543
|
+
first.AddFunction(GetFirstFunction<false, false>(type));
|
|
85544
|
+
last.AddFunction(GetFirstFunction<true, false>(type));
|
|
85545
|
+
any_value.AddFunction(GetFirstFunction<false, true>(type));
|
|
85519
85546
|
}
|
|
85520
85547
|
}
|
|
85521
85548
|
set.AddFunction(first);
|
|
@@ -85523,6 +85550,8 @@ void FirstFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
85523
85550
|
set.AddFunction(first);
|
|
85524
85551
|
|
|
85525
85552
|
set.AddFunction(last);
|
|
85553
|
+
|
|
85554
|
+
set.AddFunction(any_value);
|
|
85526
85555
|
}
|
|
85527
85556
|
|
|
85528
85557
|
} // namespace duckdb
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.4.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "17ec2ab20"
|
|
15
|
+
#define DUCKDB_VERSION "v0.4.1-dev1527"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|