duckdb 0.3.5-dev653.0 → 0.3.5-dev658.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +153 -49
- package/src/duckdb.hpp +26 -16
- package/src/parquet-amalgamation.cpp +35617 -35617
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -59077,7 +59077,7 @@ public:
|
|
|
59077
59077
|
using FrameBounds = std::pair<idx_t, idx_t>;
|
|
59078
59078
|
|
|
59079
59079
|
WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info, const LogicalType &result_type,
|
|
59080
|
-
ChunkCollection *input, WindowAggregationMode mode);
|
|
59080
|
+
ChunkCollection *input, const ValidityMask &filter_mask, WindowAggregationMode mode);
|
|
59081
59081
|
~WindowSegmentTree();
|
|
59082
59082
|
|
|
59083
59083
|
//! First row contains the result.
|
|
@@ -59110,6 +59110,8 @@ private:
|
|
|
59110
59110
|
vector<data_t> state;
|
|
59111
59111
|
//! Input data chunk, used for intermediate window segment aggregation
|
|
59112
59112
|
DataChunk inputs;
|
|
59113
|
+
//! The filtered rows in inputs.
|
|
59114
|
+
SelectionVector filter_sel;
|
|
59113
59115
|
//! A vector of pointers to "state", used for intermediate window segment aggregation
|
|
59114
59116
|
Vector statep;
|
|
59115
59117
|
//! The frame boundaries, used for the window functions
|
|
@@ -59130,6 +59132,9 @@ private:
|
|
|
59130
59132
|
//! The (sorted) input chunk collection on which the tree is built
|
|
59131
59133
|
ChunkCollection *input_ref;
|
|
59132
59134
|
|
|
59135
|
+
//! The filtered rows in input_ref.
|
|
59136
|
+
const ValidityMask &filter_mask;
|
|
59137
|
+
|
|
59133
59138
|
//! Use the window API, if available
|
|
59134
59139
|
WindowAggregationMode mode;
|
|
59135
59140
|
|
|
@@ -60032,6 +60037,25 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
60032
60037
|
}
|
|
60033
60038
|
}
|
|
60034
60039
|
|
|
60040
|
+
// evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
|
|
60041
|
+
ValidityMask filter_mask;
|
|
60042
|
+
vector<validity_t> filter_bits;
|
|
60043
|
+
if (wexpr->filter_expr) {
|
|
60044
|
+
// Start with all invalid and set the ones that pass
|
|
60045
|
+
filter_bits.resize(ValidityMask::ValidityMaskSize(input.Count()), 0);
|
|
60046
|
+
filter_mask.Initialize(filter_bits.data());
|
|
60047
|
+
ExpressionExecutor filter_execution(*wexpr->filter_expr);
|
|
60048
|
+
SelectionVector true_sel(STANDARD_VECTOR_SIZE);
|
|
60049
|
+
idx_t base_idx = 0;
|
|
60050
|
+
for (auto &chunk : input.Chunks()) {
|
|
60051
|
+
const auto filtered = filter_execution.SelectExpression(*chunk, true_sel);
|
|
60052
|
+
for (idx_t f = 0; f < filtered; ++f) {
|
|
60053
|
+
filter_mask.SetValid(base_idx + true_sel[f]);
|
|
60054
|
+
}
|
|
60055
|
+
base_idx += chunk->size();
|
|
60056
|
+
}
|
|
60057
|
+
}
|
|
60058
|
+
|
|
60035
60059
|
// evaluate boundaries if present. Parser has checked boundary types.
|
|
60036
60060
|
ChunkCollection boundary_start_collection;
|
|
60037
60061
|
if (wexpr->start_expr) {
|
|
@@ -60085,7 +60109,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
60085
60109
|
|
|
60086
60110
|
if (wexpr->aggregate) {
|
|
60087
60111
|
segment_tree = make_unique<WindowSegmentTree>(*(wexpr->aggregate), wexpr->bind_info.get(), wexpr->return_type,
|
|
60088
|
-
&payload_collection, mode);
|
|
60112
|
+
&payload_collection, filter_mask, mode);
|
|
60089
60113
|
}
|
|
60090
60114
|
|
|
60091
60115
|
WindowBoundariesState bounds(wexpr);
|
|
@@ -78692,10 +78716,11 @@ namespace duckdb {
|
|
|
78692
78716
|
|
|
78693
78717
|
WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info,
|
|
78694
78718
|
const LogicalType &result_type_p, ChunkCollection *input,
|
|
78695
|
-
WindowAggregationMode mode_p)
|
|
78719
|
+
const ValidityMask &filter_mask_p, WindowAggregationMode mode_p)
|
|
78696
78720
|
: aggregate(aggregate), bind_info(bind_info), result_type(result_type_p), state(aggregate.state_size()),
|
|
78697
78721
|
statep(Value::POINTER((idx_t)state.data())), frame(0, 0), active(0, 1),
|
|
78698
|
-
statev(Value::POINTER((idx_t)state.data())), internal_nodes(0), input_ref(input),
|
|
78722
|
+
statev(Value::POINTER((idx_t)state.data())), internal_nodes(0), input_ref(input), filter_mask(filter_mask_p),
|
|
78723
|
+
mode(mode_p) {
|
|
78699
78724
|
#if STANDARD_VECTOR_SIZE < 512
|
|
78700
78725
|
throw NotImplementedException("Window functions are not supported for vector sizes < 512");
|
|
78701
78726
|
#endif
|
|
@@ -78703,6 +78728,7 @@ WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData
|
|
|
78703
78728
|
statev.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants
|
|
78704
78729
|
|
|
78705
78730
|
if (input_ref && input_ref->ColumnCount() > 0) {
|
|
78731
|
+
filter_sel.Initialize(STANDARD_VECTOR_SIZE);
|
|
78706
78732
|
inputs.Initialize(input_ref->Types());
|
|
78707
78733
|
// if we have a frame-by-frame method, share the single state
|
|
78708
78734
|
if (aggregate.window && UseWindowAPI()) {
|
|
@@ -78783,6 +78809,19 @@ void WindowSegmentTree::ExtractFrame(idx_t begin, idx_t end) {
|
|
|
78783
78809
|
VectorOperations::Copy(chunk_b.data[i], v, chunk_b_count, 0, chunk_a_count);
|
|
78784
78810
|
}
|
|
78785
78811
|
}
|
|
78812
|
+
|
|
78813
|
+
// Slice to any filtered rows
|
|
78814
|
+
if (!filter_mask.AllValid()) {
|
|
78815
|
+
idx_t filtered = 0;
|
|
78816
|
+
for (idx_t i = begin; i < end; ++i) {
|
|
78817
|
+
if (filter_mask.RowIsValid(i)) {
|
|
78818
|
+
filter_sel.set_index(filtered++, i - begin);
|
|
78819
|
+
}
|
|
78820
|
+
}
|
|
78821
|
+
if (filtered != inputs.size()) {
|
|
78822
|
+
inputs.Slice(filter_sel, filtered);
|
|
78823
|
+
}
|
|
78824
|
+
}
|
|
78786
78825
|
}
|
|
78787
78826
|
|
|
78788
78827
|
void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end) {
|
|
@@ -78863,7 +78902,16 @@ void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t en
|
|
|
78863
78902
|
if (inputs.ColumnCount() == 0) {
|
|
78864
78903
|
D_ASSERT(GetTypeIdSize(result_type.InternalType()) == sizeof(idx_t));
|
|
78865
78904
|
auto data = FlatVector::GetData<idx_t>(result);
|
|
78866
|
-
|
|
78905
|
+
// Slice to any filtered rows
|
|
78906
|
+
if (!filter_mask.AllValid()) {
|
|
78907
|
+
idx_t filtered = 0;
|
|
78908
|
+
for (idx_t i = begin; i < end; ++i) {
|
|
78909
|
+
filtered += filter_mask.RowIsValid(i);
|
|
78910
|
+
}
|
|
78911
|
+
data[rid] = filtered;
|
|
78912
|
+
} else {
|
|
78913
|
+
data[rid] = end - begin;
|
|
78914
|
+
}
|
|
78867
78915
|
return;
|
|
78868
78916
|
}
|
|
78869
78917
|
|
|
@@ -78904,8 +78952,8 @@ void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t en
|
|
|
78904
78952
|
active = FrameBounds(active_chunks.first * STANDARD_VECTOR_SIZE,
|
|
78905
78953
|
MinValue((active_chunks.second + 1) * STANDARD_VECTOR_SIZE, coll.Count()));
|
|
78906
78954
|
|
|
78907
|
-
aggregate.window(inputs.data.data(), bind_info, inputs.ColumnCount(), state.data(), frame, prev,
|
|
78908
|
-
active.first);
|
|
78955
|
+
aggregate.window(inputs.data.data(), filter_mask, bind_info, inputs.ColumnCount(), state.data(), frame, prev,
|
|
78956
|
+
result, rid, active.first);
|
|
78909
78957
|
return;
|
|
78910
78958
|
}
|
|
78911
78959
|
|
|
@@ -83436,6 +83484,19 @@ struct ModeState {
|
|
|
83436
83484
|
}
|
|
83437
83485
|
};
|
|
83438
83486
|
|
|
83487
|
+
struct ModeIncluded {
|
|
83488
|
+
inline explicit ModeIncluded(const ValidityMask &fmask_p, const ValidityMask &dmask_p, idx_t bias_p)
|
|
83489
|
+
: fmask(fmask_p), dmask(dmask_p), bias(bias_p) {
|
|
83490
|
+
}
|
|
83491
|
+
|
|
83492
|
+
inline bool operator()(const idx_t &idx) const {
|
|
83493
|
+
return fmask.RowIsValid(idx) && dmask.RowIsValid(idx - bias);
|
|
83494
|
+
}
|
|
83495
|
+
const ValidityMask &fmask;
|
|
83496
|
+
const ValidityMask &dmask;
|
|
83497
|
+
const idx_t bias;
|
|
83498
|
+
};
|
|
83499
|
+
|
|
83439
83500
|
template <typename KEY_TYPE>
|
|
83440
83501
|
struct ModeFunction {
|
|
83441
83502
|
template <class STATE>
|
|
@@ -83492,11 +83553,14 @@ struct ModeFunction {
|
|
|
83492
83553
|
}
|
|
83493
83554
|
|
|
83494
83555
|
template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
|
|
83495
|
-
static void Window(const INPUT_TYPE *data, const ValidityMask &
|
|
83496
|
-
const FrameBounds &frame, const FrameBounds &prev,
|
|
83556
|
+
static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
|
|
83557
|
+
FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
|
|
83558
|
+
Vector &result, idx_t rid, idx_t bias) {
|
|
83497
83559
|
auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
|
|
83498
83560
|
auto &rmask = FlatVector::Validity(result);
|
|
83499
83561
|
|
|
83562
|
+
ModeIncluded included(fmask, dmask, bias);
|
|
83563
|
+
|
|
83500
83564
|
if (!state->frequency_map) {
|
|
83501
83565
|
state->frequency_map = new unordered_map<KEY_TYPE, size_t>();
|
|
83502
83566
|
}
|
|
@@ -83505,31 +83569,31 @@ struct ModeFunction {
|
|
|
83505
83569
|
state->Reset();
|
|
83506
83570
|
// for f ∈ F do
|
|
83507
83571
|
for (auto f = frame.first; f < frame.second; ++f) {
|
|
83508
|
-
if (
|
|
83572
|
+
if (included(f)) {
|
|
83509
83573
|
state->ModeAdd(KEY_TYPE(data[f]));
|
|
83510
83574
|
}
|
|
83511
83575
|
}
|
|
83512
83576
|
} else {
|
|
83513
83577
|
// for f ∈ P \ F do
|
|
83514
83578
|
for (auto p = prev.first; p < frame.first; ++p) {
|
|
83515
|
-
if (
|
|
83579
|
+
if (included(p)) {
|
|
83516
83580
|
state->ModeRm(KEY_TYPE(data[p]));
|
|
83517
83581
|
}
|
|
83518
83582
|
}
|
|
83519
83583
|
for (auto p = frame.second; p < prev.second; ++p) {
|
|
83520
|
-
if (
|
|
83584
|
+
if (included(p)) {
|
|
83521
83585
|
state->ModeRm(KEY_TYPE(data[p]));
|
|
83522
83586
|
}
|
|
83523
83587
|
}
|
|
83524
83588
|
|
|
83525
83589
|
// for f ∈ F \ P do
|
|
83526
83590
|
for (auto f = frame.first; f < prev.first; ++f) {
|
|
83527
|
-
if (
|
|
83591
|
+
if (included(f)) {
|
|
83528
83592
|
state->ModeAdd(KEY_TYPE(data[f]));
|
|
83529
83593
|
}
|
|
83530
83594
|
}
|
|
83531
83595
|
for (auto f = prev.second; f < frame.second; ++f) {
|
|
83532
|
-
if (
|
|
83596
|
+
if (included(f)) {
|
|
83533
83597
|
state->ModeAdd(KEY_TYPE(data[f]));
|
|
83534
83598
|
}
|
|
83535
83599
|
}
|
|
@@ -83744,14 +83808,21 @@ struct QuantileState {
|
|
|
83744
83808
|
}
|
|
83745
83809
|
};
|
|
83746
83810
|
|
|
83747
|
-
struct
|
|
83748
|
-
inline explicit
|
|
83811
|
+
struct QuantileIncluded {
|
|
83812
|
+
inline explicit QuantileIncluded(const ValidityMask &fmask_p, const ValidityMask &dmask_p, idx_t bias_p)
|
|
83813
|
+
: fmask(fmask_p), dmask(dmask_p), bias(bias_p) {
|
|
83749
83814
|
}
|
|
83750
83815
|
|
|
83751
83816
|
inline bool operator()(const idx_t &idx) const {
|
|
83752
|
-
return
|
|
83817
|
+
return fmask.RowIsValid(idx) && dmask.RowIsValid(idx - bias);
|
|
83818
|
+
}
|
|
83819
|
+
|
|
83820
|
+
inline bool AllValid() const {
|
|
83821
|
+
return fmask.AllValid() && dmask.AllValid();
|
|
83753
83822
|
}
|
|
83754
|
-
|
|
83823
|
+
|
|
83824
|
+
const ValidityMask &fmask;
|
|
83825
|
+
const ValidityMask &dmask;
|
|
83755
83826
|
const idx_t bias;
|
|
83756
83827
|
};
|
|
83757
83828
|
|
|
@@ -83811,7 +83882,7 @@ static idx_t ReplaceIndex(idx_t *index, const FrameBounds &frame, const FrameBou
|
|
|
83811
83882
|
|
|
83812
83883
|
template <class INPUT_TYPE>
|
|
83813
83884
|
static inline int CanReplace(const idx_t *index, const INPUT_TYPE *fdata, const idx_t j, const idx_t k0, const idx_t k1,
|
|
83814
|
-
const
|
|
83885
|
+
const QuantileIncluded &validity) {
|
|
83815
83886
|
D_ASSERT(index);
|
|
83816
83887
|
|
|
83817
83888
|
// NULLs sort to the end, so if we have inserted a NULL,
|
|
@@ -84142,12 +84213,13 @@ struct QuantileScalarOperation : public QuantileOperation {
|
|
|
84142
84213
|
}
|
|
84143
84214
|
|
|
84144
84215
|
template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
|
|
84145
|
-
static void Window(const INPUT_TYPE *data, const ValidityMask &
|
|
84146
|
-
const FrameBounds &frame, const FrameBounds &prev,
|
|
84216
|
+
static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
|
|
84217
|
+
FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
|
|
84218
|
+
Vector &result, idx_t ridx, idx_t bias) {
|
|
84147
84219
|
auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
|
|
84148
84220
|
auto &rmask = FlatVector::Validity(result);
|
|
84149
84221
|
|
|
84150
|
-
|
|
84222
|
+
QuantileIncluded included(fmask, dmask, bias);
|
|
84151
84223
|
|
|
84152
84224
|
// Lazily initialise frame state
|
|
84153
84225
|
auto prev_pos = state->pos;
|
|
@@ -84167,9 +84239,9 @@ struct QuantileScalarOperation : public QuantileOperation {
|
|
|
84167
84239
|
// Fixed frame size
|
|
84168
84240
|
const auto j = ReplaceIndex(index, frame, prev);
|
|
84169
84241
|
// We can only replace if the number of NULLs has not changed
|
|
84170
|
-
if (
|
|
84242
|
+
if (included.AllValid() || included(prev.first) == included(prev.second)) {
|
|
84171
84243
|
Interpolator<DISCRETE> interp(q, prev_pos);
|
|
84172
|
-
replace = CanReplace(index, data, j, interp.FRN, interp.CRN,
|
|
84244
|
+
replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
|
|
84173
84245
|
if (replace) {
|
|
84174
84246
|
state->pos = prev_pos;
|
|
84175
84247
|
}
|
|
@@ -84178,9 +84250,9 @@ struct QuantileScalarOperation : public QuantileOperation {
|
|
|
84178
84250
|
ReuseIndexes(index, frame, prev);
|
|
84179
84251
|
}
|
|
84180
84252
|
|
|
84181
|
-
if (!replace && !
|
|
84253
|
+
if (!replace && !included.AllValid()) {
|
|
84182
84254
|
// Remove the NULLs
|
|
84183
|
-
state->pos = std::partition(index, index + state->pos,
|
|
84255
|
+
state->pos = std::partition(index, index + state->pos, included) - index;
|
|
84184
84256
|
}
|
|
84185
84257
|
if (state->pos) {
|
|
84186
84258
|
Interpolator<DISCRETE> interp(q, state->pos);
|
|
@@ -84293,12 +84365,13 @@ struct QuantileListOperation : public QuantileOperation {
|
|
|
84293
84365
|
}
|
|
84294
84366
|
|
|
84295
84367
|
template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
|
|
84296
|
-
static void Window(const INPUT_TYPE *data, const ValidityMask &
|
|
84297
|
-
const FrameBounds &frame, const FrameBounds &prev,
|
|
84368
|
+
static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
|
|
84369
|
+
FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
|
|
84370
|
+
Vector &list, idx_t lidx, idx_t bias) {
|
|
84298
84371
|
D_ASSERT(bind_data_p);
|
|
84299
84372
|
auto bind_data = (QuantileBindData *)bind_data_p;
|
|
84300
84373
|
|
|
84301
|
-
|
|
84374
|
+
QuantileIncluded included(fmask, dmask, bias);
|
|
84302
84375
|
|
|
84303
84376
|
// Result is a constant LIST<RESULT_TYPE> with a fixed length
|
|
84304
84377
|
auto ldata = FlatVector::GetData<RESULT_TYPE>(list);
|
|
@@ -84329,11 +84402,11 @@ struct QuantileListOperation : public QuantileOperation {
|
|
|
84329
84402
|
// Fixed frame size
|
|
84330
84403
|
const auto j = ReplaceIndex(index, frame, prev);
|
|
84331
84404
|
// We can only replace if the number of NULLs has not changed
|
|
84332
|
-
if (
|
|
84405
|
+
if (included.AllValid() || included(prev.first) == included(prev.second)) {
|
|
84333
84406
|
for (const auto &q : bind_data->order) {
|
|
84334
84407
|
const auto &quantile = bind_data->quantiles[q];
|
|
84335
84408
|
Interpolator<DISCRETE> interp(quantile, prev_pos);
|
|
84336
|
-
const auto replace = CanReplace(index, data, j, interp.FRN, interp.CRN,
|
|
84409
|
+
const auto replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
|
|
84337
84410
|
if (replace < 0) {
|
|
84338
84411
|
// Replacement is before this quantile, so the rest will be replaceable too.
|
|
84339
84412
|
replaceable.first = MinValue(replaceable.first, interp.FRN);
|
|
@@ -84353,9 +84426,9 @@ struct QuantileListOperation : public QuantileOperation {
|
|
|
84353
84426
|
ReuseIndexes(index, frame, prev);
|
|
84354
84427
|
}
|
|
84355
84428
|
|
|
84356
|
-
if (replaceable.first >= replaceable.second && !
|
|
84429
|
+
if (replaceable.first >= replaceable.second && !included.AllValid()) {
|
|
84357
84430
|
// Remove the NULLs
|
|
84358
|
-
state->pos = std::partition(index, index + state->pos,
|
|
84431
|
+
state->pos = std::partition(index, index + state->pos, included) - index;
|
|
84359
84432
|
}
|
|
84360
84433
|
|
|
84361
84434
|
if (state->pos) {
|
|
@@ -84652,12 +84725,13 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
|
|
|
84652
84725
|
}
|
|
84653
84726
|
|
|
84654
84727
|
template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
|
|
84655
|
-
static void Window(const INPUT_TYPE *data, const ValidityMask &
|
|
84656
|
-
const FrameBounds &frame, const FrameBounds &prev,
|
|
84728
|
+
static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
|
|
84729
|
+
FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
|
|
84730
|
+
Vector &result, idx_t ridx, idx_t bias) {
|
|
84657
84731
|
auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
|
|
84658
84732
|
auto &rmask = FlatVector::Validity(result);
|
|
84659
84733
|
|
|
84660
|
-
|
|
84734
|
+
QuantileIncluded included(fmask, dmask, bias);
|
|
84661
84735
|
|
|
84662
84736
|
// Lazily initialise frame state
|
|
84663
84737
|
auto prev_pos = state->pos;
|
|
@@ -84678,7 +84752,7 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
|
|
|
84678
84752
|
// the median has changed, the previous order is not correct.
|
|
84679
84753
|
// It is probably close, however, and so reuse is helpful.
|
|
84680
84754
|
ReuseIndexes(index2, frame, prev);
|
|
84681
|
-
std::partition(index2, index2 + state->pos,
|
|
84755
|
+
std::partition(index2, index2 + state->pos, included);
|
|
84682
84756
|
|
|
84683
84757
|
// Find the two positions needed for the median
|
|
84684
84758
|
const float q = 0.5;
|
|
@@ -84687,10 +84761,10 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
|
|
|
84687
84761
|
if (frame.first == prev.first + 1 && frame.second == prev.second + 1) {
|
|
84688
84762
|
// Fixed frame size
|
|
84689
84763
|
const auto j = ReplaceIndex(index, frame, prev);
|
|
84690
|
-
// We can only replace if the number of
|
|
84691
|
-
if (
|
|
84764
|
+
// We can only replace if the number of NULLs has not changed
|
|
84765
|
+
if (included.AllValid() || included(prev.first) == included(prev.second)) {
|
|
84692
84766
|
Interpolator<false> interp(q, prev_pos);
|
|
84693
|
-
replace = CanReplace(index, data, j, interp.FRN, interp.CRN,
|
|
84767
|
+
replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
|
|
84694
84768
|
if (replace) {
|
|
84695
84769
|
state->pos = prev_pos;
|
|
84696
84770
|
}
|
|
@@ -84699,9 +84773,9 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
|
|
|
84699
84773
|
ReuseIndexes(index, frame, prev);
|
|
84700
84774
|
}
|
|
84701
84775
|
|
|
84702
|
-
if (!replace && !
|
|
84776
|
+
if (!replace && !included.AllValid()) {
|
|
84703
84777
|
// Remove the NULLs
|
|
84704
|
-
state->pos = std::partition(index, index + state->pos,
|
|
84778
|
+
state->pos = std::partition(index, index + state->pos, included) - index;
|
|
84705
84779
|
}
|
|
84706
84780
|
|
|
84707
84781
|
if (state->pos) {
|
|
@@ -145925,6 +145999,11 @@ bool WindowExpression::Equals(const WindowExpression *a, const WindowExpression
|
|
|
145925
145999
|
return false;
|
|
145926
146000
|
}
|
|
145927
146001
|
}
|
|
146002
|
+
// check if the filter clauses are equivalent
|
|
146003
|
+
if (!BaseExpression::Equals(a->filter_expr.get(), b->filter_expr.get())) {
|
|
146004
|
+
return false;
|
|
146005
|
+
}
|
|
146006
|
+
|
|
145928
146007
|
return true;
|
|
145929
146008
|
}
|
|
145930
146009
|
|
|
@@ -145944,6 +146023,8 @@ unique_ptr<ParsedExpression> WindowExpression::Copy() const {
|
|
|
145944
146023
|
new_window->orders.emplace_back(o.type, o.null_order, o.expression->Copy());
|
|
145945
146024
|
}
|
|
145946
146025
|
|
|
146026
|
+
new_window->filter_expr = filter_expr ? filter_expr->Copy() : nullptr;
|
|
146027
|
+
|
|
145947
146028
|
new_window->start = start;
|
|
145948
146029
|
new_window->end = end;
|
|
145949
146030
|
new_window->start_expr = start_expr ? start_expr->Copy() : nullptr;
|
|
@@ -145976,6 +146057,7 @@ void WindowExpression::Serialize(FieldWriter &writer) const {
|
|
|
145976
146057
|
writer.WriteOptional(offset_expr);
|
|
145977
146058
|
writer.WriteOptional(default_expr);
|
|
145978
146059
|
writer.WriteField<bool>(ignore_nulls);
|
|
146060
|
+
writer.WriteOptional(filter_expr);
|
|
145979
146061
|
}
|
|
145980
146062
|
|
|
145981
146063
|
unique_ptr<ParsedExpression> WindowExpression::Deserialize(ExpressionType type, FieldReader &reader) {
|
|
@@ -145998,6 +146080,7 @@ unique_ptr<ParsedExpression> WindowExpression::Deserialize(ExpressionType type,
|
|
|
145998
146080
|
expr->offset_expr = reader.ReadOptional<ParsedExpression>(nullptr);
|
|
145999
146081
|
expr->default_expr = reader.ReadOptional<ParsedExpression>(nullptr);
|
|
146000
146082
|
expr->ignore_nulls = reader.ReadRequired<bool>();
|
|
146083
|
+
expr->filter_expr = reader.ReadOptional<ParsedExpression>(nullptr);
|
|
146001
146084
|
return move(expr);
|
|
146002
146085
|
}
|
|
146003
146086
|
|
|
@@ -146860,6 +146943,9 @@ void ParsedExpressionIterator::EnumerateChildren(
|
|
|
146860
146943
|
for (auto &child : window_expr.children) {
|
|
146861
146944
|
callback(child);
|
|
146862
146945
|
}
|
|
146946
|
+
if (window_expr.filter_expr) {
|
|
146947
|
+
callback(window_expr.filter_expr);
|
|
146948
|
+
}
|
|
146863
146949
|
if (window_expr.start_expr) {
|
|
146864
146950
|
callback(window_expr.start_expr);
|
|
146865
146951
|
}
|
|
@@ -155446,6 +155532,11 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
|
|
|
155446
155532
|
auto lowercase_name = StringUtil::Lower(function_name);
|
|
155447
155533
|
|
|
155448
155534
|
if (root->over) {
|
|
155535
|
+
const auto win_fun_type = WindowToExpressionType(lowercase_name);
|
|
155536
|
+
if (win_fun_type == ExpressionType::INVALID) {
|
|
155537
|
+
throw InternalException("Unknown/unsupported window function");
|
|
155538
|
+
}
|
|
155539
|
+
|
|
155449
155540
|
if (root->agg_distinct) {
|
|
155450
155541
|
throw ParserException("DISTINCT is not implemented for window functions!");
|
|
155451
155542
|
}
|
|
@@ -155454,18 +155545,13 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
|
|
|
155454
155545
|
throw ParserException("ORDER BY is not implemented for window functions!");
|
|
155455
155546
|
}
|
|
155456
155547
|
|
|
155457
|
-
if (root->agg_filter) {
|
|
155458
|
-
throw ParserException("FILTER is not implemented for window functions!");
|
|
155548
|
+
if (win_fun_type != ExpressionType::WINDOW_AGGREGATE && root->agg_filter) {
|
|
155549
|
+
throw ParserException("FILTER is not implemented for non-aggregate window functions!");
|
|
155459
155550
|
}
|
|
155460
155551
|
if (root->export_state) {
|
|
155461
155552
|
throw ParserException("EXPORT_STATE is not supported for window functions!");
|
|
155462
155553
|
}
|
|
155463
155554
|
|
|
155464
|
-
const auto win_fun_type = WindowToExpressionType(lowercase_name);
|
|
155465
|
-
if (win_fun_type == ExpressionType::INVALID) {
|
|
155466
|
-
throw InternalException("Unknown/unsupported window function");
|
|
155467
|
-
}
|
|
155468
|
-
|
|
155469
155555
|
if (win_fun_type == ExpressionType::WINDOW_AGGREGATE && root->agg_ignore_nulls) {
|
|
155470
155556
|
throw ParserException("IGNORE NULLS is not supported for windowed aggregates");
|
|
155471
155557
|
}
|
|
@@ -155473,6 +155559,11 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
|
|
|
155473
155559
|
auto expr = make_unique<WindowExpression>(win_fun_type, schema, lowercase_name);
|
|
155474
155560
|
expr->ignore_nulls = root->agg_ignore_nulls;
|
|
155475
155561
|
|
|
155562
|
+
if (root->agg_filter) {
|
|
155563
|
+
auto filter_expr = TransformExpression(root->agg_filter);
|
|
155564
|
+
expr->filter_expr = move(filter_expr);
|
|
155565
|
+
}
|
|
155566
|
+
|
|
155476
155567
|
if (root->args) {
|
|
155477
155568
|
vector<unique_ptr<ParsedExpression>> function_list;
|
|
155478
155569
|
TransformExpressionList(*root->args, function_list);
|
|
@@ -161727,6 +161818,7 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) {
|
|
|
161727
161818
|
for (auto &order : window.orders) {
|
|
161728
161819
|
BindChild(order.expression, depth, error);
|
|
161729
161820
|
}
|
|
161821
|
+
BindChild(window.filter_expr, depth, error);
|
|
161730
161822
|
BindChild(window.start_expr, depth, error);
|
|
161731
161823
|
BindChild(window.end_expr, depth, error);
|
|
161732
161824
|
BindChild(window.offset_expr, depth, error);
|
|
@@ -161860,6 +161952,8 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) {
|
|
|
161860
161952
|
result->orders.emplace_back(type, null_order, move(expression));
|
|
161861
161953
|
}
|
|
161862
161954
|
|
|
161955
|
+
result->filter_expr = CastWindowExpression(window.filter_expr, LogicalType::BOOLEAN);
|
|
161956
|
+
|
|
161863
161957
|
result->start_expr = CastWindowExpression(window.start_expr, start_type);
|
|
161864
161958
|
result->end_expr = CastWindowExpression(window.end_expr, end_type);
|
|
161865
161959
|
result->offset_expr = CastWindowExpression(window.offset_expr, LogicalType::BIGINT);
|
|
@@ -168164,6 +168258,11 @@ bool BoundWindowExpression::Equals(const BaseExpression *other_p) const {
|
|
|
168164
168258
|
return false;
|
|
168165
168259
|
}
|
|
168166
168260
|
}
|
|
168261
|
+
// check if the filter expressions are equivalent
|
|
168262
|
+
if (!Expression::Equals(filter_expr.get(), other->filter_expr.get())) {
|
|
168263
|
+
return false;
|
|
168264
|
+
}
|
|
168265
|
+
|
|
168167
168266
|
// check if the framing expressions are equivalent
|
|
168168
168267
|
if (!Expression::Equals(start_expr.get(), other->start_expr.get()) ||
|
|
168169
168268
|
!Expression::Equals(end_expr.get(), other->end_expr.get()) ||
|
|
@@ -168228,6 +168327,8 @@ unique_ptr<Expression> BoundWindowExpression::Copy() {
|
|
|
168228
168327
|
new_window->orders.emplace_back(o.type, o.null_order, o.expression->Copy());
|
|
168229
168328
|
}
|
|
168230
168329
|
|
|
168330
|
+
new_window->filter_expr = filter_expr ? filter_expr->Copy() : nullptr;
|
|
168331
|
+
|
|
168231
168332
|
new_window->start = start;
|
|
168232
168333
|
new_window->end = end;
|
|
168233
168334
|
new_window->start_expr = start_expr ? start_expr->Copy() : nullptr;
|
|
@@ -169468,6 +169569,9 @@ void ExpressionIterator::EnumerateChildren(Expression &expr,
|
|
|
169468
169569
|
for (auto &child : window_expr.children) {
|
|
169469
169570
|
callback(child);
|
|
169470
169571
|
}
|
|
169572
|
+
if (window_expr.filter_expr) {
|
|
169573
|
+
callback(window_expr.filter_expr);
|
|
169574
|
+
}
|
|
169471
169575
|
if (window_expr.start_expr) {
|
|
169472
169576
|
callback(window_expr.start_expr);
|
|
169473
169577
|
}
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "1d0e9975a"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev658"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -7791,13 +7791,13 @@ public:
|
|
|
7791
7791
|
}
|
|
7792
7792
|
|
|
7793
7793
|
template <class STATE, class INPUT_TYPE, class RESULT_TYPE, class OP>
|
|
7794
|
-
static void UnaryWindow(Vector &input, FunctionData *bind_data, data_ptr_t state,
|
|
7795
|
-
const FrameBounds &prev, Vector &result, idx_t rid, idx_t bias) {
|
|
7794
|
+
static void UnaryWindow(Vector &input, const ValidityMask &ifilter, FunctionData *bind_data, data_ptr_t state,
|
|
7795
|
+
const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t rid, idx_t bias) {
|
|
7796
7796
|
|
|
7797
7797
|
auto idata = FlatVector::GetData<const INPUT_TYPE>(input) - bias;
|
|
7798
7798
|
const auto &ivalid = FlatVector::Validity(input);
|
|
7799
|
-
OP::template Window<STATE, INPUT_TYPE, RESULT_TYPE>(idata, ivalid, bind_data, (STATE *)state, frame,
|
|
7800
|
-
result, rid, bias);
|
|
7799
|
+
OP::template Window<STATE, INPUT_TYPE, RESULT_TYPE>(idata, ifilter, ivalid, bind_data, (STATE *)state, frame,
|
|
7800
|
+
prev, result, rid, bias);
|
|
7801
7801
|
}
|
|
7802
7802
|
|
|
7803
7803
|
template <class STATE_TYPE, class OP>
|
|
@@ -9022,9 +9022,9 @@ typedef void (*aggregate_simple_update_t)(Vector inputs[], FunctionData *bind_da
|
|
|
9022
9022
|
|
|
9023
9023
|
//! The type used for updating complex windowed aggregate functions (optional)
|
|
9024
9024
|
typedef std::pair<idx_t, idx_t> FrameBounds;
|
|
9025
|
-
typedef void (*aggregate_window_t)(Vector inputs[],
|
|
9026
|
-
|
|
9027
|
-
idx_t bias);
|
|
9025
|
+
typedef void (*aggregate_window_t)(Vector inputs[], const ValidityMask &filter_mask, FunctionData *bind_data,
|
|
9026
|
+
idx_t input_count, data_ptr_t state, const FrameBounds &frame,
|
|
9027
|
+
const FrameBounds &prev, Vector &result, idx_t rid, idx_t bias);
|
|
9028
9028
|
|
|
9029
9029
|
class AggregateFunction : public BaseScalarFunction {
|
|
9030
9030
|
public:
|
|
@@ -9191,11 +9191,12 @@ public:
|
|
|
9191
9191
|
}
|
|
9192
9192
|
|
|
9193
9193
|
template <class STATE, class INPUT_TYPE, class RESULT_TYPE, class OP>
|
|
9194
|
-
static void UnaryWindow(Vector inputs[],
|
|
9195
|
-
const FrameBounds &frame, const FrameBounds &prev,
|
|
9194
|
+
static void UnaryWindow(Vector inputs[], const ValidityMask &filter_mask, FunctionData *bind_data,
|
|
9195
|
+
idx_t input_count, data_ptr_t state, const FrameBounds &frame, const FrameBounds &prev,
|
|
9196
|
+
Vector &result, idx_t rid, idx_t bias) {
|
|
9196
9197
|
D_ASSERT(input_count == 1);
|
|
9197
|
-
AggregateExecutor::UnaryWindow<STATE, INPUT_TYPE, RESULT_TYPE, OP>(inputs[0], bind_data, state,
|
|
9198
|
-
result, rid, bias);
|
|
9198
|
+
AggregateExecutor::UnaryWindow<STATE, INPUT_TYPE, RESULT_TYPE, OP>(inputs[0], filter_mask, bind_data, state,
|
|
9199
|
+
frame, prev, result, rid, bias);
|
|
9199
9200
|
}
|
|
9200
9201
|
|
|
9201
9202
|
template <class STATE, class A_TYPE, class B_TYPE, class OP>
|
|
@@ -14363,12 +14364,14 @@ public:
|
|
|
14363
14364
|
string schema;
|
|
14364
14365
|
//! Name of the aggregate function
|
|
14365
14366
|
string function_name;
|
|
14366
|
-
//! The child expression of the main window
|
|
14367
|
+
//! The child expression of the main window function
|
|
14367
14368
|
vector<unique_ptr<ParsedExpression>> children;
|
|
14368
14369
|
//! The set of expressions to partition by
|
|
14369
14370
|
vector<unique_ptr<ParsedExpression>> partitions;
|
|
14370
14371
|
//! The set of ordering clauses
|
|
14371
14372
|
vector<OrderByNode> orders;
|
|
14373
|
+
//! Expression representing a filter, only used for aggregates
|
|
14374
|
+
unique_ptr<ParsedExpression> filter_expr;
|
|
14372
14375
|
//! True to ignore NULL values
|
|
14373
14376
|
bool ignore_nulls;
|
|
14374
14377
|
//! The window boundaries
|
|
@@ -14417,8 +14420,13 @@ public:
|
|
|
14417
14420
|
if (entry.ignore_nulls) {
|
|
14418
14421
|
result += " IGNORE NULLS";
|
|
14419
14422
|
}
|
|
14423
|
+
// FILTER
|
|
14424
|
+
if (entry.filter_expr) {
|
|
14425
|
+
result += ") FILTER (WHERE " + entry.filter_expr->ToString();
|
|
14426
|
+
}
|
|
14427
|
+
|
|
14420
14428
|
// Over clause
|
|
14421
|
-
result += ") OVER(";
|
|
14429
|
+
result += ") OVER (";
|
|
14422
14430
|
string sep;
|
|
14423
14431
|
|
|
14424
14432
|
// Partitions
|
|
@@ -14537,7 +14545,7 @@ public:
|
|
|
14537
14545
|
unique_ptr<AggregateFunction> aggregate;
|
|
14538
14546
|
//! The bound function info
|
|
14539
14547
|
unique_ptr<FunctionData> bind_info;
|
|
14540
|
-
//! The child expressions of the main window
|
|
14548
|
+
//! The child expressions of the main window function
|
|
14541
14549
|
vector<unique_ptr<Expression>> children;
|
|
14542
14550
|
//! The set of expressions to partition by
|
|
14543
14551
|
vector<unique_ptr<Expression>> partitions;
|
|
@@ -14545,6 +14553,8 @@ public:
|
|
|
14545
14553
|
vector<unique_ptr<BaseStatistics>> partitions_stats;
|
|
14546
14554
|
//! The set of ordering clauses
|
|
14547
14555
|
vector<BoundOrderByNode> orders;
|
|
14556
|
+
//! Expression representing a filter, only used for aggregates
|
|
14557
|
+
unique_ptr<Expression> filter_expr;
|
|
14548
14558
|
//! True to ignore NULL values
|
|
14549
14559
|
bool ignore_nulls;
|
|
14550
14560
|
//! The window boundaries
|