duckdb 0.3.5-dev653.0 → 0.3.5-dev666.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +177 -68
- package/src/duckdb.hpp +26 -16
- package/src/parquet-amalgamation.cpp +27711 -27711
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -1261,7 +1261,7 @@ public:
|
|
|
1261
1261
|
}
|
|
1262
1262
|
|
|
1263
1263
|
template <class T, class RETURN_TYPE = unique_ptr<T>, typename... ARGS>
|
|
1264
|
-
RETURN_TYPE ReadSerializable(RETURN_TYPE default_value, ARGS &&...args) {
|
|
1264
|
+
RETURN_TYPE ReadSerializable(RETURN_TYPE default_value, ARGS &&... args) {
|
|
1265
1265
|
if (field_count >= max_field_count) {
|
|
1266
1266
|
// field is not there, read the default value
|
|
1267
1267
|
return default_value;
|
|
@@ -1283,7 +1283,7 @@ public:
|
|
|
1283
1283
|
}
|
|
1284
1284
|
|
|
1285
1285
|
template <class T, class RETURN_TYPE = unique_ptr<T>, typename... ARGS>
|
|
1286
|
-
RETURN_TYPE ReadRequiredSerializable(ARGS &&...args) {
|
|
1286
|
+
RETURN_TYPE ReadRequiredSerializable(ARGS &&... args) {
|
|
1287
1287
|
if (field_count >= max_field_count) {
|
|
1288
1288
|
// field is not there, read the default value
|
|
1289
1289
|
throw SerializationException("Attempting to read mandatory field, but field is missing");
|
|
@@ -52758,28 +52758,23 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashe
|
|
|
52758
52758
|
// value have not been seen yet
|
|
52759
52759
|
idx_t new_group_count =
|
|
52760
52760
|
distinct_hashes[aggr_idx]->FindOrCreateGroups(probe_chunk, dummy_addresses, new_groups);
|
|
52761
|
-
|
|
52762
|
-
// now fix up the payload and addresses accordingly by creating
|
|
52763
|
-
// a selection vector
|
|
52764
52761
|
if (new_group_count > 0) {
|
|
52762
|
+
// now fix up the payload and addresses accordingly by creating
|
|
52763
|
+
// a selection vector
|
|
52764
|
+
DataChunk distinct_payload;
|
|
52765
|
+
distinct_payload.Initialize(payload.GetTypes());
|
|
52766
|
+
distinct_payload.Slice(payload, new_groups, new_group_count);
|
|
52767
|
+
distinct_payload.Verify();
|
|
52768
|
+
|
|
52769
|
+
Vector distinct_addresses(addresses, new_groups, new_group_count);
|
|
52770
|
+
distinct_addresses.Verify(new_group_count);
|
|
52771
|
+
|
|
52765
52772
|
if (aggr.filter) {
|
|
52766
|
-
Vector distinct_addresses(addresses, new_groups, new_group_count);
|
|
52767
|
-
DataChunk distinct_payload;
|
|
52768
|
-
auto pay_types = payload.GetTypes();
|
|
52769
|
-
distinct_payload.Initialize(pay_types);
|
|
52770
|
-
distinct_payload.Slice(payload, new_groups, new_group_count);
|
|
52771
|
-
distinct_addresses.Verify(new_group_count);
|
|
52772
52773
|
distinct_addresses.Normalify(new_group_count);
|
|
52773
52774
|
RowOperations::UpdateFilteredStates(aggr, distinct_addresses, distinct_payload, payload_idx);
|
|
52774
52775
|
} else {
|
|
52775
|
-
|
|
52776
|
-
|
|
52777
|
-
payload.data[payload_idx + i].Slice(new_groups, new_group_count);
|
|
52778
|
-
payload.data[payload_idx + i].Verify(new_group_count);
|
|
52779
|
-
}
|
|
52780
|
-
distinct_addresses.Verify(new_group_count);
|
|
52781
|
-
|
|
52782
|
-
RowOperations::UpdateStates(aggr, distinct_addresses, payload, payload_idx, new_group_count);
|
|
52776
|
+
RowOperations::UpdateStates(aggr, distinct_addresses, distinct_payload, payload_idx,
|
|
52777
|
+
new_group_count);
|
|
52783
52778
|
}
|
|
52784
52779
|
}
|
|
52785
52780
|
} else if (aggr.filter) {
|
|
@@ -58033,6 +58028,7 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, GlobalSink
|
|
|
58033
58028
|
for (auto &child_expr : aggr.children) {
|
|
58034
58029
|
D_ASSERT(child_expr->type == ExpressionType::BOUND_REF);
|
|
58035
58030
|
auto &bound_ref_expr = (BoundReferenceExpression &)*child_expr;
|
|
58031
|
+
D_ASSERT(bound_ref_expr.index < input.data.size());
|
|
58036
58032
|
aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[bound_ref_expr.index]);
|
|
58037
58033
|
}
|
|
58038
58034
|
}
|
|
@@ -58041,6 +58037,7 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, GlobalSink
|
|
|
58041
58037
|
if (aggr.filter) {
|
|
58042
58038
|
auto it = filter_indexes.find(aggr.filter.get());
|
|
58043
58039
|
D_ASSERT(it != filter_indexes.end());
|
|
58040
|
+
D_ASSERT(it->second < input.data.size());
|
|
58044
58041
|
aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[it->second]);
|
|
58045
58042
|
}
|
|
58046
58043
|
}
|
|
@@ -59077,7 +59074,7 @@ public:
|
|
|
59077
59074
|
using FrameBounds = std::pair<idx_t, idx_t>;
|
|
59078
59075
|
|
|
59079
59076
|
WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info, const LogicalType &result_type,
|
|
59080
|
-
ChunkCollection *input, WindowAggregationMode mode);
|
|
59077
|
+
ChunkCollection *input, const ValidityMask &filter_mask, WindowAggregationMode mode);
|
|
59081
59078
|
~WindowSegmentTree();
|
|
59082
59079
|
|
|
59083
59080
|
//! First row contains the result.
|
|
@@ -59110,6 +59107,8 @@ private:
|
|
|
59110
59107
|
vector<data_t> state;
|
|
59111
59108
|
//! Input data chunk, used for intermediate window segment aggregation
|
|
59112
59109
|
DataChunk inputs;
|
|
59110
|
+
//! The filtered rows in inputs.
|
|
59111
|
+
SelectionVector filter_sel;
|
|
59113
59112
|
//! A vector of pointers to "state", used for intermediate window segment aggregation
|
|
59114
59113
|
Vector statep;
|
|
59115
59114
|
//! The frame boundaries, used for the window functions
|
|
@@ -59130,6 +59129,9 @@ private:
|
|
|
59130
59129
|
//! The (sorted) input chunk collection on which the tree is built
|
|
59131
59130
|
ChunkCollection *input_ref;
|
|
59132
59131
|
|
|
59132
|
+
//! The filtered rows in input_ref.
|
|
59133
|
+
const ValidityMask &filter_mask;
|
|
59134
|
+
|
|
59133
59135
|
//! Use the window API, if available
|
|
59134
59136
|
WindowAggregationMode mode;
|
|
59135
59137
|
|
|
@@ -60032,6 +60034,25 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
60032
60034
|
}
|
|
60033
60035
|
}
|
|
60034
60036
|
|
|
60037
|
+
// evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
|
|
60038
|
+
ValidityMask filter_mask;
|
|
60039
|
+
vector<validity_t> filter_bits;
|
|
60040
|
+
if (wexpr->filter_expr) {
|
|
60041
|
+
// Start with all invalid and set the ones that pass
|
|
60042
|
+
filter_bits.resize(ValidityMask::ValidityMaskSize(input.Count()), 0);
|
|
60043
|
+
filter_mask.Initialize(filter_bits.data());
|
|
60044
|
+
ExpressionExecutor filter_execution(*wexpr->filter_expr);
|
|
60045
|
+
SelectionVector true_sel(STANDARD_VECTOR_SIZE);
|
|
60046
|
+
idx_t base_idx = 0;
|
|
60047
|
+
for (auto &chunk : input.Chunks()) {
|
|
60048
|
+
const auto filtered = filter_execution.SelectExpression(*chunk, true_sel);
|
|
60049
|
+
for (idx_t f = 0; f < filtered; ++f) {
|
|
60050
|
+
filter_mask.SetValid(base_idx + true_sel[f]);
|
|
60051
|
+
}
|
|
60052
|
+
base_idx += chunk->size();
|
|
60053
|
+
}
|
|
60054
|
+
}
|
|
60055
|
+
|
|
60035
60056
|
// evaluate boundaries if present. Parser has checked boundary types.
|
|
60036
60057
|
ChunkCollection boundary_start_collection;
|
|
60037
60058
|
if (wexpr->start_expr) {
|
|
@@ -60085,7 +60106,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
|
|
|
60085
60106
|
|
|
60086
60107
|
if (wexpr->aggregate) {
|
|
60087
60108
|
segment_tree = make_unique<WindowSegmentTree>(*(wexpr->aggregate), wexpr->bind_info.get(), wexpr->return_type,
|
|
60088
|
-
&payload_collection, mode);
|
|
60109
|
+
&payload_collection, filter_mask, mode);
|
|
60089
60110
|
}
|
|
60090
60111
|
|
|
60091
60112
|
WindowBoundariesState bounds(wexpr);
|
|
@@ -78692,10 +78713,11 @@ namespace duckdb {
|
|
|
78692
78713
|
|
|
78693
78714
|
WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info,
|
|
78694
78715
|
const LogicalType &result_type_p, ChunkCollection *input,
|
|
78695
|
-
WindowAggregationMode mode_p)
|
|
78716
|
+
const ValidityMask &filter_mask_p, WindowAggregationMode mode_p)
|
|
78696
78717
|
: aggregate(aggregate), bind_info(bind_info), result_type(result_type_p), state(aggregate.state_size()),
|
|
78697
78718
|
statep(Value::POINTER((idx_t)state.data())), frame(0, 0), active(0, 1),
|
|
78698
|
-
statev(Value::POINTER((idx_t)state.data())), internal_nodes(0), input_ref(input),
|
|
78719
|
+
statev(Value::POINTER((idx_t)state.data())), internal_nodes(0), input_ref(input), filter_mask(filter_mask_p),
|
|
78720
|
+
mode(mode_p) {
|
|
78699
78721
|
#if STANDARD_VECTOR_SIZE < 512
|
|
78700
78722
|
throw NotImplementedException("Window functions are not supported for vector sizes < 512");
|
|
78701
78723
|
#endif
|
|
@@ -78703,6 +78725,7 @@ WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData
|
|
|
78703
78725
|
statev.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants
|
|
78704
78726
|
|
|
78705
78727
|
if (input_ref && input_ref->ColumnCount() > 0) {
|
|
78728
|
+
filter_sel.Initialize(STANDARD_VECTOR_SIZE);
|
|
78706
78729
|
inputs.Initialize(input_ref->Types());
|
|
78707
78730
|
// if we have a frame-by-frame method, share the single state
|
|
78708
78731
|
if (aggregate.window && UseWindowAPI()) {
|
|
@@ -78783,6 +78806,19 @@ void WindowSegmentTree::ExtractFrame(idx_t begin, idx_t end) {
|
|
|
78783
78806
|
VectorOperations::Copy(chunk_b.data[i], v, chunk_b_count, 0, chunk_a_count);
|
|
78784
78807
|
}
|
|
78785
78808
|
}
|
|
78809
|
+
|
|
78810
|
+
// Slice to any filtered rows
|
|
78811
|
+
if (!filter_mask.AllValid()) {
|
|
78812
|
+
idx_t filtered = 0;
|
|
78813
|
+
for (idx_t i = begin; i < end; ++i) {
|
|
78814
|
+
if (filter_mask.RowIsValid(i)) {
|
|
78815
|
+
filter_sel.set_index(filtered++, i - begin);
|
|
78816
|
+
}
|
|
78817
|
+
}
|
|
78818
|
+
if (filtered != inputs.size()) {
|
|
78819
|
+
inputs.Slice(filter_sel, filtered);
|
|
78820
|
+
}
|
|
78821
|
+
}
|
|
78786
78822
|
}
|
|
78787
78823
|
|
|
78788
78824
|
void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end) {
|
|
@@ -78863,7 +78899,16 @@ void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t en
|
|
|
78863
78899
|
if (inputs.ColumnCount() == 0) {
|
|
78864
78900
|
D_ASSERT(GetTypeIdSize(result_type.InternalType()) == sizeof(idx_t));
|
|
78865
78901
|
auto data = FlatVector::GetData<idx_t>(result);
|
|
78866
|
-
|
|
78902
|
+
// Slice to any filtered rows
|
|
78903
|
+
if (!filter_mask.AllValid()) {
|
|
78904
|
+
idx_t filtered = 0;
|
|
78905
|
+
for (idx_t i = begin; i < end; ++i) {
|
|
78906
|
+
filtered += filter_mask.RowIsValid(i);
|
|
78907
|
+
}
|
|
78908
|
+
data[rid] = filtered;
|
|
78909
|
+
} else {
|
|
78910
|
+
data[rid] = end - begin;
|
|
78911
|
+
}
|
|
78867
78912
|
return;
|
|
78868
78913
|
}
|
|
78869
78914
|
|
|
@@ -78904,8 +78949,8 @@ void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t en
|
|
|
78904
78949
|
active = FrameBounds(active_chunks.first * STANDARD_VECTOR_SIZE,
|
|
78905
78950
|
MinValue((active_chunks.second + 1) * STANDARD_VECTOR_SIZE, coll.Count()));
|
|
78906
78951
|
|
|
78907
|
-
aggregate.window(inputs.data.data(), bind_info, inputs.ColumnCount(), state.data(), frame, prev,
|
|
78908
|
-
active.first);
|
|
78952
|
+
aggregate.window(inputs.data.data(), filter_mask, bind_info, inputs.ColumnCount(), state.data(), frame, prev,
|
|
78953
|
+
result, rid, active.first);
|
|
78909
78954
|
return;
|
|
78910
78955
|
}
|
|
78911
78956
|
|
|
@@ -83436,6 +83481,19 @@ struct ModeState {
|
|
|
83436
83481
|
}
|
|
83437
83482
|
};
|
|
83438
83483
|
|
|
83484
|
+
struct ModeIncluded {
|
|
83485
|
+
inline explicit ModeIncluded(const ValidityMask &fmask_p, const ValidityMask &dmask_p, idx_t bias_p)
|
|
83486
|
+
: fmask(fmask_p), dmask(dmask_p), bias(bias_p) {
|
|
83487
|
+
}
|
|
83488
|
+
|
|
83489
|
+
inline bool operator()(const idx_t &idx) const {
|
|
83490
|
+
return fmask.RowIsValid(idx) && dmask.RowIsValid(idx - bias);
|
|
83491
|
+
}
|
|
83492
|
+
const ValidityMask &fmask;
|
|
83493
|
+
const ValidityMask &dmask;
|
|
83494
|
+
const idx_t bias;
|
|
83495
|
+
};
|
|
83496
|
+
|
|
83439
83497
|
template <typename KEY_TYPE>
|
|
83440
83498
|
struct ModeFunction {
|
|
83441
83499
|
template <class STATE>
|
|
@@ -83492,11 +83550,14 @@ struct ModeFunction {
|
|
|
83492
83550
|
}
|
|
83493
83551
|
|
|
83494
83552
|
template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
|
|
83495
|
-
static void Window(const INPUT_TYPE *data, const ValidityMask &
|
|
83496
|
-
const FrameBounds &frame, const FrameBounds &prev,
|
|
83553
|
+
static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
|
|
83554
|
+
FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
|
|
83555
|
+
Vector &result, idx_t rid, idx_t bias) {
|
|
83497
83556
|
auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
|
|
83498
83557
|
auto &rmask = FlatVector::Validity(result);
|
|
83499
83558
|
|
|
83559
|
+
ModeIncluded included(fmask, dmask, bias);
|
|
83560
|
+
|
|
83500
83561
|
if (!state->frequency_map) {
|
|
83501
83562
|
state->frequency_map = new unordered_map<KEY_TYPE, size_t>();
|
|
83502
83563
|
}
|
|
@@ -83505,31 +83566,31 @@ struct ModeFunction {
|
|
|
83505
83566
|
state->Reset();
|
|
83506
83567
|
// for f ∈ F do
|
|
83507
83568
|
for (auto f = frame.first; f < frame.second; ++f) {
|
|
83508
|
-
if (
|
|
83569
|
+
if (included(f)) {
|
|
83509
83570
|
state->ModeAdd(KEY_TYPE(data[f]));
|
|
83510
83571
|
}
|
|
83511
83572
|
}
|
|
83512
83573
|
} else {
|
|
83513
83574
|
// for f ∈ P \ F do
|
|
83514
83575
|
for (auto p = prev.first; p < frame.first; ++p) {
|
|
83515
|
-
if (
|
|
83576
|
+
if (included(p)) {
|
|
83516
83577
|
state->ModeRm(KEY_TYPE(data[p]));
|
|
83517
83578
|
}
|
|
83518
83579
|
}
|
|
83519
83580
|
for (auto p = frame.second; p < prev.second; ++p) {
|
|
83520
|
-
if (
|
|
83581
|
+
if (included(p)) {
|
|
83521
83582
|
state->ModeRm(KEY_TYPE(data[p]));
|
|
83522
83583
|
}
|
|
83523
83584
|
}
|
|
83524
83585
|
|
|
83525
83586
|
// for f ∈ F \ P do
|
|
83526
83587
|
for (auto f = frame.first; f < prev.first; ++f) {
|
|
83527
|
-
if (
|
|
83588
|
+
if (included(f)) {
|
|
83528
83589
|
state->ModeAdd(KEY_TYPE(data[f]));
|
|
83529
83590
|
}
|
|
83530
83591
|
}
|
|
83531
83592
|
for (auto f = prev.second; f < frame.second; ++f) {
|
|
83532
|
-
if (
|
|
83593
|
+
if (included(f)) {
|
|
83533
83594
|
state->ModeAdd(KEY_TYPE(data[f]));
|
|
83534
83595
|
}
|
|
83535
83596
|
}
|
|
@@ -83744,14 +83805,21 @@ struct QuantileState {
|
|
|
83744
83805
|
}
|
|
83745
83806
|
};
|
|
83746
83807
|
|
|
83747
|
-
struct
|
|
83748
|
-
inline explicit
|
|
83808
|
+
struct QuantileIncluded {
|
|
83809
|
+
inline explicit QuantileIncluded(const ValidityMask &fmask_p, const ValidityMask &dmask_p, idx_t bias_p)
|
|
83810
|
+
: fmask(fmask_p), dmask(dmask_p), bias(bias_p) {
|
|
83749
83811
|
}
|
|
83750
83812
|
|
|
83751
83813
|
inline bool operator()(const idx_t &idx) const {
|
|
83752
|
-
return
|
|
83814
|
+
return fmask.RowIsValid(idx) && dmask.RowIsValid(idx - bias);
|
|
83753
83815
|
}
|
|
83754
|
-
|
|
83816
|
+
|
|
83817
|
+
inline bool AllValid() const {
|
|
83818
|
+
return fmask.AllValid() && dmask.AllValid();
|
|
83819
|
+
}
|
|
83820
|
+
|
|
83821
|
+
const ValidityMask &fmask;
|
|
83822
|
+
const ValidityMask &dmask;
|
|
83755
83823
|
const idx_t bias;
|
|
83756
83824
|
};
|
|
83757
83825
|
|
|
@@ -83811,7 +83879,7 @@ static idx_t ReplaceIndex(idx_t *index, const FrameBounds &frame, const FrameBou
|
|
|
83811
83879
|
|
|
83812
83880
|
template <class INPUT_TYPE>
|
|
83813
83881
|
static inline int CanReplace(const idx_t *index, const INPUT_TYPE *fdata, const idx_t j, const idx_t k0, const idx_t k1,
|
|
83814
|
-
const
|
|
83882
|
+
const QuantileIncluded &validity) {
|
|
83815
83883
|
D_ASSERT(index);
|
|
83816
83884
|
|
|
83817
83885
|
// NULLs sort to the end, so if we have inserted a NULL,
|
|
@@ -84142,12 +84210,13 @@ struct QuantileScalarOperation : public QuantileOperation {
|
|
|
84142
84210
|
}
|
|
84143
84211
|
|
|
84144
84212
|
template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
|
|
84145
|
-
static void Window(const INPUT_TYPE *data, const ValidityMask &
|
|
84146
|
-
const FrameBounds &frame, const FrameBounds &prev,
|
|
84213
|
+
static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
|
|
84214
|
+
FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
|
|
84215
|
+
Vector &result, idx_t ridx, idx_t bias) {
|
|
84147
84216
|
auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
|
|
84148
84217
|
auto &rmask = FlatVector::Validity(result);
|
|
84149
84218
|
|
|
84150
|
-
|
|
84219
|
+
QuantileIncluded included(fmask, dmask, bias);
|
|
84151
84220
|
|
|
84152
84221
|
// Lazily initialise frame state
|
|
84153
84222
|
auto prev_pos = state->pos;
|
|
@@ -84167,9 +84236,9 @@ struct QuantileScalarOperation : public QuantileOperation {
|
|
|
84167
84236
|
// Fixed frame size
|
|
84168
84237
|
const auto j = ReplaceIndex(index, frame, prev);
|
|
84169
84238
|
// We can only replace if the number of NULLs has not changed
|
|
84170
|
-
if (
|
|
84239
|
+
if (included.AllValid() || included(prev.first) == included(prev.second)) {
|
|
84171
84240
|
Interpolator<DISCRETE> interp(q, prev_pos);
|
|
84172
|
-
replace = CanReplace(index, data, j, interp.FRN, interp.CRN,
|
|
84241
|
+
replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
|
|
84173
84242
|
if (replace) {
|
|
84174
84243
|
state->pos = prev_pos;
|
|
84175
84244
|
}
|
|
@@ -84178,9 +84247,9 @@ struct QuantileScalarOperation : public QuantileOperation {
|
|
|
84178
84247
|
ReuseIndexes(index, frame, prev);
|
|
84179
84248
|
}
|
|
84180
84249
|
|
|
84181
|
-
if (!replace && !
|
|
84250
|
+
if (!replace && !included.AllValid()) {
|
|
84182
84251
|
// Remove the NULLs
|
|
84183
|
-
state->pos = std::partition(index, index + state->pos,
|
|
84252
|
+
state->pos = std::partition(index, index + state->pos, included) - index;
|
|
84184
84253
|
}
|
|
84185
84254
|
if (state->pos) {
|
|
84186
84255
|
Interpolator<DISCRETE> interp(q, state->pos);
|
|
@@ -84293,12 +84362,13 @@ struct QuantileListOperation : public QuantileOperation {
|
|
|
84293
84362
|
}
|
|
84294
84363
|
|
|
84295
84364
|
template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
|
|
84296
|
-
static void Window(const INPUT_TYPE *data, const ValidityMask &
|
|
84297
|
-
const FrameBounds &frame, const FrameBounds &prev,
|
|
84365
|
+
static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
|
|
84366
|
+
FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
|
|
84367
|
+
Vector &list, idx_t lidx, idx_t bias) {
|
|
84298
84368
|
D_ASSERT(bind_data_p);
|
|
84299
84369
|
auto bind_data = (QuantileBindData *)bind_data_p;
|
|
84300
84370
|
|
|
84301
|
-
|
|
84371
|
+
QuantileIncluded included(fmask, dmask, bias);
|
|
84302
84372
|
|
|
84303
84373
|
// Result is a constant LIST<RESULT_TYPE> with a fixed length
|
|
84304
84374
|
auto ldata = FlatVector::GetData<RESULT_TYPE>(list);
|
|
@@ -84329,11 +84399,11 @@ struct QuantileListOperation : public QuantileOperation {
|
|
|
84329
84399
|
// Fixed frame size
|
|
84330
84400
|
const auto j = ReplaceIndex(index, frame, prev);
|
|
84331
84401
|
// We can only replace if the number of NULLs has not changed
|
|
84332
|
-
if (
|
|
84402
|
+
if (included.AllValid() || included(prev.first) == included(prev.second)) {
|
|
84333
84403
|
for (const auto &q : bind_data->order) {
|
|
84334
84404
|
const auto &quantile = bind_data->quantiles[q];
|
|
84335
84405
|
Interpolator<DISCRETE> interp(quantile, prev_pos);
|
|
84336
|
-
const auto replace = CanReplace(index, data, j, interp.FRN, interp.CRN,
|
|
84406
|
+
const auto replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
|
|
84337
84407
|
if (replace < 0) {
|
|
84338
84408
|
// Replacement is before this quantile, so the rest will be replaceable too.
|
|
84339
84409
|
replaceable.first = MinValue(replaceable.first, interp.FRN);
|
|
@@ -84353,9 +84423,9 @@ struct QuantileListOperation : public QuantileOperation {
|
|
|
84353
84423
|
ReuseIndexes(index, frame, prev);
|
|
84354
84424
|
}
|
|
84355
84425
|
|
|
84356
|
-
if (replaceable.first >= replaceable.second && !
|
|
84426
|
+
if (replaceable.first >= replaceable.second && !included.AllValid()) {
|
|
84357
84427
|
// Remove the NULLs
|
|
84358
|
-
state->pos = std::partition(index, index + state->pos,
|
|
84428
|
+
state->pos = std::partition(index, index + state->pos, included) - index;
|
|
84359
84429
|
}
|
|
84360
84430
|
|
|
84361
84431
|
if (state->pos) {
|
|
@@ -84652,12 +84722,13 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
|
|
|
84652
84722
|
}
|
|
84653
84723
|
|
|
84654
84724
|
template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
|
|
84655
|
-
static void Window(const INPUT_TYPE *data, const ValidityMask &
|
|
84656
|
-
const FrameBounds &frame, const FrameBounds &prev,
|
|
84725
|
+
static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
|
|
84726
|
+
FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
|
|
84727
|
+
Vector &result, idx_t ridx, idx_t bias) {
|
|
84657
84728
|
auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
|
|
84658
84729
|
auto &rmask = FlatVector::Validity(result);
|
|
84659
84730
|
|
|
84660
|
-
|
|
84731
|
+
QuantileIncluded included(fmask, dmask, bias);
|
|
84661
84732
|
|
|
84662
84733
|
// Lazily initialise frame state
|
|
84663
84734
|
auto prev_pos = state->pos;
|
|
@@ -84678,7 +84749,7 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
|
|
|
84678
84749
|
// the median has changed, the previous order is not correct.
|
|
84679
84750
|
// It is probably close, however, and so reuse is helpful.
|
|
84680
84751
|
ReuseIndexes(index2, frame, prev);
|
|
84681
|
-
std::partition(index2, index2 + state->pos,
|
|
84752
|
+
std::partition(index2, index2 + state->pos, included);
|
|
84682
84753
|
|
|
84683
84754
|
// Find the two positions needed for the median
|
|
84684
84755
|
const float q = 0.5;
|
|
@@ -84687,10 +84758,10 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
|
|
|
84687
84758
|
if (frame.first == prev.first + 1 && frame.second == prev.second + 1) {
|
|
84688
84759
|
// Fixed frame size
|
|
84689
84760
|
const auto j = ReplaceIndex(index, frame, prev);
|
|
84690
|
-
// We can only replace if the number of
|
|
84691
|
-
if (
|
|
84761
|
+
// We can only replace if the number of NULLs has not changed
|
|
84762
|
+
if (included.AllValid() || included(prev.first) == included(prev.second)) {
|
|
84692
84763
|
Interpolator<false> interp(q, prev_pos);
|
|
84693
|
-
replace = CanReplace(index, data, j, interp.FRN, interp.CRN,
|
|
84764
|
+
replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
|
|
84694
84765
|
if (replace) {
|
|
84695
84766
|
state->pos = prev_pos;
|
|
84696
84767
|
}
|
|
@@ -84699,9 +84770,9 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
|
|
|
84699
84770
|
ReuseIndexes(index, frame, prev);
|
|
84700
84771
|
}
|
|
84701
84772
|
|
|
84702
|
-
if (!replace && !
|
|
84773
|
+
if (!replace && !included.AllValid()) {
|
|
84703
84774
|
// Remove the NULLs
|
|
84704
|
-
state->pos = std::partition(index, index + state->pos,
|
|
84775
|
+
state->pos = std::partition(index, index + state->pos, included) - index;
|
|
84705
84776
|
}
|
|
84706
84777
|
|
|
84707
84778
|
if (state->pos) {
|
|
@@ -104983,6 +105054,14 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanState &scan
|
|
|
104983
105054
|
auto &struct_validity_mask = FlatVector::Validity(vector);
|
|
104984
105055
|
for (idx_t type_idx = 0; type_idx < (idx_t)array.n_children; type_idx++) {
|
|
104985
105056
|
SetValidityMask(*child_entries[type_idx], *array.children[type_idx], scan_state, size, nested_offset);
|
|
105057
|
+
if (!struct_validity_mask.AllValid()) {
|
|
105058
|
+
auto &child_validity_mark = FlatVector::Validity(*child_entries[type_idx]);
|
|
105059
|
+
for (idx_t i = 0; i < size; i++) {
|
|
105060
|
+
if (!struct_validity_mask.RowIsValid(i)) {
|
|
105061
|
+
child_validity_mark.SetInvalid(i);
|
|
105062
|
+
}
|
|
105063
|
+
}
|
|
105064
|
+
}
|
|
104986
105065
|
ColumnArrowToDuckDB(*child_entries[type_idx], *array.children[type_idx], scan_state, size,
|
|
104987
105066
|
arrow_convert_data, col_idx, arrow_convert_idx, nested_offset, &struct_validity_mask);
|
|
104988
105067
|
}
|
|
@@ -145925,6 +146004,11 @@ bool WindowExpression::Equals(const WindowExpression *a, const WindowExpression
|
|
|
145925
146004
|
return false;
|
|
145926
146005
|
}
|
|
145927
146006
|
}
|
|
146007
|
+
// check if the filter clauses are equivalent
|
|
146008
|
+
if (!BaseExpression::Equals(a->filter_expr.get(), b->filter_expr.get())) {
|
|
146009
|
+
return false;
|
|
146010
|
+
}
|
|
146011
|
+
|
|
145928
146012
|
return true;
|
|
145929
146013
|
}
|
|
145930
146014
|
|
|
@@ -145944,6 +146028,8 @@ unique_ptr<ParsedExpression> WindowExpression::Copy() const {
|
|
|
145944
146028
|
new_window->orders.emplace_back(o.type, o.null_order, o.expression->Copy());
|
|
145945
146029
|
}
|
|
145946
146030
|
|
|
146031
|
+
new_window->filter_expr = filter_expr ? filter_expr->Copy() : nullptr;
|
|
146032
|
+
|
|
145947
146033
|
new_window->start = start;
|
|
145948
146034
|
new_window->end = end;
|
|
145949
146035
|
new_window->start_expr = start_expr ? start_expr->Copy() : nullptr;
|
|
@@ -145976,6 +146062,7 @@ void WindowExpression::Serialize(FieldWriter &writer) const {
|
|
|
145976
146062
|
writer.WriteOptional(offset_expr);
|
|
145977
146063
|
writer.WriteOptional(default_expr);
|
|
145978
146064
|
writer.WriteField<bool>(ignore_nulls);
|
|
146065
|
+
writer.WriteOptional(filter_expr);
|
|
145979
146066
|
}
|
|
145980
146067
|
|
|
145981
146068
|
unique_ptr<ParsedExpression> WindowExpression::Deserialize(ExpressionType type, FieldReader &reader) {
|
|
@@ -145998,6 +146085,7 @@ unique_ptr<ParsedExpression> WindowExpression::Deserialize(ExpressionType type,
|
|
|
145998
146085
|
expr->offset_expr = reader.ReadOptional<ParsedExpression>(nullptr);
|
|
145999
146086
|
expr->default_expr = reader.ReadOptional<ParsedExpression>(nullptr);
|
|
146000
146087
|
expr->ignore_nulls = reader.ReadRequired<bool>();
|
|
146088
|
+
expr->filter_expr = reader.ReadOptional<ParsedExpression>(nullptr);
|
|
146001
146089
|
return move(expr);
|
|
146002
146090
|
}
|
|
146003
146091
|
|
|
@@ -146860,6 +146948,9 @@ void ParsedExpressionIterator::EnumerateChildren(
|
|
|
146860
146948
|
for (auto &child : window_expr.children) {
|
|
146861
146949
|
callback(child);
|
|
146862
146950
|
}
|
|
146951
|
+
if (window_expr.filter_expr) {
|
|
146952
|
+
callback(window_expr.filter_expr);
|
|
146953
|
+
}
|
|
146863
146954
|
if (window_expr.start_expr) {
|
|
146864
146955
|
callback(window_expr.start_expr);
|
|
146865
146956
|
}
|
|
@@ -155446,6 +155537,11 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
|
|
|
155446
155537
|
auto lowercase_name = StringUtil::Lower(function_name);
|
|
155447
155538
|
|
|
155448
155539
|
if (root->over) {
|
|
155540
|
+
const auto win_fun_type = WindowToExpressionType(lowercase_name);
|
|
155541
|
+
if (win_fun_type == ExpressionType::INVALID) {
|
|
155542
|
+
throw InternalException("Unknown/unsupported window function");
|
|
155543
|
+
}
|
|
155544
|
+
|
|
155449
155545
|
if (root->agg_distinct) {
|
|
155450
155546
|
throw ParserException("DISTINCT is not implemented for window functions!");
|
|
155451
155547
|
}
|
|
@@ -155454,18 +155550,13 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
|
|
|
155454
155550
|
throw ParserException("ORDER BY is not implemented for window functions!");
|
|
155455
155551
|
}
|
|
155456
155552
|
|
|
155457
|
-
if (root->agg_filter) {
|
|
155458
|
-
throw ParserException("FILTER is not implemented for window functions!");
|
|
155553
|
+
if (win_fun_type != ExpressionType::WINDOW_AGGREGATE && root->agg_filter) {
|
|
155554
|
+
throw ParserException("FILTER is not implemented for non-aggregate window functions!");
|
|
155459
155555
|
}
|
|
155460
155556
|
if (root->export_state) {
|
|
155461
155557
|
throw ParserException("EXPORT_STATE is not supported for window functions!");
|
|
155462
155558
|
}
|
|
155463
155559
|
|
|
155464
|
-
const auto win_fun_type = WindowToExpressionType(lowercase_name);
|
|
155465
|
-
if (win_fun_type == ExpressionType::INVALID) {
|
|
155466
|
-
throw InternalException("Unknown/unsupported window function");
|
|
155467
|
-
}
|
|
155468
|
-
|
|
155469
155560
|
if (win_fun_type == ExpressionType::WINDOW_AGGREGATE && root->agg_ignore_nulls) {
|
|
155470
155561
|
throw ParserException("IGNORE NULLS is not supported for windowed aggregates");
|
|
155471
155562
|
}
|
|
@@ -155473,6 +155564,11 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
|
|
|
155473
155564
|
auto expr = make_unique<WindowExpression>(win_fun_type, schema, lowercase_name);
|
|
155474
155565
|
expr->ignore_nulls = root->agg_ignore_nulls;
|
|
155475
155566
|
|
|
155567
|
+
if (root->agg_filter) {
|
|
155568
|
+
auto filter_expr = TransformExpression(root->agg_filter);
|
|
155569
|
+
expr->filter_expr = move(filter_expr);
|
|
155570
|
+
}
|
|
155571
|
+
|
|
155476
155572
|
if (root->args) {
|
|
155477
155573
|
vector<unique_ptr<ParsedExpression>> function_list;
|
|
155478
155574
|
TransformExpressionList(*root->args, function_list);
|
|
@@ -161727,6 +161823,7 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) {
|
|
|
161727
161823
|
for (auto &order : window.orders) {
|
|
161728
161824
|
BindChild(order.expression, depth, error);
|
|
161729
161825
|
}
|
|
161826
|
+
BindChild(window.filter_expr, depth, error);
|
|
161730
161827
|
BindChild(window.start_expr, depth, error);
|
|
161731
161828
|
BindChild(window.end_expr, depth, error);
|
|
161732
161829
|
BindChild(window.offset_expr, depth, error);
|
|
@@ -161860,6 +161957,8 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) {
|
|
|
161860
161957
|
result->orders.emplace_back(type, null_order, move(expression));
|
|
161861
161958
|
}
|
|
161862
161959
|
|
|
161960
|
+
result->filter_expr = CastWindowExpression(window.filter_expr, LogicalType::BOOLEAN);
|
|
161961
|
+
|
|
161863
161962
|
result->start_expr = CastWindowExpression(window.start_expr, start_type);
|
|
161864
161963
|
result->end_expr = CastWindowExpression(window.end_expr, end_type);
|
|
161865
161964
|
result->offset_expr = CastWindowExpression(window.offset_expr, LogicalType::BIGINT);
|
|
@@ -168164,6 +168263,11 @@ bool BoundWindowExpression::Equals(const BaseExpression *other_p) const {
|
|
|
168164
168263
|
return false;
|
|
168165
168264
|
}
|
|
168166
168265
|
}
|
|
168266
|
+
// check if the filter expressions are equivalent
|
|
168267
|
+
if (!Expression::Equals(filter_expr.get(), other->filter_expr.get())) {
|
|
168268
|
+
return false;
|
|
168269
|
+
}
|
|
168270
|
+
|
|
168167
168271
|
// check if the framing expressions are equivalent
|
|
168168
168272
|
if (!Expression::Equals(start_expr.get(), other->start_expr.get()) ||
|
|
168169
168273
|
!Expression::Equals(end_expr.get(), other->end_expr.get()) ||
|
|
@@ -168228,6 +168332,8 @@ unique_ptr<Expression> BoundWindowExpression::Copy() {
|
|
|
168228
168332
|
new_window->orders.emplace_back(o.type, o.null_order, o.expression->Copy());
|
|
168229
168333
|
}
|
|
168230
168334
|
|
|
168335
|
+
new_window->filter_expr = filter_expr ? filter_expr->Copy() : nullptr;
|
|
168336
|
+
|
|
168231
168337
|
new_window->start = start;
|
|
168232
168338
|
new_window->end = end;
|
|
168233
168339
|
new_window->start_expr = start_expr ? start_expr->Copy() : nullptr;
|
|
@@ -169468,6 +169574,9 @@ void ExpressionIterator::EnumerateChildren(Expression &expr,
|
|
|
169468
169574
|
for (auto &child : window_expr.children) {
|
|
169469
169575
|
callback(child);
|
|
169470
169576
|
}
|
|
169577
|
+
if (window_expr.filter_expr) {
|
|
169578
|
+
callback(window_expr.filter_expr);
|
|
169579
|
+
}
|
|
169471
169580
|
if (window_expr.start_expr) {
|
|
169472
169581
|
callback(window_expr.start_expr);
|
|
169473
169582
|
}
|