duckdb 0.3.5-dev653.0 → 0.3.5-dev666.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.3.5-dev653.0",
4
+ "version": "0.3.5-dev666.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -1261,7 +1261,7 @@ public:
1261
1261
  }
1262
1262
 
1263
1263
  template <class T, class RETURN_TYPE = unique_ptr<T>, typename... ARGS>
1264
- RETURN_TYPE ReadSerializable(RETURN_TYPE default_value, ARGS &&...args) {
1264
+ RETURN_TYPE ReadSerializable(RETURN_TYPE default_value, ARGS &&... args) {
1265
1265
  if (field_count >= max_field_count) {
1266
1266
  // field is not there, read the default value
1267
1267
  return default_value;
@@ -1283,7 +1283,7 @@ public:
1283
1283
  }
1284
1284
 
1285
1285
  template <class T, class RETURN_TYPE = unique_ptr<T>, typename... ARGS>
1286
- RETURN_TYPE ReadRequiredSerializable(ARGS &&...args) {
1286
+ RETURN_TYPE ReadRequiredSerializable(ARGS &&... args) {
1287
1287
  if (field_count >= max_field_count) {
1288
1288
  // field is not there, read the default value
1289
1289
  throw SerializationException("Attempting to read mandatory field, but field is missing");
@@ -52758,28 +52758,23 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashe
52758
52758
  // value have not been seen yet
52759
52759
  idx_t new_group_count =
52760
52760
  distinct_hashes[aggr_idx]->FindOrCreateGroups(probe_chunk, dummy_addresses, new_groups);
52761
-
52762
- // now fix up the payload and addresses accordingly by creating
52763
- // a selection vector
52764
52761
  if (new_group_count > 0) {
52762
+ // now fix up the payload and addresses accordingly by creating
52763
+ // a selection vector
52764
+ DataChunk distinct_payload;
52765
+ distinct_payload.Initialize(payload.GetTypes());
52766
+ distinct_payload.Slice(payload, new_groups, new_group_count);
52767
+ distinct_payload.Verify();
52768
+
52769
+ Vector distinct_addresses(addresses, new_groups, new_group_count);
52770
+ distinct_addresses.Verify(new_group_count);
52771
+
52765
52772
  if (aggr.filter) {
52766
- Vector distinct_addresses(addresses, new_groups, new_group_count);
52767
- DataChunk distinct_payload;
52768
- auto pay_types = payload.GetTypes();
52769
- distinct_payload.Initialize(pay_types);
52770
- distinct_payload.Slice(payload, new_groups, new_group_count);
52771
- distinct_addresses.Verify(new_group_count);
52772
52773
  distinct_addresses.Normalify(new_group_count);
52773
52774
  RowOperations::UpdateFilteredStates(aggr, distinct_addresses, distinct_payload, payload_idx);
52774
52775
  } else {
52775
- Vector distinct_addresses(addresses, new_groups, new_group_count);
52776
- for (idx_t i = 0; i < aggr.child_count; i++) {
52777
- payload.data[payload_idx + i].Slice(new_groups, new_group_count);
52778
- payload.data[payload_idx + i].Verify(new_group_count);
52779
- }
52780
- distinct_addresses.Verify(new_group_count);
52781
-
52782
- RowOperations::UpdateStates(aggr, distinct_addresses, payload, payload_idx, new_group_count);
52776
+ RowOperations::UpdateStates(aggr, distinct_addresses, distinct_payload, payload_idx,
52777
+ new_group_count);
52783
52778
  }
52784
52779
  }
52785
52780
  } else if (aggr.filter) {
@@ -58033,6 +58028,7 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, GlobalSink
58033
58028
  for (auto &child_expr : aggr.children) {
58034
58029
  D_ASSERT(child_expr->type == ExpressionType::BOUND_REF);
58035
58030
  auto &bound_ref_expr = (BoundReferenceExpression &)*child_expr;
58031
+ D_ASSERT(bound_ref_expr.index < input.data.size());
58036
58032
  aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[bound_ref_expr.index]);
58037
58033
  }
58038
58034
  }
@@ -58041,6 +58037,7 @@ SinkResultType PhysicalHashAggregate::Sink(ExecutionContext &context, GlobalSink
58041
58037
  if (aggr.filter) {
58042
58038
  auto it = filter_indexes.find(aggr.filter.get());
58043
58039
  D_ASSERT(it != filter_indexes.end());
58040
+ D_ASSERT(it->second < input.data.size());
58044
58041
  aggregate_input_chunk.data[aggregate_input_idx++].Reference(input.data[it->second]);
58045
58042
  }
58046
58043
  }
@@ -59077,7 +59074,7 @@ public:
59077
59074
  using FrameBounds = std::pair<idx_t, idx_t>;
59078
59075
 
59079
59076
  WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info, const LogicalType &result_type,
59080
- ChunkCollection *input, WindowAggregationMode mode);
59077
+ ChunkCollection *input, const ValidityMask &filter_mask, WindowAggregationMode mode);
59081
59078
  ~WindowSegmentTree();
59082
59079
 
59083
59080
  //! First row contains the result.
@@ -59110,6 +59107,8 @@ private:
59110
59107
  vector<data_t> state;
59111
59108
  //! Input data chunk, used for intermediate window segment aggregation
59112
59109
  DataChunk inputs;
59110
+ //! The filtered rows in inputs.
59111
+ SelectionVector filter_sel;
59113
59112
  //! A vector of pointers to "state", used for intermediate window segment aggregation
59114
59113
  Vector statep;
59115
59114
  //! The frame boundaries, used for the window functions
@@ -59130,6 +59129,9 @@ private:
59130
59129
  //! The (sorted) input chunk collection on which the tree is built
59131
59130
  ChunkCollection *input_ref;
59132
59131
 
59132
+ //! The filtered rows in input_ref.
59133
+ const ValidityMask &filter_mask;
59134
+
59133
59135
  //! Use the window API, if available
59134
59136
  WindowAggregationMode mode;
59135
59137
 
@@ -60032,6 +60034,25 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
60032
60034
  }
60033
60035
  }
60034
60036
 
60037
+ // evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
60038
+ ValidityMask filter_mask;
60039
+ vector<validity_t> filter_bits;
60040
+ if (wexpr->filter_expr) {
60041
+ // Start with all invalid and set the ones that pass
60042
+ filter_bits.resize(ValidityMask::ValidityMaskSize(input.Count()), 0);
60043
+ filter_mask.Initialize(filter_bits.data());
60044
+ ExpressionExecutor filter_execution(*wexpr->filter_expr);
60045
+ SelectionVector true_sel(STANDARD_VECTOR_SIZE);
60046
+ idx_t base_idx = 0;
60047
+ for (auto &chunk : input.Chunks()) {
60048
+ const auto filtered = filter_execution.SelectExpression(*chunk, true_sel);
60049
+ for (idx_t f = 0; f < filtered; ++f) {
60050
+ filter_mask.SetValid(base_idx + true_sel[f]);
60051
+ }
60052
+ base_idx += chunk->size();
60053
+ }
60054
+ }
60055
+
60035
60056
  // evaluate boundaries if present. Parser has checked boundary types.
60036
60057
  ChunkCollection boundary_start_collection;
60037
60058
  if (wexpr->start_expr) {
@@ -60085,7 +60106,7 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
60085
60106
 
60086
60107
  if (wexpr->aggregate) {
60087
60108
  segment_tree = make_unique<WindowSegmentTree>(*(wexpr->aggregate), wexpr->bind_info.get(), wexpr->return_type,
60088
- &payload_collection, mode);
60109
+ &payload_collection, filter_mask, mode);
60089
60110
  }
60090
60111
 
60091
60112
  WindowBoundariesState bounds(wexpr);
@@ -78692,10 +78713,11 @@ namespace duckdb {
78692
78713
 
78693
78714
  WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info,
78694
78715
  const LogicalType &result_type_p, ChunkCollection *input,
78695
- WindowAggregationMode mode_p)
78716
+ const ValidityMask &filter_mask_p, WindowAggregationMode mode_p)
78696
78717
  : aggregate(aggregate), bind_info(bind_info), result_type(result_type_p), state(aggregate.state_size()),
78697
78718
  statep(Value::POINTER((idx_t)state.data())), frame(0, 0), active(0, 1),
78698
- statev(Value::POINTER((idx_t)state.data())), internal_nodes(0), input_ref(input), mode(mode_p) {
78719
+ statev(Value::POINTER((idx_t)state.data())), internal_nodes(0), input_ref(input), filter_mask(filter_mask_p),
78720
+ mode(mode_p) {
78699
78721
  #if STANDARD_VECTOR_SIZE < 512
78700
78722
  throw NotImplementedException("Window functions are not supported for vector sizes < 512");
78701
78723
  #endif
@@ -78703,6 +78725,7 @@ WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData
78703
78725
  statev.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants
78704
78726
 
78705
78727
  if (input_ref && input_ref->ColumnCount() > 0) {
78728
+ filter_sel.Initialize(STANDARD_VECTOR_SIZE);
78706
78729
  inputs.Initialize(input_ref->Types());
78707
78730
  // if we have a frame-by-frame method, share the single state
78708
78731
  if (aggregate.window && UseWindowAPI()) {
@@ -78783,6 +78806,19 @@ void WindowSegmentTree::ExtractFrame(idx_t begin, idx_t end) {
78783
78806
  VectorOperations::Copy(chunk_b.data[i], v, chunk_b_count, 0, chunk_a_count);
78784
78807
  }
78785
78808
  }
78809
+
78810
+ // Slice to any filtered rows
78811
+ if (!filter_mask.AllValid()) {
78812
+ idx_t filtered = 0;
78813
+ for (idx_t i = begin; i < end; ++i) {
78814
+ if (filter_mask.RowIsValid(i)) {
78815
+ filter_sel.set_index(filtered++, i - begin);
78816
+ }
78817
+ }
78818
+ if (filtered != inputs.size()) {
78819
+ inputs.Slice(filter_sel, filtered);
78820
+ }
78821
+ }
78786
78822
  }
78787
78823
 
78788
78824
  void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end) {
@@ -78863,7 +78899,16 @@ void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t en
78863
78899
  if (inputs.ColumnCount() == 0) {
78864
78900
  D_ASSERT(GetTypeIdSize(result_type.InternalType()) == sizeof(idx_t));
78865
78901
  auto data = FlatVector::GetData<idx_t>(result);
78866
- data[rid] = end - begin;
78902
+ // Slice to any filtered rows
78903
+ if (!filter_mask.AllValid()) {
78904
+ idx_t filtered = 0;
78905
+ for (idx_t i = begin; i < end; ++i) {
78906
+ filtered += filter_mask.RowIsValid(i);
78907
+ }
78908
+ data[rid] = filtered;
78909
+ } else {
78910
+ data[rid] = end - begin;
78911
+ }
78867
78912
  return;
78868
78913
  }
78869
78914
 
@@ -78904,8 +78949,8 @@ void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t en
78904
78949
  active = FrameBounds(active_chunks.first * STANDARD_VECTOR_SIZE,
78905
78950
  MinValue((active_chunks.second + 1) * STANDARD_VECTOR_SIZE, coll.Count()));
78906
78951
 
78907
- aggregate.window(inputs.data.data(), bind_info, inputs.ColumnCount(), state.data(), frame, prev, result, rid,
78908
- active.first);
78952
+ aggregate.window(inputs.data.data(), filter_mask, bind_info, inputs.ColumnCount(), state.data(), frame, prev,
78953
+ result, rid, active.first);
78909
78954
  return;
78910
78955
  }
78911
78956
 
@@ -83436,6 +83481,19 @@ struct ModeState {
83436
83481
  }
83437
83482
  };
83438
83483
 
83484
+ struct ModeIncluded {
83485
+ inline explicit ModeIncluded(const ValidityMask &fmask_p, const ValidityMask &dmask_p, idx_t bias_p)
83486
+ : fmask(fmask_p), dmask(dmask_p), bias(bias_p) {
83487
+ }
83488
+
83489
+ inline bool operator()(const idx_t &idx) const {
83490
+ return fmask.RowIsValid(idx) && dmask.RowIsValid(idx - bias);
83491
+ }
83492
+ const ValidityMask &fmask;
83493
+ const ValidityMask &dmask;
83494
+ const idx_t bias;
83495
+ };
83496
+
83439
83497
  template <typename KEY_TYPE>
83440
83498
  struct ModeFunction {
83441
83499
  template <class STATE>
@@ -83492,11 +83550,14 @@ struct ModeFunction {
83492
83550
  }
83493
83551
 
83494
83552
  template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
83495
- static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state,
83496
- const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t rid, idx_t bias) {
83553
+ static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
83554
+ FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
83555
+ Vector &result, idx_t rid, idx_t bias) {
83497
83556
  auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
83498
83557
  auto &rmask = FlatVector::Validity(result);
83499
83558
 
83559
+ ModeIncluded included(fmask, dmask, bias);
83560
+
83500
83561
  if (!state->frequency_map) {
83501
83562
  state->frequency_map = new unordered_map<KEY_TYPE, size_t>();
83502
83563
  }
@@ -83505,31 +83566,31 @@ struct ModeFunction {
83505
83566
  state->Reset();
83506
83567
  // for f ∈ F do
83507
83568
  for (auto f = frame.first; f < frame.second; ++f) {
83508
- if (dmask.RowIsValid(f - bias)) {
83569
+ if (included(f)) {
83509
83570
  state->ModeAdd(KEY_TYPE(data[f]));
83510
83571
  }
83511
83572
  }
83512
83573
  } else {
83513
83574
  // for f ∈ P \ F do
83514
83575
  for (auto p = prev.first; p < frame.first; ++p) {
83515
- if (dmask.RowIsValid(p - bias)) {
83576
+ if (included(p)) {
83516
83577
  state->ModeRm(KEY_TYPE(data[p]));
83517
83578
  }
83518
83579
  }
83519
83580
  for (auto p = frame.second; p < prev.second; ++p) {
83520
- if (dmask.RowIsValid(p - bias)) {
83581
+ if (included(p)) {
83521
83582
  state->ModeRm(KEY_TYPE(data[p]));
83522
83583
  }
83523
83584
  }
83524
83585
 
83525
83586
  // for f ∈ F \ P do
83526
83587
  for (auto f = frame.first; f < prev.first; ++f) {
83527
- if (dmask.RowIsValid(f - bias)) {
83588
+ if (included(f)) {
83528
83589
  state->ModeAdd(KEY_TYPE(data[f]));
83529
83590
  }
83530
83591
  }
83531
83592
  for (auto f = prev.second; f < frame.second; ++f) {
83532
- if (dmask.RowIsValid(f - bias)) {
83593
+ if (included(f)) {
83533
83594
  state->ModeAdd(KEY_TYPE(data[f]));
83534
83595
  }
83535
83596
  }
@@ -83744,14 +83805,21 @@ struct QuantileState {
83744
83805
  }
83745
83806
  };
83746
83807
 
83747
- struct QuantileNotNull {
83748
- inline explicit QuantileNotNull(const ValidityMask &mask_p, idx_t bias_p) : mask(mask_p), bias(bias_p) {
83808
+ struct QuantileIncluded {
83809
+ inline explicit QuantileIncluded(const ValidityMask &fmask_p, const ValidityMask &dmask_p, idx_t bias_p)
83810
+ : fmask(fmask_p), dmask(dmask_p), bias(bias_p) {
83749
83811
  }
83750
83812
 
83751
83813
  inline bool operator()(const idx_t &idx) const {
83752
- return mask.RowIsValid(idx - bias);
83814
+ return fmask.RowIsValid(idx) && dmask.RowIsValid(idx - bias);
83753
83815
  }
83754
- const ValidityMask &mask;
83816
+
83817
+ inline bool AllValid() const {
83818
+ return fmask.AllValid() && dmask.AllValid();
83819
+ }
83820
+
83821
+ const ValidityMask &fmask;
83822
+ const ValidityMask &dmask;
83755
83823
  const idx_t bias;
83756
83824
  };
83757
83825
 
@@ -83811,7 +83879,7 @@ static idx_t ReplaceIndex(idx_t *index, const FrameBounds &frame, const FrameBou
83811
83879
 
83812
83880
  template <class INPUT_TYPE>
83813
83881
  static inline int CanReplace(const idx_t *index, const INPUT_TYPE *fdata, const idx_t j, const idx_t k0, const idx_t k1,
83814
- const QuantileNotNull &validity) {
83882
+ const QuantileIncluded &validity) {
83815
83883
  D_ASSERT(index);
83816
83884
 
83817
83885
  // NULLs sort to the end, so if we have inserted a NULL,
@@ -84142,12 +84210,13 @@ struct QuantileScalarOperation : public QuantileOperation {
84142
84210
  }
84143
84211
 
84144
84212
  template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
84145
- static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state,
84146
- const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t ridx, idx_t bias) {
84213
+ static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
84214
+ FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
84215
+ Vector &result, idx_t ridx, idx_t bias) {
84147
84216
  auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
84148
84217
  auto &rmask = FlatVector::Validity(result);
84149
84218
 
84150
- QuantileNotNull not_null(dmask, bias);
84219
+ QuantileIncluded included(fmask, dmask, bias);
84151
84220
 
84152
84221
  // Lazily initialise frame state
84153
84222
  auto prev_pos = state->pos;
@@ -84167,9 +84236,9 @@ struct QuantileScalarOperation : public QuantileOperation {
84167
84236
  // Fixed frame size
84168
84237
  const auto j = ReplaceIndex(index, frame, prev);
84169
84238
  // We can only replace if the number of NULLs has not changed
84170
- if (dmask.AllValid() || not_null(prev.first) == not_null(prev.second)) {
84239
+ if (included.AllValid() || included(prev.first) == included(prev.second)) {
84171
84240
  Interpolator<DISCRETE> interp(q, prev_pos);
84172
- replace = CanReplace(index, data, j, interp.FRN, interp.CRN, not_null);
84241
+ replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
84173
84242
  if (replace) {
84174
84243
  state->pos = prev_pos;
84175
84244
  }
@@ -84178,9 +84247,9 @@ struct QuantileScalarOperation : public QuantileOperation {
84178
84247
  ReuseIndexes(index, frame, prev);
84179
84248
  }
84180
84249
 
84181
- if (!replace && !dmask.AllValid()) {
84250
+ if (!replace && !included.AllValid()) {
84182
84251
  // Remove the NULLs
84183
- state->pos = std::partition(index, index + state->pos, not_null) - index;
84252
+ state->pos = std::partition(index, index + state->pos, included) - index;
84184
84253
  }
84185
84254
  if (state->pos) {
84186
84255
  Interpolator<DISCRETE> interp(q, state->pos);
@@ -84293,12 +84362,13 @@ struct QuantileListOperation : public QuantileOperation {
84293
84362
  }
84294
84363
 
84295
84364
  template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
84296
- static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state,
84297
- const FrameBounds &frame, const FrameBounds &prev, Vector &list, idx_t lidx, idx_t bias) {
84365
+ static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
84366
+ FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
84367
+ Vector &list, idx_t lidx, idx_t bias) {
84298
84368
  D_ASSERT(bind_data_p);
84299
84369
  auto bind_data = (QuantileBindData *)bind_data_p;
84300
84370
 
84301
- QuantileNotNull not_null(dmask, bias);
84371
+ QuantileIncluded included(fmask, dmask, bias);
84302
84372
 
84303
84373
  // Result is a constant LIST<RESULT_TYPE> with a fixed length
84304
84374
  auto ldata = FlatVector::GetData<RESULT_TYPE>(list);
@@ -84329,11 +84399,11 @@ struct QuantileListOperation : public QuantileOperation {
84329
84399
  // Fixed frame size
84330
84400
  const auto j = ReplaceIndex(index, frame, prev);
84331
84401
  // We can only replace if the number of NULLs has not changed
84332
- if (dmask.AllValid() || not_null(prev.first) == not_null(prev.second)) {
84402
+ if (included.AllValid() || included(prev.first) == included(prev.second)) {
84333
84403
  for (const auto &q : bind_data->order) {
84334
84404
  const auto &quantile = bind_data->quantiles[q];
84335
84405
  Interpolator<DISCRETE> interp(quantile, prev_pos);
84336
- const auto replace = CanReplace(index, data, j, interp.FRN, interp.CRN, not_null);
84406
+ const auto replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
84337
84407
  if (replace < 0) {
84338
84408
  // Replacement is before this quantile, so the rest will be replaceable too.
84339
84409
  replaceable.first = MinValue(replaceable.first, interp.FRN);
@@ -84353,9 +84423,9 @@ struct QuantileListOperation : public QuantileOperation {
84353
84423
  ReuseIndexes(index, frame, prev);
84354
84424
  }
84355
84425
 
84356
- if (replaceable.first >= replaceable.second && !dmask.AllValid()) {
84426
+ if (replaceable.first >= replaceable.second && !included.AllValid()) {
84357
84427
  // Remove the NULLs
84358
- state->pos = std::partition(index, index + state->pos, not_null) - index;
84428
+ state->pos = std::partition(index, index + state->pos, included) - index;
84359
84429
  }
84360
84430
 
84361
84431
  if (state->pos) {
@@ -84652,12 +84722,13 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
84652
84722
  }
84653
84723
 
84654
84724
  template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
84655
- static void Window(const INPUT_TYPE *data, const ValidityMask &dmask, FunctionData *bind_data_p, STATE *state,
84656
- const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t ridx, idx_t bias) {
84725
+ static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
84726
+ FunctionData *bind_data_p, STATE *state, const FrameBounds &frame, const FrameBounds &prev,
84727
+ Vector &result, idx_t ridx, idx_t bias) {
84657
84728
  auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
84658
84729
  auto &rmask = FlatVector::Validity(result);
84659
84730
 
84660
- QuantileNotNull not_null(dmask, bias);
84731
+ QuantileIncluded included(fmask, dmask, bias);
84661
84732
 
84662
84733
  // Lazily initialise frame state
84663
84734
  auto prev_pos = state->pos;
@@ -84678,7 +84749,7 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
84678
84749
  // the median has changed, the previous order is not correct.
84679
84750
  // It is probably close, however, and so reuse is helpful.
84680
84751
  ReuseIndexes(index2, frame, prev);
84681
- std::partition(index2, index2 + state->pos, not_null);
84752
+ std::partition(index2, index2 + state->pos, included);
84682
84753
 
84683
84754
  // Find the two positions needed for the median
84684
84755
  const float q = 0.5;
@@ -84687,10 +84758,10 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
84687
84758
  if (frame.first == prev.first + 1 && frame.second == prev.second + 1) {
84688
84759
  // Fixed frame size
84689
84760
  const auto j = ReplaceIndex(index, frame, prev);
84690
- // We can only replace if the number of NULls has not changed
84691
- if (dmask.AllValid() || not_null(prev.first) == not_null(prev.second)) {
84761
+ // We can only replace if the number of NULLs has not changed
84762
+ if (included.AllValid() || included(prev.first) == included(prev.second)) {
84692
84763
  Interpolator<false> interp(q, prev_pos);
84693
- replace = CanReplace(index, data, j, interp.FRN, interp.CRN, not_null);
84764
+ replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
84694
84765
  if (replace) {
84695
84766
  state->pos = prev_pos;
84696
84767
  }
@@ -84699,9 +84770,9 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
84699
84770
  ReuseIndexes(index, frame, prev);
84700
84771
  }
84701
84772
 
84702
- if (!replace && !dmask.AllValid()) {
84773
+ if (!replace && !included.AllValid()) {
84703
84774
  // Remove the NULLs
84704
- state->pos = std::partition(index, index + state->pos, not_null) - index;
84775
+ state->pos = std::partition(index, index + state->pos, included) - index;
84705
84776
  }
84706
84777
 
84707
84778
  if (state->pos) {
@@ -104983,6 +105054,14 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanState &scan
104983
105054
  auto &struct_validity_mask = FlatVector::Validity(vector);
104984
105055
  for (idx_t type_idx = 0; type_idx < (idx_t)array.n_children; type_idx++) {
104985
105056
  SetValidityMask(*child_entries[type_idx], *array.children[type_idx], scan_state, size, nested_offset);
105057
+ if (!struct_validity_mask.AllValid()) {
105058
+ auto &child_validity_mark = FlatVector::Validity(*child_entries[type_idx]);
105059
+ for (idx_t i = 0; i < size; i++) {
105060
+ if (!struct_validity_mask.RowIsValid(i)) {
105061
+ child_validity_mark.SetInvalid(i);
105062
+ }
105063
+ }
105064
+ }
104986
105065
  ColumnArrowToDuckDB(*child_entries[type_idx], *array.children[type_idx], scan_state, size,
104987
105066
  arrow_convert_data, col_idx, arrow_convert_idx, nested_offset, &struct_validity_mask);
104988
105067
  }
@@ -145925,6 +146004,11 @@ bool WindowExpression::Equals(const WindowExpression *a, const WindowExpression
145925
146004
  return false;
145926
146005
  }
145927
146006
  }
146007
+ // check if the filter clauses are equivalent
146008
+ if (!BaseExpression::Equals(a->filter_expr.get(), b->filter_expr.get())) {
146009
+ return false;
146010
+ }
146011
+
145928
146012
  return true;
145929
146013
  }
145930
146014
 
@@ -145944,6 +146028,8 @@ unique_ptr<ParsedExpression> WindowExpression::Copy() const {
145944
146028
  new_window->orders.emplace_back(o.type, o.null_order, o.expression->Copy());
145945
146029
  }
145946
146030
 
146031
+ new_window->filter_expr = filter_expr ? filter_expr->Copy() : nullptr;
146032
+
145947
146033
  new_window->start = start;
145948
146034
  new_window->end = end;
145949
146035
  new_window->start_expr = start_expr ? start_expr->Copy() : nullptr;
@@ -145976,6 +146062,7 @@ void WindowExpression::Serialize(FieldWriter &writer) const {
145976
146062
  writer.WriteOptional(offset_expr);
145977
146063
  writer.WriteOptional(default_expr);
145978
146064
  writer.WriteField<bool>(ignore_nulls);
146065
+ writer.WriteOptional(filter_expr);
145979
146066
  }
145980
146067
 
145981
146068
  unique_ptr<ParsedExpression> WindowExpression::Deserialize(ExpressionType type, FieldReader &reader) {
@@ -145998,6 +146085,7 @@ unique_ptr<ParsedExpression> WindowExpression::Deserialize(ExpressionType type,
145998
146085
  expr->offset_expr = reader.ReadOptional<ParsedExpression>(nullptr);
145999
146086
  expr->default_expr = reader.ReadOptional<ParsedExpression>(nullptr);
146000
146087
  expr->ignore_nulls = reader.ReadRequired<bool>();
146088
+ expr->filter_expr = reader.ReadOptional<ParsedExpression>(nullptr);
146001
146089
  return move(expr);
146002
146090
  }
146003
146091
 
@@ -146860,6 +146948,9 @@ void ParsedExpressionIterator::EnumerateChildren(
146860
146948
  for (auto &child : window_expr.children) {
146861
146949
  callback(child);
146862
146950
  }
146951
+ if (window_expr.filter_expr) {
146952
+ callback(window_expr.filter_expr);
146953
+ }
146863
146954
  if (window_expr.start_expr) {
146864
146955
  callback(window_expr.start_expr);
146865
146956
  }
@@ -155446,6 +155537,11 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
155446
155537
  auto lowercase_name = StringUtil::Lower(function_name);
155447
155538
 
155448
155539
  if (root->over) {
155540
+ const auto win_fun_type = WindowToExpressionType(lowercase_name);
155541
+ if (win_fun_type == ExpressionType::INVALID) {
155542
+ throw InternalException("Unknown/unsupported window function");
155543
+ }
155544
+
155449
155545
  if (root->agg_distinct) {
155450
155546
  throw ParserException("DISTINCT is not implemented for window functions!");
155451
155547
  }
@@ -155454,18 +155550,13 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
155454
155550
  throw ParserException("ORDER BY is not implemented for window functions!");
155455
155551
  }
155456
155552
 
155457
- if (root->agg_filter) {
155458
- throw ParserException("FILTER is not implemented for window functions!");
155553
+ if (win_fun_type != ExpressionType::WINDOW_AGGREGATE && root->agg_filter) {
155554
+ throw ParserException("FILTER is not implemented for non-aggregate window functions!");
155459
155555
  }
155460
155556
  if (root->export_state) {
155461
155557
  throw ParserException("EXPORT_STATE is not supported for window functions!");
155462
155558
  }
155463
155559
 
155464
- const auto win_fun_type = WindowToExpressionType(lowercase_name);
155465
- if (win_fun_type == ExpressionType::INVALID) {
155466
- throw InternalException("Unknown/unsupported window function");
155467
- }
155468
-
155469
155560
  if (win_fun_type == ExpressionType::WINDOW_AGGREGATE && root->agg_ignore_nulls) {
155470
155561
  throw ParserException("IGNORE NULLS is not supported for windowed aggregates");
155471
155562
  }
@@ -155473,6 +155564,11 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
155473
155564
  auto expr = make_unique<WindowExpression>(win_fun_type, schema, lowercase_name);
155474
155565
  expr->ignore_nulls = root->agg_ignore_nulls;
155475
155566
 
155567
+ if (root->agg_filter) {
155568
+ auto filter_expr = TransformExpression(root->agg_filter);
155569
+ expr->filter_expr = move(filter_expr);
155570
+ }
155571
+
155476
155572
  if (root->args) {
155477
155573
  vector<unique_ptr<ParsedExpression>> function_list;
155478
155574
  TransformExpressionList(*root->args, function_list);
@@ -161727,6 +161823,7 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) {
161727
161823
  for (auto &order : window.orders) {
161728
161824
  BindChild(order.expression, depth, error);
161729
161825
  }
161826
+ BindChild(window.filter_expr, depth, error);
161730
161827
  BindChild(window.start_expr, depth, error);
161731
161828
  BindChild(window.end_expr, depth, error);
161732
161829
  BindChild(window.offset_expr, depth, error);
@@ -161860,6 +161957,8 @@ BindResult SelectBinder::BindWindow(WindowExpression &window, idx_t depth) {
161860
161957
  result->orders.emplace_back(type, null_order, move(expression));
161861
161958
  }
161862
161959
 
161960
+ result->filter_expr = CastWindowExpression(window.filter_expr, LogicalType::BOOLEAN);
161961
+
161863
161962
  result->start_expr = CastWindowExpression(window.start_expr, start_type);
161864
161963
  result->end_expr = CastWindowExpression(window.end_expr, end_type);
161865
161964
  result->offset_expr = CastWindowExpression(window.offset_expr, LogicalType::BIGINT);
@@ -168164,6 +168263,11 @@ bool BoundWindowExpression::Equals(const BaseExpression *other_p) const {
168164
168263
  return false;
168165
168264
  }
168166
168265
  }
168266
+ // check if the filter expressions are equivalent
168267
+ if (!Expression::Equals(filter_expr.get(), other->filter_expr.get())) {
168268
+ return false;
168269
+ }
168270
+
168167
168271
  // check if the framing expressions are equivalent
168168
168272
  if (!Expression::Equals(start_expr.get(), other->start_expr.get()) ||
168169
168273
  !Expression::Equals(end_expr.get(), other->end_expr.get()) ||
@@ -168228,6 +168332,8 @@ unique_ptr<Expression> BoundWindowExpression::Copy() {
168228
168332
  new_window->orders.emplace_back(o.type, o.null_order, o.expression->Copy());
168229
168333
  }
168230
168334
 
168335
+ new_window->filter_expr = filter_expr ? filter_expr->Copy() : nullptr;
168336
+
168231
168337
  new_window->start = start;
168232
168338
  new_window->end = end;
168233
168339
  new_window->start_expr = start_expr ? start_expr->Copy() : nullptr;
@@ -169468,6 +169574,9 @@ void ExpressionIterator::EnumerateChildren(Expression &expr,
169468
169574
  for (auto &child : window_expr.children) {
169469
169575
  callback(child);
169470
169576
  }
169577
+ if (window_expr.filter_expr) {
169578
+ callback(window_expr.filter_expr);
169579
+ }
169471
169580
  if (window_expr.start_expr) {
169472
169581
  callback(window_expr.start_expr);
169473
169582
  }