duckdb 0.8.2-dev2356.0 → 0.8.2-dev2509.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/binding.gyp +7 -7
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -0
  4. package/src/duckdb/extension/icu/icu-datepart.cpp +7 -5
  5. package/src/duckdb/extension/icu/icu-strptime.cpp +1 -20
  6. package/src/duckdb/src/common/http_state.cpp +78 -0
  7. package/src/duckdb/src/common/types/list_segment.cpp +42 -134
  8. package/src/duckdb/src/common/types/vector.cpp +21 -0
  9. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +5 -7
  10. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +17 -19
  11. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +80 -61
  12. package/src/duckdb/src/core_functions/function_list.cpp +2 -2
  13. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +308 -82
  14. package/src/duckdb/src/execution/aggregate_hashtable.cpp +6 -0
  15. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +11 -5
  16. package/src/duckdb/src/execution/window_executor.cpp +18 -20
  17. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -2
  18. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  19. package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
  20. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +9 -11
  21. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +7 -0
  22. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +7 -2
  23. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +4 -4
  24. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +4 -2
  25. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -2
  26. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +0 -1
  27. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +20 -3
  28. package/src/duckdb/src/main/extension/extension_helper.cpp +2 -1
  29. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
  30. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -0
  31. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +2 -2
  32. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +2 -2
  33. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +2 -2
  34. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +2 -4
  35. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
  36. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11077 -10674
  37. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  38. package/src/duckdb/ub_src_common.cpp +2 -0
@@ -37,8 +37,6 @@ inline interval_t operator-(const interval_t &lhs, const interval_t &rhs) {
37
37
  return Interval::FromMicro(Interval::GetMicro(lhs) - Interval::GetMicro(rhs));
38
38
  }
39
39
 
40
- using FrameBounds = std::pair<idx_t, idx_t>;
41
-
42
40
  template <typename SAVE_TYPE>
43
41
  struct QuantileState {
44
42
  using SaveType = SAVE_TYPE;
@@ -89,7 +87,7 @@ void ReuseIndexes(idx_t *index, const FrameBounds &frame, const FrameBounds &pre
89
87
  idx_t j = 0;
90
88
 
91
89
  // Copy overlapping indices
92
- for (idx_t p = 0; p < (prev.second - prev.first); ++p) {
90
+ for (idx_t p = 0; p < (prev.end - prev.start); ++p) {
93
91
  auto idx = index[p];
94
92
 
95
93
  // Shift down into any hole
@@ -98,7 +96,7 @@ void ReuseIndexes(idx_t *index, const FrameBounds &frame, const FrameBounds &pre
98
96
  }
99
97
 
100
98
  // Skip overlapping values
101
- if (frame.first <= idx && idx < frame.second) {
99
+ if (frame.start <= idx && idx < frame.end) {
102
100
  ++j;
103
101
  }
104
102
  }
@@ -106,15 +104,15 @@ void ReuseIndexes(idx_t *index, const FrameBounds &frame, const FrameBounds &pre
106
104
  // Insert new indices
107
105
  if (j > 0) {
108
106
  // Overlap: append the new ends
109
- for (auto f = frame.first; f < prev.first; ++f, ++j) {
107
+ for (auto f = frame.start; f < prev.start; ++f, ++j) {
110
108
  index[j] = f;
111
109
  }
112
- for (auto f = prev.second; f < frame.second; ++f, ++j) {
110
+ for (auto f = prev.end; f < frame.end; ++f, ++j) {
113
111
  index[j] = f;
114
112
  }
115
113
  } else {
116
114
  // No overlap: overwrite with new values
117
- for (auto f = frame.first; f < frame.second; ++f, ++j) {
115
+ for (auto f = frame.start; f < frame.end; ++f, ++j) {
118
116
  index[j] = f;
119
117
  }
120
118
  }
@@ -124,17 +122,17 @@ static idx_t ReplaceIndex(idx_t *index, const FrameBounds &frame, const FrameBou
124
122
  D_ASSERT(index);
125
123
 
126
124
  idx_t j = 0;
127
- for (idx_t p = 0; p < (prev.second - prev.first); ++p) {
125
+ for (idx_t p = 0; p < (prev.end - prev.start); ++p) {
128
126
  auto idx = index[p];
129
127
  if (j != p) {
130
128
  break;
131
129
  }
132
130
 
133
- if (frame.first <= idx && idx < frame.second) {
131
+ if (frame.start <= idx && idx < frame.end) {
134
132
  ++j;
135
133
  }
136
134
  }
137
- index[j] = frame.second - 1;
135
+ index[j] = frame.end - 1;
138
136
 
139
137
  return j;
140
138
  }
@@ -560,7 +558,7 @@ struct QuantileScalarOperation : public QuantileOperation {
560
558
 
561
559
  // Lazily initialise frame state
562
560
  auto prev_pos = state.pos;
563
- state.SetPos(frame.second - frame.first);
561
+ state.SetPos(frame.end - frame.start);
564
562
 
565
563
  auto index = state.w.data();
566
564
  D_ASSERT(index);
@@ -572,11 +570,11 @@ struct QuantileScalarOperation : public QuantileOperation {
572
570
  const auto q = bind_data.quantiles[0];
573
571
 
574
572
  bool replace = false;
575
- if (frame.first == prev.first + 1 && frame.second == prev.second + 1) {
573
+ if (frame.start == prev.start + 1 && frame.end == prev.end + 1) {
576
574
  // Fixed frame size
577
575
  const auto j = ReplaceIndex(index, frame, prev);
578
576
  // We can only replace if the number of NULLs has not changed
579
- if (included.AllValid() || included(prev.first) == included(prev.second)) {
577
+ if (included.AllValid() || included(prev.start) == included(prev.end)) {
580
578
  Interpolator<DISCRETE> interp(q, prev_pos, false);
581
579
  replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
582
580
  if (replace) {
@@ -720,7 +718,7 @@ struct QuantileListOperation : public QuantileOperation {
720
718
 
721
719
  // Lazily initialise frame state
722
720
  auto prev_pos = state.pos;
723
- state.SetPos(frame.second - frame.first);
721
+ state.SetPos(frame.end - frame.start);
724
722
 
725
723
  auto index = state.w.data();
726
724
 
@@ -731,11 +729,11 @@ struct QuantileListOperation : public QuantileOperation {
731
729
  // then Q25 must be recomputed, but Q50 and Q75 are unaffected.
732
730
  // For a single element list, this reduces to the scalar case.
733
731
  std::pair<idx_t, idx_t> replaceable {state.pos, 0};
734
- if (frame.first == prev.first + 1 && frame.second == prev.second + 1) {
732
+ if (frame.start == prev.start + 1 && frame.end == prev.end + 1) {
735
733
  // Fixed frame size
736
734
  const auto j = ReplaceIndex(index, frame, prev);
737
735
  // We can only replace if the number of NULLs has not changed
738
- if (included.AllValid() || included(prev.first) == included(prev.second)) {
736
+ if (included.AllValid() || included(prev.start) == included(prev.end)) {
739
737
  for (const auto &q : bind_data.order) {
740
738
  const auto &quantile = bind_data.quantiles[q];
741
739
  Interpolator<DISCRETE> interp(quantile, prev_pos, false);
@@ -1062,7 +1060,7 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
1062
1060
 
1063
1061
  // Lazily initialise frame state
1064
1062
  auto prev_pos = state.pos;
1065
- state.SetPos(frame.second - frame.first);
1063
+ state.SetPos(frame.end - frame.start);
1066
1064
 
1067
1065
  auto index = state.w.data();
1068
1066
  D_ASSERT(index);
@@ -1085,11 +1083,11 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
1085
1083
  const float q = 0.5;
1086
1084
 
1087
1085
  bool replace = false;
1088
- if (frame.first == prev.first + 1 && frame.second == prev.second + 1) {
1086
+ if (frame.start == prev.start + 1 && frame.end == prev.end + 1) {
1089
1087
  // Fixed frame size
1090
1088
  const auto j = ReplaceIndex(index, frame, prev);
1091
1089
  // We can only replace if the number of NULLs has not changed
1092
- if (included.AllValid() || included(prev.first) == included(prev.second)) {
1090
+ if (included.AllValid() || included(prev.start) == included(prev.end)) {
1093
1091
  Interpolator<false> interp(q, prev_pos, false);
1094
1092
  replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
1095
1093
  if (replace) {
@@ -5,24 +5,6 @@
5
5
 
6
6
  namespace duckdb {
7
7
 
8
- static void RecursiveFlatten(Vector &vector, idx_t &count) {
9
- if (vector.GetVectorType() != VectorType::FLAT_VECTOR) {
10
- vector.Flatten(count);
11
- }
12
-
13
- auto internal_type = vector.GetType().InternalType();
14
- if (internal_type == PhysicalType::LIST) {
15
- auto &child_vector = ListVector::GetEntry(vector);
16
- auto child_vector_count = ListVector::GetListSize(vector);
17
- RecursiveFlatten(child_vector, child_vector_count);
18
- } else if (internal_type == PhysicalType::STRUCT) {
19
- auto &children = StructVector::GetEntries(vector);
20
- for (auto &child : children) {
21
- RecursiveFlatten(*child, count);
22
- }
23
- }
24
- }
25
-
26
8
  struct ListBindData : public FunctionData {
27
9
  explicit ListBindData(const LogicalType &stype_p);
28
10
  ~ListBindData() override;
@@ -60,12 +42,6 @@ struct ListFunction {
60
42
  state.linked_list.first_segment = nullptr;
61
43
  state.linked_list.last_segment = nullptr;
62
44
  }
63
-
64
- template <class STATE>
65
- static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
66
- // nop
67
- }
68
-
69
45
  static bool IgnoreNull() {
70
46
  return false;
71
47
  }
@@ -73,58 +49,54 @@ struct ListFunction {
73
49
 
74
50
  static void ListUpdateFunction(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count,
75
51
  Vector &state_vector, idx_t count) {
76
- D_ASSERT(input_count == 1);
77
52
 
53
+ D_ASSERT(input_count == 1);
78
54
  auto &input = inputs[0];
79
- UnifiedVectorFormat sdata;
80
- state_vector.ToUnifiedFormat(count, sdata);
55
+ RecursiveUnifiedVectorFormat input_data;
56
+ Vector::RecursiveToUnifiedFormat(input, count, input_data);
81
57
 
82
- auto states = UnifiedVectorFormat::GetData<ListAggState *>(sdata);
83
- RecursiveFlatten(input, count);
58
+ UnifiedVectorFormat states_data;
59
+ state_vector.ToUnifiedFormat(count, states_data);
60
+ auto states = UnifiedVectorFormat::GetData<ListAggState *>(states_data);
84
61
 
85
62
  auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
86
63
 
87
64
  for (idx_t i = 0; i < count; i++) {
88
- auto &state = *states[sdata.sel->get_index(i)];
89
- list_bind_data.functions.AppendRow(aggr_input_data.allocator, state.linked_list, input, i, count);
65
+ auto &state = *states[states_data.sel->get_index(i)];
66
+ list_bind_data.functions.AppendRow(aggr_input_data.allocator, state.linked_list, input_data, i);
90
67
  }
91
68
  }
92
69
 
93
- static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData &aggr_input_data, idx_t count) {
94
- UnifiedVectorFormat sdata;
95
- state.ToUnifiedFormat(count, sdata);
96
- auto states_ptr = UnifiedVectorFormat::GetData<ListAggState *>(sdata);
70
+ static void ListCombineFunction(Vector &states_vector, Vector &combined, AggregateInputData &, idx_t count) {
97
71
 
98
- auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
72
+ UnifiedVectorFormat states_data;
73
+ states_vector.ToUnifiedFormat(count, states_data);
74
+ auto states_ptr = UnifiedVectorFormat::GetData<ListAggState *>(states_data);
99
75
 
100
76
  auto combined_ptr = FlatVector::GetData<ListAggState *>(combined);
101
77
  for (idx_t i = 0; i < count; i++) {
102
- auto &state = *states_ptr[sdata.sel->get_index(i)];
103
- if (state.linked_list.total_capacity == 0) {
104
- // NULL, no need to append.
105
- continue;
106
- }
107
78
 
108
- // copy the linked list of the state
109
- auto copied_linked_list = LinkedList(state.linked_list.total_capacity, nullptr, nullptr);
110
- list_bind_data.functions.CopyLinkedList(state.linked_list, copied_linked_list, aggr_input_data.allocator);
79
+ auto &state = *states_ptr[states_data.sel->get_index(i)];
80
+ D_ASSERT(state.linked_list.total_capacity != 0);
111
81
 
112
- // append the copied linked list to the combined state
113
- if (combined_ptr[i]->linked_list.last_segment) {
114
- combined_ptr[i]->linked_list.last_segment->next = copied_linked_list.first_segment;
115
- } else {
116
- combined_ptr[i]->linked_list.first_segment = copied_linked_list.first_segment;
82
+ if (combined_ptr[i]->linked_list.total_capacity == 0) {
83
+ combined_ptr[i]->linked_list = state.linked_list;
84
+ continue;
117
85
  }
118
- combined_ptr[i]->linked_list.last_segment = copied_linked_list.last_segment;
119
- combined_ptr[i]->linked_list.total_capacity += copied_linked_list.total_capacity;
86
+
87
+ // append the linked list
88
+ combined_ptr[i]->linked_list.last_segment->next = state.linked_list.first_segment;
89
+ combined_ptr[i]->linked_list.last_segment = state.linked_list.last_segment;
90
+ combined_ptr[i]->linked_list.total_capacity += state.linked_list.total_capacity;
120
91
  }
121
92
  }
122
93
 
123
- static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_data, Vector &result, idx_t count,
94
+ static void ListFinalize(Vector &states_vector, AggregateInputData &aggr_input_data, Vector &result, idx_t count,
124
95
  idx_t offset) {
125
- UnifiedVectorFormat sdata;
126
- state_vector.ToUnifiedFormat(count, sdata);
127
- auto states = UnifiedVectorFormat::GetData<ListAggState *>(sdata);
96
+
97
+ UnifiedVectorFormat states_data;
98
+ states_vector.ToUnifiedFormat(count, states_data);
99
+ auto states = UnifiedVectorFormat::GetData<ListAggState *>(states_data);
128
100
 
129
101
  D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
130
102
 
@@ -133,9 +105,11 @@ static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_da
133
105
  size_t total_len = ListVector::GetListSize(result);
134
106
 
135
107
  auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
136
- // first iterate over all of the entries and set up the list entries, plus get the newly required total length
108
+
109
+ // first iterate over all entries and set up the list entries, and get the newly required total length
137
110
  for (idx_t i = 0; i < count; i++) {
138
- auto &state = *states[sdata.sel->get_index(i)];
111
+
112
+ auto &state = *states[states_data.sel->get_index(i)];
139
113
  const auto rid = i + offset;
140
114
  result_data[rid].offset = total_len;
141
115
  if (state.linked_list.total_capacity == 0) {
@@ -143,16 +117,19 @@ static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_da
143
117
  result_data[rid].length = 0;
144
118
  continue;
145
119
  }
120
+
146
121
  // set the length and offset of this list in the result vector
147
122
  auto total_capacity = state.linked_list.total_capacity;
148
123
  result_data[rid].length = total_capacity;
149
124
  total_len += total_capacity;
150
125
  }
151
- // reserve capacity, then iterate over all of the entries again and copy over the data tot he child vector
126
+
127
+ // reserve capacity, then iterate over all entries again and copy over the data to the child vector
152
128
  ListVector::Reserve(result, total_len);
153
129
  auto &result_child = ListVector::GetEntry(result);
154
130
  for (idx_t i = 0; i < count; i++) {
155
- auto &state = *states[sdata.sel->get_index(i)];
131
+
132
+ auto &state = *states[states_data.sel->get_index(i)];
156
133
  const auto rid = i + offset;
157
134
  if (state.linked_list.total_capacity == 0) {
158
135
  continue;
@@ -161,6 +138,48 @@ static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_da
161
138
  idx_t current_offset = result_data[rid].offset;
162
139
  list_bind_data.functions.BuildListVector(state.linked_list, result_child, current_offset);
163
140
  }
141
+
142
+ ListVector::SetListSize(result, total_len);
143
+ }
144
+
145
+ static void ListWindow(Vector inputs[], const ValidityMask &filter_mask, AggregateInputData &aggr_input_data,
146
+ idx_t input_count, data_ptr_t state, const FrameBounds &frame, const FrameBounds &prev,
147
+ Vector &result, idx_t rid, idx_t bias) {
148
+
149
+ auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
150
+ LinkedList linked_list;
151
+
152
+ // UPDATE step
153
+
154
+ D_ASSERT(input_count == 1);
155
+ auto &input = inputs[0];
156
+
157
+ // FIXME: we unify more values than necessary (count is frame.end)
158
+ RecursiveUnifiedVectorFormat input_data;
159
+ Vector::RecursiveToUnifiedFormat(input, frame.end, input_data);
160
+
161
+ for (idx_t i = frame.start; i < frame.end; i++) {
162
+ list_bind_data.functions.AppendRow(aggr_input_data.allocator, linked_list, input_data, i);
163
+ }
164
+
165
+ // FINALIZE step
166
+
167
+ D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
168
+ auto result_data = FlatVector::GetData<list_entry_t>(result);
169
+ size_t total_len = ListVector::GetListSize(result);
170
+
171
+ // set the length and offset of this list in the result vector
172
+ result_data[rid].offset = total_len;
173
+ result_data[rid].length = linked_list.total_capacity;
174
+ D_ASSERT(linked_list.total_capacity != 0);
175
+ total_len += linked_list.total_capacity;
176
+
177
+ // reserve capacity, then copy over the data to the child vector
178
+ ListVector::Reserve(result, total_len);
179
+ auto &result_child = ListVector::GetEntry(result);
180
+ idx_t offset = result_data[rid].offset;
181
+ list_bind_data.functions.BuildListVector(linked_list, result_child, offset);
182
+
164
183
  ListVector::SetListSize(result, total_len);
165
184
  }
166
185
 
@@ -182,8 +201,8 @@ unique_ptr<FunctionData> ListBindFunction(ClientContext &context, AggregateFunct
182
201
  AggregateFunction ListFun::GetFunction() {
183
202
  return AggregateFunction({LogicalType::ANY}, LogicalTypeId::LIST, AggregateFunction::StateSize<ListAggState>,
184
203
  AggregateFunction::StateInitialize<ListAggState, ListFunction>, ListUpdateFunction,
185
- ListCombineFunction, ListFinalize, nullptr, ListBindFunction,
186
- AggregateFunction::StateDestroy<ListAggState, ListFunction>, nullptr, nullptr);
204
+ ListCombineFunction, ListFinalize, nullptr, ListBindFunction, nullptr, nullptr,
205
+ ListWindow);
187
206
  }
188
207
 
189
208
  } // namespace duckdb
@@ -73,7 +73,7 @@ static StaticFunctionDefinition internal_functions[] = {
73
73
  DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayDistinctFun),
74
74
  DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayFilterFun),
75
75
  DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ArrayReverseSortFun),
76
- DUCKDB_SCALAR_FUNCTION_ALIAS(ArraySliceFun),
76
+ DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ArraySliceFun),
77
77
  DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ArraySortFun),
78
78
  DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayTransformFun),
79
79
  DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayUniqueFun),
@@ -200,7 +200,7 @@ static StaticFunctionDefinition internal_functions[] = {
200
200
  DUCKDB_SCALAR_FUNCTION(ListFilterFun),
201
201
  DUCKDB_SCALAR_FUNCTION_ALIAS(ListPackFun),
202
202
  DUCKDB_SCALAR_FUNCTION_SET(ListReverseSortFun),
203
- DUCKDB_SCALAR_FUNCTION(ListSliceFun),
203
+ DUCKDB_SCALAR_FUNCTION_SET(ListSliceFun),
204
204
  DUCKDB_SCALAR_FUNCTION_SET(ListSortFun),
205
205
  DUCKDB_SCALAR_FUNCTION(ListTransformFun),
206
206
  DUCKDB_SCALAR_FUNCTION(ListUniqueFun),