duckdb 0.8.2-dev2320.0 → 0.8.2-dev2399.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/binding.gyp +7 -7
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -0
  4. package/src/duckdb/extension/icu/icu-datepart.cpp +7 -5
  5. package/src/duckdb/extension/icu/icu-strptime.cpp +1 -20
  6. package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -0
  7. package/src/duckdb/src/common/adbc/adbc.cpp +8 -3
  8. package/src/duckdb/src/common/arrow/arrow_appender.cpp +4 -4
  9. package/src/duckdb/src/common/arrow/arrow_converter.cpp +27 -26
  10. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +5 -5
  11. package/src/duckdb/src/common/types/list_segment.cpp +42 -134
  12. package/src/duckdb/src/common/types/vector.cpp +21 -0
  13. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +5 -7
  14. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +17 -19
  15. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +80 -61
  16. package/src/duckdb/src/execution/aggregate_hashtable.cpp +6 -0
  17. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +11 -5
  18. package/src/duckdb/src/execution/window_executor.cpp +18 -20
  19. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -2
  20. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  21. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +3 -3
  22. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +1 -1
  23. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +4 -3
  24. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +3 -3
  25. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +3 -3
  26. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +9 -11
  27. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +7 -0
  28. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +7 -2
  29. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +4 -2
  30. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -2
  31. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +0 -1
  32. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +1 -2
  33. package/src/duckdb/src/include/duckdb/main/client_config.hpp +0 -2
  34. package/src/duckdb/src/include/duckdb/main/client_context.hpp +1 -0
  35. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +25 -0
  36. package/src/duckdb/src/include/duckdb/main/config.hpp +1 -1
  37. package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -13
  38. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +1 -1
  39. package/src/duckdb/src/main/capi/arrow-c.cpp +1 -7
  40. package/src/duckdb/src/main/client_context.cpp +15 -2
  41. package/src/duckdb/src/main/database.cpp +0 -9
  42. package/src/duckdb/src/main/query_result.cpp +0 -8
  43. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -0
  44. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +2 -2
  45. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +2 -2
  46. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +2 -2
  47. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +2 -4
  48. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  49. package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +0 -25
@@ -5,24 +5,6 @@
5
5
 
6
6
  namespace duckdb {
7
7
 
8
- static void RecursiveFlatten(Vector &vector, idx_t &count) {
9
- if (vector.GetVectorType() != VectorType::FLAT_VECTOR) {
10
- vector.Flatten(count);
11
- }
12
-
13
- auto internal_type = vector.GetType().InternalType();
14
- if (internal_type == PhysicalType::LIST) {
15
- auto &child_vector = ListVector::GetEntry(vector);
16
- auto child_vector_count = ListVector::GetListSize(vector);
17
- RecursiveFlatten(child_vector, child_vector_count);
18
- } else if (internal_type == PhysicalType::STRUCT) {
19
- auto &children = StructVector::GetEntries(vector);
20
- for (auto &child : children) {
21
- RecursiveFlatten(*child, count);
22
- }
23
- }
24
- }
25
-
26
8
  struct ListBindData : public FunctionData {
27
9
  explicit ListBindData(const LogicalType &stype_p);
28
10
  ~ListBindData() override;
@@ -60,12 +42,6 @@ struct ListFunction {
60
42
  state.linked_list.first_segment = nullptr;
61
43
  state.linked_list.last_segment = nullptr;
62
44
  }
63
-
64
- template <class STATE>
65
- static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
66
- // nop
67
- }
68
-
69
45
  static bool IgnoreNull() {
70
46
  return false;
71
47
  }
@@ -73,58 +49,54 @@ struct ListFunction {
73
49
 
74
50
  static void ListUpdateFunction(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count,
75
51
  Vector &state_vector, idx_t count) {
76
- D_ASSERT(input_count == 1);
77
52
 
53
+ D_ASSERT(input_count == 1);
78
54
  auto &input = inputs[0];
79
- UnifiedVectorFormat sdata;
80
- state_vector.ToUnifiedFormat(count, sdata);
55
+ RecursiveUnifiedVectorFormat input_data;
56
+ Vector::RecursiveToUnifiedFormat(input, count, input_data);
81
57
 
82
- auto states = UnifiedVectorFormat::GetData<ListAggState *>(sdata);
83
- RecursiveFlatten(input, count);
58
+ UnifiedVectorFormat states_data;
59
+ state_vector.ToUnifiedFormat(count, states_data);
60
+ auto states = UnifiedVectorFormat::GetData<ListAggState *>(states_data);
84
61
 
85
62
  auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
86
63
 
87
64
  for (idx_t i = 0; i < count; i++) {
88
- auto &state = *states[sdata.sel->get_index(i)];
89
- list_bind_data.functions.AppendRow(aggr_input_data.allocator, state.linked_list, input, i, count);
65
+ auto &state = *states[states_data.sel->get_index(i)];
66
+ list_bind_data.functions.AppendRow(aggr_input_data.allocator, state.linked_list, input_data, i);
90
67
  }
91
68
  }
92
69
 
93
- static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData &aggr_input_data, idx_t count) {
94
- UnifiedVectorFormat sdata;
95
- state.ToUnifiedFormat(count, sdata);
96
- auto states_ptr = UnifiedVectorFormat::GetData<ListAggState *>(sdata);
70
+ static void ListCombineFunction(Vector &states_vector, Vector &combined, AggregateInputData &, idx_t count) {
97
71
 
98
- auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
72
+ UnifiedVectorFormat states_data;
73
+ states_vector.ToUnifiedFormat(count, states_data);
74
+ auto states_ptr = UnifiedVectorFormat::GetData<ListAggState *>(states_data);
99
75
 
100
76
  auto combined_ptr = FlatVector::GetData<ListAggState *>(combined);
101
77
  for (idx_t i = 0; i < count; i++) {
102
- auto &state = *states_ptr[sdata.sel->get_index(i)];
103
- if (state.linked_list.total_capacity == 0) {
104
- // NULL, no need to append.
105
- continue;
106
- }
107
78
 
108
- // copy the linked list of the state
109
- auto copied_linked_list = LinkedList(state.linked_list.total_capacity, nullptr, nullptr);
110
- list_bind_data.functions.CopyLinkedList(state.linked_list, copied_linked_list, aggr_input_data.allocator);
79
+ auto &state = *states_ptr[states_data.sel->get_index(i)];
80
+ D_ASSERT(state.linked_list.total_capacity != 0);
111
81
 
112
- // append the copied linked list to the combined state
113
- if (combined_ptr[i]->linked_list.last_segment) {
114
- combined_ptr[i]->linked_list.last_segment->next = copied_linked_list.first_segment;
115
- } else {
116
- combined_ptr[i]->linked_list.first_segment = copied_linked_list.first_segment;
82
+ if (combined_ptr[i]->linked_list.total_capacity == 0) {
83
+ combined_ptr[i]->linked_list = state.linked_list;
84
+ continue;
117
85
  }
118
- combined_ptr[i]->linked_list.last_segment = copied_linked_list.last_segment;
119
- combined_ptr[i]->linked_list.total_capacity += copied_linked_list.total_capacity;
86
+
87
+ // append the linked list
88
+ combined_ptr[i]->linked_list.last_segment->next = state.linked_list.first_segment;
89
+ combined_ptr[i]->linked_list.last_segment = state.linked_list.last_segment;
90
+ combined_ptr[i]->linked_list.total_capacity += state.linked_list.total_capacity;
120
91
  }
121
92
  }
122
93
 
123
- static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_data, Vector &result, idx_t count,
94
+ static void ListFinalize(Vector &states_vector, AggregateInputData &aggr_input_data, Vector &result, idx_t count,
124
95
  idx_t offset) {
125
- UnifiedVectorFormat sdata;
126
- state_vector.ToUnifiedFormat(count, sdata);
127
- auto states = UnifiedVectorFormat::GetData<ListAggState *>(sdata);
96
+
97
+ UnifiedVectorFormat states_data;
98
+ states_vector.ToUnifiedFormat(count, states_data);
99
+ auto states = UnifiedVectorFormat::GetData<ListAggState *>(states_data);
128
100
 
129
101
  D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
130
102
 
@@ -133,9 +105,11 @@ static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_da
133
105
  size_t total_len = ListVector::GetListSize(result);
134
106
 
135
107
  auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
136
- // first iterate over all of the entries and set up the list entries, plus get the newly required total length
108
+
109
+ // first iterate over all entries and set up the list entries, and get the newly required total length
137
110
  for (idx_t i = 0; i < count; i++) {
138
- auto &state = *states[sdata.sel->get_index(i)];
111
+
112
+ auto &state = *states[states_data.sel->get_index(i)];
139
113
  const auto rid = i + offset;
140
114
  result_data[rid].offset = total_len;
141
115
  if (state.linked_list.total_capacity == 0) {
@@ -143,16 +117,19 @@ static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_da
143
117
  result_data[rid].length = 0;
144
118
  continue;
145
119
  }
120
+
146
121
  // set the length and offset of this list in the result vector
147
122
  auto total_capacity = state.linked_list.total_capacity;
148
123
  result_data[rid].length = total_capacity;
149
124
  total_len += total_capacity;
150
125
  }
151
- // reserve capacity, then iterate over all of the entries again and copy over the data tot he child vector
126
+
127
+ // reserve capacity, then iterate over all entries again and copy over the data to the child vector
152
128
  ListVector::Reserve(result, total_len);
153
129
  auto &result_child = ListVector::GetEntry(result);
154
130
  for (idx_t i = 0; i < count; i++) {
155
- auto &state = *states[sdata.sel->get_index(i)];
131
+
132
+ auto &state = *states[states_data.sel->get_index(i)];
156
133
  const auto rid = i + offset;
157
134
  if (state.linked_list.total_capacity == 0) {
158
135
  continue;
@@ -161,6 +138,48 @@ static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_da
161
138
  idx_t current_offset = result_data[rid].offset;
162
139
  list_bind_data.functions.BuildListVector(state.linked_list, result_child, current_offset);
163
140
  }
141
+
142
+ ListVector::SetListSize(result, total_len);
143
+ }
144
+
145
+ static void ListWindow(Vector inputs[], const ValidityMask &filter_mask, AggregateInputData &aggr_input_data,
146
+ idx_t input_count, data_ptr_t state, const FrameBounds &frame, const FrameBounds &prev,
147
+ Vector &result, idx_t rid, idx_t bias) {
148
+
149
+ auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
150
+ LinkedList linked_list;
151
+
152
+ // UPDATE step
153
+
154
+ D_ASSERT(input_count == 1);
155
+ auto &input = inputs[0];
156
+
157
+ // FIXME: we unify more values than necessary (count is frame.end)
158
+ RecursiveUnifiedVectorFormat input_data;
159
+ Vector::RecursiveToUnifiedFormat(input, frame.end, input_data);
160
+
161
+ for (idx_t i = frame.start; i < frame.end; i++) {
162
+ list_bind_data.functions.AppendRow(aggr_input_data.allocator, linked_list, input_data, i);
163
+ }
164
+
165
+ // FINALIZE step
166
+
167
+ D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
168
+ auto result_data = FlatVector::GetData<list_entry_t>(result);
169
+ size_t total_len = ListVector::GetListSize(result);
170
+
171
+ // set the length and offset of this list in the result vector
172
+ result_data[rid].offset = total_len;
173
+ result_data[rid].length = linked_list.total_capacity;
174
+ D_ASSERT(linked_list.total_capacity != 0);
175
+ total_len += linked_list.total_capacity;
176
+
177
+ // reserve capacity, then copy over the data to the child vector
178
+ ListVector::Reserve(result, total_len);
179
+ auto &result_child = ListVector::GetEntry(result);
180
+ idx_t offset = result_data[rid].offset;
181
+ list_bind_data.functions.BuildListVector(linked_list, result_child, offset);
182
+
164
183
  ListVector::SetListSize(result, total_len);
165
184
  }
166
185
 
@@ -182,8 +201,8 @@ unique_ptr<FunctionData> ListBindFunction(ClientContext &context, AggregateFunct
182
201
  AggregateFunction ListFun::GetFunction() {
183
202
  return AggregateFunction({LogicalType::ANY}, LogicalTypeId::LIST, AggregateFunction::StateSize<ListAggState>,
184
203
  AggregateFunction::StateInitialize<ListAggState, ListFunction>, ListUpdateFunction,
185
- ListCombineFunction, ListFinalize, nullptr, ListBindFunction,
186
- AggregateFunction::StateDestroy<ListAggState, ListFunction>, nullptr, nullptr);
204
+ ListCombineFunction, ListFinalize, nullptr, ListBindFunction, nullptr, nullptr,
205
+ ListWindow);
187
206
  }
188
207
 
189
208
  } // namespace duckdb
@@ -584,6 +584,12 @@ void GroupedAggregateHashTable::Combine(GroupedAggregateHashTable &other) {
584
584
  }
585
585
 
586
586
  Verify();
587
+
588
+ // if we combine states, then we also need to combine the arena allocators
589
+ for (auto &stored_allocator : other.stored_allocators) {
590
+ stored_allocators.push_back(stored_allocator);
591
+ }
592
+ stored_allocators.push_back(other.aggregate_allocator);
587
593
  }
588
594
 
589
595
  void GroupedAggregateHashTable::Append(GroupedAggregateHashTable &other) {
@@ -12,7 +12,8 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(ClientContext &context, All
12
12
  vector<Value> group_minima_p, vector<idx_t> required_bits_p)
13
13
  : BaseAggregateHashTable(context, allocator, aggregate_objects_p, std::move(payload_types_p)),
14
14
  addresses(LogicalType::POINTER), required_bits(std::move(required_bits_p)), total_required_bits(0),
15
- group_minima(std::move(group_minima_p)), sel(STANDARD_VECTOR_SIZE), aggregate_allocator(allocator) {
15
+ group_minima(std::move(group_minima_p)), sel(STANDARD_VECTOR_SIZE),
16
+ aggregate_allocator(make_uniq<ArenaAllocator>(allocator)) {
16
17
  for (auto &group_bits : required_bits) {
17
18
  total_required_bits += group_bits;
18
19
  }
@@ -136,7 +137,7 @@ void PerfectAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload)
136
137
  // after finding the group location we update the aggregates
137
138
  idx_t payload_idx = 0;
138
139
  auto &aggregates = layout.GetAggregates();
139
- RowOperationsState row_state(aggregate_allocator);
140
+ RowOperationsState row_state(*aggregate_allocator);
140
141
  for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) {
141
142
  auto &aggregate = aggregates[aggr_idx];
142
143
  auto input_count = (idx_t)aggregate.child_count;
@@ -165,7 +166,7 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) {
165
166
  data_ptr_t source_ptr = other.data;
166
167
  data_ptr_t target_ptr = data;
167
168
  idx_t combine_count = 0;
168
- RowOperationsState row_state(aggregate_allocator);
169
+ RowOperationsState row_state(*aggregate_allocator);
169
170
  for (idx_t i = 0; i < total_groups; i++) {
170
171
  auto has_entry_source = other.group_is_set[i];
171
172
  // we only have any work to do if the source has an entry for this group
@@ -183,6 +184,11 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) {
183
184
  target_ptr += tuple_size;
184
185
  }
185
186
  RowOperations::CombineStates(row_state, layout, source_addresses, target_addresses, combine_count);
187
+
188
+ // FIXME: after moving the arena allocator, we currently have to ensure that the pointer is not nullptr, because the
189
+ // FIXME: Destroy()-function of the hash table expects an allocator in some cases (e.g., for sorted aggregates)
190
+ stored_allocators.push_back(std::move(other.aggregate_allocator));
191
+ other.aggregate_allocator = make_uniq<ArenaAllocator>(allocator);
186
192
  }
187
193
 
188
194
  template <class T>
@@ -268,7 +274,7 @@ void PerfectAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) {
268
274
  }
269
275
  // then construct the payloads
270
276
  result.SetCardinality(entry_count);
271
- RowOperationsState row_state(aggregate_allocator);
277
+ RowOperationsState row_state(*aggregate_allocator);
272
278
  RowOperations::FinalizeStates(row_state, layout, addresses, result, grouping_columns);
273
279
  }
274
280
 
@@ -289,7 +295,7 @@ void PerfectAggregateHashTable::Destroy() {
289
295
  idx_t count = 0;
290
296
 
291
297
  // iterate over all initialised slots of the hash table
292
- RowOperationsState row_state(aggregate_allocator);
298
+ RowOperationsState row_state(*aggregate_allocator);
293
299
  data_ptr_t payload_ptr = data;
294
300
  for (idx_t i = 0; i < total_groups; i++) {
295
301
  if (group_is_set[i]) {
@@ -204,19 +204,19 @@ static idx_t FindTypedRangeBound(const WindowInputColumn &over, const idx_t orde
204
204
  WindowColumnIterator<T> begin(over, order_begin);
205
205
  WindowColumnIterator<T> end(over, order_end);
206
206
 
207
- if (order_begin < prev.first && prev.first < order_end) {
208
- const auto first = over.GetCell<T>(prev.first);
207
+ if (order_begin < prev.start && prev.start < order_end) {
208
+ const auto first = over.GetCell<T>(prev.start);
209
209
  if (!comp(val, first)) {
210
210
  // prev.first <= val, so we can start further forward
211
- begin += (prev.first - order_begin);
211
+ begin += (prev.start - order_begin);
212
212
  }
213
213
  }
214
- if (order_begin <= prev.second && prev.second < order_end) {
215
- const auto second = over.GetCell<T>(prev.second);
214
+ if (order_begin <= prev.end && prev.end < order_end) {
215
+ const auto second = over.GetCell<T>(prev.end);
216
216
  if (!comp(second, val)) {
217
217
  // val <= prev.second, so we can end further back
218
218
  // (prev.second is the largest peer)
219
- end -= (order_end - prev.second - 1);
219
+ end -= (order_end - prev.end - 1);
220
220
  }
221
221
  }
222
222
 
@@ -278,8 +278,6 @@ static idx_t FindOrderedRangeBound(const WindowInputColumn &over, const OrderTyp
278
278
  }
279
279
 
280
280
  struct WindowBoundariesState {
281
- using FrameBounds = std::pair<idx_t, idx_t>;
282
-
283
281
  static inline bool IsScalar(const unique_ptr<Expression> &expr) {
284
282
  return expr ? expr->IsScalar() : true;
285
283
  }
@@ -375,8 +373,8 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
375
373
  }
376
374
 
377
375
  // Reset range hints
378
- prev.first = valid_start;
379
- prev.second = valid_end;
376
+ prev.start = valid_start;
377
+ prev.end = valid_end;
380
378
  }
381
379
  } else if (!is_peer) {
382
380
  peer_start = row_idx;
@@ -427,9 +425,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
427
425
  if (boundary_start.CellIsNull(chunk_idx)) {
428
426
  window_start = peer_start;
429
427
  } else {
430
- prev.first = FindOrderedRangeBound<true>(range_collection, range_sense, valid_start, row_idx,
428
+ prev.start = FindOrderedRangeBound<true>(range_collection, range_sense, valid_start, row_idx,
431
429
  boundary_start, chunk_idx, prev);
432
- window_start = prev.first;
430
+ window_start = prev.start;
433
431
  }
434
432
  break;
435
433
  }
@@ -437,9 +435,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
437
435
  if (boundary_start.CellIsNull(chunk_idx)) {
438
436
  window_start = peer_start;
439
437
  } else {
440
- prev.first = FindOrderedRangeBound<true>(range_collection, range_sense, row_idx, valid_end, boundary_start,
438
+ prev.start = FindOrderedRangeBound<true>(range_collection, range_sense, row_idx, valid_end, boundary_start,
441
439
  chunk_idx, prev);
442
- window_start = prev.first;
440
+ window_start = prev.start;
443
441
  }
444
442
  break;
445
443
  }
@@ -472,9 +470,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
472
470
  if (boundary_end.CellIsNull(chunk_idx)) {
473
471
  window_end = peer_end;
474
472
  } else {
475
- prev.second = FindOrderedRangeBound<false>(range_collection, range_sense, valid_start, row_idx,
476
- boundary_end, chunk_idx, prev);
477
- window_end = prev.second;
473
+ prev.end = FindOrderedRangeBound<false>(range_collection, range_sense, valid_start, row_idx, boundary_end,
474
+ chunk_idx, prev);
475
+ window_end = prev.end;
478
476
  }
479
477
  break;
480
478
  }
@@ -482,9 +480,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
482
480
  if (boundary_end.CellIsNull(chunk_idx)) {
483
481
  window_end = peer_end;
484
482
  } else {
485
- prev.second = FindOrderedRangeBound<false>(range_collection, range_sense, row_idx, valid_end, boundary_end,
486
- chunk_idx, prev);
487
- window_end = prev.second;
483
+ prev.end = FindOrderedRangeBound<false>(range_collection, range_sense, row_idx, valid_end, boundary_end,
484
+ chunk_idx, prev);
485
+ window_end = prev.end;
488
486
  }
489
487
  break;
490
488
  }
@@ -39,8 +39,8 @@ struct CountStarFunction : public BaseCountFunction {
39
39
  Vector &result, idx_t rid, idx_t bias) {
40
40
  D_ASSERT(input_count == 0);
41
41
  auto data = FlatVector::GetData<RESULT_TYPE>(result);
42
- const auto begin = frame.first;
43
- const auto end = frame.second;
42
+ const auto begin = frame.start;
43
+ const auto end = frame.end;
44
44
  // Slice to any filtered rows
45
45
  if (!filter_mask.AllValid()) {
46
46
  RESULT_TYPE filtered = 0;
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev2320"
2
+ #define DUCKDB_VERSION "0.8.2-dev2399"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "02412e1015"
5
+ #define DUCKDB_SOURCE_ID "20ad35b3fa"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -4,7 +4,7 @@
4
4
  #include "duckdb/common/types/vector.hpp"
5
5
  #include "duckdb/common/arrow/arrow.hpp"
6
6
  #include "duckdb/common/arrow/arrow_buffer.hpp"
7
- #include "duckdb/common/arrow/arrow_options.hpp"
7
+ #include "duckdb/main/client_properties.hpp"
8
8
  #include "duckdb/common/array.hpp"
9
9
 
10
10
  namespace duckdb {
@@ -26,7 +26,7 @@ typedef void (*finalize_t)(ArrowAppendData &append_data, const LogicalType &type
26
26
  // FIXME: we should separate the append state variables from the variables required by the ArrowArray into
27
27
  // ArrowAppendState
28
28
  struct ArrowAppendData {
29
- explicit ArrowAppendData(ArrowOptions &options_p) : options(options_p) {
29
+ explicit ArrowAppendData(ClientProperties &options_p) : options(options_p) {
30
30
  }
31
31
  // the buffers of the arrow vector
32
32
  ArrowBuffer validity;
@@ -49,7 +49,7 @@ struct ArrowAppendData {
49
49
  duckdb::array<const void *, 3> buffers = {{nullptr, nullptr, nullptr}};
50
50
  vector<ArrowArray *> child_pointers;
51
51
 
52
- ArrowOptions options;
52
+ ClientProperties options;
53
53
  };
54
54
 
55
55
  //===--------------------------------------------------------------------===//
@@ -62,7 +62,7 @@ struct ArrowVarcharData {
62
62
  auto last_offset = offset_data[append_data.row_count];
63
63
  idx_t max_offset = append_data.row_count + to - from;
64
64
  if (max_offset > NumericLimits<uint32_t>::Maximum() &&
65
- append_data.options.offset_size == ArrowOffsetSize::REGULAR) {
65
+ append_data.options.arrow_offset_size == ArrowOffsetSize::REGULAR) {
66
66
  throw InvalidInputException("Arrow Appender: The maximum total string size for regular string buffers is "
67
67
  "%u but the offset of %lu exceeds this.",
68
68
  NumericLimits<uint32_t>::Maximum(), max_offset);
@@ -19,7 +19,7 @@ struct ArrowAppendData;
19
19
  //! The ArrowAppender class can be used to incrementally construct an arrow array by appending data chunks into it
20
20
  class ArrowAppender {
21
21
  public:
22
- DUCKDB_API ArrowAppender(vector<LogicalType> types, idx_t initial_capacity, ArrowOptions options);
22
+ DUCKDB_API ArrowAppender(vector<LogicalType> types, idx_t initial_capacity, ClientProperties options);
23
23
  DUCKDB_API ~ArrowAppender();
24
24
 
25
25
  //! Append a data chunk to the underlying arrow array
@@ -30,7 +30,8 @@ public:
30
30
  public:
31
31
  static void ReleaseArray(ArrowArray *array);
32
32
  static ArrowArray *FinalizeChild(const LogicalType &type, ArrowAppendData &append_data);
33
- static unique_ptr<ArrowAppendData> InitializeChild(const LogicalType &type, idx_t capacity, ArrowOptions &options);
33
+ static unique_ptr<ArrowAppendData> InitializeChild(const LogicalType &type, idx_t capacity,
34
+ ClientProperties &options);
34
35
 
35
36
  private:
36
37
  //! The types of the chunks that will be appended in
@@ -40,7 +41,7 @@ private:
40
41
  //! The total row count that has been appended
41
42
  idx_t row_count = 0;
42
43
 
43
- ArrowOptions options;
44
+ ClientProperties options;
44
45
  };
45
46
 
46
47
  } // namespace duckdb
@@ -10,7 +10,7 @@
10
10
 
11
11
  #include "duckdb/common/types/data_chunk.hpp"
12
12
  #include "duckdb/common/arrow/arrow.hpp"
13
- #include "duckdb/common/arrow/arrow_options.hpp"
13
+ #include "duckdb/main/client_properties.hpp"
14
14
 
15
15
  struct ArrowSchema;
16
16
 
@@ -18,8 +18,8 @@ namespace duckdb {
18
18
 
19
19
  struct ArrowConverter {
20
20
  DUCKDB_API static void ToArrowSchema(ArrowSchema *out_schema, const vector<LogicalType> &types,
21
- const vector<string> &names, const ArrowOptions &options);
22
- DUCKDB_API static void ToArrowArray(DataChunk &input, ArrowArray *out_array, ArrowOptions options);
21
+ const vector<string> &names, const ClientProperties &options);
22
+ DUCKDB_API static void ToArrowArray(DataChunk &input, ArrowArray *out_array, ClientProperties options);
23
23
  };
24
24
 
25
25
  } // namespace duckdb
@@ -11,7 +11,7 @@
11
11
  #include "duckdb/common/helper.hpp"
12
12
  #include "duckdb/common/preserved_error.hpp"
13
13
  #include "duckdb/main/chunk_scan_state.hpp"
14
- #include "duckdb/common/arrow/arrow_options.hpp"
14
+ #include "duckdb/main/client_properties.hpp"
15
15
 
16
16
  //! Here we have the internal duckdb classes that interact with Arrow's Internal Header (i.e., duckdb/commons/arrow.hpp)
17
17
  namespace duckdb {
@@ -58,9 +58,9 @@ public:
58
58
 
59
59
  class ArrowUtil {
60
60
  public:
61
- static bool TryFetchChunk(ChunkScanState &scan_state, ArrowOptions options, idx_t chunk_size, ArrowArray *out,
61
+ static bool TryFetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out,
62
62
  idx_t &result_count, PreservedError &error);
63
- static idx_t FetchChunk(ChunkScanState &scan_state, ArrowOptions options, idx_t chunk_size, ArrowArray *out);
63
+ static idx_t FetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out);
64
64
 
65
65
  private:
66
66
  static bool TryFetchNext(QueryResult &result, unique_ptr<DataChunk> &out, PreservedError &error);
@@ -22,14 +22,14 @@ struct ListSegment {
22
22
  ListSegment *next;
23
23
  };
24
24
  struct LinkedList {
25
- LinkedList() {};
25
+ LinkedList() : total_capacity(0), first_segment(nullptr), last_segment(nullptr) {};
26
26
  LinkedList(idx_t total_capacity_p, ListSegment *first_segment_p, ListSegment *last_segment_p)
27
27
  : total_capacity(total_capacity_p), first_segment(first_segment_p), last_segment(last_segment_p) {
28
28
  }
29
29
 
30
- idx_t total_capacity = 0;
31
- ListSegment *first_segment = nullptr;
32
- ListSegment *last_segment = nullptr;
30
+ idx_t total_capacity;
31
+ ListSegment *first_segment;
32
+ ListSegment *last_segment;
33
33
  };
34
34
 
35
35
  // forward declarations
@@ -37,23 +37,21 @@ struct ListSegmentFunctions;
37
37
  typedef ListSegment *(*create_segment_t)(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
38
38
  uint16_t capacity);
39
39
  typedef void (*write_data_to_segment_t)(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
40
- ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count);
40
+ ListSegment *segment, RecursiveUnifiedVectorFormat &input_data,
41
+ idx_t &entry_idx);
41
42
  typedef void (*read_data_from_segment_t)(const ListSegmentFunctions &functions, const ListSegment *segment,
42
43
  Vector &result, idx_t &total_count);
43
- typedef ListSegment *(*copy_data_from_segment_t)(const ListSegmentFunctions &functions, const ListSegment *source,
44
- ArenaAllocator &allocator);
45
44
 
46
45
  struct ListSegmentFunctions {
47
46
  create_segment_t create_segment;
48
47
  write_data_to_segment_t write_data;
49
48
  read_data_from_segment_t read_data;
50
- copy_data_from_segment_t copy_data;
49
+
51
50
  vector<ListSegmentFunctions> child_functions;
52
51
 
53
- void AppendRow(ArenaAllocator &allocator, LinkedList &linked_list, Vector &input, idx_t &entry_idx,
54
- idx_t &count) const;
52
+ void AppendRow(ArenaAllocator &allocator, LinkedList &linked_list, RecursiveUnifiedVectorFormat &input_data,
53
+ idx_t &entry_idx) const;
55
54
  void BuildListVector(const LinkedList &linked_list, Vector &result, idx_t &initial_total_count) const;
56
- void CopyLinkedList(const LinkedList &source_list, LinkedList &target_list, ArenaAllocator &allocator) const;
57
55
  };
58
56
 
59
57
  void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type);
@@ -35,6 +35,11 @@ struct UnifiedVectorFormat {
35
35
  }
36
36
  };
37
37
 
38
+ struct RecursiveUnifiedVectorFormat {
39
+ UnifiedVectorFormat unified;
40
+ vector<RecursiveUnifiedVectorFormat> children;
41
+ };
42
+
38
43
  class VectorCache;
39
44
  class VectorStructBuffer;
40
45
  class VectorListBuffer;
@@ -140,6 +145,8 @@ public:
140
145
  //! The most common vector types (flat, constant & dictionary) can be converted to the canonical format "for free"
141
146
  //! ToUnifiedFormat was originally called Orrify, as a tribute to Orri Erling who came up with it
142
147
  DUCKDB_API void ToUnifiedFormat(idx_t count, UnifiedVectorFormat &data);
148
+ //! Recursively calls UnifiedVectorFormat on a vector and its child vectors (for nested types)
149
+ static void RecursiveToUnifiedFormat(Vector &input, idx_t count, RecursiveUnifiedVectorFormat &data);
143
150
 
144
151
  //! Turn the vector into a sequence vector
145
152
  DUCKDB_API void Sequence(int64_t start, int64_t increment, idx_t count);
@@ -15,9 +15,14 @@
15
15
 
16
16
  namespace duckdb {
17
17
 
18
+ // structs
18
19
  struct AggregateInputData;
19
-
20
- typedef std::pair<idx_t, idx_t> FrameBounds;
20
+ struct FrameBounds {
21
+ FrameBounds() : start(0), end(0) {};
22
+ FrameBounds(idx_t start, idx_t end) : start(start), end(end) {};
23
+ idx_t start = 0;
24
+ idx_t end = 0;
25
+ };
21
26
 
22
27
  class AggregateExecutor {
23
28
  private:
@@ -56,8 +56,10 @@ protected:
56
56
  //! Reused selection vector
57
57
  SelectionVector sel;
58
58
 
59
- //! The arena allocator used by the aggregates for their internal state
60
- ArenaAllocator aggregate_allocator;
59
+ //! The active arena allocator used by the aggregates for their internal state
60
+ unique_ptr<ArenaAllocator> aggregate_allocator;
61
+ //! Owning arena allocators that this HT has data from
62
+ vector<unique_ptr<ArenaAllocator>> stored_allocators;
61
63
 
62
64
  private:
63
65
  //! Destroy the perfect aggregate HT (called automatically by the destructor)
@@ -117,8 +117,6 @@ public:
117
117
 
118
118
  class WindowSegmentTree : public WindowAggregator {
119
119
  public:
120
- using FrameBounds = std::pair<idx_t, idx_t>;
121
-
122
120
  WindowSegmentTree(AggregateObject aggr, const LogicalType &result_type, idx_t count, WindowAggregationMode mode_p);
123
121
  ~WindowSegmentTree() override;
124
122
 
@@ -41,7 +41,6 @@ typedef void (*aggregate_simple_update_t)(Vector inputs[], AggregateInputData &a
41
41
  data_ptr_t state, idx_t count);
42
42
 
43
43
  //! The type used for updating complex windowed aggregate functions (optional)
44
- typedef std::pair<idx_t, idx_t> FrameBounds;
45
44
  typedef void (*aggregate_window_t)(Vector inputs[], const ValidityMask &filter_mask,
46
45
  AggregateInputData &aggr_input_data, idx_t input_count, data_ptr_t state,
47
46
  const FrameBounds &frame, const FrameBounds &prev, Vector &result, idx_t rid,
@@ -13,7 +13,6 @@
13
13
  #include "duckdb/common/types.hpp"
14
14
  #include "duckdb/common/types/data_chunk.hpp"
15
15
  #include "duckdb/main/appender.hpp"
16
- #include "duckdb/common/arrow/arrow_options.hpp"
17
16
 
18
17
  #include <cstring>
19
18
  #include <cassert>
@@ -48,7 +47,7 @@ struct PendingStatementWrapper {
48
47
  struct ArrowResultWrapper {
49
48
  unique_ptr<MaterializedQueryResult> result;
50
49
  unique_ptr<DataChunk> current_chunk;
51
- ArrowOptions options;
50
+ ClientProperties options;
52
51
  };
53
52
 
54
53
  struct AppenderWrapper {