duckdb 0.7.2-dev1034.0 → 0.7.2-dev1138.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  3. package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
  4. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
  5. package/src/duckdb/src/common/serializer/enum_serializer.cpp +6 -6
  6. package/src/duckdb/src/common/sort/comparators.cpp +14 -5
  7. package/src/duckdb/src/common/types/interval.cpp +0 -41
  8. package/src/duckdb/src/common/types/list_segment.cpp +658 -0
  9. package/src/duckdb/src/common/types/string_heap.cpp +1 -1
  10. package/src/duckdb/src/common/types/string_type.cpp +1 -1
  11. package/src/duckdb/src/common/types/vector.cpp +1 -1
  12. package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
  13. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
  14. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
  15. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  16. package/src/duckdb/src/execution/index/art/art.cpp +13 -0
  17. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
  18. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
  19. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +1 -0
  20. package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
  21. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +5 -1
  22. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +18 -5
  23. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
  24. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
  25. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
  26. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
  27. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +0 -4
  28. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +1 -0
  29. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
  30. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
  31. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -0
  32. package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
  33. package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
  34. package/src/duckdb/src/function/table/read_csv.cpp +5 -0
  35. package/src/duckdb/src/function/table/table_scan.cpp +8 -11
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  37. package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
  39. package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
  40. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
  41. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
  42. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
  43. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -12
  44. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +4 -0
  45. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  46. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -0
  47. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +0 -2
  49. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  50. package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +18 -7
  53. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -3
  54. package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
  55. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +0 -1
  56. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +35 -43
  57. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +18 -5
  58. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -4
  59. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -29
  60. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +2 -3
  61. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +11 -1
  62. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
  63. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +4 -1
  64. package/src/duckdb/src/include/duckdb.h +21 -0
  65. package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
  66. package/src/duckdb/src/main/settings/settings.cpp +20 -8
  67. package/src/duckdb/src/optimizer/filter_combiner.cpp +2 -5
  68. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -0
  69. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +1 -0
  70. package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -3
  71. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +22 -0
  72. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
  73. package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
  74. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -1
  75. package/src/duckdb/src/storage/compression/numeric_constant.cpp +1 -1
  76. package/src/duckdb/src/storage/compression/rle.cpp +1 -0
  77. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
  78. package/src/duckdb/src/storage/data_table.cpp +3 -3
  79. package/src/duckdb/src/storage/local_storage.cpp +7 -0
  80. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  81. package/src/duckdb/src/storage/table/column_data.cpp +75 -18
  82. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -1
  83. package/src/duckdb/src/storage/table/column_segment.cpp +17 -31
  84. package/src/duckdb/src/storage/table/list_column_data.cpp +9 -12
  85. package/src/duckdb/src/storage/table/row_group.cpp +200 -136
  86. package/src/duckdb/src/storage/table/row_group_collection.cpp +75 -45
  87. package/src/duckdb/src/storage/table/scan_state.cpp +31 -38
  88. package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -6
  89. package/src/duckdb/src/storage/table/struct_column_data.cpp +11 -18
  90. package/src/duckdb/src/storage/table/update_segment.cpp +3 -0
  91. package/src/duckdb/ub_src_common_types.cpp +2 -0
@@ -1,569 +1,11 @@
1
1
  #include "duckdb/common/pair.hpp"
2
2
  #include "duckdb/common/types/chunk_collection.hpp"
3
+ #include "duckdb/common/types/list_segment.hpp"
3
4
  #include "duckdb/function/aggregate/nested_functions.hpp"
4
5
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
5
6
 
6
7
  namespace duckdb {
7
8
 
8
- struct ListSegment {
9
- uint16_t count;
10
- uint16_t capacity;
11
- ListSegment *next;
12
- };
13
- struct LinkedList {
14
- LinkedList() {};
15
- LinkedList(idx_t total_capacity_p, ListSegment *first_segment_p, ListSegment *last_segment_p)
16
- : total_capacity(total_capacity_p), first_segment(first_segment_p), last_segment(last_segment_p) {
17
- }
18
-
19
- idx_t total_capacity = 0;
20
- ListSegment *first_segment = nullptr;
21
- ListSegment *last_segment = nullptr;
22
- };
23
-
24
- // forward declarations
25
- struct WriteDataToSegment;
26
- struct ReadDataFromSegment;
27
- struct CopyDataFromSegment;
28
- typedef ListSegment *(*create_segment_t)(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
29
- vector<AllocatedData> &owning_vector, const uint16_t &capacity);
30
- typedef void (*write_data_to_segment_t)(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
31
- vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
32
- idx_t &entry_idx, idx_t &count);
33
- typedef void (*read_data_from_segment_t)(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
34
- Vector &result, idx_t &total_count);
35
- typedef ListSegment *(*copy_data_from_segment_t)(CopyDataFromSegment &copy_data_from_segment, const ListSegment *source,
36
- Allocator &allocator, vector<AllocatedData> &owning_vector);
37
-
38
- struct WriteDataToSegment {
39
- create_segment_t create_segment;
40
- write_data_to_segment_t segment_function;
41
- vector<WriteDataToSegment> child_functions;
42
- };
43
- struct ReadDataFromSegment {
44
- read_data_from_segment_t segment_function;
45
- vector<ReadDataFromSegment> child_functions;
46
- };
47
- struct CopyDataFromSegment {
48
- copy_data_from_segment_t segment_function;
49
- vector<CopyDataFromSegment> child_functions;
50
- };
51
-
52
- // forward declarations
53
- static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
54
- vector<AllocatedData> &owning_vector, LinkedList *linked_list, Vector &input, idx_t &entry_idx,
55
- idx_t &count);
56
- static void BuildListVector(ReadDataFromSegment &read_data_from_segment, LinkedList *linked_list, Vector &result,
57
- idx_t &initial_total_count);
58
- static void CopyLinkedList(CopyDataFromSegment &copy_data_from_segment, const LinkedList *source_list,
59
- LinkedList &target_list, Allocator &allocator, vector<AllocatedData> &owning_vector);
60
-
61
- template <class T>
62
- static data_ptr_t AllocatePrimitiveData(Allocator &allocator, vector<AllocatedData> &owning_vector,
63
- const uint16_t &capacity) {
64
-
65
- owning_vector.emplace_back(allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(T))));
66
- return owning_vector.back().get();
67
- }
68
-
69
- static data_ptr_t AllocateListData(Allocator &allocator, vector<AllocatedData> &owning_vector,
70
- const uint16_t &capacity) {
71
-
72
- owning_vector.emplace_back(
73
- allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList)));
74
- return owning_vector.back().get();
75
- }
76
-
77
- static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData> &owning_vector,
78
- const uint16_t &capacity, const idx_t &child_count) {
79
-
80
- owning_vector.emplace_back(
81
- allocator.Allocate(sizeof(ListSegment) + capacity * sizeof(bool) + child_count * sizeof(ListSegment *)));
82
- return owning_vector.back().get();
83
- }
84
-
85
- template <class T>
86
- static T *GetPrimitiveData(const ListSegment *segment) {
87
- return (T *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
88
- }
89
-
90
- static uint64_t *GetListLengthData(const ListSegment *segment) {
91
- return (uint64_t *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
92
- }
93
-
94
- static LinkedList *GetListChildData(const ListSegment *segment) {
95
- return (LinkedList *)(((char *)segment) + sizeof(ListSegment) +
96
- segment->capacity * (sizeof(bool) + sizeof(uint64_t)));
97
- }
98
-
99
- static ListSegment **GetStructData(const ListSegment *segment) {
100
- return (ListSegment **)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
101
- }
102
-
103
- static bool *GetNullMask(const ListSegment *segment) {
104
- return (bool *)(((char *)segment) + sizeof(ListSegment));
105
- }
106
-
107
- static uint16_t GetCapacityForNewSegment(const LinkedList *linked_list) {
108
-
109
- // consecutive segments grow by the power of two
110
- uint16_t capacity = 4;
111
- if (linked_list->last_segment) {
112
- auto next_power_of_two = linked_list->last_segment->capacity * 2;
113
- capacity = next_power_of_two < 65536 ? next_power_of_two : linked_list->last_segment->capacity;
114
- }
115
- return capacity;
116
- }
117
-
118
- template <class T>
119
- static ListSegment *CreatePrimitiveSegment(WriteDataToSegment &, Allocator &allocator,
120
- vector<AllocatedData> &owning_vector, const uint16_t &capacity) {
121
-
122
- // allocate data and set the header
123
- auto segment = (ListSegment *)AllocatePrimitiveData<T>(allocator, owning_vector, capacity);
124
- segment->capacity = capacity;
125
- segment->count = 0;
126
- segment->next = nullptr;
127
- return segment;
128
- }
129
-
130
- static ListSegment *CreateListSegment(WriteDataToSegment &, Allocator &allocator, vector<AllocatedData> &owning_vector,
131
- const uint16_t &capacity) {
132
-
133
- // allocate data and set the header
134
- auto segment = (ListSegment *)AllocateListData(allocator, owning_vector, capacity);
135
- segment->capacity = capacity;
136
- segment->count = 0;
137
- segment->next = nullptr;
138
-
139
- // create an empty linked list for the child vector
140
- auto linked_child_list = GetListChildData(segment);
141
- LinkedList linked_list(0, nullptr, nullptr);
142
- Store<LinkedList>(linked_list, (data_ptr_t)linked_child_list);
143
-
144
- return segment;
145
- }
146
-
147
- static ListSegment *CreateStructSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
148
- vector<AllocatedData> &owning_vector, const uint16_t &capacity) {
149
-
150
- // allocate data and set header
151
- auto segment = (ListSegment *)AllocateStructData(allocator, owning_vector, capacity,
152
- write_data_to_segment.child_functions.size());
153
- segment->capacity = capacity;
154
- segment->count = 0;
155
- segment->next = nullptr;
156
-
157
- // create a child ListSegment with exactly the same capacity for each child vector
158
- auto child_segments = GetStructData(segment);
159
- for (idx_t i = 0; i < write_data_to_segment.child_functions.size(); i++) {
160
- auto child_function = write_data_to_segment.child_functions[i];
161
- auto child_segment = child_function.create_segment(child_function, allocator, owning_vector, capacity);
162
- Store<ListSegment *>(child_segment, (data_ptr_t)(child_segments + i));
163
- }
164
-
165
- return segment;
166
- }
167
-
168
- static ListSegment *GetSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
169
- vector<AllocatedData> &owning_vector, LinkedList *linked_list) {
170
-
171
- ListSegment *segment = nullptr;
172
-
173
- // determine segment
174
- if (!linked_list->last_segment) {
175
- // empty linked list, create the first (and last) segment
176
- auto capacity = GetCapacityForNewSegment(linked_list);
177
- segment = write_data_to_segment.create_segment(write_data_to_segment, allocator, owning_vector, capacity);
178
- linked_list->first_segment = segment;
179
- linked_list->last_segment = segment;
180
-
181
- } else if (linked_list->last_segment->capacity == linked_list->last_segment->count) {
182
- // the last segment of the linked list is full, create a new one and append it
183
- auto capacity = GetCapacityForNewSegment(linked_list);
184
- segment = write_data_to_segment.create_segment(write_data_to_segment, allocator, owning_vector, capacity);
185
- linked_list->last_segment->next = segment;
186
- linked_list->last_segment = segment;
187
-
188
- } else {
189
- // the last segment of the linked list is not full, append the data to it
190
- segment = linked_list->last_segment;
191
- }
192
-
193
- D_ASSERT(segment);
194
- return segment;
195
- }
196
-
197
- template <class T>
198
- static void WriteDataToPrimitiveSegment(WriteDataToSegment &, Allocator &allocator,
199
- vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
200
- idx_t &entry_idx, idx_t &count) {
201
-
202
- // get the vector data and the source index of the entry that we want to write
203
- auto input_data = FlatVector::GetData(input);
204
-
205
- // write null validity
206
- auto null_mask = GetNullMask(segment);
207
- auto is_null = FlatVector::IsNull(input, entry_idx);
208
- null_mask[segment->count] = is_null;
209
-
210
- // write value
211
- if (!is_null) {
212
- auto data = GetPrimitiveData<T>(segment);
213
- Store<T>(((T *)input_data)[entry_idx], (data_ptr_t)(data + segment->count));
214
- }
215
- }
216
-
217
- static void WriteDataToVarcharSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
218
- vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
219
- idx_t &entry_idx, idx_t &count) {
220
-
221
- // get the vector data and the source index of the entry that we want to write
222
- auto input_data = FlatVector::GetData(input);
223
-
224
- // write null validity
225
- auto null_mask = GetNullMask(segment);
226
- auto is_null = FlatVector::IsNull(input, entry_idx);
227
- null_mask[segment->count] = is_null;
228
-
229
- // set the length of this string
230
- auto str_length_data = GetListLengthData(segment);
231
- uint64_t str_length = 0;
232
-
233
- // get the string
234
- string_t str_t;
235
- if (!is_null) {
236
- str_t = ((string_t *)input_data)[entry_idx];
237
- str_length = str_t.GetSize();
238
- }
239
-
240
- // we can reconstruct the offset from the length
241
- Store<uint64_t>(str_length, (data_ptr_t)(str_length_data + segment->count));
242
-
243
- if (is_null) {
244
- return;
245
- }
246
-
247
- // write the characters to the linked list of child segments
248
- auto child_segments = Load<LinkedList>((data_ptr_t)GetListChildData(segment));
249
- for (char &c : str_t.GetString()) {
250
- auto child_segment =
251
- GetSegment(write_data_to_segment.child_functions.back(), allocator, owning_vector, &child_segments);
252
- auto data = GetPrimitiveData<char>(child_segment);
253
- data[child_segment->count] = c;
254
- child_segment->count++;
255
- child_segments.total_capacity++;
256
- }
257
-
258
- // store the updated linked list
259
- Store<LinkedList>(child_segments, (data_ptr_t)GetListChildData(segment));
260
- }
261
-
262
- static void WriteDataToListSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
263
- vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
264
- idx_t &entry_idx, idx_t &count) {
265
-
266
- // get the vector data and the source index of the entry that we want to write
267
- auto input_data = FlatVector::GetData(input);
268
-
269
- // write null validity
270
- auto null_mask = GetNullMask(segment);
271
- auto is_null = FlatVector::IsNull(input, entry_idx);
272
- null_mask[segment->count] = is_null;
273
-
274
- // set the length of this list
275
- auto list_length_data = GetListLengthData(segment);
276
- uint64_t list_length = 0;
277
-
278
- if (!is_null) {
279
- // get list entry information
280
- auto list_entries = (list_entry_t *)input_data;
281
- const auto &list_entry = list_entries[entry_idx];
282
- list_length = list_entry.length;
283
-
284
- // get the child vector and its data
285
- auto lists_size = ListVector::GetListSize(input);
286
- auto &child_vector = ListVector::GetEntry(input);
287
-
288
- // loop over the child vector entries and recurse on them
289
- auto child_segments = Load<LinkedList>((data_ptr_t)GetListChildData(segment));
290
- D_ASSERT(write_data_to_segment.child_functions.size() == 1);
291
- for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) {
292
- auto source_idx_child = list_entry.offset + child_idx;
293
- AppendRow(write_data_to_segment.child_functions[0], allocator, owning_vector, &child_segments, child_vector,
294
- source_idx_child, lists_size);
295
- }
296
- // store the updated linked list
297
- Store<LinkedList>(child_segments, (data_ptr_t)GetListChildData(segment));
298
- }
299
-
300
- Store<uint64_t>(list_length, (data_ptr_t)(list_length_data + segment->count));
301
- }
302
-
303
- static void WriteDataToStructSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
304
- vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
305
- idx_t &entry_idx, idx_t &count) {
306
-
307
- // write null validity
308
- auto null_mask = GetNullMask(segment);
309
- auto is_null = FlatVector::IsNull(input, entry_idx);
310
- null_mask[segment->count] = is_null;
311
-
312
- // write value
313
- auto &children = StructVector::GetEntries(input);
314
- D_ASSERT(children.size() == write_data_to_segment.child_functions.size());
315
- auto child_list = GetStructData(segment);
316
-
317
- // write the data of each of the children of the struct
318
- for (idx_t child_count = 0; child_count < children.size(); child_count++) {
319
- auto child_list_segment = Load<ListSegment *>((data_ptr_t)(child_list + child_count));
320
- auto &child_function = write_data_to_segment.child_functions[child_count];
321
- child_function.segment_function(child_function, allocator, owning_vector, child_list_segment,
322
- *children[child_count], entry_idx, count);
323
- child_list_segment->count++;
324
- }
325
- }
326
-
327
- static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
328
- vector<AllocatedData> &owning_vector, LinkedList *linked_list, Vector &input, idx_t &entry_idx,
329
- idx_t &count) {
330
-
331
- D_ASSERT(input.GetVectorType() == VectorType::FLAT_VECTOR);
332
-
333
- auto segment = GetSegment(write_data_to_segment, allocator, owning_vector, linked_list);
334
- write_data_to_segment.segment_function(write_data_to_segment, allocator, owning_vector, segment, input, entry_idx,
335
- count);
336
-
337
- linked_list->total_capacity++;
338
- segment->count++;
339
- }
340
-
341
- template <class T>
342
- static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result,
343
- idx_t &total_count) {
344
-
345
- auto &aggr_vector_validity = FlatVector::Validity(result);
346
-
347
- // set NULLs
348
- auto null_mask = GetNullMask(segment);
349
- for (idx_t i = 0; i < segment->count; i++) {
350
- if (null_mask[i]) {
351
- aggr_vector_validity.SetInvalid(total_count + i);
352
- }
353
- }
354
-
355
- auto aggr_vector_data = FlatVector::GetData(result);
356
-
357
- // load values
358
- for (idx_t i = 0; i < segment->count; i++) {
359
- if (aggr_vector_validity.RowIsValid(total_count + i)) {
360
- auto data = GetPrimitiveData<T>(segment);
361
- ((T *)aggr_vector_data)[total_count + i] = Load<T>((data_ptr_t)(data + i));
362
- }
363
- }
364
- }
365
-
366
- static void ReadDataFromVarcharSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result,
367
- idx_t &total_count) {
368
-
369
- auto &aggr_vector_validity = FlatVector::Validity(result);
370
-
371
- // set NULLs
372
- auto null_mask = GetNullMask(segment);
373
- for (idx_t i = 0; i < segment->count; i++) {
374
- if (null_mask[i]) {
375
- aggr_vector_validity.SetInvalid(total_count + i);
376
- }
377
- }
378
-
379
- // append all the child chars to one string
380
- string str = "";
381
- auto linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(segment));
382
- while (linked_child_list.first_segment) {
383
- auto child_segment = linked_child_list.first_segment;
384
- auto data = GetPrimitiveData<char>(child_segment);
385
- str.append(data, child_segment->count);
386
- linked_child_list.first_segment = child_segment->next;
387
- }
388
- linked_child_list.last_segment = nullptr;
389
-
390
- // use length and (reconstructed) offset to get the correct substrings
391
- auto aggr_vector_data = FlatVector::GetData(result);
392
- auto str_length_data = GetListLengthData(segment);
393
-
394
- // get the substrings and write them to the result vector
395
- idx_t offset = 0;
396
- for (idx_t i = 0; i < segment->count; i++) {
397
- if (!null_mask[i]) {
398
- auto str_length = Load<uint64_t>((data_ptr_t)(str_length_data + i));
399
- auto substr = str.substr(offset, str_length);
400
- auto str_t = StringVector::AddStringOrBlob(result, substr);
401
- ((string_t *)aggr_vector_data)[total_count + i] = str_t;
402
- offset += str_length;
403
- }
404
- }
405
- }
406
-
407
- static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
408
- Vector &result, idx_t &total_count) {
409
-
410
- auto &aggr_vector_validity = FlatVector::Validity(result);
411
-
412
- // set NULLs
413
- auto null_mask = GetNullMask(segment);
414
- for (idx_t i = 0; i < segment->count; i++) {
415
- if (null_mask[i]) {
416
- aggr_vector_validity.SetInvalid(total_count + i);
417
- }
418
- }
419
-
420
- auto list_vector_data = FlatVector::GetData<list_entry_t>(result);
421
-
422
- // get the starting offset
423
- idx_t offset = 0;
424
- if (total_count != 0) {
425
- offset = list_vector_data[total_count - 1].offset + list_vector_data[total_count - 1].length;
426
- }
427
- idx_t starting_offset = offset;
428
-
429
- // set length and offsets
430
- auto list_length_data = GetListLengthData(segment);
431
- for (idx_t i = 0; i < segment->count; i++) {
432
- auto list_length = Load<uint64_t>((data_ptr_t)(list_length_data + i));
433
- list_vector_data[total_count + i].length = list_length;
434
- list_vector_data[total_count + i].offset = offset;
435
- offset += list_length;
436
- }
437
-
438
- auto &child_vector = ListVector::GetEntry(result);
439
- auto linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(segment));
440
- ListVector::Reserve(result, offset);
441
-
442
- // recurse into the linked list of child values
443
- D_ASSERT(read_data_from_segment.child_functions.size() == 1);
444
- BuildListVector(read_data_from_segment.child_functions[0], &linked_child_list, child_vector, starting_offset);
445
- }
446
-
447
- static void ReadDataFromStructSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
448
- Vector &result, idx_t &total_count) {
449
-
450
- auto &aggr_vector_validity = FlatVector::Validity(result);
451
-
452
- // set NULLs
453
- auto null_mask = GetNullMask(segment);
454
- for (idx_t i = 0; i < segment->count; i++) {
455
- if (null_mask[i]) {
456
- aggr_vector_validity.SetInvalid(total_count + i);
457
- }
458
- }
459
-
460
- auto &children = StructVector::GetEntries(result);
461
-
462
- // recurse into the child segments of each child of the struct
463
- D_ASSERT(children.size() == read_data_from_segment.child_functions.size());
464
- auto struct_children = GetStructData(segment);
465
- for (idx_t child_count = 0; child_count < children.size(); child_count++) {
466
- auto struct_children_segment = Load<ListSegment *>((data_ptr_t)(struct_children + child_count));
467
- auto &child_function = read_data_from_segment.child_functions[child_count];
468
- child_function.segment_function(child_function, struct_children_segment, *children[child_count], total_count);
469
- }
470
- }
471
-
472
- static void BuildListVector(ReadDataFromSegment &read_data_from_segment, LinkedList *linked_list, Vector &result,
473
- idx_t &initial_total_count) {
474
-
475
- idx_t total_count = initial_total_count;
476
- while (linked_list->first_segment) {
477
- auto segment = linked_list->first_segment;
478
- read_data_from_segment.segment_function(read_data_from_segment, segment, result, total_count);
479
-
480
- total_count += segment->count;
481
- linked_list->first_segment = segment->next;
482
- }
483
-
484
- linked_list->last_segment = nullptr;
485
- }
486
-
487
- template <class T>
488
- static ListSegment *CopyDataFromPrimitiveSegment(CopyDataFromSegment &, const ListSegment *source, Allocator &allocator,
489
- vector<AllocatedData> &owning_vector) {
490
-
491
- auto target = (ListSegment *)AllocatePrimitiveData<T>(allocator, owning_vector, source->capacity);
492
- memcpy(target, source, sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(T)));
493
- target->next = nullptr;
494
- return target;
495
- }
496
-
497
- static ListSegment *CopyDataFromListSegment(CopyDataFromSegment &copy_data_from_segment, const ListSegment *source,
498
- Allocator &allocator, vector<AllocatedData> &owning_vector) {
499
-
500
- // create an empty linked list for the child vector of target
501
- auto source_linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(source));
502
-
503
- // create the segment
504
- auto target = (ListSegment *)AllocateListData(allocator, owning_vector, source->capacity);
505
- memcpy(target, source,
506
- sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList));
507
- target->next = nullptr;
508
-
509
- auto target_linked_list = GetListChildData(target);
510
- LinkedList linked_list(source_linked_child_list.total_capacity, nullptr, nullptr);
511
- Store<LinkedList>(linked_list, (data_ptr_t)target_linked_list);
512
-
513
- // recurse to copy the linked child list
514
- auto target_linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(target));
515
- D_ASSERT(copy_data_from_segment.child_functions.size() == 1);
516
- CopyLinkedList(copy_data_from_segment.child_functions[0], &source_linked_child_list, target_linked_child_list,
517
- allocator, owning_vector);
518
-
519
- // store the updated linked list
520
- Store<LinkedList>(target_linked_child_list, (data_ptr_t)GetListChildData(target));
521
- return target;
522
- }
523
-
524
- static ListSegment *CopyDataFromStructSegment(CopyDataFromSegment &copy_data_from_segment, const ListSegment *source,
525
- Allocator &allocator, vector<AllocatedData> &owning_vector) {
526
-
527
- auto source_child_count = copy_data_from_segment.child_functions.size();
528
- auto target = (ListSegment *)AllocateStructData(allocator, owning_vector, source->capacity, source_child_count);
529
- memcpy(target, source,
530
- sizeof(ListSegment) + source->capacity * sizeof(bool) + source_child_count * sizeof(ListSegment *));
531
- target->next = nullptr;
532
-
533
- // recurse and copy the children
534
- auto source_child_segments = GetStructData(source);
535
- auto target_child_segments = GetStructData(target);
536
-
537
- for (idx_t i = 0; i < copy_data_from_segment.child_functions.size(); i++) {
538
- auto child_function = copy_data_from_segment.child_functions[i];
539
- auto source_child_segment = Load<ListSegment *>((data_ptr_t)(source_child_segments + i));
540
- auto target_child_segment =
541
- child_function.segment_function(child_function, source_child_segment, allocator, owning_vector);
542
- Store<ListSegment *>(target_child_segment, (data_ptr_t)(target_child_segments + i));
543
- }
544
- return target;
545
- }
546
-
547
- static void CopyLinkedList(CopyDataFromSegment &copy_data_from_segment, const LinkedList *source_list,
548
- LinkedList &target_list, Allocator &allocator, vector<AllocatedData> &owning_vector) {
549
-
550
- auto source_segment = source_list->first_segment;
551
-
552
- while (source_segment) {
553
- auto target_segment =
554
- copy_data_from_segment.segment_function(copy_data_from_segment, source_segment, allocator, owning_vector);
555
- source_segment = source_segment->next;
556
-
557
- if (!target_list.first_segment) {
558
- target_list.first_segment = target_segment;
559
- }
560
- if (target_list.last_segment) {
561
- target_list.last_segment->next = target_segment;
562
- }
563
- target_list.last_segment = target_segment;
564
- }
565
- }
566
-
567
9
  static void InitializeValidities(Vector &vector, idx_t &capacity) {
568
10
 
569
11
  auto &validity_mask = FlatVector::Validity(vector);
@@ -619,154 +61,6 @@ struct ListBindData : public FunctionData {
619
61
  }
620
62
  };
621
63
 
622
- static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment,
623
- ReadDataFromSegment &read_data_from_segment,
624
- CopyDataFromSegment &copy_data_from_segment, const LogicalType &type) {
625
-
626
- auto physical_type = type.InternalType();
627
- switch (physical_type) {
628
- case PhysicalType::BIT:
629
- case PhysicalType::BOOL: {
630
- write_data_to_segment.create_segment = CreatePrimitiveSegment<bool>;
631
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<bool>;
632
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<bool>;
633
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<bool>;
634
- break;
635
- }
636
- case PhysicalType::INT8: {
637
- write_data_to_segment.create_segment = CreatePrimitiveSegment<int8_t>;
638
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int8_t>;
639
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int8_t>;
640
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int8_t>;
641
- break;
642
- }
643
- case PhysicalType::INT16: {
644
- write_data_to_segment.create_segment = CreatePrimitiveSegment<int16_t>;
645
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int16_t>;
646
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int16_t>;
647
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int16_t>;
648
- break;
649
- }
650
- case PhysicalType::INT32: {
651
- write_data_to_segment.create_segment = CreatePrimitiveSegment<int32_t>;
652
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int32_t>;
653
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int32_t>;
654
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int32_t>;
655
- break;
656
- }
657
- case PhysicalType::INT64: {
658
- write_data_to_segment.create_segment = CreatePrimitiveSegment<int64_t>;
659
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int64_t>;
660
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int64_t>;
661
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int64_t>;
662
- break;
663
- }
664
- case PhysicalType::UINT8: {
665
- write_data_to_segment.create_segment = CreatePrimitiveSegment<uint8_t>;
666
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint8_t>;
667
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint8_t>;
668
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint8_t>;
669
- break;
670
- }
671
- case PhysicalType::UINT16: {
672
- write_data_to_segment.create_segment = CreatePrimitiveSegment<uint16_t>;
673
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint16_t>;
674
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint16_t>;
675
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint16_t>;
676
- break;
677
- }
678
- case PhysicalType::UINT32: {
679
- write_data_to_segment.create_segment = CreatePrimitiveSegment<uint32_t>;
680
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint32_t>;
681
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint32_t>;
682
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint32_t>;
683
- break;
684
- }
685
- case PhysicalType::UINT64: {
686
- write_data_to_segment.create_segment = CreatePrimitiveSegment<uint64_t>;
687
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint64_t>;
688
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint64_t>;
689
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint64_t>;
690
- break;
691
- }
692
- case PhysicalType::FLOAT: {
693
- write_data_to_segment.create_segment = CreatePrimitiveSegment<float>;
694
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<float>;
695
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<float>;
696
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<float>;
697
- break;
698
- }
699
- case PhysicalType::DOUBLE: {
700
- write_data_to_segment.create_segment = CreatePrimitiveSegment<double>;
701
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<double>;
702
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<double>;
703
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<double>;
704
- break;
705
- }
706
- case PhysicalType::INT128: {
707
- write_data_to_segment.create_segment = CreatePrimitiveSegment<hugeint_t>;
708
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<hugeint_t>;
709
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<hugeint_t>;
710
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<hugeint_t>;
711
- break;
712
- }
713
- case PhysicalType::INTERVAL: {
714
- write_data_to_segment.create_segment = CreatePrimitiveSegment<interval_t>;
715
- write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<interval_t>;
716
- read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<interval_t>;
717
- copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<interval_t>;
718
- break;
719
- }
720
- case PhysicalType::VARCHAR: {
721
- write_data_to_segment.create_segment = CreateListSegment;
722
- write_data_to_segment.segment_function = WriteDataToVarcharSegment;
723
- read_data_from_segment.segment_function = ReadDataFromVarcharSegment;
724
- copy_data_from_segment.segment_function = CopyDataFromListSegment;
725
-
726
- write_data_to_segment.child_functions.emplace_back();
727
- write_data_to_segment.child_functions.back().create_segment = CreatePrimitiveSegment<char>;
728
- copy_data_from_segment.child_functions.emplace_back();
729
- copy_data_from_segment.child_functions.back().segment_function = CopyDataFromPrimitiveSegment<char>;
730
- break;
731
- }
732
- case PhysicalType::LIST: {
733
- write_data_to_segment.create_segment = CreateListSegment;
734
- write_data_to_segment.segment_function = WriteDataToListSegment;
735
- read_data_from_segment.segment_function = ReadDataFromListSegment;
736
- copy_data_from_segment.segment_function = CopyDataFromListSegment;
737
-
738
- // recurse
739
- write_data_to_segment.child_functions.emplace_back();
740
- read_data_from_segment.child_functions.emplace_back();
741
- copy_data_from_segment.child_functions.emplace_back();
742
- GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
743
- read_data_from_segment.child_functions.back(),
744
- copy_data_from_segment.child_functions.back(), ListType::GetChildType(type));
745
- break;
746
- }
747
- case PhysicalType::STRUCT: {
748
- write_data_to_segment.create_segment = CreateStructSegment;
749
- write_data_to_segment.segment_function = WriteDataToStructSegment;
750
- read_data_from_segment.segment_function = ReadDataFromStructSegment;
751
- copy_data_from_segment.segment_function = CopyDataFromStructSegment;
752
-
753
- // recurse
754
- auto child_types = StructType::GetChildTypes(type);
755
- for (idx_t i = 0; i < child_types.size(); i++) {
756
- write_data_to_segment.child_functions.emplace_back();
757
- read_data_from_segment.child_functions.emplace_back();
758
- copy_data_from_segment.child_functions.emplace_back();
759
- GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
760
- read_data_from_segment.child_functions.back(),
761
- copy_data_from_segment.child_functions.back(), child_types[i].second);
762
- }
763
- break;
764
- }
765
- default:
766
- throw InternalException("LIST aggregate not yet implemented for " + type.ToString());
767
- }
768
- }
769
-
770
64
  ListBindData::ListBindData(const LogicalType &stype_p) : stype(stype_p) {
771
65
 
772
66
  // always unnest once because the result vector is of type LIST
@@ -834,8 +128,8 @@ static void ListUpdateFunction(Vector inputs[], AggregateInputData &aggr_input_d
834
128
  state->owning_vector = new vector<AllocatedData>;
835
129
  }
836
130
  D_ASSERT(state->type);
837
- AppendRow(list_bind_data.write_data_to_segment, aggr_input_data.allocator, *state->owning_vector,
838
- state->linked_list, input, i, count);
131
+ list_bind_data.write_data_to_segment.AppendRow(aggr_input_data.allocator, *state->owning_vector,
132
+ state->linked_list, input, i, count);
839
133
  }
840
134
  }
841
135
 
@@ -865,8 +159,8 @@ static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputD
865
159
 
866
160
  // copy the linked list of the state
867
161
  auto copied_linked_list = LinkedList(state->linked_list->total_capacity, nullptr, nullptr);
868
- CopyLinkedList(list_bind_data.copy_data_from_segment, state->linked_list, copied_linked_list,
869
- aggr_input_data.allocator, *owning_vector);
162
+ list_bind_data.copy_data_from_segment.CopyLinkedList(state->linked_list, copied_linked_list,
163
+ aggr_input_data.allocator, *owning_vector);
870
164
 
871
165
  // append the copied linked list to the combined state
872
166
  if (combined_ptr[i]->linked_list->last_segment) {
@@ -919,7 +213,7 @@ static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_da
919
213
  InitializeValidities(aggr_vector, total_capacity);
920
214
 
921
215
  idx_t total_count = 0;
922
- BuildListVector(list_bind_data.read_data_from_segment, state->linked_list, aggr_vector, total_count);
216
+ list_bind_data.read_data_from_segment.BuildListVector(state->linked_list, aggr_vector, total_count);
923
217
  ListVector::Append(result, aggr_vector, total_capacity);
924
218
 
925
219
  // now destroy the state (for parallel destruction)