duckdb 0.7.2-dev1034.0 → 0.7.2-dev1138.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  3. package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
  4. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
  5. package/src/duckdb/src/common/serializer/enum_serializer.cpp +6 -6
  6. package/src/duckdb/src/common/sort/comparators.cpp +14 -5
  7. package/src/duckdb/src/common/types/interval.cpp +0 -41
  8. package/src/duckdb/src/common/types/list_segment.cpp +658 -0
  9. package/src/duckdb/src/common/types/string_heap.cpp +1 -1
  10. package/src/duckdb/src/common/types/string_type.cpp +1 -1
  11. package/src/duckdb/src/common/types/vector.cpp +1 -1
  12. package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
  13. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
  14. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
  15. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  16. package/src/duckdb/src/execution/index/art/art.cpp +13 -0
  17. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
  18. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
  19. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +1 -0
  20. package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
  21. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +5 -1
  22. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +18 -5
  23. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
  24. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
  25. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
  26. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
  27. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +0 -4
  28. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +1 -0
  29. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
  30. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
  31. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -0
  32. package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
  33. package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
  34. package/src/duckdb/src/function/table/read_csv.cpp +5 -0
  35. package/src/duckdb/src/function/table/table_scan.cpp +8 -11
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  37. package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
  39. package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
  40. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
  41. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
  42. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
  43. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -12
  44. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +4 -0
  45. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  46. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -0
  47. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +0 -2
  49. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  50. package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +18 -7
  53. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -3
  54. package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
  55. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +0 -1
  56. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +35 -43
  57. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +18 -5
  58. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -4
  59. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -29
  60. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +2 -3
  61. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +11 -1
  62. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
  63. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +4 -1
  64. package/src/duckdb/src/include/duckdb.h +21 -0
  65. package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
  66. package/src/duckdb/src/main/settings/settings.cpp +20 -8
  67. package/src/duckdb/src/optimizer/filter_combiner.cpp +2 -5
  68. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -0
  69. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +1 -0
  70. package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -3
  71. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +22 -0
  72. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
  73. package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
  74. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -1
  75. package/src/duckdb/src/storage/compression/numeric_constant.cpp +1 -1
  76. package/src/duckdb/src/storage/compression/rle.cpp +1 -0
  77. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
  78. package/src/duckdb/src/storage/data_table.cpp +3 -3
  79. package/src/duckdb/src/storage/local_storage.cpp +7 -0
  80. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  81. package/src/duckdb/src/storage/table/column_data.cpp +75 -18
  82. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -1
  83. package/src/duckdb/src/storage/table/column_segment.cpp +17 -31
  84. package/src/duckdb/src/storage/table/list_column_data.cpp +9 -12
  85. package/src/duckdb/src/storage/table/row_group.cpp +200 -136
  86. package/src/duckdb/src/storage/table/row_group_collection.cpp +75 -45
  87. package/src/duckdb/src/storage/table/scan_state.cpp +31 -38
  88. package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -6
  89. package/src/duckdb/src/storage/table/struct_column_data.cpp +11 -18
  90. package/src/duckdb/src/storage/table/update_segment.cpp +3 -0
  91. package/src/duckdb/ub_src_common_types.cpp +2 -0
@@ -0,0 +1,658 @@
1
+ #include "duckdb/common/types/list_segment.hpp"
2
+
3
+ namespace duckdb {
4
+
5
+ // forward declarations
6
+
7
+ template <class T>
8
+ static data_ptr_t AllocatePrimitiveData(Allocator &allocator, vector<AllocatedData> &owning_vector,
9
+ const uint16_t &capacity) {
10
+
11
+ owning_vector.emplace_back(allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(T))));
12
+ return owning_vector.back().get();
13
+ }
14
+
15
+ static data_ptr_t AllocateListData(Allocator &allocator, vector<AllocatedData> &owning_vector,
16
+ const uint16_t &capacity) {
17
+
18
+ owning_vector.emplace_back(
19
+ allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList)));
20
+ return owning_vector.back().get();
21
+ }
22
+
23
+ static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData> &owning_vector,
24
+ const uint16_t &capacity, const idx_t &child_count) {
25
+
26
+ owning_vector.emplace_back(
27
+ allocator.Allocate(sizeof(ListSegment) + capacity * sizeof(bool) + child_count * sizeof(ListSegment *)));
28
+ return owning_vector.back().get();
29
+ }
30
+
31
+ template <class T>
32
+ static T *GetPrimitiveData(const ListSegment *segment) {
33
+ return (T *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
34
+ }
35
+
36
+ static uint64_t *GetListLengthData(const ListSegment *segment) {
37
+ return (uint64_t *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
38
+ }
39
+
40
+ static LinkedList *GetListChildData(const ListSegment *segment) {
41
+ return (LinkedList *)(((char *)segment) + sizeof(ListSegment) +
42
+ segment->capacity * (sizeof(bool) + sizeof(uint64_t)));
43
+ }
44
+
45
+ static ListSegment **GetStructData(const ListSegment *segment) {
46
+ return (ListSegment **)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
47
+ }
48
+
49
+ static bool *GetNullMask(const ListSegment *segment) {
50
+ return (bool *)(((char *)segment) + sizeof(ListSegment));
51
+ }
52
+
53
+ static uint16_t GetCapacityForNewSegment(const LinkedList *linked_list) {
54
+
55
+ // consecutive segments grow by the power of two
56
+ uint16_t capacity = 4;
57
+ if (linked_list->last_segment) {
58
+ auto next_power_of_two = linked_list->last_segment->capacity * 2;
59
+ capacity = next_power_of_two < 65536 ? next_power_of_two : linked_list->last_segment->capacity;
60
+ }
61
+ return capacity;
62
+ }
63
+
64
+ template <class T>
65
+ static ListSegment *CreatePrimitiveSegment(WriteDataToSegment &, Allocator &allocator,
66
+ vector<AllocatedData> &owning_vector, const uint16_t &capacity) {
67
+
68
+ // allocate data and set the header
69
+ auto segment = (ListSegment *)AllocatePrimitiveData<T>(allocator, owning_vector, capacity);
70
+ segment->capacity = capacity;
71
+ segment->count = 0;
72
+ segment->next = nullptr;
73
+ return segment;
74
+ }
75
+
76
+ static ListSegment *CreateListSegment(WriteDataToSegment &, Allocator &allocator, vector<AllocatedData> &owning_vector,
77
+ const uint16_t &capacity) {
78
+
79
+ // allocate data and set the header
80
+ auto segment = (ListSegment *)AllocateListData(allocator, owning_vector, capacity);
81
+ segment->capacity = capacity;
82
+ segment->count = 0;
83
+ segment->next = nullptr;
84
+
85
+ // create an empty linked list for the child vector
86
+ auto linked_child_list = GetListChildData(segment);
87
+ LinkedList linked_list(0, nullptr, nullptr);
88
+ Store<LinkedList>(linked_list, (data_ptr_t)linked_child_list);
89
+
90
+ return segment;
91
+ }
92
+
93
+ static ListSegment *CreateStructSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
94
+ vector<AllocatedData> &owning_vector, const uint16_t &capacity) {
95
+
96
+ // allocate data and set header
97
+ auto segment = (ListSegment *)AllocateStructData(allocator, owning_vector, capacity,
98
+ write_data_to_segment.child_functions.size());
99
+ segment->capacity = capacity;
100
+ segment->count = 0;
101
+ segment->next = nullptr;
102
+
103
+ // create a child ListSegment with exactly the same capacity for each child vector
104
+ auto child_segments = GetStructData(segment);
105
+ for (idx_t i = 0; i < write_data_to_segment.child_functions.size(); i++) {
106
+ auto child_function = write_data_to_segment.child_functions[i];
107
+ auto child_segment = child_function.create_segment(child_function, allocator, owning_vector, capacity);
108
+ Store<ListSegment *>(child_segment, (data_ptr_t)(child_segments + i));
109
+ }
110
+
111
+ return segment;
112
+ }
113
+
114
+ static ListSegment *GetSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
115
+ vector<AllocatedData> &owning_vector, LinkedList *linked_list) {
116
+
117
+ ListSegment *segment = nullptr;
118
+
119
+ // determine segment
120
+ if (!linked_list->last_segment) {
121
+ // empty linked list, create the first (and last) segment
122
+ auto capacity = GetCapacityForNewSegment(linked_list);
123
+ segment = write_data_to_segment.create_segment(write_data_to_segment, allocator, owning_vector, capacity);
124
+ linked_list->first_segment = segment;
125
+ linked_list->last_segment = segment;
126
+
127
+ } else if (linked_list->last_segment->capacity == linked_list->last_segment->count) {
128
+ // the last segment of the linked list is full, create a new one and append it
129
+ auto capacity = GetCapacityForNewSegment(linked_list);
130
+ segment = write_data_to_segment.create_segment(write_data_to_segment, allocator, owning_vector, capacity);
131
+ linked_list->last_segment->next = segment;
132
+ linked_list->last_segment = segment;
133
+
134
+ } else {
135
+ // the last segment of the linked list is not full, append the data to it
136
+ segment = linked_list->last_segment;
137
+ }
138
+
139
+ D_ASSERT(segment);
140
+ return segment;
141
+ }
142
+
143
+ template <class T>
144
+ static void WriteDataToPrimitiveSegment(WriteDataToSegment &, Allocator &allocator,
145
+ vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
146
+ idx_t &entry_idx, idx_t &count) {
147
+
148
+ // get the vector data and the source index of the entry that we want to write
149
+ auto input_data = FlatVector::GetData(input);
150
+
151
+ // write null validity
152
+ auto null_mask = GetNullMask(segment);
153
+ auto is_null = FlatVector::IsNull(input, entry_idx);
154
+ null_mask[segment->count] = is_null;
155
+
156
+ // write value
157
+ if (!is_null) {
158
+ auto data = GetPrimitiveData<T>(segment);
159
+ Store<T>(((T *)input_data)[entry_idx], (data_ptr_t)(data + segment->count));
160
+ }
161
+ }
162
+
163
+ static void WriteDataToVarcharSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
164
+ vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
165
+ idx_t &entry_idx, idx_t &count) {
166
+
167
+ // get the vector data and the source index of the entry that we want to write
168
+ auto input_data = FlatVector::GetData(input);
169
+
170
+ // write null validity
171
+ auto null_mask = GetNullMask(segment);
172
+ auto is_null = FlatVector::IsNull(input, entry_idx);
173
+ null_mask[segment->count] = is_null;
174
+
175
+ // set the length of this string
176
+ auto str_length_data = GetListLengthData(segment);
177
+ uint64_t str_length = 0;
178
+
179
+ // get the string
180
+ string_t str_t;
181
+ if (!is_null) {
182
+ str_t = ((string_t *)input_data)[entry_idx];
183
+ str_length = str_t.GetSize();
184
+ }
185
+
186
+ // we can reconstruct the offset from the length
187
+ Store<uint64_t>(str_length, (data_ptr_t)(str_length_data + segment->count));
188
+
189
+ if (is_null) {
190
+ return;
191
+ }
192
+
193
+ // write the characters to the linked list of child segments
194
+ auto child_segments = Load<LinkedList>((data_ptr_t)GetListChildData(segment));
195
+ for (char &c : str_t.GetString()) {
196
+ auto child_segment =
197
+ GetSegment(write_data_to_segment.child_functions.back(), allocator, owning_vector, &child_segments);
198
+ auto data = GetPrimitiveData<char>(child_segment);
199
+ data[child_segment->count] = c;
200
+ child_segment->count++;
201
+ child_segments.total_capacity++;
202
+ }
203
+
204
+ // store the updated linked list
205
+ Store<LinkedList>(child_segments, (data_ptr_t)GetListChildData(segment));
206
+ }
207
+
208
+ static void WriteDataToListSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
209
+ vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
210
+ idx_t &entry_idx, idx_t &count) {
211
+
212
+ // get the vector data and the source index of the entry that we want to write
213
+ auto input_data = FlatVector::GetData(input);
214
+
215
+ // write null validity
216
+ auto null_mask = GetNullMask(segment);
217
+ auto is_null = FlatVector::IsNull(input, entry_idx);
218
+ null_mask[segment->count] = is_null;
219
+
220
+ // set the length of this list
221
+ auto list_length_data = GetListLengthData(segment);
222
+ uint64_t list_length = 0;
223
+
224
+ if (!is_null) {
225
+ // get list entry information
226
+ auto list_entries = (list_entry_t *)input_data;
227
+ const auto &list_entry = list_entries[entry_idx];
228
+ list_length = list_entry.length;
229
+
230
+ // get the child vector and its data
231
+ auto lists_size = ListVector::GetListSize(input);
232
+ auto &child_vector = ListVector::GetEntry(input);
233
+
234
+ // loop over the child vector entries and recurse on them
235
+ auto child_segments = Load<LinkedList>((data_ptr_t)GetListChildData(segment));
236
+ D_ASSERT(write_data_to_segment.child_functions.size() == 1);
237
+ for (idx_t child_idx = 0; child_idx < list_entry.length; child_idx++) {
238
+ auto source_idx_child = list_entry.offset + child_idx;
239
+ write_data_to_segment.child_functions[0].AppendRow(allocator, owning_vector, &child_segments, child_vector,
240
+ source_idx_child, lists_size);
241
+ }
242
+ // store the updated linked list
243
+ Store<LinkedList>(child_segments, (data_ptr_t)GetListChildData(segment));
244
+ }
245
+
246
+ Store<uint64_t>(list_length, (data_ptr_t)(list_length_data + segment->count));
247
+ }
248
+
249
+ static void WriteDataToStructSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
250
+ vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
251
+ idx_t &entry_idx, idx_t &count) {
252
+
253
+ // write null validity
254
+ auto null_mask = GetNullMask(segment);
255
+ auto is_null = FlatVector::IsNull(input, entry_idx);
256
+ null_mask[segment->count] = is_null;
257
+
258
+ // write value
259
+ auto &children = StructVector::GetEntries(input);
260
+ D_ASSERT(children.size() == write_data_to_segment.child_functions.size());
261
+ auto child_list = GetStructData(segment);
262
+
263
+ // write the data of each of the children of the struct
264
+ for (idx_t child_count = 0; child_count < children.size(); child_count++) {
265
+ auto child_list_segment = Load<ListSegment *>((data_ptr_t)(child_list + child_count));
266
+ auto &child_function = write_data_to_segment.child_functions[child_count];
267
+ child_function.segment_function(child_function, allocator, owning_vector, child_list_segment,
268
+ *children[child_count], entry_idx, count);
269
+ child_list_segment->count++;
270
+ }
271
+ }
272
+
273
+ void WriteDataToSegment::AppendRow(Allocator &allocator, vector<AllocatedData> &owning_vector, LinkedList *linked_list,
274
+ Vector &input, idx_t &entry_idx, idx_t &count) {
275
+
276
+ D_ASSERT(input.GetVectorType() == VectorType::FLAT_VECTOR);
277
+ auto &write_data_to_segment = *this;
278
+ auto segment = GetSegment(write_data_to_segment, allocator, owning_vector, linked_list);
279
+ write_data_to_segment.segment_function(write_data_to_segment, allocator, owning_vector, segment, input, entry_idx,
280
+ count);
281
+
282
+ linked_list->total_capacity++;
283
+ segment->count++;
284
+ }
285
+
286
+ template <class T>
287
+ static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result,
288
+ idx_t &total_count) {
289
+
290
+ auto &aggr_vector_validity = FlatVector::Validity(result);
291
+
292
+ // set NULLs
293
+ auto null_mask = GetNullMask(segment);
294
+ for (idx_t i = 0; i < segment->count; i++) {
295
+ if (null_mask[i]) {
296
+ aggr_vector_validity.SetInvalid(total_count + i);
297
+ }
298
+ }
299
+
300
+ auto aggr_vector_data = FlatVector::GetData(result);
301
+
302
+ // load values
303
+ for (idx_t i = 0; i < segment->count; i++) {
304
+ if (aggr_vector_validity.RowIsValid(total_count + i)) {
305
+ auto data = GetPrimitiveData<T>(segment);
306
+ ((T *)aggr_vector_data)[total_count + i] = Load<T>((data_ptr_t)(data + i));
307
+ }
308
+ }
309
+ }
310
+
311
+ static void ReadDataFromVarcharSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result,
312
+ idx_t &total_count) {
313
+
314
+ auto &aggr_vector_validity = FlatVector::Validity(result);
315
+
316
+ // set NULLs
317
+ auto null_mask = GetNullMask(segment);
318
+ for (idx_t i = 0; i < segment->count; i++) {
319
+ if (null_mask[i]) {
320
+ aggr_vector_validity.SetInvalid(total_count + i);
321
+ }
322
+ }
323
+
324
+ // append all the child chars to one string
325
+ string str = "";
326
+ auto linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(segment));
327
+ while (linked_child_list.first_segment) {
328
+ auto child_segment = linked_child_list.first_segment;
329
+ auto data = GetPrimitiveData<char>(child_segment);
330
+ str.append(data, child_segment->count);
331
+ linked_child_list.first_segment = child_segment->next;
332
+ }
333
+ linked_child_list.last_segment = nullptr;
334
+
335
+ // use length and (reconstructed) offset to get the correct substrings
336
+ auto aggr_vector_data = FlatVector::GetData(result);
337
+ auto str_length_data = GetListLengthData(segment);
338
+
339
+ // get the substrings and write them to the result vector
340
+ idx_t offset = 0;
341
+ for (idx_t i = 0; i < segment->count; i++) {
342
+ if (!null_mask[i]) {
343
+ auto str_length = Load<uint64_t>((data_ptr_t)(str_length_data + i));
344
+ auto substr = str.substr(offset, str_length);
345
+ auto str_t = StringVector::AddStringOrBlob(result, substr);
346
+ ((string_t *)aggr_vector_data)[total_count + i] = str_t;
347
+ offset += str_length;
348
+ }
349
+ }
350
+ }
351
+
352
+ static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
353
+ Vector &result, idx_t &total_count) {
354
+
355
+ auto &aggr_vector_validity = FlatVector::Validity(result);
356
+
357
+ // set NULLs
358
+ auto null_mask = GetNullMask(segment);
359
+ for (idx_t i = 0; i < segment->count; i++) {
360
+ if (null_mask[i]) {
361
+ aggr_vector_validity.SetInvalid(total_count + i);
362
+ }
363
+ }
364
+
365
+ auto list_vector_data = FlatVector::GetData<list_entry_t>(result);
366
+
367
+ // get the starting offset
368
+ idx_t offset = 0;
369
+ if (total_count != 0) {
370
+ offset = list_vector_data[total_count - 1].offset + list_vector_data[total_count - 1].length;
371
+ }
372
+ idx_t starting_offset = offset;
373
+
374
+ // set length and offsets
375
+ auto list_length_data = GetListLengthData(segment);
376
+ for (idx_t i = 0; i < segment->count; i++) {
377
+ auto list_length = Load<uint64_t>((data_ptr_t)(list_length_data + i));
378
+ list_vector_data[total_count + i].length = list_length;
379
+ list_vector_data[total_count + i].offset = offset;
380
+ offset += list_length;
381
+ }
382
+
383
+ auto &child_vector = ListVector::GetEntry(result);
384
+ auto linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(segment));
385
+ ListVector::Reserve(result, offset);
386
+
387
+ // recurse into the linked list of child values
388
+ D_ASSERT(read_data_from_segment.child_functions.size() == 1);
389
+ read_data_from_segment.child_functions[0].BuildListVector(&linked_child_list, child_vector, starting_offset);
390
+ }
391
+
392
+ static void ReadDataFromStructSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
393
+ Vector &result, idx_t &total_count) {
394
+
395
+ auto &aggr_vector_validity = FlatVector::Validity(result);
396
+
397
+ // set NULLs
398
+ auto null_mask = GetNullMask(segment);
399
+ for (idx_t i = 0; i < segment->count; i++) {
400
+ if (null_mask[i]) {
401
+ aggr_vector_validity.SetInvalid(total_count + i);
402
+ }
403
+ }
404
+
405
+ auto &children = StructVector::GetEntries(result);
406
+
407
+ // recurse into the child segments of each child of the struct
408
+ D_ASSERT(children.size() == read_data_from_segment.child_functions.size());
409
+ auto struct_children = GetStructData(segment);
410
+ for (idx_t child_count = 0; child_count < children.size(); child_count++) {
411
+ auto struct_children_segment = Load<ListSegment *>((data_ptr_t)(struct_children + child_count));
412
+ auto &child_function = read_data_from_segment.child_functions[child_count];
413
+ child_function.segment_function(child_function, struct_children_segment, *children[child_count], total_count);
414
+ }
415
+ }
416
+
417
+ void ReadDataFromSegment::BuildListVector(LinkedList *linked_list, Vector &result, idx_t &initial_total_count) {
418
+ auto &read_data_from_segment = *this;
419
+ idx_t total_count = initial_total_count;
420
+ while (linked_list->first_segment) {
421
+ auto segment = linked_list->first_segment;
422
+ read_data_from_segment.segment_function(read_data_from_segment, segment, result, total_count);
423
+
424
+ total_count += segment->count;
425
+ linked_list->first_segment = segment->next;
426
+ }
427
+
428
+ linked_list->last_segment = nullptr;
429
+ }
430
+
431
+ template <class T>
432
+ static ListSegment *CopyDataFromPrimitiveSegment(CopyDataFromSegment &, const ListSegment *source, Allocator &allocator,
433
+ vector<AllocatedData> &owning_vector) {
434
+
435
+ auto target = (ListSegment *)AllocatePrimitiveData<T>(allocator, owning_vector, source->capacity);
436
+ memcpy(target, source, sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(T)));
437
+ target->next = nullptr;
438
+ return target;
439
+ }
440
+
441
+ static ListSegment *CopyDataFromListSegment(CopyDataFromSegment &copy_data_from_segment, const ListSegment *source,
442
+ Allocator &allocator, vector<AllocatedData> &owning_vector) {
443
+
444
+ // create an empty linked list for the child vector of target
445
+ auto source_linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(source));
446
+
447
+ // create the segment
448
+ auto target = (ListSegment *)AllocateListData(allocator, owning_vector, source->capacity);
449
+ memcpy(target, source,
450
+ sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList));
451
+ target->next = nullptr;
452
+
453
+ auto target_linked_list = GetListChildData(target);
454
+ LinkedList linked_list(source_linked_child_list.total_capacity, nullptr, nullptr);
455
+ Store<LinkedList>(linked_list, (data_ptr_t)target_linked_list);
456
+
457
+ // recurse to copy the linked child list
458
+ auto target_linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(target));
459
+ D_ASSERT(copy_data_from_segment.child_functions.size() == 1);
460
+ copy_data_from_segment.child_functions[0].CopyLinkedList(&source_linked_child_list, target_linked_child_list,
461
+ allocator, owning_vector);
462
+
463
+ // store the updated linked list
464
+ Store<LinkedList>(target_linked_child_list, (data_ptr_t)GetListChildData(target));
465
+ return target;
466
+ }
467
+
468
+ static ListSegment *CopyDataFromStructSegment(CopyDataFromSegment &copy_data_from_segment, const ListSegment *source,
469
+ Allocator &allocator, vector<AllocatedData> &owning_vector) {
470
+
471
+ auto source_child_count = copy_data_from_segment.child_functions.size();
472
+ auto target = (ListSegment *)AllocateStructData(allocator, owning_vector, source->capacity, source_child_count);
473
+ memcpy(target, source,
474
+ sizeof(ListSegment) + source->capacity * sizeof(bool) + source_child_count * sizeof(ListSegment *));
475
+ target->next = nullptr;
476
+
477
+ // recurse and copy the children
478
+ auto source_child_segments = GetStructData(source);
479
+ auto target_child_segments = GetStructData(target);
480
+
481
+ for (idx_t i = 0; i < copy_data_from_segment.child_functions.size(); i++) {
482
+ auto child_function = copy_data_from_segment.child_functions[i];
483
+ auto source_child_segment = Load<ListSegment *>((data_ptr_t)(source_child_segments + i));
484
+ auto target_child_segment =
485
+ child_function.segment_function(child_function, source_child_segment, allocator, owning_vector);
486
+ Store<ListSegment *>(target_child_segment, (data_ptr_t)(target_child_segments + i));
487
+ }
488
+ return target;
489
+ }
490
+
491
+ void CopyDataFromSegment::CopyLinkedList(const LinkedList *source_list, LinkedList &target_list, Allocator &allocator,
492
+ vector<AllocatedData> &owning_vector) {
493
+ auto &copy_data_from_segment = *this;
494
+ auto source_segment = source_list->first_segment;
495
+
496
+ while (source_segment) {
497
+ auto target_segment =
498
+ copy_data_from_segment.segment_function(copy_data_from_segment, source_segment, allocator, owning_vector);
499
+ source_segment = source_segment->next;
500
+
501
+ if (!target_list.first_segment) {
502
+ target_list.first_segment = target_segment;
503
+ }
504
+ if (target_list.last_segment) {
505
+ target_list.last_segment->next = target_segment;
506
+ }
507
+ target_list.last_segment = target_segment;
508
+ }
509
+ }
510
+
511
+ void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment, ReadDataFromSegment &read_data_from_segment,
512
+ CopyDataFromSegment &copy_data_from_segment, const LogicalType &type) {
513
+
514
+ auto physical_type = type.InternalType();
515
+ switch (physical_type) {
516
+ case PhysicalType::BIT:
517
+ case PhysicalType::BOOL: {
518
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<bool>;
519
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<bool>;
520
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<bool>;
521
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<bool>;
522
+ break;
523
+ }
524
+ case PhysicalType::INT8: {
525
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<int8_t>;
526
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int8_t>;
527
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int8_t>;
528
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int8_t>;
529
+ break;
530
+ }
531
+ case PhysicalType::INT16: {
532
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<int16_t>;
533
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int16_t>;
534
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int16_t>;
535
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int16_t>;
536
+ break;
537
+ }
538
+ case PhysicalType::INT32: {
539
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<int32_t>;
540
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int32_t>;
541
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int32_t>;
542
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int32_t>;
543
+ break;
544
+ }
545
+ case PhysicalType::INT64: {
546
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<int64_t>;
547
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int64_t>;
548
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int64_t>;
549
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int64_t>;
550
+ break;
551
+ }
552
+ case PhysicalType::UINT8: {
553
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<uint8_t>;
554
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint8_t>;
555
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint8_t>;
556
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint8_t>;
557
+ break;
558
+ }
559
+ case PhysicalType::UINT16: {
560
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<uint16_t>;
561
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint16_t>;
562
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint16_t>;
563
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint16_t>;
564
+ break;
565
+ }
566
+ case PhysicalType::UINT32: {
567
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<uint32_t>;
568
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint32_t>;
569
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint32_t>;
570
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint32_t>;
571
+ break;
572
+ }
573
+ case PhysicalType::UINT64: {
574
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<uint64_t>;
575
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint64_t>;
576
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint64_t>;
577
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint64_t>;
578
+ break;
579
+ }
580
+ case PhysicalType::FLOAT: {
581
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<float>;
582
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<float>;
583
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<float>;
584
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<float>;
585
+ break;
586
+ }
587
+ case PhysicalType::DOUBLE: {
588
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<double>;
589
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<double>;
590
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<double>;
591
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<double>;
592
+ break;
593
+ }
594
+ case PhysicalType::INT128: {
595
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<hugeint_t>;
596
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<hugeint_t>;
597
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<hugeint_t>;
598
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<hugeint_t>;
599
+ break;
600
+ }
601
+ case PhysicalType::INTERVAL: {
602
+ write_data_to_segment.create_segment = CreatePrimitiveSegment<interval_t>;
603
+ write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<interval_t>;
604
+ read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<interval_t>;
605
+ copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<interval_t>;
606
+ break;
607
+ }
608
+ case PhysicalType::VARCHAR: {
609
+ write_data_to_segment.create_segment = CreateListSegment;
610
+ write_data_to_segment.segment_function = WriteDataToVarcharSegment;
611
+ read_data_from_segment.segment_function = ReadDataFromVarcharSegment;
612
+ copy_data_from_segment.segment_function = CopyDataFromListSegment;
613
+
614
+ write_data_to_segment.child_functions.emplace_back();
615
+ write_data_to_segment.child_functions.back().create_segment = CreatePrimitiveSegment<char>;
616
+ copy_data_from_segment.child_functions.emplace_back();
617
+ copy_data_from_segment.child_functions.back().segment_function = CopyDataFromPrimitiveSegment<char>;
618
+ break;
619
+ }
620
+ case PhysicalType::LIST: {
621
+ write_data_to_segment.create_segment = CreateListSegment;
622
+ write_data_to_segment.segment_function = WriteDataToListSegment;
623
+ read_data_from_segment.segment_function = ReadDataFromListSegment;
624
+ copy_data_from_segment.segment_function = CopyDataFromListSegment;
625
+
626
+ // recurse
627
+ write_data_to_segment.child_functions.emplace_back();
628
+ read_data_from_segment.child_functions.emplace_back();
629
+ copy_data_from_segment.child_functions.emplace_back();
630
+ GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
631
+ read_data_from_segment.child_functions.back(),
632
+ copy_data_from_segment.child_functions.back(), ListType::GetChildType(type));
633
+ break;
634
+ }
635
+ case PhysicalType::STRUCT: {
636
+ write_data_to_segment.create_segment = CreateStructSegment;
637
+ write_data_to_segment.segment_function = WriteDataToStructSegment;
638
+ read_data_from_segment.segment_function = ReadDataFromStructSegment;
639
+ copy_data_from_segment.segment_function = CopyDataFromStructSegment;
640
+
641
+ // recurse
642
+ auto child_types = StructType::GetChildTypes(type);
643
+ for (idx_t i = 0; i < child_types.size(); i++) {
644
+ write_data_to_segment.child_functions.emplace_back();
645
+ read_data_from_segment.child_functions.emplace_back();
646
+ copy_data_from_segment.child_functions.emplace_back();
647
+ GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
648
+ read_data_from_segment.child_functions.back(),
649
+ copy_data_from_segment.child_functions.back(), child_types[i].second);
650
+ }
651
+ break;
652
+ }
653
+ default:
654
+ throw InternalException("LIST aggregate not yet implemented for " + type.ToString());
655
+ }
656
+ }
657
+
658
+ } // namespace duckdb
@@ -50,7 +50,7 @@ string_t StringHeap::AddBlob(const string_t &data) {
50
50
  }
51
51
 
52
52
  string_t StringHeap::EmptyString(idx_t len) {
53
- D_ASSERT(len >= string_t::INLINE_LENGTH);
53
+ D_ASSERT(len > string_t::INLINE_LENGTH);
54
54
  auto insert_pos = (const char *)allocator.Allocate(len);
55
55
  return string_t(insert_pos, len);
56
56
  }
@@ -20,7 +20,7 @@ void string_t::Verify() const {
20
20
  for (idx_t i = 0; i < MinValue<uint32_t>(PREFIX_LENGTH, GetSize()); i++) {
21
21
  D_ASSERT(GetPrefix()[i] == dataptr[i]);
22
22
  }
23
- // verify that for strings with length < INLINE_LENGTH, the rest of the string is zero
23
+ // verify that for strings with length <= INLINE_LENGTH, the rest of the string is zero
24
24
  for (idx_t i = GetSize(); i < INLINE_LENGTH; i++) {
25
25
  D_ASSERT(GetDataUnsafe()[i] == '\0');
26
26
  }
@@ -1549,7 +1549,7 @@ string_t StringVector::AddStringOrBlob(Vector &vector, string_t data) {
1549
1549
 
1550
1550
  string_t StringVector::EmptyString(Vector &vector, idx_t len) {
1551
1551
  D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR);
1552
- if (len < string_t::INLINE_LENGTH) {
1552
+ if (len <= string_t::INLINE_LENGTH) {
1553
1553
  return string_t(len);
1554
1554
  }
1555
1555
  if (!vector.auxiliary) {