duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  4. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
  5. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
  6. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
  10. package/src/duckdb/src/common/box_renderer.cpp +4 -2
  11. package/src/duckdb/src/common/constants.cpp +10 -1
  12. package/src/duckdb/src/common/filename_pattern.cpp +41 -0
  13. package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
  14. package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
  15. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
  16. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  17. package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
  18. package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
  19. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  20. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  21. package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
  22. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  23. package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
  24. package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
  25. package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
  26. package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
  27. package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
  28. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
  29. package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
  30. package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
  31. package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
  33. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
  34. package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
  35. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
  36. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
  37. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
  38. package/src/duckdb/src/common/types/vector.cpp +1 -1
  39. package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
  40. package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
  44. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
  45. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
  46. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  47. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
  48. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
  49. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
  50. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
  51. package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
  52. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
  53. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
  54. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
  55. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
  56. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
  57. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
  58. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
  59. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
  60. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
  61. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
  62. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  63. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
  64. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  65. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  66. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
  67. package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
  68. package/src/duckdb/src/execution/physical_operator.cpp +1 -1
  69. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
  70. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
  71. package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
  72. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
  73. package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
  74. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
  75. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
  76. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
  77. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
  78. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
  79. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
  80. package/src/duckdb/src/function/table/read_csv.cpp +124 -58
  81. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  82. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  85. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  86. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
  87. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
  88. package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
  89. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
  90. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
  91. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
  92. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
  93. package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
  94. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
  95. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
  98. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
  99. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
  100. package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
  101. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
  102. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
  103. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
  104. package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
  105. package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
  106. package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
  107. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
  108. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
  109. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
  110. package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
  111. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
  112. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
  113. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
  114. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
  115. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
  116. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
  117. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
  119. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
  120. package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
  122. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
  124. package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
  125. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
  127. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
  128. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  129. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
  130. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
  131. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
  132. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
  133. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
  134. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
  135. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
  136. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
  137. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
  138. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
  139. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  140. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
  141. package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
  142. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
  143. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
  144. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
  145. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
  147. package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
  148. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
  149. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
  150. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
  151. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
  152. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
  153. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
  154. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
  155. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
  156. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
  157. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
  158. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
  159. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
  160. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
  161. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
  162. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  163. package/src/duckdb/src/main/appender.cpp +6 -6
  164. package/src/duckdb/src/main/client_context.cpp +1 -1
  165. package/src/duckdb/src/main/connection.cpp +2 -2
  166. package/src/duckdb/src/main/query_result.cpp +13 -0
  167. package/src/duckdb/src/main/settings/settings.cpp +3 -4
  168. package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
  169. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
  170. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
  171. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
  172. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
  173. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
  174. package/src/duckdb/src/parallel/executor.cpp +1 -1
  175. package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
  176. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  177. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  178. package/src/duckdb/src/parser/transformer.cpp +50 -9
  179. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
  180. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
  181. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
  182. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
  183. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
  184. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
  185. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
  186. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
  187. package/src/duckdb/src/planner/binder.cpp +16 -19
  188. package/src/duckdb/src/planner/expression_binder.cpp +8 -8
  189. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
  190. package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
  191. package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
  192. package/src/duckdb/src/storage/table_index_list.cpp +3 -3
  193. package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
  194. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
  195. package/src/duckdb/ub_src_common.cpp +2 -0
  196. package/src/duckdb/ub_src_common_types.cpp +0 -16
  197. package/src/duckdb/ub_src_common_types_column.cpp +10 -0
  198. package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -4,10 +4,10 @@
4
4
  //===--------------------------------------------------------------------===//
5
5
 
6
6
  #include "duckdb/common/exception.hpp"
7
- #include "duckdb/common/operator/constant_operators.hpp"
8
7
  #include "duckdb/common/operator/comparison_operators.hpp"
8
+ #include "duckdb/common/operator/constant_operators.hpp"
9
9
  #include "duckdb/common/row_operations/row_operations.hpp"
10
- #include "duckdb/common/types/row_layout.hpp"
10
+ #include "duckdb/common/types/row/tuple_data_collection.hpp"
11
11
 
12
12
  namespace duckdb {
13
13
 
@@ -118,12 +118,104 @@ static void TemplatedMatchType(UnifiedVectorFormat &col, Vector &rows, Selection
118
118
  count = match_count;
119
119
  }
120
120
 
121
+ //! Forward declaration for recursion
122
+ template <class OP, bool NO_MATCH_SEL>
123
+ static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
124
+ SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match,
125
+ idx_t &no_match_count, const idx_t original_count);
126
+
121
127
  template <class OP, bool NO_MATCH_SEL>
122
- static void TemplatedMatchNested(Vector &col, Vector &rows, SelectionVector &sel, idx_t &count, const RowLayout &layout,
123
- const idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
128
+ static void TemplatedMatchStruct(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
129
+ SelectionVector &sel, idx_t &count, const idx_t col_no, SelectionVector *no_match,
130
+ idx_t &no_match_count, const idx_t original_count) {
131
+ // Precompute row_mask indexes
132
+ idx_t entry_idx;
133
+ idx_t idx_in_entry;
134
+ ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry);
135
+
136
+ // Work our way through the validity of the whole struct
137
+ auto ptrs = FlatVector::GetData<data_ptr_t>(rows);
138
+ idx_t match_count = 0;
139
+ if (!col.validity.AllValid()) {
140
+ for (idx_t i = 0; i < count; i++) {
141
+ auto idx = sel.get_index(i);
142
+
143
+ auto row = ptrs[idx];
144
+ ValidityBytes row_mask(row);
145
+ auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry);
146
+
147
+ auto col_idx = col.sel->get_index(idx);
148
+ if (!col.validity.RowIsValid(col_idx)) {
149
+ if (isnull) {
150
+ // match: move to next value to compare
151
+ sel.set_index(match_count++, idx);
152
+ } else {
153
+ if (NO_MATCH_SEL) {
154
+ no_match->set_index(no_match_count++, idx);
155
+ }
156
+ }
157
+ } else {
158
+ if (!isnull) {
159
+ sel.set_index(match_count++, idx);
160
+ } else {
161
+ if (NO_MATCH_SEL) {
162
+ no_match->set_index(no_match_count++, idx);
163
+ }
164
+ }
165
+ }
166
+ }
167
+ } else {
168
+ for (idx_t i = 0; i < count; i++) {
169
+ auto idx = sel.get_index(i);
170
+
171
+ auto row = ptrs[idx];
172
+ ValidityBytes row_mask(row);
173
+ auto isnull = !row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry);
174
+
175
+ if (!isnull) {
176
+ sel.set_index(match_count++, idx);
177
+ } else {
178
+ if (NO_MATCH_SEL) {
179
+ no_match->set_index(no_match_count++, idx);
180
+ }
181
+ }
182
+ }
183
+ }
184
+ count = match_count;
185
+
186
+ // Now we construct row pointers to the structs
187
+ Vector struct_rows(LogicalTypeId::POINTER);
188
+ auto struct_ptrs = FlatVector::GetData<data_ptr_t>(struct_rows);
189
+
190
+ const auto col_offset = layout.GetOffsets()[col_no];
191
+ for (idx_t i = 0; i < count; i++) {
192
+ auto idx = sel.get_index(i);
193
+ auto row = ptrs[idx];
194
+ struct_ptrs[idx] = row + col_offset;
195
+ }
196
+
197
+ // Get the struct layout, child columns, then recurse
198
+ const auto &struct_layout = layout.GetStructLayout(col_no);
199
+ auto &struct_entries = StructVector::GetEntries(vec);
200
+ D_ASSERT(struct_layout.ColumnCount() == struct_entries.size());
201
+ for (idx_t struct_col_no = 0; struct_col_no < struct_layout.ColumnCount(); struct_col_no++) {
202
+ auto &struct_vec = *struct_entries[struct_col_no];
203
+ UnifiedVectorFormat struct_col;
204
+ struct_vec.ToUnifiedFormat(original_count, struct_col);
205
+ TemplatedMatchOp<OP, NO_MATCH_SEL>(struct_vec, struct_col, struct_layout, struct_rows, sel, count,
206
+ struct_col_no, no_match, no_match_count, original_count);
207
+ }
208
+ }
209
+
210
+ template <class OP, bool NO_MATCH_SEL>
211
+ static void TemplatedMatchList(Vector &col, Vector &rows, SelectionVector &sel, idx_t &count,
212
+ const TupleDataLayout &layout, const idx_t col_no, SelectionVector *no_match,
213
+ idx_t &no_match_count) {
124
214
  // Gather a dense Vector containing the column values being matched
125
215
  Vector key(col.GetType());
126
- RowOperations::Gather(rows, sel, key, *FlatVector::IncrementalSelectionVector(), count, layout, col_no);
216
+ const auto gather_function = TupleDataCollection::GetGatherFunction(col.GetType());
217
+ gather_function.function(layout, rows, col_no, sel, count, key, *FlatVector::IncrementalSelectionVector(), key,
218
+ gather_function.child_functions);
127
219
 
128
220
  // Densify the input column
129
221
  Vector sliced(col, sel, count);
@@ -139,9 +231,9 @@ static void TemplatedMatchNested(Vector &col, Vector &rows, SelectionVector &sel
139
231
  }
140
232
 
141
233
  template <class OP, bool NO_MATCH_SEL>
142
- static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const RowLayout &layout, Vector &rows,
234
+ static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
143
235
  SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match,
144
- idx_t &no_match_count) {
236
+ idx_t &no_match_count, const idx_t original_count) {
145
237
  if (count == 0) {
146
238
  return;
147
239
  }
@@ -200,9 +292,12 @@ static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const RowLay
200
292
  TemplatedMatchType<string_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
201
293
  no_match_count);
202
294
  break;
203
- case PhysicalType::LIST:
204
295
  case PhysicalType::STRUCT:
205
- TemplatedMatchNested<OP, NO_MATCH_SEL>(vec, rows, sel, count, layout, col_no, no_match, no_match_count);
296
+ TemplatedMatchStruct<OP, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, no_match_count,
297
+ original_count);
298
+ break;
299
+ case PhysicalType::LIST:
300
+ TemplatedMatchList<OP, NO_MATCH_SEL>(vec, rows, sel, count, layout, col_no, no_match, no_match_count);
206
301
  break;
207
302
  default:
208
303
  throw InternalException("Unsupported column type for RowOperations::Match");
@@ -210,9 +305,9 @@ static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const RowLay
210
305
  }
211
306
 
212
307
  template <bool NO_MATCH_SEL>
213
- static void TemplatedMatch(DataChunk &columns, UnifiedVectorFormat col_data[], const RowLayout &layout, Vector &rows,
214
- const Predicates &predicates, SelectionVector &sel, idx_t &count, SelectionVector *no_match,
215
- idx_t &no_match_count) {
308
+ static void TemplatedMatch(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout,
309
+ Vector &rows, const Predicates &predicates, SelectionVector &sel, idx_t &count,
310
+ SelectionVector *no_match, idx_t &no_match_count) {
216
311
  for (idx_t col_no = 0; col_no < predicates.size(); ++col_no) {
217
312
  auto &vec = columns.data[col_no];
218
313
  auto &col = col_data[col_no];
@@ -220,28 +315,28 @@ static void TemplatedMatch(DataChunk &columns, UnifiedVectorFormat col_data[], c
220
315
  case ExpressionType::COMPARE_EQUAL:
221
316
  case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
222
317
  case ExpressionType::COMPARE_DISTINCT_FROM:
223
- TemplatedMatchOp<Equals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
224
- no_match_count);
318
+ TemplatedMatchOp<Equals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, no_match_count,
319
+ count);
225
320
  break;
226
321
  case ExpressionType::COMPARE_NOTEQUAL:
227
322
  TemplatedMatchOp<NotEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
228
- no_match_count);
323
+ no_match_count, count);
229
324
  break;
230
325
  case ExpressionType::COMPARE_GREATERTHAN:
231
326
  TemplatedMatchOp<GreaterThan, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
232
- no_match_count);
327
+ no_match_count, count);
233
328
  break;
234
329
  case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
235
330
  TemplatedMatchOp<GreaterThanEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
236
- no_match_count);
331
+ no_match_count, count);
237
332
  break;
238
333
  case ExpressionType::COMPARE_LESSTHAN:
239
334
  TemplatedMatchOp<LessThan, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
240
- no_match_count);
335
+ no_match_count, count);
241
336
  break;
242
337
  case ExpressionType::COMPARE_LESSTHANOREQUALTO:
243
338
  TemplatedMatchOp<LessThanEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
244
- no_match_count);
339
+ no_match_count, count);
245
340
  break;
246
341
  default:
247
342
  throw InternalException("Unsupported comparison type for RowOperations::Match");
@@ -249,9 +344,9 @@ static void TemplatedMatch(DataChunk &columns, UnifiedVectorFormat col_data[], c
249
344
  }
250
345
  }
251
346
 
252
- idx_t RowOperations::Match(DataChunk &columns, UnifiedVectorFormat col_data[], const RowLayout &layout, Vector &rows,
253
- const Predicates &predicates, SelectionVector &sel, idx_t count, SelectionVector *no_match,
254
- idx_t &no_match_count) {
347
+ idx_t RowOperations::Match(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout,
348
+ Vector &rows, const Predicates &predicates, SelectionVector &sel, idx_t count,
349
+ SelectionVector *no_match, idx_t &no_match_count) {
255
350
  if (no_match) {
256
351
  TemplatedMatch<true>(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count);
257
352
  } else {
@@ -8,8 +8,8 @@
8
8
  #include "duckdb/common/helper.hpp"
9
9
  #include "duckdb/common/row_operations/row_operations.hpp"
10
10
  #include "duckdb/common/types/null_value.hpp"
11
- #include "duckdb/common/types/row_data_collection.hpp"
12
- #include "duckdb/common/types/row_layout.hpp"
11
+ #include "duckdb/common/types/row/row_data_collection.hpp"
12
+ #include "duckdb/common/types/row/row_layout.hpp"
13
13
  #include "duckdb/common/types/selection_vector.hpp"
14
14
  #include "duckdb/common/types/vector.hpp"
15
15
 
@@ -1,6 +1,6 @@
1
1
  #include "duckdb/common/sort/partition_state.hpp"
2
2
 
3
- #include "duckdb/common/types/column_data_consumer.hpp"
3
+ #include "duckdb/common/types/column/column_data_consumer.hpp"
4
4
  #include "duckdb/common/row_operations/row_operations.hpp"
5
5
  #include "duckdb/main/config.hpp"
6
6
  #include "duckdb/parallel/event.hpp"
@@ -303,7 +303,8 @@ void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataColl
303
303
  ordered_data_ptr += row_width;
304
304
  sorting_ptr += sorting_entry_size;
305
305
  }
306
- ordered_data_block->block->SetSwizzling(sd.swizzled ? "LocalSortState::ReOrder.ordered_data" : nullptr);
306
+ ordered_data_block->block->SetSwizzling(
307
+ sd.layout.AllConstant() || !sd.swizzled ? nullptr : "LocalSortState::ReOrder.ordered_data");
307
308
  // Replace the unordered data block with the re-ordered data block
308
309
  sd.data_blocks.clear();
309
310
  sd.data_blocks.push_back(std::move(ordered_data_block));
@@ -3,7 +3,7 @@
3
3
  #include "duckdb/common/constants.hpp"
4
4
  #include "duckdb/common/row_operations/row_operations.hpp"
5
5
  #include "duckdb/common/sort/sort.hpp"
6
- #include "duckdb/common/types/row_data_collection.hpp"
6
+ #include "duckdb/common/types/row/row_data_collection.hpp"
7
7
 
8
8
  #include <numeric>
9
9
 
@@ -1,6 +1,6 @@
1
- #include "duckdb/common/types/column_data_allocator.hpp"
1
+ #include "duckdb/common/types/column/column_data_allocator.hpp"
2
2
 
3
- #include "duckdb/common/types/column_data_collection_segment.hpp"
3
+ #include "duckdb/common/types/column/column_data_collection_segment.hpp"
4
4
  #include "duckdb/storage/buffer_manager.hpp"
5
5
  #include "duckdb/storage/buffer/block_handle.hpp"
6
6
 
@@ -1,11 +1,11 @@
1
- #include "duckdb/common/types/column_data_collection.hpp"
1
+ #include "duckdb/common/types/column/column_data_collection.hpp"
2
2
 
3
3
  #include "duckdb/common/printer.hpp"
4
4
  #include "duckdb/common/string_util.hpp"
5
- #include "duckdb/common/types/column_data_collection_segment.hpp"
5
+ #include "duckdb/common/types/column/column_data_collection_segment.hpp"
6
+ #include "duckdb/common/types/value_map.hpp"
6
7
  #include "duckdb/common/vector_operations/vector_operations.hpp"
7
8
  #include "duckdb/storage/buffer_manager.hpp"
8
- #include "duckdb/common/types/value_map.hpp"
9
9
 
10
10
  namespace duckdb {
11
11
 
@@ -887,6 +887,7 @@ void ColumnDataCollection::Combine(ColumnDataCollection &other) {
887
887
  for (auto &other_seg : other.segments) {
888
888
  segments.push_back(std::move(other_seg));
889
889
  }
890
+ other.Reset();
890
891
  Verify();
891
892
  }
892
893
 
@@ -930,7 +931,24 @@ void ColumnDataCollection::Verify() {
930
931
  }
931
932
 
932
933
  string ColumnDataCollection::ToString() const {
933
- return "Column Data Collection";
934
+ DataChunk chunk;
935
+ InitializeScanChunk(chunk);
936
+
937
+ ColumnDataScanState scan_state;
938
+ InitializeScan(scan_state);
939
+
940
+ string result = StringUtil::Format("ColumnDataCollection - [%llu Chunks, %llu Rows]\n", ChunkCount(), Count());
941
+ idx_t chunk_idx = 0;
942
+ idx_t row_count = 0;
943
+ while (Scan(scan_state, chunk)) {
944
+ result +=
945
+ StringUtil::Format("Chunk %llu - [Rows %llu - %llu]\n", chunk_idx, row_count, row_count + chunk.size()) +
946
+ chunk.ToString();
947
+ chunk_idx++;
948
+ row_count += chunk.size();
949
+ }
950
+
951
+ return result;
934
952
  }
935
953
 
936
954
  void ColumnDataCollection::Print() const {
@@ -952,7 +970,7 @@ struct ValueResultEquals {
952
970
  };
953
971
 
954
972
  bool ColumnDataCollection::ResultEquals(const ColumnDataCollection &left, const ColumnDataCollection &right,
955
- string &error_message) {
973
+ string &error_message, bool ordered) {
956
974
  if (left.ColumnCount() != right.ColumnCount()) {
957
975
  error_message = "Column count mismatch";
958
976
  return false;
@@ -967,6 +985,7 @@ bool ColumnDataCollection::ResultEquals(const ColumnDataCollection &left, const
967
985
  for (idx_t c = 0; c < left.ColumnCount(); c++) {
968
986
  auto lvalue = left_rows.GetValue(c, r);
969
987
  auto rvalue = right_rows.GetValue(c, r);
988
+
970
989
  if (!Value::DefaultValuesAreEqual(lvalue, rvalue)) {
971
990
  error_message =
972
991
  StringUtil::Format("%s <> %s (row: %lld, col: %lld)\n", lvalue.ToString(), rvalue.ToString(), r, c);
@@ -974,7 +993,11 @@ bool ColumnDataCollection::ResultEquals(const ColumnDataCollection &left, const
974
993
  }
975
994
  }
976
995
  if (!error_message.empty()) {
977
- break;
996
+ if (ordered) {
997
+ return false;
998
+ } else {
999
+ break;
1000
+ }
978
1001
  }
979
1002
  }
980
1003
  if (!error_message.empty()) {
@@ -1,4 +1,5 @@
1
- #include "duckdb/common/types/column_data_collection_segment.hpp"
1
+ #include "duckdb/common/types/column/column_data_collection_segment.hpp"
2
+
2
3
  #include "duckdb/common/vector_operations/vector_operations.hpp"
3
4
 
4
5
  namespace duckdb {
@@ -1,4 +1,4 @@
1
- #include "duckdb/common/types/column_data_consumer.hpp"
1
+ #include "duckdb/common/types/column/column_data_consumer.hpp"
2
2
 
3
3
  #include <algorithm>
4
4
 
@@ -1,14 +1,15 @@
1
- #include "duckdb/common/types/partitioned_column_data.hpp"
1
+ #include "duckdb/common/types/column/partitioned_column_data.hpp"
2
2
 
3
- #include "duckdb/common/radix_partitioning.hpp"
4
3
  #include "duckdb/common/hive_partitioning.hpp"
4
+ #include "duckdb/common/radix_partitioning.hpp"
5
5
  #include "duckdb/storage/buffer_manager.hpp"
6
6
 
7
7
  namespace duckdb {
8
8
 
9
9
  PartitionedColumnData::PartitionedColumnData(PartitionedColumnDataType type_p, ClientContext &context_p,
10
10
  vector<LogicalType> types_p)
11
- : type(type_p), context(context_p), types(std::move(types_p)), allocators(make_shared<PartitionAllocators>()) {
11
+ : type(type_p), context(context_p), types(std::move(types_p)),
12
+ allocators(make_shared<PartitionColumnDataAllocators>()) {
12
13
  }
13
14
 
14
15
  PartitionedColumnData::PartitionedColumnData(const PartitionedColumnData &other)
@@ -47,8 +48,9 @@ void PartitionedColumnData::Append(PartitionedColumnDataAppendState &state, Data
47
48
 
48
49
  // Compute the counts per partition
49
50
  const auto count = input.size();
50
- unordered_map<idx_t, list_entry_t> partition_entries;
51
51
  const auto partition_indices = FlatVector::GetData<idx_t>(state.partition_indices);
52
+ auto &partition_entries = state.partition_entries;
53
+ partition_entries.clear();
52
54
  switch (state.partition_indices.GetVectorType()) {
53
55
  case VectorType::FLAT_VECTOR:
54
56
  for (idx_t i = 0; i < count; i++) {
@@ -72,8 +74,8 @@ void PartitionedColumnData::Append(PartitionedColumnDataAppendState &state, Data
72
74
  if (partition_entries.size() == 1) {
73
75
  const auto &partition_index = partition_entries.begin()->first;
74
76
  auto &partition = *partitions[partition_index];
75
- auto &partition_append_state = state.partition_append_states[partition_index];
76
- partition.Append(*partition_append_state, input);
77
+ auto &partition_append_state = *state.partition_append_states[partition_index];
78
+ partition.Append(partition_append_state, input);
77
79
  return;
78
80
  }
79
81
 
@@ -101,7 +103,7 @@ void PartitionedColumnData::Append(PartitionedColumnDataAppendState &state, Data
101
103
  // Partition, buffer, and append state for this partition index
102
104
  auto &partition = *partitions[partition_index];
103
105
  auto &partition_buffer = *state.partition_buffers[partition_index];
104
- auto &partition_append_state = state.partition_append_states[partition_index];
106
+ auto &partition_append_state = *state.partition_append_states[partition_index];
105
107
 
106
108
  // Length and offset into the selection vector for this chunk, for this partition
107
109
  const auto &partition_entry = pc.second;
@@ -117,14 +119,14 @@ void PartitionedColumnData::Append(PartitionedColumnDataAppendState &state, Data
117
119
  state.slice_chunk.Slice(input, partition_sel, partition_length);
118
120
 
119
121
  // Append it to the partition directly
120
- partition.Append(*partition_append_state, state.slice_chunk);
122
+ partition.Append(partition_append_state, state.slice_chunk);
121
123
  } else {
122
124
  // Append the input chunk to the partition buffer using the selection vector
123
125
  partition_buffer.Append(input, false, &partition_sel, partition_length);
124
126
 
125
127
  if (partition_buffer.size() >= HalfBufferSize()) {
126
128
  // Next batch won't fit in the buffer, flush it to the partition
127
- partition.Append(*partition_append_state, partition_buffer);
129
+ partition.Append(partition_append_state, partition_buffer);
128
130
  partition_buffer.Reset();
129
131
  partition_buffer.SetCapacity(BufferSize());
130
132
  }