duckdb 0.7.2-dev1898.0 → 0.7.2-dev2144.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/data_chunk.cpp +13 -1
  4. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
  5. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
  6. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
  10. package/src/duckdb/src/common/box_renderer.cpp +4 -2
  11. package/src/duckdb/src/common/constants.cpp +10 -1
  12. package/src/duckdb/src/common/filename_pattern.cpp +41 -0
  13. package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
  14. package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
  15. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
  16. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  17. package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
  18. package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
  19. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  20. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  21. package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
  22. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  23. package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
  24. package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +22 -4
  25. package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
  26. package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
  27. package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
  28. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
  29. package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
  30. package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
  31. package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
  33. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
  34. package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
  35. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
  36. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
  37. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
  38. package/src/duckdb/src/common/types/vector.cpp +1 -1
  39. package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
  40. package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
  44. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
  45. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
  46. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  47. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
  48. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
  49. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
  50. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
  51. package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
  52. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
  53. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
  54. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
  55. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
  56. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
  57. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  58. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
  59. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  60. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
  61. package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
  62. package/src/duckdb/src/execution/physical_operator.cpp +1 -1
  63. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
  64. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
  65. package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
  66. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
  67. package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
  68. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
  69. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
  70. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
  71. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
  72. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
  73. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
  74. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  75. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  78. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  79. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
  80. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
  81. package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
  82. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
  83. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
  84. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
  85. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
  86. package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
  87. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
  88. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
  89. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
  90. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
  91. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
  92. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
  93. package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
  94. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +2 -2
  95. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
  96. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
  97. package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
  98. package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
  99. package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
  100. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
  101. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
  102. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
  103. package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
  104. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
  105. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
  106. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
  107. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
  108. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
  109. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
  110. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  111. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
  112. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
  113. package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
  114. package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
  115. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  116. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
  117. package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
  118. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
  119. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
  120. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  122. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
  124. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
  125. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
  126. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
  127. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
  128. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  129. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
  130. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  131. package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
  132. package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
  133. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
  134. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
  135. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
  136. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
  137. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
  138. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
  139. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
  140. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
  141. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
  142. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
  143. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
  144. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
  145. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
  147. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  148. package/src/duckdb/src/main/appender.cpp +6 -6
  149. package/src/duckdb/src/main/client_context.cpp +1 -1
  150. package/src/duckdb/src/main/connection.cpp +2 -2
  151. package/src/duckdb/src/main/query_result.cpp +13 -0
  152. package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
  153. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
  154. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
  155. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
  156. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
  157. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
  158. package/src/duckdb/src/parallel/executor.cpp +1 -1
  159. package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
  160. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  161. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  162. package/src/duckdb/src/parser/transformer.cpp +50 -9
  163. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
  164. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
  165. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
  166. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
  167. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
  168. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
  169. package/src/duckdb/src/planner/binder.cpp +16 -19
  170. package/src/duckdb/src/planner/expression_binder.cpp +8 -8
  171. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
  172. package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
  173. package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
  174. package/src/duckdb/src/storage/table_index_list.cpp +3 -3
  175. package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
  176. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
  177. package/src/duckdb/ub_src_common.cpp +2 -0
  178. package/src/duckdb/ub_src_common_types.cpp +0 -16
  179. package/src/duckdb/ub_src_common_types_column.cpp +10 -0
  180. package/src/duckdb/ub_src_common_types_row.cpp +20 -0
  181. package/test/udf.test.ts +9 -0
package/binding.gyp CHANGED
@@ -22,6 +22,8 @@
22
22
  "src/duckdb/ub_src_common_serializer.cpp",
23
23
  "src/duckdb/ub_src_common_sort.cpp",
24
24
  "src/duckdb/ub_src_common_types.cpp",
25
+ "src/duckdb/ub_src_common_types_column.cpp",
26
+ "src/duckdb/ub_src_common_types_row.cpp",
25
27
  "src/duckdb/ub_src_common_value_operations.cpp",
26
28
  "src/duckdb/src/common/vector_operations/boolean_operators.cpp",
27
29
  "src/duckdb/src/common/vector_operations/comparison_operators.cpp",
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev1898.0",
5
+ "version": "0.7.2-dev2144.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -145,7 +145,19 @@ Napi::Array EncodeDataChunk(Napi::Env env, duckdb::DataChunk &chunk, bool with_t
145
145
  }
146
146
  break;
147
147
  }
148
- case duckdb::LogicalTypeId::BLOB:
148
+ case duckdb::LogicalTypeId::BLOB: {
149
+ if (with_data) {
150
+ auto array = Napi::Array::New(env, chunk.size());
151
+ auto data = duckdb::FlatVector::GetData<duckdb::string_t>(*vec);
152
+
153
+ for (size_t i = 0; i < chunk.size(); ++i) {
154
+ auto buf = Napi::Buffer<char>::Copy(env, data[i].GetDataUnsafe(), data[i].GetSize());
155
+ array.Set(i, buf);
156
+ }
157
+ desc.Set("data", array);
158
+ }
159
+ break;
160
+ }
149
161
  case duckdb::LogicalTypeId::VARCHAR: {
150
162
  if (with_data) {
151
163
  auto array = Napi::Array::New(env, chunk.size());
@@ -14,7 +14,7 @@
14
14
  #include "duckdb/common/exception.hpp"
15
15
  #include "duckdb/common/mutex.hpp"
16
16
  #include "duckdb/common/serializer/buffered_file_writer.hpp"
17
- #include "duckdb/common/types/column_data_collection.hpp"
17
+ #include "duckdb/common/types/column/column_data_collection.hpp"
18
18
  #endif
19
19
 
20
20
  #include "parquet_types.h"
@@ -1,12 +1,14 @@
1
1
  #include "parquet_metadata.hpp"
2
+
2
3
  #include "parquet_statistics.hpp"
4
+
3
5
  #include <sstream>
4
6
 
5
7
  #ifndef DUCKDB_AMALGAMATION
8
+ #include "duckdb/common/multi_file_reader.hpp"
6
9
  #include "duckdb/common/types/blob.hpp"
10
+ #include "duckdb/common/types/column/column_data_collection.hpp"
7
11
  #include "duckdb/main/config.hpp"
8
- #include "duckdb/common/types/column_data_collection.hpp"
9
- #include "duckdb/common/multi_file_reader.hpp"
10
12
  #endif
11
13
 
12
14
  namespace duckdb {
@@ -13,7 +13,7 @@ DuckIndexEntry::~DuckIndexEntry() {
13
13
  if (!info || !index) {
14
14
  return;
15
15
  }
16
- info->indexes.RemoveIndex(index);
16
+ info->indexes.RemoveIndex(*index);
17
17
  }
18
18
 
19
19
  string DuckIndexEntry::GetSchemaName() const {
@@ -12,7 +12,7 @@ namespace duckdb {
12
12
  // Arrow append data
13
13
  //===--------------------------------------------------------------------===//
14
14
  typedef void (*initialize_t)(ArrowAppendData &result, const LogicalType &type, idx_t capacity);
15
- typedef void (*append_vector_t)(ArrowAppendData &append_data, Vector &input, idx_t size);
15
+ typedef void (*append_vector_t)(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size);
16
16
  typedef void (*finalize_t)(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result);
17
17
 
18
18
  struct ArrowAppendData {
@@ -84,8 +84,9 @@ static void SetNull(ArrowAppendData &append_data, uint8_t *validity_data, idx_t
84
84
  append_data.null_count++;
85
85
  }
86
86
 
87
- static void AppendValidity(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t size) {
87
+ static void AppendValidity(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t from, idx_t to) {
88
88
  // resize the buffer, filling the validity buffer with all valid values
89
+ idx_t size = to - from;
89
90
  ResizeValidity(append_data.validity, append_data.row_count + size);
90
91
  if (format.validity.AllValid()) {
91
92
  // if all values are valid we don't need to do anything else
@@ -97,7 +98,7 @@ static void AppendValidity(ArrowAppendData &append_data, UnifiedVectorFormat &fo
97
98
  uint8_t current_bit;
98
99
  idx_t current_byte;
99
100
  GetBitPosition(append_data.row_count, current_byte, current_bit);
100
- for (idx_t i = 0; i < size; i++) {
101
+ for (idx_t i = from; i < to; i++) {
101
102
  auto source_idx = format.sel->get_index(i);
102
103
  // append the validity mask
103
104
  if (!format.validity.RowIsValid(source_idx)) {
@@ -146,21 +147,22 @@ struct ArrowIntervalConverter {
146
147
 
147
148
  template <class TGT, class SRC = TGT, class OP = ArrowScalarConverter>
148
149
  struct ArrowScalarBaseData {
149
- static void Append(ArrowAppendData &append_data, Vector &input, idx_t size) {
150
+ static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
151
+ idx_t size = to - from;
150
152
  UnifiedVectorFormat format;
151
- input.ToUnifiedFormat(size, format);
153
+ input.ToUnifiedFormat(input_size, format);
152
154
 
153
155
  // append the validity mask
154
- AppendValidity(append_data, format, size);
156
+ AppendValidity(append_data, format, from, to);
155
157
 
156
158
  // append the main data
157
159
  append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(TGT) * size);
158
160
  auto data = (SRC *)format.data;
159
161
  auto result_data = (TGT *)append_data.main_buffer.data();
160
162
 
161
- for (idx_t i = 0; i < size; i++) {
163
+ for (idx_t i = from; i < to; i++) {
162
164
  auto source_idx = format.sel->get_index(i);
163
- auto result_idx = append_data.row_count + i;
165
+ auto result_idx = append_data.row_count + i - from;
164
166
 
165
167
  if (OP::SkipNulls() && !format.validity.RowIsValid(source_idx)) {
166
168
  OP::template SetNull<TGT>(result_data[result_idx]);
@@ -254,9 +256,10 @@ struct ArrowBoolData {
254
256
  result.main_buffer.reserve(byte_count);
255
257
  }
256
258
 
257
- static void Append(ArrowAppendData &append_data, Vector &input, idx_t size) {
259
+ static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
260
+ idx_t size = to - from;
258
261
  UnifiedVectorFormat format;
259
- input.ToUnifiedFormat(size, format);
262
+ input.ToUnifiedFormat(input_size, format);
260
263
 
261
264
  // we initialize both the validity and the bit set to 1's
262
265
  ResizeValidity(append_data.validity, append_data.row_count + size);
@@ -268,7 +271,7 @@ struct ArrowBoolData {
268
271
  uint8_t current_bit;
269
272
  idx_t current_byte;
270
273
  GetBitPosition(append_data.row_count, current_byte, current_bit);
271
- for (idx_t i = 0; i < size; i++) {
274
+ for (idx_t i = from; i < to; i++) {
272
275
  auto source_idx = format.sel->get_index(i);
273
276
  // append the validity mask
274
277
  if (!format.validity.RowIsValid(source_idx)) {
@@ -321,9 +324,10 @@ struct ArrowVarcharData {
321
324
  result.aux_buffer.reserve(capacity);
322
325
  }
323
326
 
324
- static void Append(ArrowAppendData &append_data, Vector &input, idx_t size) {
327
+ static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
328
+ idx_t size = to - from;
325
329
  UnifiedVectorFormat format;
326
- input.ToUnifiedFormat(size, format);
330
+ input.ToUnifiedFormat(input_size, format);
327
331
 
328
332
  // resize the validity mask and set up the validity buffer for iteration
329
333
  ResizeValidity(append_data.validity, append_data.row_count + size);
@@ -340,14 +344,14 @@ struct ArrowVarcharData {
340
344
  // now append the string data to the auxiliary buffer
341
345
  // the auxiliary buffer's length depends on the string lengths, so we resize as required
342
346
  auto last_offset = offset_data[append_data.row_count];
343
- for (idx_t i = 0; i < size; i++) {
347
+ for (idx_t i = from; i < to; i++) {
344
348
  auto source_idx = format.sel->get_index(i);
345
- auto offset_idx = append_data.row_count + i + 1;
349
+ auto offset_idx = append_data.row_count + i + 1 - from;
346
350
 
347
351
  if (!format.validity.RowIsValid(source_idx)) {
348
352
  uint8_t current_bit;
349
353
  idx_t current_byte;
350
- GetBitPosition(append_data.row_count + i, current_byte, current_bit);
354
+ GetBitPosition(append_data.row_count + i - from, current_byte, current_bit);
351
355
  SetNull(append_data, validity_data, current_byte, current_bit);
352
356
  offset_data[offset_idx] = last_offset;
353
357
  continue;
@@ -387,17 +391,17 @@ struct ArrowStructData {
387
391
  }
388
392
  }
389
393
 
390
- static void Append(ArrowAppendData &append_data, Vector &input, idx_t size) {
394
+ static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
391
395
  UnifiedVectorFormat format;
392
- input.ToUnifiedFormat(size, format);
393
-
394
- AppendValidity(append_data, format, size);
396
+ input.ToUnifiedFormat(input_size, format);
397
+ idx_t size = to - from;
398
+ AppendValidity(append_data, format, from, to);
395
399
  // append the children of the struct
396
400
  auto &children = StructVector::GetEntries(input);
397
401
  for (idx_t child_idx = 0; child_idx < children.size(); child_idx++) {
398
402
  auto &child = children[child_idx];
399
403
  auto &child_data = *append_data.child_data[child_idx];
400
- child_data.append_vector(child_data, *child, size);
404
+ child_data.append_vector(child_data, *child, from, to, size);
401
405
  }
402
406
  append_data.row_count += size;
403
407
  }
@@ -419,9 +423,10 @@ struct ArrowStructData {
419
423
  //===--------------------------------------------------------------------===//
420
424
  // Lists
421
425
  //===--------------------------------------------------------------------===//
422
- void AppendListOffsets(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t size,
426
+ void AppendListOffsets(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t from, idx_t to,
423
427
  vector<sel_t> &child_sel) {
424
428
  // resize the offset buffer - the offset buffer holds the offsets into the child array
429
+ idx_t size = to - from;
425
430
  append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(uint32_t) * (size + 1));
426
431
  auto data = (list_entry_t *)format.data;
427
432
  auto offset_data = (uint32_t *)append_data.main_buffer.data();
@@ -431,9 +436,9 @@ void AppendListOffsets(ArrowAppendData &append_data, UnifiedVectorFormat &format
431
436
  }
432
437
  // set up the offsets using the list entries
433
438
  auto last_offset = offset_data[append_data.row_count];
434
- for (idx_t i = 0; i < size; i++) {
439
+ for (idx_t i = from; i < to; i++) {
435
440
  auto source_idx = format.sel->get_index(i);
436
- auto offset_idx = append_data.row_count + i + 1;
441
+ auto offset_idx = append_data.row_count + i + 1 - from;
437
442
 
438
443
  if (!format.validity.RowIsValid(source_idx)) {
439
444
  offset_data[offset_idx] = last_offset;
@@ -459,21 +464,28 @@ struct ArrowListData {
459
464
  result.child_data.push_back(std::move(child_buffer));
460
465
  }
461
466
 
462
- static void Append(ArrowAppendData &append_data, Vector &input, idx_t size) {
467
+ static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
463
468
  UnifiedVectorFormat format;
464
- input.ToUnifiedFormat(size, format);
465
-
469
+ input.ToUnifiedFormat(input_size, format);
470
+ idx_t size = to - from;
466
471
  vector<sel_t> child_indices;
467
- AppendValidity(append_data, format, size);
468
- AppendListOffsets(append_data, format, size, child_indices);
472
+ AppendValidity(append_data, format, from, to);
473
+ AppendListOffsets(append_data, format, from, to, child_indices);
469
474
 
470
475
  // append the child vector of the list
471
476
  SelectionVector child_sel(child_indices.data());
472
477
  auto &child = ListVector::GetEntry(input);
473
478
  auto child_size = child_indices.size();
474
- child.Slice(child_sel, child_size);
475
-
476
- append_data.child_data[0]->append_vector(*append_data.child_data[0], child, child_size);
479
+ if (size != input_size) {
480
+ // Let's avoid doing this
481
+ Vector child_copy(child.GetType());
482
+ child_copy.Slice(child, child_sel, child_size);
483
+ append_data.child_data[0]->append_vector(*append_data.child_data[0], child_copy, 0, child_size, child_size);
484
+ } else {
485
+ // We don't care about the vector, slice it
486
+ child.Slice(child_sel, child_size);
487
+ append_data.child_data[0]->append_vector(*append_data.child_data[0], child, 0, child_size, child_size);
488
+ }
477
489
  append_data.row_count += size;
478
490
  }
479
491
 
@@ -508,26 +520,39 @@ struct ArrowMapData {
508
520
  result.child_data.push_back(std::move(internal_struct));
509
521
  }
510
522
 
511
- static void Append(ArrowAppendData &append_data, Vector &input, idx_t size) {
523
+ static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
512
524
  UnifiedVectorFormat format;
513
- input.ToUnifiedFormat(size, format);
514
-
515
- AppendValidity(append_data, format, size);
525
+ input.ToUnifiedFormat(input_size, format);
526
+ idx_t size = to - from;
527
+ AppendValidity(append_data, format, from, to);
516
528
  vector<sel_t> child_indices;
517
- AppendListOffsets(append_data, format, size, child_indices);
529
+ AppendListOffsets(append_data, format, from, to, child_indices);
518
530
 
519
531
  SelectionVector child_sel(child_indices.data());
520
532
  auto &key_vector = MapVector::GetKeys(input);
521
533
  auto &value_vector = MapVector::GetValues(input);
522
534
  auto list_size = child_indices.size();
523
- key_vector.Slice(child_sel, list_size);
524
- value_vector.Slice(child_sel, list_size);
525
535
 
526
536
  auto &struct_data = *append_data.child_data[0];
527
537
  auto &key_data = *struct_data.child_data[0];
528
538
  auto &value_data = *struct_data.child_data[1];
529
- key_data.append_vector(key_data, key_vector, list_size);
530
- value_data.append_vector(value_data, value_vector, list_size);
539
+
540
+ if (size != input_size) {
541
+ // Let's avoid doing this
542
+ Vector key_vector_copy(key_vector.GetType());
543
+ key_vector_copy.Slice(key_vector, child_sel, list_size);
544
+ Vector value_vector_copy(value_vector.GetType());
545
+ value_vector_copy.Slice(value_vector, child_sel, list_size);
546
+ key_data.append_vector(key_data, key_vector_copy, 0, list_size, list_size);
547
+ value_data.append_vector(value_data, value_vector_copy, 0, list_size, list_size);
548
+ } else {
549
+ // We don't care about the vector, slice it
550
+ key_vector.Slice(child_sel, list_size);
551
+ value_vector.Slice(child_sel, list_size);
552
+ key_data.append_vector(key_data, key_vector, 0, list_size, list_size);
553
+ value_data.append_vector(value_data, value_vector, 0, list_size, list_size);
554
+ }
555
+
531
556
  append_data.row_count += size;
532
557
  struct_data.row_count += size;
533
558
  }
@@ -567,12 +592,12 @@ struct ArrowMapData {
567
592
  };
568
593
 
569
594
  //! Append a data chunk to the underlying arrow array
570
- void ArrowAppender::Append(DataChunk &input) {
595
+ void ArrowAppender::Append(DataChunk &input, idx_t from, idx_t to, idx_t input_size) {
571
596
  D_ASSERT(types == input.GetTypes());
572
597
  for (idx_t i = 0; i < input.ColumnCount(); i++) {
573
- root_data[i]->append_vector(*root_data[i], input.data[i], input.size());
598
+ root_data[i]->append_vector(*root_data[i], input.data[i], from, to, input_size);
574
599
  }
575
- row_count += input.size();
600
+ row_count += to - from;
576
601
  }
577
602
  //===--------------------------------------------------------------------===//
578
603
  // Initialize Arrow Child
@@ -17,7 +17,7 @@ namespace duckdb {
17
17
 
18
18
  void ArrowConverter::ToArrowArray(DataChunk &input, ArrowArray *out_array) {
19
19
  ArrowAppender appender(input.GetTypes(), input.size());
20
- appender.Append(input);
20
+ appender.Append(input, 0, input.size(), input.size());
21
21
  *out_array = appender.Finalize();
22
22
  }
23
23
 
@@ -187,6 +187,15 @@ bool ArrowUtil::TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray
187
187
  PreservedError &error) {
188
188
  count = 0;
189
189
  ArrowAppender appender(result->types, chunk_size);
190
+ auto &current_chunk = result->current_chunk;
191
+ if (current_chunk.Valid()) {
192
+ // We start by scanning the non-finished current chunk
193
+ idx_t cur_consumption = current_chunk.RemainingSize() > chunk_size ? chunk_size : current_chunk.RemainingSize();
194
+ count += cur_consumption;
195
+ appender.Append(*current_chunk.data_chunk, current_chunk.position, current_chunk.position + cur_consumption,
196
+ current_chunk.data_chunk->size());
197
+ current_chunk.position += cur_consumption;
198
+ }
190
199
  while (count < chunk_size) {
191
200
  unique_ptr<DataChunk> data_chunk;
192
201
  if (!TryFetchNext(*result, data_chunk, error)) {
@@ -198,8 +207,17 @@ bool ArrowUtil::TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray
198
207
  if (!data_chunk || data_chunk->size() == 0) {
199
208
  break;
200
209
  }
201
- count += data_chunk->size();
202
- appender.Append(*data_chunk);
210
+ if (count + data_chunk->size() > chunk_size) {
211
+ // We have to split the chunk between this and the next batch
212
+ idx_t available_space = chunk_size - count;
213
+ appender.Append(*data_chunk, 0, available_space, data_chunk->size());
214
+ count += available_space;
215
+ current_chunk.data_chunk = std::move(data_chunk);
216
+ current_chunk.position = available_space;
217
+ } else {
218
+ count += data_chunk->size();
219
+ appender.Append(*data_chunk, 0, data_chunk->size(), data_chunk->size());
220
+ }
203
221
  }
204
222
  if (count > 0) {
205
223
  *out = appender.Finalize();
@@ -1,8 +1,10 @@
1
1
  #include "duckdb/common/box_renderer.hpp"
2
- #include "duckdb/common/types/column_data_collection.hpp"
2
+
3
3
  #include "duckdb/common/printer.hpp"
4
- #include "utf8proc_wrapper.hpp"
4
+ #include "duckdb/common/types/column/column_data_collection.hpp"
5
5
  #include "duckdb/common/vector_operations/vector_operations.hpp"
6
+ #include "utf8proc_wrapper.hpp"
7
+
6
8
  #include <sstream>
7
9
 
8
10
  namespace duckdb {
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/common/constants.hpp"
2
- #include "duckdb/common/vector_size.hpp"
2
+
3
3
  #include "duckdb/common/limits.hpp"
4
+ #include "duckdb/common/vector_size.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
@@ -15,6 +16,10 @@ const transaction_t MAX_TRANSACTION_ID = NumericLimits<transaction_t>::Maximum()
15
16
  const transaction_t NOT_DELETED_ID = NumericLimits<transaction_t>::Maximum() - 1; // 2^64 - 1
16
17
  const transaction_t MAXIMUM_QUERY_ID = NumericLimits<transaction_t>::Maximum(); // 2^64
17
18
 
19
+ bool IsPowerOfTwo(uint64_t v) {
20
+ return (v & (v - 1)) == 0;
21
+ }
22
+
18
23
  uint64_t NextPowerOfTwo(uint64_t v) {
19
24
  v--;
20
25
  v |= v >> 1;
@@ -27,6 +32,10 @@ uint64_t NextPowerOfTwo(uint64_t v) {
27
32
  return v;
28
33
  }
29
34
 
35
+ uint64_t PreviousPowerOfTwo(uint64_t v) {
36
+ return NextPowerOfTwo((v / 2) + 1);
37
+ }
38
+
30
39
  bool IsInvalidSchema(const string &str) {
31
40
  return str.empty();
32
41
  }
@@ -0,0 +1,41 @@
1
+ #include "duckdb/common/filename_pattern.hpp"
2
+ #include "duckdb/common/string_util.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ void FilenamePattern::SetFilenamePattern(const string &pattern) {
7
+ const string id_format {"{i}"};
8
+ const string uuid_format {"{uuid}"};
9
+
10
+ _base = pattern;
11
+
12
+ _pos = _base.find(id_format);
13
+ if (_pos != string::npos) {
14
+ _base = StringUtil::Replace(_base, id_format, "");
15
+ _uuid = false;
16
+ }
17
+
18
+ _pos = _base.find(uuid_format);
19
+ if (_pos != string::npos) {
20
+ _base = StringUtil::Replace(_base, uuid_format, "");
21
+ _uuid = true;
22
+ }
23
+
24
+ _pos = std::min(_pos, (idx_t)_base.length());
25
+ }
26
+
27
+ string FilenamePattern::CreateFilename(const FileSystem &fs, const string &path, const string &extension,
28
+ idx_t offset) const {
29
+ string result(_base);
30
+ string replacement;
31
+
32
+ if (_uuid) {
33
+ replacement = UUID::ToString(UUID::GenerateRandomUUID());
34
+ } else {
35
+ replacement = std::to_string(offset);
36
+ }
37
+ result.insert(_pos, replacement);
38
+ return fs.JoinPath(path, result + "." + extension);
39
+ }
40
+
41
+ } // namespace duckdb
@@ -1,11 +1,12 @@
1
1
  #include "duckdb/common/hive_partitioning.hpp"
2
- #include "duckdb/planner/table_filter.hpp"
2
+
3
3
  #include "duckdb/execution/expression_executor.hpp"
4
4
  #include "duckdb/optimizer/filter_combiner.hpp"
5
- #include "duckdb/planner/expression_iterator.hpp"
6
- #include "duckdb/planner/expression/bound_constant_expression.hpp"
7
5
  #include "duckdb/planner/expression/bound_columnref_expression.hpp"
6
+ #include "duckdb/planner/expression/bound_constant_expression.hpp"
8
7
  #include "duckdb/planner/expression/bound_reference_expression.hpp"
8
+ #include "duckdb/planner/expression_iterator.hpp"
9
+ #include "duckdb/planner/table_filter.hpp"
9
10
  #include "re2/re2.h"
10
11
 
11
12
  namespace duckdb {
@@ -140,29 +141,157 @@ void HivePartitioning::ApplyFiltersToFileList(ClientContext &context, vector<str
140
141
  }
141
142
 
142
143
  HivePartitionedColumnData::HivePartitionedColumnData(const HivePartitionedColumnData &other)
143
- : PartitionedColumnData(other) {
144
+ : PartitionedColumnData(other), hashes_v(LogicalType::HASH) {
144
145
  // Synchronize to ensure consistency of shared partition map
145
146
  if (other.global_state) {
146
147
  global_state = other.global_state;
147
148
  unique_lock<mutex> lck(global_state->lock);
148
149
  SynchronizeLocalMap();
149
150
  }
151
+ InitializeKeys();
150
152
  }
151
153
 
152
- void HivePartitionedColumnData::ComputePartitionIndices(PartitionedColumnDataAppendState &state, DataChunk &input) {
153
- Vector hashes(LogicalType::HASH, input.size());
154
- input.Hash(group_by_columns, hashes);
155
- hashes.Flatten(input.size());
156
-
157
- for (idx_t i = 0; i < input.size(); i++) {
158
- HivePartitionKey key;
159
- key.hash = FlatVector::GetData<hash_t>(hashes)[i];
160
- for (auto &col : group_by_columns) {
161
- key.values.emplace_back(input.GetValue(col, i));
154
+ void HivePartitionedColumnData::InitializeKeys() {
155
+ keys.resize(STANDARD_VECTOR_SIZE);
156
+ for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) {
157
+ keys[i].values.resize(group_by_columns.size());
158
+ }
159
+ }
160
+
161
+ template <class T>
162
+ static inline Value GetHiveKeyValue(const T &val) {
163
+ return Value::CreateValue<T>(val);
164
+ }
165
+
166
+ template <class T>
167
+ static inline Value GetHiveKeyValue(const T &val, const LogicalType &type) {
168
+ auto result = GetHiveKeyValue(val);
169
+ result.Reinterpret(type);
170
+ return result;
171
+ }
172
+
173
+ static inline Value GetHiveKeyNullValue(const LogicalType &type) {
174
+ Value result;
175
+ result.Reinterpret(type);
176
+ return result;
177
+ }
178
+
179
+ template <class T>
180
+ static void TemplatedGetHivePartitionValues(Vector &input, vector<HivePartitionKey> &keys, const idx_t col_idx,
181
+ const idx_t count) {
182
+ UnifiedVectorFormat format;
183
+ input.ToUnifiedFormat(count, format);
184
+
185
+ const auto &sel = *format.sel;
186
+ const auto data = (T *)format.data;
187
+ const auto &validity = format.validity;
188
+
189
+ const auto &type = input.GetType();
190
+
191
+ const auto reinterpret = Value::CreateValue<T>(data[0]).GetTypeMutable() != type;
192
+ if (reinterpret) {
193
+ for (idx_t i = 0; i < count; i++) {
194
+ auto &key = keys[i];
195
+ const auto idx = sel.get_index(i);
196
+ if (validity.RowIsValid(idx)) {
197
+ key.values[col_idx] = GetHiveKeyValue(data[idx], type);
198
+ } else {
199
+ key.values[col_idx] = GetHiveKeyNullValue(type);
200
+ }
201
+ }
202
+ } else {
203
+ for (idx_t i = 0; i < count; i++) {
204
+ auto &key = keys[i];
205
+ const auto idx = sel.get_index(i);
206
+ if (validity.RowIsValid(idx)) {
207
+ key.values[col_idx] = GetHiveKeyValue(data[idx]);
208
+ } else {
209
+ key.values[col_idx] = GetHiveKeyNullValue(type);
210
+ }
162
211
  }
212
+ }
213
+ }
214
+
215
+ static void GetNestedHivePartitionValues(Vector &input, vector<HivePartitionKey> &keys, const idx_t col_idx,
216
+ const idx_t count) {
217
+ for (idx_t i = 0; i < count; i++) {
218
+ auto &key = keys[i];
219
+ key.values[col_idx] = input.GetValue(i);
220
+ }
221
+ }
222
+
223
+ static void GetHivePartitionValuesTypeSwitch(Vector &input, vector<HivePartitionKey> &keys, const idx_t col_idx,
224
+ const idx_t count) {
225
+ const auto &type = input.GetType();
226
+ switch (type.InternalType()) {
227
+ case PhysicalType::BOOL:
228
+ TemplatedGetHivePartitionValues<bool>(input, keys, col_idx, count);
229
+ break;
230
+ case PhysicalType::INT8:
231
+ TemplatedGetHivePartitionValues<int8_t>(input, keys, col_idx, count);
232
+ break;
233
+ case PhysicalType::INT16:
234
+ TemplatedGetHivePartitionValues<int16_t>(input, keys, col_idx, count);
235
+ break;
236
+ case PhysicalType::INT32:
237
+ TemplatedGetHivePartitionValues<int32_t>(input, keys, col_idx, count);
238
+ break;
239
+ case PhysicalType::INT64:
240
+ TemplatedGetHivePartitionValues<int64_t>(input, keys, col_idx, count);
241
+ break;
242
+ case PhysicalType::INT128:
243
+ TemplatedGetHivePartitionValues<hugeint_t>(input, keys, col_idx, count);
244
+ break;
245
+ case PhysicalType::UINT8:
246
+ TemplatedGetHivePartitionValues<uint8_t>(input, keys, col_idx, count);
247
+ break;
248
+ case PhysicalType::UINT16:
249
+ TemplatedGetHivePartitionValues<uint16_t>(input, keys, col_idx, count);
250
+ break;
251
+ case PhysicalType::UINT32:
252
+ TemplatedGetHivePartitionValues<uint32_t>(input, keys, col_idx, count);
253
+ break;
254
+ case PhysicalType::UINT64:
255
+ TemplatedGetHivePartitionValues<uint64_t>(input, keys, col_idx, count);
256
+ break;
257
+ case PhysicalType::FLOAT:
258
+ TemplatedGetHivePartitionValues<float>(input, keys, col_idx, count);
259
+ break;
260
+ case PhysicalType::DOUBLE:
261
+ TemplatedGetHivePartitionValues<double>(input, keys, col_idx, count);
262
+ break;
263
+ case PhysicalType::INTERVAL:
264
+ TemplatedGetHivePartitionValues<interval_t>(input, keys, col_idx, count);
265
+ break;
266
+ case PhysicalType::VARCHAR:
267
+ TemplatedGetHivePartitionValues<string_t>(input, keys, col_idx, count);
268
+ break;
269
+ case PhysicalType::STRUCT:
270
+ case PhysicalType::LIST:
271
+ GetNestedHivePartitionValues(input, keys, col_idx, count);
272
+ break;
273
+ default:
274
+ throw InternalException("Unsupported type for HivePartitionedColumnData::ComputePartitionIndices");
275
+ }
276
+ }
277
+
278
+ void HivePartitionedColumnData::ComputePartitionIndices(PartitionedColumnDataAppendState &state, DataChunk &input) {
279
+ const auto count = input.size();
280
+
281
+ input.Hash(group_by_columns, hashes_v);
282
+ hashes_v.Flatten(count);
283
+
284
+ for (idx_t col_idx = 0; col_idx < group_by_columns.size(); col_idx++) {
285
+ auto &group_by_col = input.data[group_by_columns[col_idx]];
286
+ GetHivePartitionValuesTypeSwitch(group_by_col, keys, col_idx, count);
287
+ }
163
288
 
289
+ const auto hashes = FlatVector::GetData<hash_t>(hashes_v);
290
+ const auto partition_indices = FlatVector::GetData<idx_t>(state.partition_indices);
291
+ for (idx_t i = 0; i < count; i++) {
292
+ auto &key = keys[i];
293
+ key.hash = hashes[i];
164
294
  auto lookup = local_partition_map.find(key);
165
- const auto partition_indices = FlatVector::GetData<idx_t>(state.partition_indices);
166
295
  if (lookup == local_partition_map.end()) {
167
296
  idx_t new_partition_id = RegisterNewPartition(key, state);
168
297
  partition_indices[i] = new_partition_id;