duckdb 0.7.2-dev1898.0 → 0.7.2-dev2144.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/data_chunk.cpp +13 -1
  4. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
  5. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
  6. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
  10. package/src/duckdb/src/common/box_renderer.cpp +4 -2
  11. package/src/duckdb/src/common/constants.cpp +10 -1
  12. package/src/duckdb/src/common/filename_pattern.cpp +41 -0
  13. package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
  14. package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
  15. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
  16. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  17. package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
  18. package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
  19. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  20. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  21. package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
  22. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  23. package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
  24. package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +22 -4
  25. package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
  26. package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
  27. package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
  28. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
  29. package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
  30. package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
  31. package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
  33. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
  34. package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
  35. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
  36. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
  37. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
  38. package/src/duckdb/src/common/types/vector.cpp +1 -1
  39. package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
  40. package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
  44. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
  45. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
  46. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  47. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
  48. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
  49. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
  50. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
  51. package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
  52. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
  53. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
  54. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
  55. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
  56. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
  57. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  58. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
  59. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  60. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
  61. package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
  62. package/src/duckdb/src/execution/physical_operator.cpp +1 -1
  63. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
  64. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
  65. package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
  66. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
  67. package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
  68. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
  69. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
  70. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
  71. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
  72. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
  73. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
  74. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  75. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  78. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  79. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
  80. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
  81. package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
  82. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
  83. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
  84. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
  85. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
  86. package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
  87. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
  88. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
  89. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
  90. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
  91. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
  92. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
  93. package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
  94. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +2 -2
  95. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
  96. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
  97. package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
  98. package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
  99. package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
  100. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
  101. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
  102. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
  103. package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
  104. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
  105. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
  106. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
  107. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
  108. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
  109. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
  110. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  111. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
  112. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
  113. package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
  114. package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
  115. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  116. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
  117. package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
  118. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
  119. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
  120. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  122. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
  124. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
  125. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
  126. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
  127. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
  128. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  129. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
  130. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  131. package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
  132. package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
  133. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
  134. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
  135. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
  136. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
  137. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
  138. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
  139. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
  140. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
  141. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
  142. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
  143. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
  144. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
  145. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
  147. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  148. package/src/duckdb/src/main/appender.cpp +6 -6
  149. package/src/duckdb/src/main/client_context.cpp +1 -1
  150. package/src/duckdb/src/main/connection.cpp +2 -2
  151. package/src/duckdb/src/main/query_result.cpp +13 -0
  152. package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
  153. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
  154. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
  155. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
  156. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
  157. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
  158. package/src/duckdb/src/parallel/executor.cpp +1 -1
  159. package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
  160. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  161. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  162. package/src/duckdb/src/parser/transformer.cpp +50 -9
  163. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
  164. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
  165. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
  166. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
  167. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
  168. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
  169. package/src/duckdb/src/planner/binder.cpp +16 -19
  170. package/src/duckdb/src/planner/expression_binder.cpp +8 -8
  171. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
  172. package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
  173. package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
  174. package/src/duckdb/src/storage/table_index_list.cpp +3 -3
  175. package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
  176. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
  177. package/src/duckdb/ub_src_common.cpp +2 -0
  178. package/src/duckdb/ub_src_common_types.cpp +0 -16
  179. package/src/duckdb/ub_src_common_types_column.cpp +10 -0
  180. package/src/duckdb/ub_src_common_types_row.cpp +20 -0
  181. package/test/udf.test.ts +9 -0
@@ -9,52 +9,63 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/fast_mem.hpp"
12
- #include "duckdb/common/types/partitioned_column_data.hpp"
12
+ #include "duckdb/common/types/column/partitioned_column_data.hpp"
13
+ #include "duckdb/common/types/row/partitioned_tuple_data.hpp"
13
14
 
14
15
  namespace duckdb {
15
16
 
16
17
  class BufferManager;
17
- class RowLayout;
18
- class RowDataCollection;
19
18
  class Vector;
20
19
  struct UnifiedVectorFormat;
21
20
  struct SelectionVector;
22
21
 
23
- //! Templated radix partitioning constants, can be templated to the number of radix bits
24
- template <idx_t radix_bits>
25
- struct RadixPartitioningConstants {
22
+ //! Generic radix partitioning functions
23
+ struct RadixPartitioning {
26
24
  public:
27
- static constexpr const idx_t NUM_RADIX_BITS = radix_bits;
28
- static constexpr const idx_t NUM_PARTITIONS = (idx_t)1 << NUM_RADIX_BITS;
29
- static constexpr const idx_t TMP_BUF_SIZE = 8;
25
+ //! The number of partitions for a given number of radix bits
26
+ static inline constexpr idx_t NumberOfPartitions(idx_t radix_bits) {
27
+ return idx_t(1) << radix_bits;
28
+ }
30
29
 
31
- public:
32
- //! Apply bitmask on the highest bits, and right shift to get a number between 0 and NUM_PARTITIONS
33
- static inline hash_t ApplyMask(hash_t hash) {
34
- return (hash & MASK) >> (sizeof(hash_t) * 8 - NUM_RADIX_BITS);
30
+ //! Inverse of NumberOfPartitions, given a number of partitions, get the number of radix bits
31
+ static inline idx_t RadixBits(idx_t n_partitions) {
32
+ D_ASSERT(IsPowerOfTwo(n_partitions));
33
+ for (idx_t r = 0; r < sizeof(idx_t) * 8; r++) {
34
+ if (n_partitions == NumberOfPartitions(r)) {
35
+ return r;
36
+ }
37
+ }
38
+ throw InternalException("RadixPartitioning::RadixBits unable to find partition count!");
35
39
  }
36
40
 
37
- private:
38
- //! Bitmask of the highest bits
39
- static constexpr const hash_t MASK = hash_t(-1) ^ ((hash_t(1) << (sizeof(hash_t) * 8 - NUM_RADIX_BITS)) - 1);
40
- };
41
+ static inline constexpr idx_t Shift(idx_t radix_bits) {
42
+ return 48 - radix_bits;
43
+ }
41
44
 
42
- //! Generic radix partitioning functions
43
- struct RadixPartitioning {
44
- public:
45
- static idx_t NumberOfPartitions(idx_t radix_bits) {
46
- return (idx_t)1 << radix_bits;
45
+ static inline constexpr hash_t Mask(idx_t radix_bits) {
46
+ return (hash_t(1 << radix_bits) - 1) << Shift(radix_bits);
47
47
  }
48
48
 
49
49
  //! Select using a cutoff on the radix bits of the hash
50
50
  static idx_t Select(Vector &hashes, const SelectionVector *sel, idx_t count, idx_t radix_bits, idx_t cutoff,
51
51
  SelectionVector *true_sel, SelectionVector *false_sel);
52
+ };
53
+
54
+ //! Templated radix partitioning constants, can be templated to the number of radix bits
55
+ template <idx_t radix_bits>
56
+ struct RadixPartitioningConstants {
57
+ public:
58
+ //! Bitmask of the upper bits of the 5th byte
59
+ static constexpr const idx_t NUM_PARTITIONS = RadixPartitioning::NumberOfPartitions(radix_bits);
60
+ static constexpr const idx_t SHIFT = RadixPartitioning::Shift(radix_bits);
61
+ static constexpr const hash_t MASK = RadixPartitioning::Mask(radix_bits);
52
62
 
53
- //! Partition the data in block_collection/string_heap to multiple partitions
54
- static void PartitionRowData(BufferManager &buffer_manager, const RowLayout &layout, const idx_t hash_offset,
55
- RowDataCollection &block_collection, RowDataCollection &string_heap,
56
- vector<unique_ptr<RowDataCollection>> &partition_block_collections,
57
- vector<unique_ptr<RowDataCollection>> &partition_string_heaps, idx_t radix_bits);
63
+ public:
64
+ //! Apply bitmask and right shift to get a number between 0 and NUM_PARTITIONS
65
+ static inline hash_t ApplyMask(hash_t hash) {
66
+ D_ASSERT((hash & MASK) >> SHIFT < NUM_PARTITIONS);
67
+ return (hash & MASK) >> SHIFT;
68
+ }
58
69
  };
59
70
 
60
71
  //! RadixPartitionedColumnData is a PartitionedColumnData that partitions input based on the radix of a hash
@@ -87,6 +98,7 @@ protected:
87
98
  return GetBufferSize(1 << 4);
88
99
  }
89
100
  }
101
+
90
102
  void InitializeAppendStateInternal(PartitionedColumnDataAppendState &state) const override;
91
103
  void ComputePartitionIndices(PartitionedColumnDataAppendState &state, DataChunk &input) override;
92
104
 
@@ -101,4 +113,45 @@ private:
101
113
  const idx_t hash_col_idx;
102
114
  };
103
115
 
116
+ //! RadixPartitionedTupleData is a PartitionedTupleData that partitions input based on the radix of a hash
117
+ class RadixPartitionedTupleData : public PartitionedTupleData {
118
+ public:
119
+ RadixPartitionedTupleData(BufferManager &buffer_manager, const TupleDataLayout &layout, idx_t radix_bits_p,
120
+ idx_t hash_col_idx_p);
121
+ RadixPartitionedTupleData(const RadixPartitionedTupleData &other);
122
+ ~RadixPartitionedTupleData() override;
123
+
124
+ idx_t GetRadixBits() const {
125
+ return radix_bits;
126
+ }
127
+
128
+ private:
129
+ void Initialize();
130
+
131
+ protected:
132
+ //===--------------------------------------------------------------------===//
133
+ // Radix Partitioning interface implementation
134
+ //===--------------------------------------------------------------------===//
135
+ void InitializeAppendStateInternal(PartitionedTupleDataAppendState &state,
136
+ TupleDataPinProperties properties) const override;
137
+ void ComputePartitionIndices(PartitionedTupleDataAppendState &state, DataChunk &input) override;
138
+ void ComputePartitionIndices(Vector &row_locations, idx_t count, Vector &partition_indices) const override;
139
+ idx_t MaxPartitionIndex() const override {
140
+ return RadixPartitioning::NumberOfPartitions(radix_bits) - 1;
141
+ }
142
+
143
+ bool RepartitionReverseOrder() const override {
144
+ return true;
145
+ }
146
+ void RepartitionFinalizeStates(PartitionedTupleData &old_partitioned_data,
147
+ PartitionedTupleData &new_partitioned_data, PartitionedTupleDataAppendState &state,
148
+ idx_t finished_partition_idx) const override;
149
+
150
+ private:
151
+ //! The number of radix bits
152
+ const idx_t radix_bits;
153
+ //! The index of the column holding the hashes
154
+ const idx_t hash_col_idx;
155
+ };
156
+
104
157
  } // namespace duckdb
@@ -0,0 +1,38 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/reference_map.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/unordered_map.hpp"
13
+ #include "duckdb/common/unordered_set.hpp"
14
+
15
+ namespace duckdb {
16
+ class Expression;
17
+
18
+ template <class T>
19
+ struct ReferenceHashFunction {
20
+ uint64_t operator()(const reference<T> &ref) const {
21
+ return std::hash<void *>()((void *)&ref.get());
22
+ }
23
+ };
24
+
25
+ template <class T>
26
+ struct ReferenceEquality {
27
+ bool operator()(const reference<T> &a, const reference<T> &b) const {
28
+ return &a.get() == &b.get();
29
+ }
30
+ };
31
+
32
+ template <typename T, typename TGT>
33
+ using reference_map_t = unordered_map<reference<T>, TGT, ReferenceHashFunction<T>, ReferenceEquality<T>>;
34
+
35
+ template <typename T>
36
+ using reference_set_t = unordered_set<reference<T>, ReferenceHashFunction<T>, ReferenceEquality<T>>;
37
+
38
+ } // namespace duckdb
@@ -18,6 +18,7 @@ struct AggregateObject;
18
18
  struct AggregateFilterData;
19
19
  class DataChunk;
20
20
  class RowLayout;
21
+ class TupleDataLayout;
21
22
  class RowDataCollection;
22
23
  struct SelectionVector;
23
24
  class StringHeap;
@@ -37,9 +38,9 @@ struct RowOperations {
37
38
  // Aggregation Operators
38
39
  //===--------------------------------------------------------------------===//
39
40
  //! initialize - unaligned addresses
40
- static void InitializeStates(RowLayout &layout, Vector &addresses, const SelectionVector &sel, idx_t count);
41
+ static void InitializeStates(TupleDataLayout &layout, Vector &addresses, const SelectionVector &sel, idx_t count);
41
42
  //! destructor - unaligned addresses, updated
42
- static void DestroyStates(RowOperationsState &state, RowLayout &layout, Vector &addresses, idx_t count);
43
+ static void DestroyStates(RowOperationsState &state, TupleDataLayout &layout, Vector &addresses, idx_t count);
43
44
  //! update - aligned addresses
44
45
  static void UpdateStates(RowOperationsState &state, AggregateObject &aggr, Vector &addresses, DataChunk &payload,
45
46
  idx_t arg_idx, idx_t count);
@@ -47,10 +48,10 @@ struct RowOperations {
47
48
  static void UpdateFilteredStates(RowOperationsState &state, AggregateFilterData &filter_data, AggregateObject &aggr,
48
49
  Vector &addresses, DataChunk &payload, idx_t arg_idx);
49
50
  //! combine - unaligned addresses, updated
50
- static void CombineStates(RowOperationsState &state, RowLayout &layout, Vector &sources, Vector &targets,
51
+ static void CombineStates(RowOperationsState &state, TupleDataLayout &layout, Vector &sources, Vector &targets,
51
52
  idx_t count);
52
53
  //! finalize - unaligned addresses, updated
53
- static void FinalizeStates(RowOperationsState &state, RowLayout &layout, Vector &addresses, DataChunk &result,
54
+ static void FinalizeStates(RowOperationsState &state, TupleDataLayout &layout, Vector &addresses, DataChunk &result,
54
55
  idx_t aggr_idx);
55
56
 
56
57
  //===--------------------------------------------------------------------===//
@@ -66,7 +67,7 @@ struct RowOperations {
66
67
  const idx_t count, const RowLayout &layout, const idx_t col_no, const idx_t build_size = 0,
67
68
  data_ptr_t heap_ptr = nullptr);
68
69
  //! Full Scan an entire columns
69
- static void FullScanColumn(const RowLayout &layout, Vector &rows, Vector &col, idx_t count, idx_t col_idx);
70
+ static void FullScanColumn(const TupleDataLayout &layout, Vector &rows, Vector &col, idx_t count, idx_t col_idx);
70
71
 
71
72
  //===--------------------------------------------------------------------===//
72
73
  // Comparison Operators
@@ -76,7 +77,7 @@ struct RowOperations {
76
77
  //! Returns the number of matches remaining in the selection.
77
78
  using Predicates = vector<ExpressionType>;
78
79
 
79
- static idx_t Match(DataChunk &columns, UnifiedVectorFormat col_data[], const RowLayout &layout, Vector &rows,
80
+ static idx_t Match(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout, Vector &rows,
80
81
  const Predicates &predicates, SelectionVector &sel, idx_t count, SelectionVector *no_match,
81
82
  idx_t &no_match_count);
82
83
 
@@ -9,7 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/types.hpp"
12
- #include "duckdb/common/types/row_layout.hpp"
12
+ #include "duckdb/common/types/row/row_layout.hpp"
13
13
 
14
14
  namespace duckdb {
15
15
 
@@ -9,7 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/sort/sort.hpp"
12
- #include "duckdb/common/types/partitioned_column_data.hpp"
12
+ #include "duckdb/common/types/column/partitioned_column_data.hpp"
13
13
  #include "duckdb/common/radix_partitioning.hpp"
14
14
  #include "duckdb/parallel/base_pipeline_event.hpp"
15
15
 
@@ -9,7 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/sort/sorted_block.hpp"
12
- #include "duckdb/common/types/row_data_collection.hpp"
12
+ #include "duckdb/common/types/row/row_data_collection.hpp"
13
13
  #include "duckdb/planner/bound_query_node.hpp"
14
14
 
15
15
  namespace duckdb {
@@ -9,8 +9,8 @@
9
9
 
10
10
  #include "duckdb/common/fast_mem.hpp"
11
11
  #include "duckdb/common/sort/comparators.hpp"
12
- #include "duckdb/common/types/row_data_collection_scanner.hpp"
13
- #include "duckdb/common/types/row_layout.hpp"
12
+ #include "duckdb/common/types/row/row_data_collection_scanner.hpp"
13
+ #include "duckdb/common/types/row/row_layout.hpp"
14
14
  #include "duckdb/storage/buffer/buffer_handle.hpp"
15
15
 
16
16
  namespace duckdb {
@@ -9,7 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/map.hpp"
12
- #include "duckdb/common/types/column_data_collection.hpp"
12
+ #include "duckdb/common/types/column/column_data_collection.hpp"
13
13
 
14
14
  namespace duckdb {
15
15
  class BufferManager;
@@ -1,14 +1,14 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/types/column_data_allocator.hpp
4
+ // duckdb/common/types/column/column_data_allocator.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/common/types/column_data_collection.hpp"
11
+ #include "duckdb/common/types/column/column_data_collection.hpp"
12
12
 
13
13
  namespace duckdb {
14
14
 
@@ -28,8 +28,8 @@ struct BlockMetaData {
28
28
 
29
29
  class ColumnDataAllocator {
30
30
  public:
31
- ColumnDataAllocator(Allocator &allocator);
32
- ColumnDataAllocator(BufferManager &buffer_manager);
31
+ explicit ColumnDataAllocator(Allocator &allocator);
32
+ explicit ColumnDataAllocator(BufferManager &buffer_manager);
33
33
  ColumnDataAllocator(ClientContext &context, ColumnDataAllocatorType allocator_type);
34
34
  ColumnDataAllocator(ColumnDataAllocator &allocator);
35
35
 
@@ -1,7 +1,7 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/types/column_data_collection.hpp
4
+ // duckdb/common/types/column/column_data_collection.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
@@ -9,7 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/pair.hpp"
12
- #include "duckdb/common/types/column_data_collection_iterators.hpp"
12
+ #include "duckdb/common/types/column/column_data_collection_iterators.hpp"
13
13
 
14
14
  namespace duckdb {
15
15
  class BufferManager;
@@ -1,14 +1,14 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/types/column_data_collection_iterators.hpp
4
+ // duckdb/common/types/column/column_data_collection_iterators.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/common/types/column_data_scan_states.hpp"
11
+ #include "duckdb/common/types/column/column_data_scan_states.hpp"
12
12
 
13
13
  namespace duckdb {
14
14
  class ColumnDataCollection;
@@ -1,15 +1,15 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/types/column_data_collection_segment.hpp
4
+ // duckdb/common/types/column/column_data_collection_segment.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/common/types/column_data_allocator.hpp"
12
- #include "duckdb/common/types/column_data_collection.hpp"
11
+ #include "duckdb/common/types/column/column_data_allocator.hpp"
12
+ #include "duckdb/common/types/column/column_data_collection.hpp"
13
13
 
14
14
  namespace duckdb {
15
15
 
@@ -1,16 +1,16 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/types/column_data_consumer.hpp
4
+ // duckdb/common/types/column/column_data_consumer.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/common/types/column_data_collection.hpp"
12
- #include "duckdb/common/types/column_data_collection_segment.hpp"
13
- #include "duckdb/common/types/column_data_scan_states.hpp"
11
+ #include "duckdb/common/types/column/column_data_collection.hpp"
12
+ #include "duckdb/common/types/column/column_data_collection_segment.hpp"
13
+ #include "duckdb/common/types/column/column_data_scan_states.hpp"
14
14
 
15
15
  namespace duckdb {
16
16
 
@@ -44,6 +44,10 @@ public:
44
44
  public:
45
45
  ColumnDataConsumer(ColumnDataCollection &collection, vector<column_t> column_ids);
46
46
 
47
+ idx_t Count() const {
48
+ return collection.Count();
49
+ }
50
+
47
51
  idx_t ChunkCount() const {
48
52
  return chunk_count;
49
53
  }
@@ -1,7 +1,7 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/types/column_data_scan_states.hpp
4
+ // duckdb/common/types/column/column_data_scan_states.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
@@ -1,15 +1,16 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/types/partitioned_column_data.hpp
4
+ // duckdb/common/types/column/partitioned_column_data.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/common/types/column_data_allocator.hpp"
12
- #include "duckdb/common/types/column_data_collection.hpp"
11
+ #include "duckdb/common/perfect_map_set.hpp"
12
+ #include "duckdb/common/types/column/column_data_allocator.hpp"
13
+ #include "duckdb/common/types/column/column_data_collection.hpp"
13
14
 
14
15
  namespace duckdb {
15
16
 
@@ -22,16 +23,23 @@ public:
22
23
  public:
23
24
  Vector partition_indices;
24
25
  SelectionVector partition_sel;
26
+ perfect_map_t<list_entry_t> partition_entries;
25
27
  DataChunk slice_chunk;
26
28
 
27
29
  vector<unique_ptr<DataChunk>> partition_buffers;
28
30
  vector<unique_ptr<ColumnDataAppendState>> partition_append_states;
29
31
  };
30
32
 
31
- enum class PartitionedColumnDataType : uint8_t { RADIX, HIVE, INVALID };
33
+ enum class PartitionedColumnDataType : uint8_t {
34
+ INVALID,
35
+ //! Radix partitioning on a hash column
36
+ RADIX,
37
+ //! Hive-style multi-field partitioning
38
+ HIVE
39
+ };
32
40
 
33
41
  //! Shared allocators for parallel partitioning
34
- struct PartitionAllocators {
42
+ struct PartitionColumnDataAllocators {
35
43
  mutex lock;
36
44
  vector<shared_ptr<ColumnDataAllocator>> allocators;
37
45
  };
@@ -80,7 +88,7 @@ protected:
80
88
 
81
89
  //! If the buffer is half full, we append to the partition
82
90
  inline idx_t HalfBufferSize() const {
83
- D_ASSERT((BufferSize() & (BufferSize() - 1)) == 0); // BufferSize should be a power of two
91
+ D_ASSERT(IsPowerOfTwo(BufferSize()));
84
92
  return BufferSize() / 2;
85
93
  }
86
94
  //! Create a new shared allocator
@@ -98,7 +106,7 @@ protected:
98
106
  vector<LogicalType> types;
99
107
 
100
108
  mutex lock;
101
- shared_ptr<PartitionAllocators> allocators;
109
+ shared_ptr<PartitionColumnDataAllocators> allocators;
102
110
  vector<unique_ptr<ColumnDataCollection>> partitions;
103
111
  };
104
112
 
@@ -0,0 +1,140 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/types/row/partitioned_tuple_data.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/perfect_map_set.hpp"
12
+ #include "duckdb/common/types/row/tuple_data_allocator.hpp"
13
+ #include "duckdb/common/types/row/tuple_data_collection.hpp"
14
+
15
+ namespace duckdb {
16
+
17
+ //! Local state for parallel partitioning
18
+ struct PartitionedTupleDataAppendState {
19
+ public:
20
+ PartitionedTupleDataAppendState() : partition_indices(LogicalType::UBIGINT) {
21
+ }
22
+
23
+ public:
24
+ Vector partition_indices;
25
+ SelectionVector partition_sel;
26
+
27
+ static constexpr idx_t MAP_THRESHOLD = 32;
28
+ perfect_map_t<list_entry_t> partition_entries;
29
+ list_entry_t partition_entries_arr[MAP_THRESHOLD];
30
+
31
+ vector<unique_ptr<TupleDataPinState>> partition_pin_states;
32
+ TupleDataChunkState chunk_state;
33
+ };
34
+
35
+ enum class PartitionedTupleDataType : uint8_t {
36
+ INVALID,
37
+ //! Radix partitioning on a hash column
38
+ RADIX
39
+ };
40
+
41
+ //! Shared allocators for parallel partitioning
42
+ struct PartitionTupleDataAllocators {
43
+ mutex lock;
44
+ vector<shared_ptr<TupleDataAllocator>> allocators;
45
+ };
46
+
47
+ //! PartitionedTupleData represents partitioned row data, which serves as an interface for different types of
48
+ //! partitioning, e.g., radix, hive
49
+ class PartitionedTupleData {
50
+ public:
51
+ unique_ptr<PartitionedTupleData> CreateShared();
52
+ virtual ~PartitionedTupleData();
53
+
54
+ public:
55
+ //! Get the partitioning type of this PartitionedTupleData
56
+ PartitionedTupleDataType GetType() const;
57
+ //! Initializes a local state for parallel partitioning that can be merged into this PartitionedTupleData
58
+ void InitializeAppendState(PartitionedTupleDataAppendState &state,
59
+ TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE) const;
60
+ //! Appends a DataChunk to this PartitionedTupleData
61
+ void Append(PartitionedTupleDataAppendState &state, DataChunk &input);
62
+ //! Appends rows to this PartitionedTupleData
63
+ void Append(PartitionedTupleDataAppendState &state, TupleDataChunkState &input, idx_t count);
64
+ //! Flushes any remaining data in the append state into this PartitionedTupleData
65
+ void FlushAppendState(PartitionedTupleDataAppendState &state);
66
+ //! Combine another PartitionedTupleData into this PartitionedTupleData
67
+ void Combine(PartitionedTupleData &other);
68
+ //! Partition a TupleDataCollection
69
+ void Partition(TupleDataCollection &source,
70
+ TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE);
71
+ //! Repartition this PartitionedTupleData into the new PartitionedTupleData
72
+ void Repartition(PartitionedTupleData &new_partitioned_data);
73
+ //! Get the partitions in this PartitionedTupleData
74
+ vector<unique_ptr<TupleDataCollection>> &GetPartitions();
75
+ //! Get the count of this PartitionedTupleData
76
+ idx_t Count() const;
77
+ //! Get the size (in bytes) of this PartitionedTupleData
78
+ idx_t SizeInBytes() const;
79
+
80
+ protected:
81
+ //===--------------------------------------------------------------------===//
82
+ // Partitioning type implementation interface
83
+ //===--------------------------------------------------------------------===//
84
+ //! Initialize a PartitionedTupleDataAppendState for this type of partitioning (optional)
85
+ virtual void InitializeAppendStateInternal(PartitionedTupleDataAppendState &state,
86
+ TupleDataPinProperties properties) const {
87
+ }
88
+ //! Compute the partition indices for this type of partitioning for the input DataChunk and store them in the
89
+ //! `partition_data` of the local state. If this type creates partitions on the fly (for, e.g., hive), this
90
+ //! function is also in charge of creating new partitions and mapping the input data to a partition index
91
+ virtual void ComputePartitionIndices(PartitionedTupleDataAppendState &state, DataChunk &input) {
92
+ throw NotImplementedException("ComputePartitionIndices for this type of PartitionedTupleData");
93
+ }
94
+ //! Compute partition indices from rows (similar to function above)
95
+ virtual void ComputePartitionIndices(Vector &row_locations, idx_t count, Vector &partition_indices) const {
96
+ throw NotImplementedException("ComputePartitionIndices for this type of PartitionedTupleData");
97
+ }
98
+ //! Maximum partition index (optional)
99
+ virtual idx_t MaxPartitionIndex() const {
100
+ return DConstants::INVALID_INDEX;
101
+ }
102
+
103
+ //! Whether or not to iterate over the original partitions in reverse order when repartitioning (optional)
104
+ virtual bool RepartitionReverseOrder() const {
105
+ return false;
106
+ }
107
+ //! Finalize states while repartitioning - useful for unpinning blocks that are no longer needed (optional)
108
+ virtual void RepartitionFinalizeStates(PartitionedTupleData &old_partitioned_data,
109
+ PartitionedTupleData &new_partitioned_data,
110
+ PartitionedTupleDataAppendState &state, idx_t finished_partition_idx) const {
111
+ }
112
+
113
+ protected:
114
+ //! PartitionedTupleData can only be instantiated by derived classes
115
+ PartitionedTupleData(PartitionedTupleDataType type, BufferManager &buffer_manager, const TupleDataLayout &layout);
116
+ PartitionedTupleData(const PartitionedTupleData &other);
117
+
118
+ //! Create a new shared allocator
119
+ void CreateAllocator();
120
+ //! Builds a selection vector in the Append state for the partitions
121
+ //! - returns true if everything belongs to the same partition - stores partition index in single_partition_idx
122
+ void BuildPartitionSel(PartitionedTupleDataAppendState &state, idx_t count);
123
+ //! Builds out the buffer space in the partitions
124
+ void BuildBufferSpace(PartitionedTupleDataAppendState &state);
125
+ //! Create a collection for a specific a partition
126
+ unique_ptr<TupleDataCollection> CreatePartitionCollection(idx_t partition_index) const {
127
+ return make_uniq<TupleDataCollection>(allocators->allocators[partition_index]);
128
+ }
129
+
130
+ protected:
131
+ PartitionedTupleDataType type;
132
+ BufferManager &buffer_manager;
133
+ const TupleDataLayout layout;
134
+
135
+ mutex lock;
136
+ shared_ptr<PartitionTupleDataAllocators> allocators;
137
+ vector<unique_ptr<TupleDataCollection>> partitions;
138
+ };
139
+
140
+ } // namespace duckdb
@@ -1,7 +1,7 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/types/row_data_collection.hpp
4
+ // duckdb/common/types/row/row_data_collection.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
@@ -1,7 +1,7 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/types/row_data_collection_scanner.hpp
4
+ // duckdb/common/types/row/row_data_collection_scanner.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
@@ -9,7 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/types.hpp"
12
- #include "duckdb/common/types/row_layout.hpp"
12
+ #include "duckdb/common/types/row/row_layout.hpp"
13
13
 
14
14
  namespace duckdb {
15
15