duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  4. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
  5. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
  6. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
  10. package/src/duckdb/src/common/box_renderer.cpp +4 -2
  11. package/src/duckdb/src/common/constants.cpp +10 -1
  12. package/src/duckdb/src/common/filename_pattern.cpp +41 -0
  13. package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
  14. package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
  15. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
  16. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  17. package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
  18. package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
  19. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  20. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  21. package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
  22. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  23. package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
  24. package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
  25. package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
  26. package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
  27. package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
  28. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
  29. package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
  30. package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
  31. package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
  33. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
  34. package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
  35. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
  36. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
  37. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
  38. package/src/duckdb/src/common/types/vector.cpp +1 -1
  39. package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
  40. package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
  44. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
  45. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
  46. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  47. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
  48. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
  49. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
  50. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
  51. package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
  52. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
  53. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
  54. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
  55. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
  56. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
  57. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
  58. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
  59. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
  60. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
  61. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
  62. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  63. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
  64. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  65. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  66. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
  67. package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
  68. package/src/duckdb/src/execution/physical_operator.cpp +1 -1
  69. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
  70. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
  71. package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
  72. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
  73. package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
  74. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
  75. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
  76. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
  77. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
  78. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
  79. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
  80. package/src/duckdb/src/function/table/read_csv.cpp +124 -58
  81. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  82. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  85. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  86. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
  87. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
  88. package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
  89. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
  90. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
  91. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
  92. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
  93. package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
  94. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
  95. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
  98. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
  99. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
  100. package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
  101. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
  102. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
  103. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
  104. package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
  105. package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
  106. package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
  107. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
  108. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
  109. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
  110. package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
  111. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
  112. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
  113. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
  114. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
  115. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
  116. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
  117. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
  119. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
  120. package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
  122. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
  124. package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
  125. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
  127. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
  128. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  129. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
  130. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
  131. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
  132. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
  133. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
  134. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
  135. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
  136. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
  137. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
  138. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
  139. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  140. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
  141. package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
  142. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
  143. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
  144. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
  145. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
  147. package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
  148. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
  149. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
  150. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
  151. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
  152. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
  153. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
  154. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
  155. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
  156. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
  157. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
  158. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
  159. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
  160. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
  161. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
  162. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  163. package/src/duckdb/src/main/appender.cpp +6 -6
  164. package/src/duckdb/src/main/client_context.cpp +1 -1
  165. package/src/duckdb/src/main/connection.cpp +2 -2
  166. package/src/duckdb/src/main/query_result.cpp +13 -0
  167. package/src/duckdb/src/main/settings/settings.cpp +3 -4
  168. package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
  169. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
  170. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
  171. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
  172. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
  173. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
  174. package/src/duckdb/src/parallel/executor.cpp +1 -1
  175. package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
  176. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  177. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  178. package/src/duckdb/src/parser/transformer.cpp +50 -9
  179. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
  180. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
  181. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
  182. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
  183. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
  184. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
  185. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
  186. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
  187. package/src/duckdb/src/planner/binder.cpp +16 -19
  188. package/src/duckdb/src/planner/expression_binder.cpp +8 -8
  189. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
  190. package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
  191. package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
  192. package/src/duckdb/src/storage/table_index_list.cpp +3 -3
  193. package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
  194. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
  195. package/src/duckdb/ub_src_common.cpp +2 -0
  196. package/src/duckdb/ub_src_common_types.cpp +0 -16
  197. package/src/duckdb/ub_src_common_types_column.cpp +10 -0
  198. package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -0,0 +1,74 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/types/row/tuple_data_states.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/mutex.hpp"
12
+ #include "duckdb/common/types.hpp"
13
+
14
+ namespace duckdb {
15
+
16
+ enum class TupleDataPinProperties : uint8_t {
17
+ INVALID,
18
+ //! Keeps all passed blocks pinned while scanning/iterating over the chunks (for both reading/writing)
19
+ KEEP_EVERYTHING_PINNED,
20
+ //! Unpins blocks after they are done (for both reading/writing)
21
+ UNPIN_AFTER_DONE,
22
+ //! Destroys blocks after they are done (for reading only)
23
+ DESTROY_AFTER_DONE,
24
+ //! Assumes all blocks are already pinned (for reading only)
25
+ ALREADY_PINNED
26
+ };
27
+
28
+ struct TupleDataPinState {
29
+ unordered_map<uint32_t, BufferHandle> row_handles;
30
+ unordered_map<uint32_t, BufferHandle> heap_handles;
31
+ TupleDataPinProperties properties = TupleDataPinProperties::INVALID;
32
+ };
33
+
34
+ struct CombinedListData {
35
+ UnifiedVectorFormat combined_data;
36
+ list_entry_t combined_list_entries[STANDARD_VECTOR_SIZE];
37
+ buffer_ptr<SelectionData> selection_data;
38
+ };
39
+
40
+ struct TupleDataVectorFormat {
41
+ UnifiedVectorFormat data;
42
+ vector<TupleDataVectorFormat> child_formats;
43
+ unique_ptr<CombinedListData> combined_list_data;
44
+ };
45
+
46
+ struct TupleDataChunkState {
47
+ vector<TupleDataVectorFormat> vector_data;
48
+ vector<column_t> column_ids;
49
+
50
+ Vector row_locations = Vector(LogicalType::POINTER);
51
+ Vector heap_locations = Vector(LogicalType::POINTER);
52
+ Vector heap_sizes = Vector(LogicalType::UBIGINT);
53
+ };
54
+
55
+ struct TupleDataAppendState {
56
+ TupleDataPinState pin_state;
57
+ TupleDataChunkState chunk_state;
58
+ };
59
+
60
+ struct TupleDataScanState {
61
+ TupleDataPinState pin_state;
62
+ TupleDataChunkState chunk_state;
63
+ idx_t segment_index = DConstants::INVALID_INDEX;
64
+ idx_t chunk_index = DConstants::INVALID_INDEX;
65
+ };
66
+
67
+ struct TupleDataParallelScanState {
68
+ TupleDataScanState scan_state;
69
+ mutex lock;
70
+ };
71
+
72
+ using TupleDataLocalScanState = TupleDataScanState;
73
+
74
+ } // namespace duckdb
@@ -170,6 +170,9 @@ public:
170
170
  }
171
171
  return ValidityBuffer::MAX_ENTRY >> (BITS_PER_VALUE - n);
172
172
  }
173
+ static inline idx_t SizeInBytes(idx_t n) {
174
+ return (n + BITS_PER_VALUE - 1) / BITS_PER_VALUE;
175
+ }
173
176
 
174
177
  //! RowIsValidUnsafe should only be used if AllValid() is false: it achieves the same as RowIsValid but skips a
175
178
  //! not-null check
@@ -175,24 +175,16 @@ public:
175
175
  DUCKDB_API static Value BIT(const string &data);
176
176
 
177
177
  template <class T>
178
- T GetValue() const {
179
- throw InternalException("Unimplemented template type for Value::GetValue");
180
- }
178
+ T GetValue() const;
181
179
  template <class T>
182
- static Value CreateValue(T value) {
183
- throw InternalException("Unimplemented template type for Value::CreateValue");
184
- }
180
+ static Value CreateValue(T value);
185
181
  // Returns the internal value. Unlike GetValue(), this method does not perform casting, and assumes T matches the
186
182
  // type of the value. Only use this if you know what you are doing.
187
183
  template <class T>
188
- T GetValueUnsafe() const {
189
- throw InternalException("Unimplemented template type for Value::GetValueUnsafe");
190
- }
184
+ T GetValueUnsafe() const;
191
185
  //! Returns a reference to the internal value. This can only be used for primitive types.
192
186
  template <class T>
193
- T &GetReferenceUnsafe() {
194
- throw InternalException("Unimplemented template type for Value::GetReferenceUnsafe");
195
- }
187
+ T &GetReferenceUnsafe();
196
188
 
197
189
  //! Return a copy of this value
198
190
  Value Copy() const {
@@ -8,14 +8,14 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include "duckdb/common/types/row/tuple_data_collection.hpp"
11
12
  #include "duckdb/execution/base_aggregate_hashtable.hpp"
12
- #include "duckdb/storage/buffer/buffer_handle.hpp"
13
13
  #include "duckdb/storage/arena_allocator.hpp"
14
+ #include "duckdb/storage/buffer/buffer_handle.hpp"
14
15
 
15
16
  namespace duckdb {
16
17
  class BlockHandle;
17
18
  class BufferHandle;
18
- class RowDataCollection;
19
19
 
20
20
  struct FlushMoveState;
21
21
 
@@ -60,7 +60,7 @@ enum HtEntryType { HT_WIDTH_32, HT_WIDTH_64 };
60
60
 
61
61
  struct AggregateHTScanState {
62
62
  mutex lock;
63
- idx_t scan_position = 0;
63
+ TupleDataScanState scan_state;
64
64
  };
65
65
 
66
66
  struct AggregateHTAppendState {
@@ -75,6 +75,9 @@ struct AggregateHTAppendState {
75
75
  Vector addresses;
76
76
  unique_ptr<UnifiedVectorFormat[]> group_data;
77
77
  DataChunk group_chunk;
78
+
79
+ TupleDataChunkState chunk_state;
80
+ bool chunk_state_initialized;
78
81
  };
79
82
 
80
83
  class GroupedAggregateHashTable : public BaseAggregateHashTable {
@@ -95,9 +98,6 @@ public:
95
98
  GroupedAggregateHashTable(ClientContext &context, Allocator &allocator, vector<LogicalType> group_types);
96
99
  ~GroupedAggregateHashTable() override;
97
100
 
98
- //! The stringheap of the AggregateHashTable
99
- unique_ptr<RowDataCollection> string_heap;
100
-
101
101
  public:
102
102
  //! Add the given data to the HT, computing the aggregates grouped by the
103
103
  //! data in the group chunk. When resize = true, aggregates will not be
@@ -110,7 +110,7 @@ public:
110
110
  //! Scan the HT starting from the scan_position until the result and group
111
111
  //! chunks are filled. scan_position will be updated by this function.
112
112
  //! Returns the amount of elements found.
113
- idx_t Scan(AggregateHTScanState &scan_state, DataChunk &result);
113
+ idx_t Scan(TupleDataParallelScanState &gstate, TupleDataLocalScanState &lstate, DataChunk &result);
114
114
 
115
115
  //! Fetch the aggregates for specific groups from the HT and place them in the result
116
116
  void FetchAggregates(DataChunk &groups, DataChunk &result);
@@ -127,10 +127,15 @@ public:
127
127
  //! Executes the filter(if any) and update the aggregates
128
128
  void Combine(GroupedAggregateHashTable &other);
129
129
 
130
- static idx_t InitialCapacity();
131
- idx_t Size() {
132
- return entries;
130
+ TupleDataCollection &GetDataCollection() {
131
+ return *data_collection;
133
132
  }
133
+
134
+ idx_t Count() const {
135
+ return data_collection->Count();
136
+ }
137
+
138
+ static idx_t InitialCapacity();
134
139
  idx_t Capacity() {
135
140
  return capacity;
136
141
  }
@@ -139,24 +144,23 @@ public:
139
144
  idx_t MaxCapacity();
140
145
  static idx_t GetMaxCapacity(HtEntryType entry_type, idx_t tuple_size);
141
146
 
142
- void Partition(vector<GroupedAggregateHashTable *> &partition_hts, hash_t mask, idx_t shift);
147
+ void Partition(vector<GroupedAggregateHashTable *> &partition_hts, idx_t radix_bits);
148
+ void InitializeFirstPart();
143
149
 
144
150
  void Finalize();
145
151
 
146
152
  private:
147
153
  HtEntryType entry_type;
148
154
 
149
- //! The total tuple size
155
+ //! The capacity of the HT. This can be increased using GroupedAggregateHashTable::Resize
156
+ idx_t capacity;
157
+ //! Tuple width
150
158
  idx_t tuple_size;
151
- //! The amount of tuples that fit in a single block
159
+ //! Tuples per block
152
160
  idx_t tuples_per_block;
153
- //! The capacity of the HT. This can be increased using
154
- //! GroupedAggregateHashTable::Resize
155
- idx_t capacity;
156
- //! The amount of entries stored in the HT currently
157
- idx_t entries;
158
161
  //! The data of the HT
159
- vector<BufferHandle> payload_hds;
162
+ unique_ptr<TupleDataCollection> data_collection;
163
+ TupleDataPinState td_pin_state;
160
164
  vector<data_ptr_t> payload_hds_ptrs;
161
165
 
162
166
  //! The hashes of the HT
@@ -165,7 +169,6 @@ private:
165
169
  idx_t hash_offset; // Offset into the layout of the hash column
166
170
 
167
171
  hash_t hash_prefix_shift;
168
- idx_t payload_page_offset;
169
172
 
170
173
  //! Bitmask for getting relevant bits from the hashes to determine the position
171
174
  hash_t bitmask;
@@ -175,30 +178,30 @@ private:
175
178
  vector<ExpressionType> predicates;
176
179
 
177
180
  //! The arena allocator used by the aggregates for their internal state
178
- ArenaAllocator aggregate_allocator;
181
+ shared_ptr<ArenaAllocator> aggregate_allocator;
179
182
 
180
183
  private:
181
184
  GroupedAggregateHashTable(const GroupedAggregateHashTable &) = delete;
182
185
 
183
- //! Resize the HT to the specified size. Must be larger than the current
184
- //! size.
185
186
  void Destroy();
186
-
187
187
  void Verify();
188
-
189
- void FlushMove(FlushMoveState &state, Vector &source_addresses, Vector &source_hashes, idx_t count);
190
- void NewBlock();
191
-
192
188
  template <class ENTRY>
193
189
  void VerifyInternal();
190
+ //! Resize the HT to the specified size. Must be larger than the current size.
194
191
  template <class ENTRY>
195
192
  void Resize(idx_t size);
193
+ //! Initializes the first part of the HT
194
+ template <class ENTRY>
195
+ void InitializeHashes();
196
+ //! Does the actual group matching / creation
197
+ template <class ENTRY>
198
+ idx_t FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes_v, Vector &addresses_v,
199
+ SelectionVector &new_groups);
200
+ //! Updates payload_hds_ptrs with the new pointers (after appending to data_collection)
201
+ void UpdateBlockPointers();
196
202
  template <class ENTRY>
197
203
  idx_t FindOrCreateGroupsInternal(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
198
204
  Vector &addresses, SelectionVector &new_groups);
199
-
200
- template <class FUNC = std::function<void(idx_t, idx_t, data_ptr_t)>>
201
- void PayloadApply(FUNC fun);
202
205
  };
203
206
 
204
207
  } // namespace duckdb
@@ -9,7 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/common.hpp"
12
- #include "duckdb/common/types/row_layout.hpp"
12
+ #include "duckdb/common/types/row/tuple_data_layout.hpp"
13
13
  #include "duckdb/common/types/vector.hpp"
14
14
  #include "duckdb/execution/operator/aggregate/aggregate_object.hpp"
15
15
 
@@ -27,7 +27,7 @@ protected:
27
27
  Allocator &allocator;
28
28
  BufferManager &buffer_manager;
29
29
  //! A helper for managing offsets into the data buffers
30
- RowLayout layout;
30
+ TupleDataLayout layout;
31
31
  //! The types of the payload columns stored in the hashtable
32
32
  vector<LogicalType> payload_types;
33
33
  //! Intermediate structures and data for aggregate filters
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/optional_ptr.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
  class ClientContext;
@@ -17,7 +18,7 @@ class Pipeline;
17
18
 
18
19
  class ExecutionContext {
19
20
  public:
20
- ExecutionContext(ClientContext &client_p, ThreadContext &thread_p, Pipeline *pipeline_p)
21
+ ExecutionContext(ClientContext &client_p, ThreadContext &thread_p, optional_ptr<Pipeline> pipeline_p)
21
22
  : client(client_p), thread(thread_p), pipeline(pipeline_p) {
22
23
  }
23
24
 
@@ -26,7 +27,7 @@ public:
26
27
  //! The thread-local context for this execution
27
28
  ThreadContext &thread;
28
29
  //! Reference to the pipeline for this execution, can be used for example by operators determine caching strategy
29
- Pipeline *pipeline;
30
+ optional_ptr<Pipeline> pipeline;
30
31
  };
31
32
 
32
33
  } // namespace duckdb
@@ -151,7 +151,7 @@ protected:
151
151
 
152
152
  private:
153
153
  //! Client context
154
- ClientContext *context;
154
+ optional_ptr<ClientContext> context;
155
155
  //! The states of the expression executor; this holds any intermediates and temporary states of expressions
156
156
  vector<unique_ptr<ExpressionExecutorState>> states;
157
157
 
@@ -10,11 +10,11 @@
10
10
 
11
11
  #include "duckdb/common/common.hpp"
12
12
  #include "duckdb/common/radix_partitioning.hpp"
13
- #include "duckdb/common/types/column_data_consumer.hpp"
13
+ #include "duckdb/common/types/column/column_data_consumer.hpp"
14
14
  #include "duckdb/common/types/data_chunk.hpp"
15
15
  #include "duckdb/common/types/null_value.hpp"
16
- #include "duckdb/common/types/row_data_collection.hpp"
17
- #include "duckdb/common/types/row_layout.hpp"
16
+ #include "duckdb/common/types/row/tuple_data_iterator.hpp"
17
+ #include "duckdb/common/types/row/tuple_data_layout.hpp"
18
18
  #include "duckdb/common/types/vector.hpp"
19
19
  #include "duckdb/execution/aggregate_hashtable.hpp"
20
20
  #include "duckdb/planner/operator/logical_comparison_join.hpp"
@@ -30,25 +30,13 @@ struct ClientConfig;
30
30
 
31
31
  struct JoinHTScanState {
32
32
  public:
33
- JoinHTScanState() : position(0), block_position(0), total(0), scan_index(0), scanned(0) {
33
+ JoinHTScanState(TupleDataCollection &collection, idx_t chunk_idx_from, idx_t chunk_idx_to,
34
+ TupleDataPinProperties properties = TupleDataPinProperties::ALREADY_PINNED)
35
+ : iterator(collection, properties, chunk_idx_from, chunk_idx_to, false), offset_in_chunk(0) {
34
36
  }
35
37
 
36
- idx_t position;
37
- idx_t block_position;
38
-
39
- //! Used for synchronization of parallel external join
40
- idx_t total;
41
- idx_t scan_index;
42
- idx_t scanned;
43
-
44
- public:
45
- void Reset() {
46
- position = 0;
47
- block_position = 0;
48
- total = 0;
49
- scan_index = 0;
50
- scanned = 0;
51
- }
38
+ TupleDataChunkIterator iterator;
39
+ idx_t offset_in_chunk;
52
40
 
53
41
  private:
54
42
  //! Implicit copying is not allowed
@@ -130,33 +118,38 @@ public:
130
118
  ~JoinHashTable();
131
119
 
132
120
  //! Add the given data to the HT
133
- void Build(DataChunk &keys, DataChunk &input);
121
+ void Build(PartitionedTupleDataAppendState &append_state, DataChunk &keys, DataChunk &input);
134
122
  //! Merge another HT into this one
135
123
  void Merge(JoinHashTable &other);
124
+ //! Combines the partitions in sink_collection into data_collection, as if it were not partitioned
125
+ void Unpartition();
136
126
  //! Initialize the pointer table for the probe
137
127
  void InitializePointerTable();
138
128
  //! Finalize the build of the HT, constructing the actual hash table and making the HT ready for probing.
139
129
  //! Finalize must be called before any call to Probe, and after Finalize is called Build should no longer be
140
130
  //! ever called.
141
- void Finalize(idx_t block_idx_start, idx_t block_idx_end, bool parallel);
131
+ void Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool parallel);
142
132
  //! Probe the HT with the given input chunk, resulting in the given result
143
133
  unique_ptr<ScanStructure> Probe(DataChunk &keys, Vector *precomputed_hashes = nullptr);
144
- //! Scan the HT to find the rows for the full outer join and return the number of found entries
145
- idx_t ScanFullOuter(JoinHTScanState &state, Vector &addresses);
146
- //! Construct the full outer join result given the addresses and number of found entries
147
- void GatherFullOuter(DataChunk &result, Vector &addresses, idx_t found_entries);
134
+ //! Scan the HT to construct the full outer join result
135
+ void ScanFullOuter(JoinHTScanState &state, Vector &addresses, DataChunk &result);
148
136
 
149
137
  //! Fill the pointer with all the addresses from the hashtable for full scan
150
- idx_t FillWithHTOffsets(data_ptr_t *key_locations, JoinHTScanState &state);
151
- //! Pins all fixed-size blocks
152
- void PinAllBlocks();
138
+ idx_t FillWithHTOffsets(JoinHTScanState &state, Vector &addresses);
153
139
 
154
140
  idx_t Count() const {
155
- return block_collection->count;
141
+ return data_collection->Count();
142
+ }
143
+ idx_t SizeInBytes() const {
144
+ return data_collection->SizeInBytes();
145
+ }
146
+
147
+ PartitionedTupleData &GetSinkCollection() {
148
+ return *sink_collection;
156
149
  }
157
150
 
158
- const RowDataCollection &GetBlockCollection() const {
159
- return *block_collection;
151
+ TupleDataCollection &GetDataCollection() {
152
+ return *data_collection;
160
153
  }
161
154
 
162
155
  //! BufferManager
@@ -172,7 +165,7 @@ public:
172
165
  //! The comparison predicates
173
166
  vector<ExpressionType> predicates;
174
167
  //! Data column layout
175
- RowLayout layout;
168
+ TupleDataLayout layout;
176
169
  //! The size of an entry as stored in the HashTable
177
170
  idx_t entry_size;
178
171
  //! The total tuple size
@@ -222,13 +215,12 @@ private:
222
215
  idx_t PrepareKeys(DataChunk &keys, unique_ptr<UnifiedVectorFormat[]> &key_data, const SelectionVector *&current_sel,
223
216
  SelectionVector &sel, bool build_side);
224
217
 
225
- //! The RowDataCollection holding the main data of the hash table
226
- unique_ptr<RowDataCollection> block_collection;
227
- //! The stringheap of the JoinHashTable
228
- unique_ptr<RowDataCollection> string_heap;
229
- //! Pinned handles, these are pinned during finalization only
230
- mutex pinned_handles_lock;
231
- vector<BufferHandle> pinned_handles;
218
+ //! Lock for combining data_collection when merging HTs
219
+ mutex data_lock;
220
+ //! Partitioned data collection that the data is sunk into when building
221
+ unique_ptr<PartitionedTupleData> sink_collection;
222
+ //! The DataCollection holding the main data of the hash table
223
+ unique_ptr<TupleDataCollection> data_collection;
232
224
  //! The hash map of the HT, created after finalization
233
225
  AllocatedData hash_map;
234
226
  //! Whether or not NULL values are considered equal in each of the comparisons
@@ -297,34 +289,25 @@ public:
297
289
  bool external;
298
290
  //! The current number of radix bits used to partition
299
291
  idx_t radix_bits;
292
+ //! The max size of the HT
293
+ idx_t max_ht_size;
300
294
  //! Total count
301
295
  idx_t total_count;
302
- //! Number of tuples for the build-side HT per partitioned round
303
- idx_t tuples_per_round;
304
296
 
305
- //! The number of tuples that are swizzled
306
- idx_t SwizzledCount() const {
307
- return swizzled_block_collection->count;
308
- }
309
- //! Size of the in-memory data
310
- idx_t SizeInBytes() const {
311
- return block_collection->SizeInBytes() + string_heap->SizeInBytes();
312
- }
313
- //! Size of the swizzled data
314
- idx_t SwizzledSize() const {
315
- return swizzled_block_collection->SizeInBytes() + swizzled_string_heap->SizeInBytes();
316
- }
317
297
  //! Capacity of the pointer table given the ht count
318
298
  //! (minimum of 1024 to prevent collision chance for small HT's)
319
299
  static idx_t PointerTableCapacity(idx_t count) {
320
300
  return MaxValue<idx_t>(NextPowerOfTwo(count * 2), 1 << 10);
321
301
  }
302
+ //! Size of the pointer table (in bytes)
303
+ static idx_t PointerTableSize(idx_t count) {
304
+ return PointerTableCapacity(count) * sizeof(data_ptr_t);
305
+ }
322
306
 
323
- //! Swizzle the blocks in this HT (moves from block_collection and string_heap to swizzled_...)
324
- void SwizzleBlocks();
325
-
307
+ //! Whether we need to do an external join
308
+ bool RequiresExternalJoin(ClientConfig &config, vector<unique_ptr<JoinHashTable>> &local_hts);
326
309
  //! Computes partition sizes and number of radix bits (called before scheduling partition tasks)
327
- void ComputePartitionSizes(ClientConfig &config, vector<unique_ptr<JoinHashTable>> &local_hts, idx_t max_ht_size);
310
+ bool RequiresPartitioning(ClientConfig &config, vector<unique_ptr<JoinHashTable>> &local_hts);
328
311
  //! Partition this HT
329
312
  void Partition(JoinHashTable &global_ht);
330
313
 
@@ -340,15 +323,6 @@ private:
340
323
  //! First and last partition of the current probe round
341
324
  idx_t partition_start;
342
325
  idx_t partition_end;
343
-
344
- //! Swizzled row data
345
- unique_ptr<RowDataCollection> swizzled_block_collection;
346
- unique_ptr<RowDataCollection> swizzled_string_heap;
347
-
348
- //! Partitioned data
349
- mutex partitioned_data_lock;
350
- vector<unique_ptr<RowDataCollection>> partition_block_collections;
351
- vector<unique_ptr<RowDataCollection>> partition_string_heaps;
352
326
  };
353
327
 
354
328
  } // namespace duckdb
@@ -9,7 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/common.hpp"
12
- #include "duckdb/common/types/column_data_collection.hpp"
12
+ #include "duckdb/common/types/column/column_data_collection.hpp"
13
13
  #include "duckdb/common/types/vector.hpp"
14
14
  #include "duckdb/planner/operator/logical_comparison_join.hpp"
15
15
 
@@ -18,9 +18,9 @@ public:
18
18
  static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::EXECUTE;
19
19
 
20
20
  public:
21
- explicit PhysicalExecute(PhysicalOperator *plan);
21
+ explicit PhysicalExecute(PhysicalOperator &plan);
22
22
 
23
- PhysicalOperator *plan;
23
+ PhysicalOperator &plan;
24
24
  unique_ptr<PhysicalOperator> owned_plan;
25
25
  shared_ptr<PreparedStatementData> prepared;
26
26
 
@@ -21,7 +21,7 @@ public:
21
21
 
22
22
  StatementType statement_type;
23
23
  StatementProperties properties;
24
- PhysicalOperator *plan;
24
+ PhysicalOperator &plan;
25
25
  vector<string> names;
26
26
 
27
27
  public:
@@ -9,9 +9,9 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/mutex.hpp"
12
- #include "duckdb/execution/physical_operator.hpp"
12
+ #include "duckdb/common/types/column/column_data_collection.hpp"
13
13
  #include "duckdb/execution/operator/join/physical_comparison_join.hpp"
14
- #include "duckdb/common/types/column_data_collection.hpp"
14
+ #include "duckdb/execution/physical_operator.hpp"
15
15
 
16
16
  namespace duckdb {
17
17
 
@@ -58,7 +58,7 @@ private:
58
58
  template <typename T>
59
59
  bool TemplatedFillSelectionVectorBuild(Vector &source, SelectionVector &sel_vec, SelectionVector &seq_sel_vec,
60
60
  idx_t count);
61
- bool FullScanHashTable(JoinHTScanState &state, LogicalType &key_type);
61
+ bool FullScanHashTable(LogicalType &key_type);
62
62
 
63
63
  private:
64
64
  const PhysicalHashJoin &join;
@@ -8,8 +8,8 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include "duckdb/common/types/column/column_data_collection.hpp"
11
12
  #include "duckdb/execution/physical_operator.hpp"
12
- #include "duckdb/common/types/column_data_collection.hpp"
13
13
 
14
14
  namespace duckdb {
15
15
 
@@ -44,8 +44,6 @@ public:
44
44
  vector<LogicalType> delim_types;
45
45
  //! Used in perfect hash join
46
46
  PerfectHashJoinStats perfect_join_statistics;
47
- //! Whether we can go external (can't yet if recursive CTE)
48
- bool can_go_external;
49
47
 
50
48
  public:
51
49
  // Operator Interface
@@ -24,7 +24,7 @@ public:
24
24
  public:
25
25
  PhysicalIndexJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left, unique_ptr<PhysicalOperator> right,
26
26
  vector<JoinCondition> cond, JoinType join_type, const vector<idx_t> &left_projection_map,
27
- vector<idx_t> right_projection_map, vector<column_t> column_ids, Index *index, bool lhs_first,
27
+ vector<idx_t> right_projection_map, vector<column_t> column_ids, Index &index, bool lhs_first,
28
28
  idx_t estimated_cardinality);
29
29
 
30
30
  //! Columns from RHS used in the query
@@ -44,7 +44,7 @@ public:
44
44
  //! The types of all conditions
45
45
  vector<LogicalType> build_types;
46
46
  //! Index used for join
47
- Index *index;
47
+ Index &index;
48
48
 
49
49
  vector<JoinCondition> conditions;
50
50
 
@@ -9,7 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/execution/physical_operator.hpp"
12
- #include "duckdb/common/types/column_data_collection.hpp"
12
+ #include "duckdb/common/types/column/column_data_collection.hpp"
13
13
 
14
14
  namespace duckdb {
15
15
 
@@ -87,7 +87,7 @@ private:
87
87
  //! Resets the steam
88
88
  void ResetStream();
89
89
  //! Reads a new buffer from the CSV file if the current one has been exhausted
90
- bool ReadBuffer(idx_t &start);
90
+ bool ReadBuffer(idx_t &start, idx_t &line_start);
91
91
  //! Jumps back to the beginning of input stream and resets necessary internal states
92
92
  bool JumpToNextSample();
93
93
  //! Initializes the TextSearchShiftArrays for complex parser
@@ -124,6 +124,9 @@ private:
124
124
  const vector<LogicalType> &requested_types,
125
125
  vector<vector<LogicalType>> &best_sql_types_candidates,
126
126
  map<LogicalTypeId, vector<string>> &best_format_candidates);
127
+
128
+ //! Skip Empty lines for tables with over one column
129
+ void SkipEmptyLines();
127
130
  };
128
131
 
129
132
  } // namespace duckdb