duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  4. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
  5. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
  6. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
  10. package/src/duckdb/src/common/box_renderer.cpp +4 -2
  11. package/src/duckdb/src/common/constants.cpp +10 -1
  12. package/src/duckdb/src/common/filename_pattern.cpp +41 -0
  13. package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
  14. package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
  15. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
  16. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  17. package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
  18. package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
  19. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  20. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  21. package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
  22. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  23. package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
  24. package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
  25. package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
  26. package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
  27. package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
  28. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
  29. package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
  30. package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
  31. package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
  33. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
  34. package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
  35. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
  36. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
  37. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
  38. package/src/duckdb/src/common/types/vector.cpp +1 -1
  39. package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
  40. package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
  44. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
  45. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
  46. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  47. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
  48. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
  49. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
  50. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
  51. package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
  52. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
  53. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
  54. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
  55. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
  56. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
  57. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
  58. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
  59. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
  60. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
  61. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
  62. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  63. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
  64. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  65. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  66. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
  67. package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
  68. package/src/duckdb/src/execution/physical_operator.cpp +1 -1
  69. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
  70. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
  71. package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
  72. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
  73. package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
  74. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
  75. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
  76. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
  77. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
  78. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
  79. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
  80. package/src/duckdb/src/function/table/read_csv.cpp +124 -58
  81. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  82. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  85. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  86. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
  87. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
  88. package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
  89. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
  90. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
  91. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
  92. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
  93. package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
  94. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
  95. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
  98. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
  99. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
  100. package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
  101. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
  102. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
  103. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
  104. package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
  105. package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
  106. package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
  107. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
  108. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
  109. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
  110. package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
  111. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
  112. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
  113. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
  114. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
  115. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
  116. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
  117. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
  119. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
  120. package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
  122. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
  124. package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
  125. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
  127. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
  128. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  129. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
  130. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
  131. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
  132. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
  133. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
  134. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
  135. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
  136. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
  137. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
  138. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
  139. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  140. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
  141. package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
  142. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
  143. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
  144. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
  145. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
  147. package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
  148. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
  149. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
  150. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
  151. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
  152. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
  153. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
  154. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
  155. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
  156. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
  157. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
  158. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
  159. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
  160. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
  161. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
  162. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  163. package/src/duckdb/src/main/appender.cpp +6 -6
  164. package/src/duckdb/src/main/client_context.cpp +1 -1
  165. package/src/duckdb/src/main/connection.cpp +2 -2
  166. package/src/duckdb/src/main/query_result.cpp +13 -0
  167. package/src/duckdb/src/main/settings/settings.cpp +3 -4
  168. package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
  169. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
  170. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
  171. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
  172. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
  173. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
  174. package/src/duckdb/src/parallel/executor.cpp +1 -1
  175. package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
  176. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  177. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  178. package/src/duckdb/src/parser/transformer.cpp +50 -9
  179. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
  180. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
  181. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
  182. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
  183. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
  184. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
  185. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
  186. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
  187. package/src/duckdb/src/planner/binder.cpp +16 -19
  188. package/src/duckdb/src/planner/expression_binder.cpp +8 -8
  189. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
  190. package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
  191. package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
  192. package/src/duckdb/src/storage/table_index_list.cpp +3 -3
  193. package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
  194. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
  195. package/src/duckdb/ub_src_common.cpp +2 -0
  196. package/src/duckdb/ub_src_common_types.cpp +0 -16
  197. package/src/duckdb/ub_src_common_types_column.cpp +10 -0
  198. package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -25,8 +25,9 @@
25
25
  namespace duckdb {
26
26
 
27
27
  ParallelCSVReader::ParallelCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p,
28
- unique_ptr<CSVBufferRead> buffer_p, const vector<LogicalType> &requested_types)
29
- : BaseCSVReader(context, std::move(options_p), requested_types) {
28
+ unique_ptr<CSVBufferRead> buffer_p, idx_t first_pos_first_buffer_p,
29
+ const vector<LogicalType> &requested_types)
30
+ : BaseCSVReader(context, std::move(options_p), requested_types), first_pos_first_buffer(first_pos_first_buffer_p) {
30
31
  Initialize(requested_types);
31
32
  SetBufferRead(std::move(buffer_p));
32
33
  if (options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1) {
@@ -52,9 +53,34 @@ bool ParallelCSVReader::NewLineDelimiter(bool carry, bool carry_followed_by_nl,
52
53
  return (carry && carry_followed_by_nl) || (!carry && first_char);
53
54
  }
54
55
 
56
+ void ParallelCSVReader::SkipEmptyLines() {
57
+ idx_t new_pos_buffer = position_buffer;
58
+ if (parse_chunk.data.size() == 1) {
59
+ // Empty lines are null data.
60
+ return;
61
+ }
62
+ for (; new_pos_buffer < end_buffer; new_pos_buffer++) {
63
+ if (StringUtil::CharacterIsNewline((*buffer)[new_pos_buffer])) {
64
+ bool carrier_return = (*buffer)[new_pos_buffer] == '\r';
65
+ new_pos_buffer++;
66
+ if (carrier_return && new_pos_buffer < buffer_size && (*buffer)[new_pos_buffer] == '\n') {
67
+ position_buffer++;
68
+ }
69
+ if (new_pos_buffer > end_buffer) {
70
+ return;
71
+ }
72
+ position_buffer = new_pos_buffer;
73
+ } else if ((*buffer)[new_pos_buffer] != ' ') {
74
+ return;
75
+ }
76
+ }
77
+ }
78
+
55
79
  bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
56
80
  if (buffer->buffer->IsCSVFileFirstBuffer() && start_buffer == position_buffer &&
57
- start_buffer == buffer->buffer->GetStart()) {
81
+ start_buffer == first_pos_first_buffer) {
82
+ start_buffer = buffer->buffer->GetStart();
83
+ position_buffer = start_buffer;
58
84
  verification_positions.beginning_of_first_line = position_buffer;
59
85
  verification_positions.end_of_last_line = position_buffer;
60
86
  // First buffer doesn't need any setting
@@ -70,11 +96,23 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
70
96
  if (position_buffer > end_buffer) {
71
97
  return false;
72
98
  }
99
+ SkipEmptyLines();
100
+ if (verification_positions.beginning_of_first_line == 0) {
101
+ verification_positions.beginning_of_first_line = position_buffer;
102
+ }
103
+
104
+ verification_positions.end_of_last_line = position_buffer;
73
105
  return true;
74
106
  }
75
107
  }
76
108
  return false;
77
109
  }
110
+ SkipEmptyLines();
111
+ if (verification_positions.beginning_of_first_line == 0) {
112
+ verification_positions.beginning_of_first_line = position_buffer;
113
+ }
114
+
115
+ verification_positions.end_of_last_line = position_buffer;
78
116
  return true;
79
117
  }
80
118
 
@@ -102,6 +140,11 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
102
140
  }
103
141
  }
104
142
  }
143
+ SkipEmptyLines();
144
+
145
+ if (position_buffer > buffer_size) {
146
+ break;
147
+ }
105
148
 
106
149
  if (position_buffer >= end_buffer && !StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1])) {
107
150
  break;
@@ -113,18 +156,20 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
113
156
  }
114
157
  idx_t position_set = position_buffer;
115
158
  start_buffer = position_buffer;
116
-
117
159
  // We check if we can add this line
118
160
  // disable the projection pushdown while reading the first line
119
161
  // otherwise the first line parsing can be influenced by which columns we are reading
120
162
  auto column_ids = std::move(reader_data.column_ids);
121
163
  auto column_mapping = std::move(reader_data.column_mapping);
122
164
  InitializeProjection();
123
- successfully_read_first_line = TryParseSimpleCSV(first_line_chunk, error_message, true);
165
+ try {
166
+ successfully_read_first_line = TryParseSimpleCSV(first_line_chunk, error_message, true);
167
+ } catch (...) {
168
+ successfully_read_first_line = false;
169
+ }
124
170
  // restore the projection pushdown
125
171
  reader_data.column_ids = std::move(column_ids);
126
172
  reader_data.column_mapping = std::move(column_mapping);
127
-
128
173
  end_buffer = end_buffer_real;
129
174
  start_buffer = position_set;
130
175
  if (position_buffer >= end_buffer) {
@@ -190,27 +235,55 @@ bool ParallelCSVReader::BufferRemainder() {
190
235
  return true;
191
236
  }
192
237
 
238
+ void VerifyLineLength(idx_t line_size, idx_t max_line_size) {
239
+ if (line_size > max_line_size) {
240
+ // FIXME: this should also output the correct estimated linenumber where it broke
241
+ throw InvalidInputException("Maximum line size of %llu bytes exceeded!", max_line_size);
242
+ }
243
+ }
244
+
245
+ bool AllNewLine(string_t value, idx_t column_amount) {
246
+ auto value_str = value.GetString();
247
+ if (value_str.empty() && column_amount == 1) {
248
+ // This is a one column (empty)
249
+ return false;
250
+ }
251
+ for (idx_t i = 0; i < value.GetSize(); i++) {
252
+ if (!StringUtil::CharacterIsNewline(value_str[i])) {
253
+ return false;
254
+ }
255
+ }
256
+ return true;
257
+ }
258
+
193
259
  bool ParallelCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message, bool try_add_line) {
194
260
  // used for parsing algorithm
261
+ if (start_buffer == buffer_size) {
262
+ // Nothing to read
263
+ finished = true;
264
+ return true;
265
+ }
195
266
  D_ASSERT(end_buffer <= buffer_size);
196
267
  bool finished_chunk = false;
197
268
  idx_t column = 0;
198
269
  idx_t offset = 0;
199
270
  bool has_quotes = false;
271
+
200
272
  vector<idx_t> escape_positions;
201
273
  if ((start_buffer == buffer->buffer_start || start_buffer == buffer->buffer_end) && !try_add_line) {
202
274
  // First time reading this buffer piece
203
275
  if (!SetPosition(insert_chunk)) {
204
- // This means the buffer size does not contain a new line
205
- if (position_buffer - start_buffer == options.buffer_size) {
206
- error_message = "Line does not fit in one buffer. Increase the buffer size.";
207
- return false;
208
- }
209
276
  finished = true;
210
277
  return true;
211
278
  }
212
279
  }
213
-
280
+ if (position_buffer == buffer_size) {
281
+ // Nothing to read
282
+ finished = true;
283
+ return true;
284
+ }
285
+ // Keep track of line size
286
+ idx_t line_start = position_buffer;
214
287
  // start parsing the first value
215
288
  goto value_start;
216
289
 
@@ -242,11 +315,16 @@ normal : {
242
315
  if (c == options.delimiter[0]) {
243
316
  // delimiter: end the value and add it to the chunk
244
317
  goto add_value;
318
+ } else if (c == options.quote[0] && try_add_line) {
319
+ return false;
245
320
  } else if (StringUtil::CharacterIsNewline(c)) {
246
321
  // newline: add row
247
- if (column > 0 || try_add_line || insert_chunk.data.size() == 1) {
322
+ if (column > 0 || try_add_line || parse_chunk.data.size() == 1) {
248
323
  goto add_row;
249
324
  }
325
+ if (column == 0 && position_buffer == start_buffer) {
326
+ start_buffer++;
327
+ }
250
328
  }
251
329
  }
252
330
  if (!BufferRemainder()) {
@@ -285,12 +363,15 @@ add_row : {
285
363
  parse_chunk.Reset();
286
364
  return success;
287
365
  } else {
366
+ VerifyLineLength(position_buffer - line_start, options.maximum_line_size);
367
+ line_start = position_buffer;
288
368
  finished_chunk = AddRow(insert_chunk, column, error_message);
289
369
  }
290
370
  // increase position by 1 and move start to the new position
291
371
  offset = 0;
292
372
  has_quotes = false;
293
- start_buffer = ++position_buffer;
373
+ position_buffer++;
374
+ start_buffer = position_buffer;
294
375
  verification_positions.end_of_last_line = position_buffer;
295
376
  if (reached_remainder_state) {
296
377
  goto final_state;
@@ -309,7 +390,10 @@ add_row : {
309
390
  // newline after carriage return: skip
310
391
  // increase position by 1 and move start to the new position
311
392
  start_buffer = ++position_buffer;
393
+
394
+ SkipEmptyLines();
312
395
  verification_positions.end_of_last_line = position_buffer;
396
+ start_buffer = position_buffer;
313
397
  if (reached_remainder_state) {
314
398
  goto final_state;
315
399
  }
@@ -331,6 +415,9 @@ add_row : {
331
415
  error_message = "Wrong NewLine Identifier. Expecting \\r or \\n";
332
416
  return false;
333
417
  }
418
+ SkipEmptyLines();
419
+ verification_positions.end_of_last_line = position_buffer;
420
+ start_buffer = position_buffer;
334
421
  // \n newline, move to value start
335
422
  if (finished_chunk) {
336
423
  goto final_state;
@@ -391,7 +478,7 @@ unquote : {
391
478
  } else if (StringUtil::CharacterIsNewline(c)) {
392
479
  offset = 1;
393
480
  // FIXME: should this be an assertion?
394
- D_ASSERT(column == parse_chunk.ColumnCount() - 1);
481
+ D_ASSERT(try_add_line || (!try_add_line && column == parse_chunk.ColumnCount() - 1));
395
482
  goto add_row;
396
483
  } else if (position_buffer >= end_buffer) {
397
484
  // reached end of buffer
@@ -448,22 +535,27 @@ final_state : {
448
535
  }
449
536
  // If this is the last buffer, we have to read the last value
450
537
  if (buffer->buffer->IsCSVFileLastBuffer() || (buffer->next_buffer && buffer->next_buffer->IsCSVFileLastBuffer())) {
451
- if (column > 0 || try_add_line || (insert_chunk.data.size() == 1 && start_buffer != position_buffer)) {
538
+ if (column > 0 || start_buffer != position_buffer || try_add_line ||
539
+ (insert_chunk.data.size() == 1 && start_buffer != position_buffer)) {
452
540
  // remaining values to be added to the chunk
453
541
  auto str_value = buffer->GetValue(start_buffer, position_buffer, offset);
454
- AddValue(str_value, column, escape_positions, has_quotes);
455
- if (try_add_line) {
456
- bool success = column == return_types.size();
457
- if (success) {
542
+ if (!AllNewLine(str_value, insert_chunk.data.size()) || offset == 0) {
543
+ AddValue(str_value, column, escape_positions, has_quotes);
544
+ if (try_add_line) {
545
+ bool success = column == return_types.size();
546
+ if (success) {
547
+ AddRow(insert_chunk, column, error_message);
548
+ success = Flush(insert_chunk);
549
+ }
550
+ parse_chunk.Reset();
551
+ reached_remainder_state = false;
552
+ return success;
553
+ } else {
554
+ VerifyLineLength(position_buffer - line_start, options.maximum_line_size);
555
+ line_start = position_buffer;
458
556
  AddRow(insert_chunk, column, error_message);
459
- success = Flush(insert_chunk);
557
+ verification_positions.end_of_last_line = position_buffer;
460
558
  }
461
- parse_chunk.Reset();
462
- reached_remainder_state = false;
463
- return success;
464
- } else {
465
- AddRow(insert_chunk, column, error_message);
466
- verification_positions.end_of_last_line = position_buffer;
467
559
  }
468
560
  }
469
561
  }
@@ -471,11 +563,14 @@ final_state : {
471
563
  if (mode == ParserMode::PARSING) {
472
564
  Flush(insert_chunk);
473
565
  }
474
- if (position_buffer != verification_positions.end_of_last_line &&
475
- !StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1])) {
566
+ if (position_buffer - verification_positions.end_of_last_line > options.buffer_size) {
476
567
  error_message = "Line does not fit in one buffer. Increase the buffer size.";
477
568
  return false;
478
569
  }
570
+ end_buffer = buffer_size;
571
+ SkipEmptyLines();
572
+ end_buffer = buffer->buffer_end;
573
+ verification_positions.end_of_last_line = position_buffer;
479
574
  if (position_buffer >= end_buffer) {
480
575
  if (position_buffer >= end_buffer) {
481
576
  if (position_buffer == end_buffer && StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1]) &&
@@ -3,6 +3,8 @@
3
3
  #include "duckdb/common/hive_partitioning.hpp"
4
4
  #include "duckdb/common/file_system.hpp"
5
5
  #include "duckdb/common/file_opener.hpp"
6
+ #include "duckdb/common/types/uuid.hpp"
7
+ #include "duckdb/common/string_util.hpp"
6
8
 
7
9
  #include <algorithm>
8
10
 
@@ -40,6 +42,7 @@ public:
40
42
  //===--------------------------------------------------------------------===//
41
43
  // Sink
42
44
  //===--------------------------------------------------------------------===//
45
+
43
46
  void MoveTmpFile(ClientContext &context, const string &tmp_file_path) {
44
47
  auto &fs = FileSystem::GetFileSystem(context);
45
48
  auto file_path = tmp_file_path.substr(0, tmp_file_path.length() - 4);
@@ -111,10 +114,10 @@ void PhysicalCopyToFile::Combine(ExecutionContext &context, GlobalSinkState &gst
111
114
  for (idx_t i = 0; i < partitions.size(); i++) {
112
115
  string hive_path =
113
116
  CreateDirRecursive(partition_columns, names, partition_key_map[i]->values, trimmed_path, fs);
114
- string full_path = fs.JoinPath(hive_path, "data_" + to_string(l.writer_offset) + "." + function.extension);
115
- if (fs.FileExists(full_path) && !allow_overwrite) {
117
+ string full_path(filename_pattern.CreateFilename(fs, hive_path, function.extension, l.writer_offset));
118
+ if (fs.FileExists(full_path) && !overwrite_or_ignore) {
116
119
  throw IOException("failed to create " + full_path +
117
- ", file exists! Enable ALLOW_OVERWRITE option to force writing");
120
+ ", file exists! Enable OVERWRITE_OR_IGNORE option to force writing");
118
121
  }
119
122
  // Create a writer for the current file
120
123
  auto fun_data_global = function.copy_to_initialize_global(context.client, *bind_data, full_path);
@@ -184,10 +187,9 @@ unique_ptr<LocalSinkState> PhysicalCopyToFile::GetLocalSinkState(ExecutionContex
184
187
  this_file_offset = g.last_file_offset++;
185
188
  }
186
189
  auto &fs = FileSystem::GetFileSystem(context.client);
187
- string output_path =
188
- fs.JoinPath(file_path, StringUtil::Format("out_%llu", this_file_offset) + "." + function.extension);
189
- if (fs.FileExists(output_path) && !allow_overwrite) {
190
- throw IOException("%s exists! Enable ALLOW_OVERWRITE option to force writing", output_path);
190
+ string output_path(filename_pattern.CreateFilename(fs, file_path, function.extension, this_file_offset));
191
+ if (fs.FileExists(output_path) && !overwrite_or_ignore) {
192
+ throw IOException("%s exists! Enable OVERWRITE_OR_IGNORE option to force writing", output_path);
191
193
  }
192
194
  res->global_state = function.copy_to_initialize_global(context.client, *bind_data, output_path);
193
195
  }
@@ -199,17 +201,17 @@ unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext
199
201
  if (partition_output || per_thread_output) {
200
202
  auto &fs = FileSystem::GetFileSystem(context);
201
203
 
202
- if (fs.FileExists(file_path) && !allow_overwrite) {
203
- throw IOException("%s exists! Enable ALLOW_OVERWRITE option to force writing", file_path);
204
+ if (fs.FileExists(file_path) && !overwrite_or_ignore) {
205
+ throw IOException("%s exists! Enable OVERWRITE_OR_IGNORE option to force writing", file_path);
204
206
  }
205
207
  if (!fs.DirectoryExists(file_path)) {
206
208
  fs.CreateDirectory(file_path);
207
- } else if (!allow_overwrite) {
209
+ } else if (!overwrite_or_ignore) {
208
210
  idx_t n_files = 0;
209
211
  fs.ListFiles(
210
212
  file_path, [&n_files](const string &path, bool) { n_files++; }, FileOpener::Get(context));
211
213
  if (n_files > 0) {
212
- throw IOException("Directory %s is not empty! Enable ALLOW_OVERWRITE option to force writing",
214
+ throw IOException("Directory %s is not empty! Enable OVERWRITE_OR_IGNORE option to force writing",
213
215
  file_path);
214
216
  }
215
217
  }
@@ -1,10 +1,11 @@
1
1
  #include "duckdb/execution/operator/persistent/physical_delete.hpp"
2
2
 
3
+ #include "duckdb/common/atomic.hpp"
4
+ #include "duckdb/common/types/column/column_data_collection.hpp"
3
5
  #include "duckdb/execution/expression_executor.hpp"
4
6
  #include "duckdb/storage/data_table.hpp"
5
- #include "duckdb/transaction/duck_transaction.hpp"
6
- #include "duckdb/common/types/column_data_collection.hpp"
7
7
  #include "duckdb/storage/table/scan_state.hpp"
8
+ #include "duckdb/transaction/duck_transaction.hpp"
8
9
 
9
10
  namespace duckdb {
10
11
 
@@ -2,6 +2,7 @@
2
2
 
3
3
  #include "duckdb/catalog/catalog.hpp"
4
4
  #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
5
+ #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
5
6
  #include "duckdb/common/file_system.hpp"
6
7
  #include "duckdb/common/string_util.hpp"
7
8
  #include "duckdb/parallel/meta_pipeline.hpp"
@@ -16,12 +17,12 @@ namespace duckdb {
16
17
 
17
18
  using std::stringstream;
18
19
 
19
- static void WriteCatalogEntries(stringstream &ss, vector<CatalogEntry *> &entries) {
20
+ static void WriteCatalogEntries(stringstream &ss, vector<reference<CatalogEntry>> &entries) {
20
21
  for (auto &entry : entries) {
21
- if (entry->internal) {
22
+ if (entry.get().internal) {
22
23
  continue;
23
24
  }
24
- ss << entry->ToSQL() << std::endl;
25
+ ss << entry.get().ToSQL() << std::endl;
25
26
  }
26
27
  ss << std::endl;
27
28
  }
@@ -42,8 +43,8 @@ static void WriteValueAsSQL(stringstream &ss, Value &val) {
42
43
  }
43
44
  }
44
45
 
45
- static void WriteCopyStatement(FileSystem &fs, stringstream &ss, TableCatalogEntry *table, CopyInfo &info,
46
- ExportedTableData &exported_table, CopyFunction const &function) {
46
+ static void WriteCopyStatement(FileSystem &fs, stringstream &ss, CopyInfo &info, ExportedTableData &exported_table,
47
+ CopyFunction const &function) {
47
48
  ss << "COPY ";
48
49
 
49
50
  if (exported_table.schema_name != DEFAULT_SCHEMA) {
@@ -107,52 +108,53 @@ void PhysicalExport::GetData(ExecutionContext &context, DataChunk &chunk, Global
107
108
  auto *opener = FileSystem::GetFileOpener(ccontext);
108
109
 
109
110
  // gather all catalog types to export
110
- vector<CatalogEntry *> schemas;
111
- vector<CatalogEntry *> custom_types;
112
- vector<CatalogEntry *> sequences;
113
- vector<CatalogEntry *> tables;
114
- vector<CatalogEntry *> views;
115
- vector<CatalogEntry *> indexes;
116
- vector<CatalogEntry *> macros;
111
+ vector<reference<CatalogEntry>> schemas;
112
+ vector<reference<CatalogEntry>> custom_types;
113
+ vector<reference<CatalogEntry>> sequences;
114
+ vector<reference<CatalogEntry>> tables;
115
+ vector<reference<CatalogEntry>> views;
116
+ vector<reference<CatalogEntry>> indexes;
117
+ vector<reference<CatalogEntry>> macros;
117
118
 
118
119
  auto schema_list = Catalog::GetSchemas(ccontext, info->catalog);
119
120
  for (auto &schema : schema_list) {
120
121
  if (!schema->internal) {
121
- schemas.push_back(schema);
122
+ schemas.push_back(*schema);
122
123
  }
123
124
  schema->Scan(context.client, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) {
124
125
  if (entry->internal) {
125
126
  return;
126
127
  }
127
128
  if (entry->type != CatalogType::TABLE_ENTRY) {
128
- views.push_back(entry);
129
+ views.push_back(*entry);
129
130
  }
130
131
  });
131
132
  schema->Scan(context.client, CatalogType::SEQUENCE_ENTRY,
132
- [&](CatalogEntry *entry) { sequences.push_back(entry); });
133
+ [&](CatalogEntry *entry) { sequences.push_back(*entry); });
133
134
  schema->Scan(context.client, CatalogType::TYPE_ENTRY,
134
- [&](CatalogEntry *entry) { custom_types.push_back(entry); });
135
- schema->Scan(context.client, CatalogType::INDEX_ENTRY, [&](CatalogEntry *entry) { indexes.push_back(entry); });
135
+ [&](CatalogEntry *entry) { custom_types.push_back(*entry); });
136
+ schema->Scan(context.client, CatalogType::INDEX_ENTRY, [&](CatalogEntry *entry) { indexes.push_back(*entry); });
136
137
  schema->Scan(context.client, CatalogType::MACRO_ENTRY, [&](CatalogEntry *entry) {
137
138
  if (!entry->internal && entry->type == CatalogType::MACRO_ENTRY) {
138
- macros.push_back(entry);
139
+ macros.push_back(*entry);
139
140
  }
140
141
  });
141
142
  schema->Scan(context.client, CatalogType::TABLE_MACRO_ENTRY, [&](CatalogEntry *entry) {
142
143
  if (!entry->internal && entry->type == CatalogType::TABLE_MACRO_ENTRY) {
143
- macros.push_back(entry);
144
+ macros.push_back(*entry);
144
145
  }
145
146
  });
146
147
  }
147
148
 
148
149
  // consider the order of tables because of foreign key constraint
149
150
  for (idx_t i = 0; i < exported_tables.data.size(); i++) {
150
- tables.push_back((CatalogEntry *)exported_tables.data[i].entry);
151
+ tables.push_back(exported_tables.data[i].entry);
151
152
  }
152
153
 
153
154
  // order macro's by timestamp so nested macro's are imported nicely
154
- sort(macros.begin(), macros.end(),
155
- [](const CatalogEntry *lhs, const CatalogEntry *rhs) { return lhs->oid < rhs->oid; });
155
+ sort(macros.begin(), macros.end(), [](const reference<CatalogEntry> &lhs, const reference<CatalogEntry> &rhs) {
156
+ return lhs.get().oid < rhs.get().oid;
157
+ });
156
158
 
157
159
  // write the schema.sql file
158
160
  // export order is SCHEMA -> SEQUENCE -> TABLE -> VIEW -> INDEX
@@ -172,9 +174,8 @@ void PhysicalExport::GetData(ExecutionContext &context, DataChunk &chunk, Global
172
174
  // for every table, we write COPY INTO statement with the specified options
173
175
  stringstream load_ss;
174
176
  for (idx_t i = 0; i < exported_tables.data.size(); i++) {
175
- auto &table = exported_tables.data[i].entry;
176
177
  auto exported_table_info = exported_tables.data[i].table_data;
177
- WriteCopyStatement(fs, load_ss, table, *info, exported_table_info, function);
178
+ WriteCopyStatement(fs, load_ss, *info, exported_table_info, function);
178
179
  }
179
180
  WriteStringStreamToFile(fs, opener, load_ss, fs.JoinPath(info->file_path, "load.sql"));
180
181
  state.finished = true;
@@ -1,7 +1,7 @@
1
1
  #include "duckdb/execution/operator/persistent/physical_insert.hpp"
2
2
  #include "duckdb/parallel/thread_context.hpp"
3
3
  #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
4
- #include "duckdb/common/types/column_data_collection.hpp"
4
+ #include "duckdb/common/types/column/column_data_collection.hpp"
5
5
  #include "duckdb/common/vector_operations/vector_operations.hpp"
6
6
  #include "duckdb/execution/expression_executor.hpp"
7
7
  #include "duckdb/storage/data_table.hpp"
@@ -1,12 +1,13 @@
1
1
  #include "duckdb/execution/operator/persistent/physical_update.hpp"
2
- #include "duckdb/parallel/thread_context.hpp"
2
+
3
3
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
4
- #include "duckdb/common/types/column_data_collection.hpp"
4
+ #include "duckdb/common/types/column/column_data_collection.hpp"
5
5
  #include "duckdb/common/vector_operations/vector_operations.hpp"
6
6
  #include "duckdb/execution/expression_executor.hpp"
7
+ #include "duckdb/main/client_context.hpp"
8
+ #include "duckdb/parallel/thread_context.hpp"
7
9
  #include "duckdb/planner/expression/bound_reference_expression.hpp"
8
10
  #include "duckdb/storage/data_table.hpp"
9
- #include "duckdb/main/client_context.hpp"
10
11
 
11
12
  namespace duckdb {
12
13
 
@@ -103,7 +103,7 @@ idx_t PhysicalTableScan::GetBatchIndex(ExecutionContext &context, DataChunk &chu
103
103
  }
104
104
 
105
105
  string PhysicalTableScan::GetName() const {
106
- return StringUtil::Upper(function.name);
106
+ return StringUtil::Upper(function.name + " " + function.extra_info);
107
107
  }
108
108
 
109
109
  string PhysicalTableScan::ParamsToString() const {
@@ -1,7 +1,7 @@
1
1
  #include "duckdb/execution/operator/schema/physical_create_type.hpp"
2
2
 
3
3
  #include "duckdb/catalog/catalog.hpp"
4
- #include "duckdb/common/types/column_data_collection.hpp"
4
+ #include "duckdb/common/types/column/column_data_collection.hpp"
5
5
 
6
6
  namespace duckdb {
7
7
 
@@ -1,6 +1,6 @@
1
1
  #include "duckdb/execution/operator/set/physical_recursive_cte.hpp"
2
2
 
3
- #include "duckdb/common/types/column_data_collection.hpp"
3
+ #include "duckdb/common/types/column/column_data_collection.hpp"
4
4
  #include "duckdb/common/vector_operations/vector_operations.hpp"
5
5
  #include "duckdb/execution/aggregate_hashtable.hpp"
6
6
  #include "duckdb/execution/executor.hpp"
@@ -183,12 +183,12 @@ void PhysicalRecursiveCTE::BuildPipelines(Pipeline &current, MetaPipeline &meta_
183
183
 
184
184
  // the LHS of the recursive CTE is our initial state
185
185
  auto initial_state_pipeline = meta_pipeline.CreateChildMetaPipeline(current, this);
186
- initial_state_pipeline->Build(children[0].get());
186
+ initial_state_pipeline->Build(*children[0]);
187
187
 
188
188
  // the RHS is the recursive pipeline
189
189
  recursive_meta_pipeline = make_shared<MetaPipeline>(executor, state, this);
190
190
  recursive_meta_pipeline->SetRecursiveCTE();
191
- recursive_meta_pipeline->Build(children[1].get());
191
+ recursive_meta_pipeline->Build(*children[1]);
192
192
  }
193
193
 
194
194
  vector<const PhysicalOperator *> PhysicalRecursiveCTE::GetSources() const {
@@ -1,46 +1,17 @@
1
1
  #include "duckdb/execution/partitionable_hashtable.hpp"
2
2
 
3
- namespace duckdb {
4
-
5
- static idx_t PartitionInfoNPartitions(const idx_t n_partitions_upper_bound) {
6
- idx_t n_partitions = 1;
7
- while (n_partitions <= n_partitions_upper_bound / 2) {
8
- n_partitions *= 2;
9
- if (n_partitions >= 256) {
10
- break;
11
- }
12
- }
13
- return n_partitions;
14
- }
15
-
16
- static idx_t PartitionInfoRadixBits(const idx_t n_partitions) {
17
- idx_t radix_bits = 0;
18
- auto radix_partitions_copy = n_partitions;
19
- while (radix_partitions_copy - 1) {
20
- radix_bits++;
21
- radix_partitions_copy >>= 1;
22
- }
23
- return radix_bits;
24
- }
3
+ #include "duckdb/common/radix_partitioning.hpp"
25
4
 
26
- static hash_t PartitionInfoRadixMask(const idx_t radix_bits, const idx_t radix_shift) {
27
- hash_t radix_mask = 0;
28
- // we use the fifth byte of the 64 bit hash as radix source
29
- for (idx_t i = 0; i < radix_bits; i++) {
30
- radix_mask = (radix_mask << 1) | 1;
31
- }
32
- radix_mask <<= radix_shift;
33
- return radix_mask;
34
- }
5
+ namespace duckdb {
35
6
 
36
7
  RadixPartitionInfo::RadixPartitionInfo(const idx_t n_partitions_upper_bound)
37
- : n_partitions(PartitionInfoNPartitions(n_partitions_upper_bound)),
38
- radix_bits(PartitionInfoRadixBits(n_partitions)), radix_mask(PartitionInfoRadixMask(radix_bits, RADIX_SHIFT)) {
8
+ : n_partitions(PreviousPowerOfTwo(n_partitions_upper_bound)),
9
+ radix_bits(RadixPartitioning::RadixBits(n_partitions)), radix_mask(RadixPartitioning::Mask(radix_bits)),
10
+ radix_shift(RadixPartitioning::Shift(radix_bits)) {
39
11
 
40
- // finalize_threads needs to be a power of 2
41
12
  D_ASSERT(n_partitions > 0);
42
13
  D_ASSERT(n_partitions <= 256);
43
- D_ASSERT((n_partitions & (n_partitions - 1)) == 0);
14
+ D_ASSERT(IsPowerOfTwo(n_partitions));
44
15
  D_ASSERT(radix_bits <= 8);
45
16
  }
46
17
 
@@ -80,7 +51,7 @@ idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &group
80
51
  DataChunk &payload, const vector<idx_t> &filter) {
81
52
  // If this is false, a single AddChunk would overflow the max capacity
82
53
  D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
83
- if (list.empty() || list.back()->Size() + groups.size() >= list.back()->MaxCapacity()) {
54
+ if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity()) {
84
55
  idx_t new_capacity = GroupedAggregateHashTable::InitialCapacity();
85
56
  if (!list.empty()) {
86
57
  new_capacity = list.back()->Capacity();
@@ -159,7 +130,7 @@ void PartitionableHashTable::Partition() {
159
130
  context, allocator, group_types, payload_types, bindings, GetHTEntrySize()));
160
131
  partition_hts[r] = radix_partitioned_hts[r].back().get();
161
132
  }
162
- unpartitioned_ht->Partition(partition_hts, partition_info.radix_mask, partition_info.RADIX_SHIFT);
133
+ unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits);
163
134
  unpartitioned_ht.reset();
164
135
  }
165
136
  unpartitioned_hts.clear();
@@ -176,6 +147,7 @@ HashTableList PartitionableHashTable::GetPartition(idx_t partition) {
176
147
  D_ASSERT(radix_partitioned_hts.size() > partition);
177
148
  return std::move(radix_partitioned_hts[partition]);
178
149
  }
150
+
179
151
  HashTableList PartitionableHashTable::GetUnpartitioned() {
180
152
  D_ASSERT(!IsPartitioned());
181
153
  return std::move(unpartitioned_hts);
@@ -138,7 +138,7 @@ void PhysicalOperator::BuildPipelines(Pipeline &current, MetaPipeline &meta_pipe
138
138
 
139
139
  // we create a new pipeline starting from the child
140
140
  auto child_meta_pipeline = meta_pipeline.CreateChildMetaPipeline(current, this);
141
- child_meta_pipeline->Build(children[0].get());
141
+ child_meta_pipeline->Build(*children[0]);
142
142
  } else {
143
143
  // operator is not a sink! recurse in children
144
144
  if (children.empty()) {