duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/read_csv.cpp +124 -58
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/main/settings/settings.cpp +3 -4
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -25,8 +25,9 @@
|
|
25
25
|
namespace duckdb {
|
26
26
|
|
27
27
|
ParallelCSVReader::ParallelCSVReader(ClientContext &context, BufferedCSVReaderOptions options_p,
|
28
|
-
unique_ptr<CSVBufferRead> buffer_p,
|
29
|
-
|
28
|
+
unique_ptr<CSVBufferRead> buffer_p, idx_t first_pos_first_buffer_p,
|
29
|
+
const vector<LogicalType> &requested_types)
|
30
|
+
: BaseCSVReader(context, std::move(options_p), requested_types), first_pos_first_buffer(first_pos_first_buffer_p) {
|
30
31
|
Initialize(requested_types);
|
31
32
|
SetBufferRead(std::move(buffer_p));
|
32
33
|
if (options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1) {
|
@@ -52,9 +53,34 @@ bool ParallelCSVReader::NewLineDelimiter(bool carry, bool carry_followed_by_nl,
|
|
52
53
|
return (carry && carry_followed_by_nl) || (!carry && first_char);
|
53
54
|
}
|
54
55
|
|
56
|
+
void ParallelCSVReader::SkipEmptyLines() {
|
57
|
+
idx_t new_pos_buffer = position_buffer;
|
58
|
+
if (parse_chunk.data.size() == 1) {
|
59
|
+
// Empty lines are null data.
|
60
|
+
return;
|
61
|
+
}
|
62
|
+
for (; new_pos_buffer < end_buffer; new_pos_buffer++) {
|
63
|
+
if (StringUtil::CharacterIsNewline((*buffer)[new_pos_buffer])) {
|
64
|
+
bool carrier_return = (*buffer)[new_pos_buffer] == '\r';
|
65
|
+
new_pos_buffer++;
|
66
|
+
if (carrier_return && new_pos_buffer < buffer_size && (*buffer)[new_pos_buffer] == '\n') {
|
67
|
+
position_buffer++;
|
68
|
+
}
|
69
|
+
if (new_pos_buffer > end_buffer) {
|
70
|
+
return;
|
71
|
+
}
|
72
|
+
position_buffer = new_pos_buffer;
|
73
|
+
} else if ((*buffer)[new_pos_buffer] != ' ') {
|
74
|
+
return;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
55
79
|
bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
|
56
80
|
if (buffer->buffer->IsCSVFileFirstBuffer() && start_buffer == position_buffer &&
|
57
|
-
start_buffer ==
|
81
|
+
start_buffer == first_pos_first_buffer) {
|
82
|
+
start_buffer = buffer->buffer->GetStart();
|
83
|
+
position_buffer = start_buffer;
|
58
84
|
verification_positions.beginning_of_first_line = position_buffer;
|
59
85
|
verification_positions.end_of_last_line = position_buffer;
|
60
86
|
// First buffer doesn't need any setting
|
@@ -70,11 +96,23 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
|
|
70
96
|
if (position_buffer > end_buffer) {
|
71
97
|
return false;
|
72
98
|
}
|
99
|
+
SkipEmptyLines();
|
100
|
+
if (verification_positions.beginning_of_first_line == 0) {
|
101
|
+
verification_positions.beginning_of_first_line = position_buffer;
|
102
|
+
}
|
103
|
+
|
104
|
+
verification_positions.end_of_last_line = position_buffer;
|
73
105
|
return true;
|
74
106
|
}
|
75
107
|
}
|
76
108
|
return false;
|
77
109
|
}
|
110
|
+
SkipEmptyLines();
|
111
|
+
if (verification_positions.beginning_of_first_line == 0) {
|
112
|
+
verification_positions.beginning_of_first_line = position_buffer;
|
113
|
+
}
|
114
|
+
|
115
|
+
verification_positions.end_of_last_line = position_buffer;
|
78
116
|
return true;
|
79
117
|
}
|
80
118
|
|
@@ -102,6 +140,11 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
|
|
102
140
|
}
|
103
141
|
}
|
104
142
|
}
|
143
|
+
SkipEmptyLines();
|
144
|
+
|
145
|
+
if (position_buffer > buffer_size) {
|
146
|
+
break;
|
147
|
+
}
|
105
148
|
|
106
149
|
if (position_buffer >= end_buffer && !StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1])) {
|
107
150
|
break;
|
@@ -113,18 +156,20 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
|
|
113
156
|
}
|
114
157
|
idx_t position_set = position_buffer;
|
115
158
|
start_buffer = position_buffer;
|
116
|
-
|
117
159
|
// We check if we can add this line
|
118
160
|
// disable the projection pushdown while reading the first line
|
119
161
|
// otherwise the first line parsing can be influenced by which columns we are reading
|
120
162
|
auto column_ids = std::move(reader_data.column_ids);
|
121
163
|
auto column_mapping = std::move(reader_data.column_mapping);
|
122
164
|
InitializeProjection();
|
123
|
-
|
165
|
+
try {
|
166
|
+
successfully_read_first_line = TryParseSimpleCSV(first_line_chunk, error_message, true);
|
167
|
+
} catch (...) {
|
168
|
+
successfully_read_first_line = false;
|
169
|
+
}
|
124
170
|
// restore the projection pushdown
|
125
171
|
reader_data.column_ids = std::move(column_ids);
|
126
172
|
reader_data.column_mapping = std::move(column_mapping);
|
127
|
-
|
128
173
|
end_buffer = end_buffer_real;
|
129
174
|
start_buffer = position_set;
|
130
175
|
if (position_buffer >= end_buffer) {
|
@@ -190,27 +235,55 @@ bool ParallelCSVReader::BufferRemainder() {
|
|
190
235
|
return true;
|
191
236
|
}
|
192
237
|
|
238
|
+
void VerifyLineLength(idx_t line_size, idx_t max_line_size) {
|
239
|
+
if (line_size > max_line_size) {
|
240
|
+
// FIXME: this should also output the correct estimated linenumber where it broke
|
241
|
+
throw InvalidInputException("Maximum line size of %llu bytes exceeded!", max_line_size);
|
242
|
+
}
|
243
|
+
}
|
244
|
+
|
245
|
+
bool AllNewLine(string_t value, idx_t column_amount) {
|
246
|
+
auto value_str = value.GetString();
|
247
|
+
if (value_str.empty() && column_amount == 1) {
|
248
|
+
// This is a one column (empty)
|
249
|
+
return false;
|
250
|
+
}
|
251
|
+
for (idx_t i = 0; i < value.GetSize(); i++) {
|
252
|
+
if (!StringUtil::CharacterIsNewline(value_str[i])) {
|
253
|
+
return false;
|
254
|
+
}
|
255
|
+
}
|
256
|
+
return true;
|
257
|
+
}
|
258
|
+
|
193
259
|
bool ParallelCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message, bool try_add_line) {
|
194
260
|
// used for parsing algorithm
|
261
|
+
if (start_buffer == buffer_size) {
|
262
|
+
// Nothing to read
|
263
|
+
finished = true;
|
264
|
+
return true;
|
265
|
+
}
|
195
266
|
D_ASSERT(end_buffer <= buffer_size);
|
196
267
|
bool finished_chunk = false;
|
197
268
|
idx_t column = 0;
|
198
269
|
idx_t offset = 0;
|
199
270
|
bool has_quotes = false;
|
271
|
+
|
200
272
|
vector<idx_t> escape_positions;
|
201
273
|
if ((start_buffer == buffer->buffer_start || start_buffer == buffer->buffer_end) && !try_add_line) {
|
202
274
|
// First time reading this buffer piece
|
203
275
|
if (!SetPosition(insert_chunk)) {
|
204
|
-
// This means the buffer size does not contain a new line
|
205
|
-
if (position_buffer - start_buffer == options.buffer_size) {
|
206
|
-
error_message = "Line does not fit in one buffer. Increase the buffer size.";
|
207
|
-
return false;
|
208
|
-
}
|
209
276
|
finished = true;
|
210
277
|
return true;
|
211
278
|
}
|
212
279
|
}
|
213
|
-
|
280
|
+
if (position_buffer == buffer_size) {
|
281
|
+
// Nothing to read
|
282
|
+
finished = true;
|
283
|
+
return true;
|
284
|
+
}
|
285
|
+
// Keep track of line size
|
286
|
+
idx_t line_start = position_buffer;
|
214
287
|
// start parsing the first value
|
215
288
|
goto value_start;
|
216
289
|
|
@@ -242,11 +315,16 @@ normal : {
|
|
242
315
|
if (c == options.delimiter[0]) {
|
243
316
|
// delimiter: end the value and add it to the chunk
|
244
317
|
goto add_value;
|
318
|
+
} else if (c == options.quote[0] && try_add_line) {
|
319
|
+
return false;
|
245
320
|
} else if (StringUtil::CharacterIsNewline(c)) {
|
246
321
|
// newline: add row
|
247
|
-
if (column > 0 || try_add_line ||
|
322
|
+
if (column > 0 || try_add_line || parse_chunk.data.size() == 1) {
|
248
323
|
goto add_row;
|
249
324
|
}
|
325
|
+
if (column == 0 && position_buffer == start_buffer) {
|
326
|
+
start_buffer++;
|
327
|
+
}
|
250
328
|
}
|
251
329
|
}
|
252
330
|
if (!BufferRemainder()) {
|
@@ -285,12 +363,15 @@ add_row : {
|
|
285
363
|
parse_chunk.Reset();
|
286
364
|
return success;
|
287
365
|
} else {
|
366
|
+
VerifyLineLength(position_buffer - line_start, options.maximum_line_size);
|
367
|
+
line_start = position_buffer;
|
288
368
|
finished_chunk = AddRow(insert_chunk, column, error_message);
|
289
369
|
}
|
290
370
|
// increase position by 1 and move start to the new position
|
291
371
|
offset = 0;
|
292
372
|
has_quotes = false;
|
293
|
-
|
373
|
+
position_buffer++;
|
374
|
+
start_buffer = position_buffer;
|
294
375
|
verification_positions.end_of_last_line = position_buffer;
|
295
376
|
if (reached_remainder_state) {
|
296
377
|
goto final_state;
|
@@ -309,7 +390,10 @@ add_row : {
|
|
309
390
|
// newline after carriage return: skip
|
310
391
|
// increase position by 1 and move start to the new position
|
311
392
|
start_buffer = ++position_buffer;
|
393
|
+
|
394
|
+
SkipEmptyLines();
|
312
395
|
verification_positions.end_of_last_line = position_buffer;
|
396
|
+
start_buffer = position_buffer;
|
313
397
|
if (reached_remainder_state) {
|
314
398
|
goto final_state;
|
315
399
|
}
|
@@ -331,6 +415,9 @@ add_row : {
|
|
331
415
|
error_message = "Wrong NewLine Identifier. Expecting \\r or \\n";
|
332
416
|
return false;
|
333
417
|
}
|
418
|
+
SkipEmptyLines();
|
419
|
+
verification_positions.end_of_last_line = position_buffer;
|
420
|
+
start_buffer = position_buffer;
|
334
421
|
// \n newline, move to value start
|
335
422
|
if (finished_chunk) {
|
336
423
|
goto final_state;
|
@@ -391,7 +478,7 @@ unquote : {
|
|
391
478
|
} else if (StringUtil::CharacterIsNewline(c)) {
|
392
479
|
offset = 1;
|
393
480
|
// FIXME: should this be an assertion?
|
394
|
-
D_ASSERT(column == parse_chunk.ColumnCount() - 1);
|
481
|
+
D_ASSERT(try_add_line || (!try_add_line && column == parse_chunk.ColumnCount() - 1));
|
395
482
|
goto add_row;
|
396
483
|
} else if (position_buffer >= end_buffer) {
|
397
484
|
// reached end of buffer
|
@@ -448,22 +535,27 @@ final_state : {
|
|
448
535
|
}
|
449
536
|
// If this is the last buffer, we have to read the last value
|
450
537
|
if (buffer->buffer->IsCSVFileLastBuffer() || (buffer->next_buffer && buffer->next_buffer->IsCSVFileLastBuffer())) {
|
451
|
-
if (column > 0 ||
|
538
|
+
if (column > 0 || start_buffer != position_buffer || try_add_line ||
|
539
|
+
(insert_chunk.data.size() == 1 && start_buffer != position_buffer)) {
|
452
540
|
// remaining values to be added to the chunk
|
453
541
|
auto str_value = buffer->GetValue(start_buffer, position_buffer, offset);
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
542
|
+
if (!AllNewLine(str_value, insert_chunk.data.size()) || offset == 0) {
|
543
|
+
AddValue(str_value, column, escape_positions, has_quotes);
|
544
|
+
if (try_add_line) {
|
545
|
+
bool success = column == return_types.size();
|
546
|
+
if (success) {
|
547
|
+
AddRow(insert_chunk, column, error_message);
|
548
|
+
success = Flush(insert_chunk);
|
549
|
+
}
|
550
|
+
parse_chunk.Reset();
|
551
|
+
reached_remainder_state = false;
|
552
|
+
return success;
|
553
|
+
} else {
|
554
|
+
VerifyLineLength(position_buffer - line_start, options.maximum_line_size);
|
555
|
+
line_start = position_buffer;
|
458
556
|
AddRow(insert_chunk, column, error_message);
|
459
|
-
|
557
|
+
verification_positions.end_of_last_line = position_buffer;
|
460
558
|
}
|
461
|
-
parse_chunk.Reset();
|
462
|
-
reached_remainder_state = false;
|
463
|
-
return success;
|
464
|
-
} else {
|
465
|
-
AddRow(insert_chunk, column, error_message);
|
466
|
-
verification_positions.end_of_last_line = position_buffer;
|
467
559
|
}
|
468
560
|
}
|
469
561
|
}
|
@@ -471,11 +563,14 @@ final_state : {
|
|
471
563
|
if (mode == ParserMode::PARSING) {
|
472
564
|
Flush(insert_chunk);
|
473
565
|
}
|
474
|
-
if (position_buffer
|
475
|
-
!StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1])) {
|
566
|
+
if (position_buffer - verification_positions.end_of_last_line > options.buffer_size) {
|
476
567
|
error_message = "Line does not fit in one buffer. Increase the buffer size.";
|
477
568
|
return false;
|
478
569
|
}
|
570
|
+
end_buffer = buffer_size;
|
571
|
+
SkipEmptyLines();
|
572
|
+
end_buffer = buffer->buffer_end;
|
573
|
+
verification_positions.end_of_last_line = position_buffer;
|
479
574
|
if (position_buffer >= end_buffer) {
|
480
575
|
if (position_buffer >= end_buffer) {
|
481
576
|
if (position_buffer == end_buffer && StringUtil::CharacterIsNewline((*buffer)[position_buffer - 1]) &&
|
@@ -3,6 +3,8 @@
|
|
3
3
|
#include "duckdb/common/hive_partitioning.hpp"
|
4
4
|
#include "duckdb/common/file_system.hpp"
|
5
5
|
#include "duckdb/common/file_opener.hpp"
|
6
|
+
#include "duckdb/common/types/uuid.hpp"
|
7
|
+
#include "duckdb/common/string_util.hpp"
|
6
8
|
|
7
9
|
#include <algorithm>
|
8
10
|
|
@@ -40,6 +42,7 @@ public:
|
|
40
42
|
//===--------------------------------------------------------------------===//
|
41
43
|
// Sink
|
42
44
|
//===--------------------------------------------------------------------===//
|
45
|
+
|
43
46
|
void MoveTmpFile(ClientContext &context, const string &tmp_file_path) {
|
44
47
|
auto &fs = FileSystem::GetFileSystem(context);
|
45
48
|
auto file_path = tmp_file_path.substr(0, tmp_file_path.length() - 4);
|
@@ -111,10 +114,10 @@ void PhysicalCopyToFile::Combine(ExecutionContext &context, GlobalSinkState &gst
|
|
111
114
|
for (idx_t i = 0; i < partitions.size(); i++) {
|
112
115
|
string hive_path =
|
113
116
|
CreateDirRecursive(partition_columns, names, partition_key_map[i]->values, trimmed_path, fs);
|
114
|
-
string full_path
|
115
|
-
if (fs.FileExists(full_path) && !
|
117
|
+
string full_path(filename_pattern.CreateFilename(fs, hive_path, function.extension, l.writer_offset));
|
118
|
+
if (fs.FileExists(full_path) && !overwrite_or_ignore) {
|
116
119
|
throw IOException("failed to create " + full_path +
|
117
|
-
", file exists! Enable
|
120
|
+
", file exists! Enable OVERWRITE_OR_IGNORE option to force writing");
|
118
121
|
}
|
119
122
|
// Create a writer for the current file
|
120
123
|
auto fun_data_global = function.copy_to_initialize_global(context.client, *bind_data, full_path);
|
@@ -184,10 +187,9 @@ unique_ptr<LocalSinkState> PhysicalCopyToFile::GetLocalSinkState(ExecutionContex
|
|
184
187
|
this_file_offset = g.last_file_offset++;
|
185
188
|
}
|
186
189
|
auto &fs = FileSystem::GetFileSystem(context.client);
|
187
|
-
string output_path
|
188
|
-
|
189
|
-
|
190
|
-
throw IOException("%s exists! Enable ALLOW_OVERWRITE option to force writing", output_path);
|
190
|
+
string output_path(filename_pattern.CreateFilename(fs, file_path, function.extension, this_file_offset));
|
191
|
+
if (fs.FileExists(output_path) && !overwrite_or_ignore) {
|
192
|
+
throw IOException("%s exists! Enable OVERWRITE_OR_IGNORE option to force writing", output_path);
|
191
193
|
}
|
192
194
|
res->global_state = function.copy_to_initialize_global(context.client, *bind_data, output_path);
|
193
195
|
}
|
@@ -199,17 +201,17 @@ unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext
|
|
199
201
|
if (partition_output || per_thread_output) {
|
200
202
|
auto &fs = FileSystem::GetFileSystem(context);
|
201
203
|
|
202
|
-
if (fs.FileExists(file_path) && !
|
203
|
-
throw IOException("%s exists! Enable
|
204
|
+
if (fs.FileExists(file_path) && !overwrite_or_ignore) {
|
205
|
+
throw IOException("%s exists! Enable OVERWRITE_OR_IGNORE option to force writing", file_path);
|
204
206
|
}
|
205
207
|
if (!fs.DirectoryExists(file_path)) {
|
206
208
|
fs.CreateDirectory(file_path);
|
207
|
-
} else if (!
|
209
|
+
} else if (!overwrite_or_ignore) {
|
208
210
|
idx_t n_files = 0;
|
209
211
|
fs.ListFiles(
|
210
212
|
file_path, [&n_files](const string &path, bool) { n_files++; }, FileOpener::Get(context));
|
211
213
|
if (n_files > 0) {
|
212
|
-
throw IOException("Directory %s is not empty! Enable
|
214
|
+
throw IOException("Directory %s is not empty! Enable OVERWRITE_OR_IGNORE option to force writing",
|
213
215
|
file_path);
|
214
216
|
}
|
215
217
|
}
|
@@ -1,10 +1,11 @@
|
|
1
1
|
#include "duckdb/execution/operator/persistent/physical_delete.hpp"
|
2
2
|
|
3
|
+
#include "duckdb/common/atomic.hpp"
|
4
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
3
5
|
#include "duckdb/execution/expression_executor.hpp"
|
4
6
|
#include "duckdb/storage/data_table.hpp"
|
5
|
-
#include "duckdb/transaction/duck_transaction.hpp"
|
6
|
-
#include "duckdb/common/types/column_data_collection.hpp"
|
7
7
|
#include "duckdb/storage/table/scan_state.hpp"
|
8
|
+
#include "duckdb/transaction/duck_transaction.hpp"
|
8
9
|
|
9
10
|
namespace duckdb {
|
10
11
|
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
#include "duckdb/catalog/catalog.hpp"
|
4
4
|
#include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
|
5
|
+
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
5
6
|
#include "duckdb/common/file_system.hpp"
|
6
7
|
#include "duckdb/common/string_util.hpp"
|
7
8
|
#include "duckdb/parallel/meta_pipeline.hpp"
|
@@ -16,12 +17,12 @@ namespace duckdb {
|
|
16
17
|
|
17
18
|
using std::stringstream;
|
18
19
|
|
19
|
-
static void WriteCatalogEntries(stringstream &ss, vector<CatalogEntry
|
20
|
+
static void WriteCatalogEntries(stringstream &ss, vector<reference<CatalogEntry>> &entries) {
|
20
21
|
for (auto &entry : entries) {
|
21
|
-
if (entry
|
22
|
+
if (entry.get().internal) {
|
22
23
|
continue;
|
23
24
|
}
|
24
|
-
ss << entry
|
25
|
+
ss << entry.get().ToSQL() << std::endl;
|
25
26
|
}
|
26
27
|
ss << std::endl;
|
27
28
|
}
|
@@ -42,8 +43,8 @@ static void WriteValueAsSQL(stringstream &ss, Value &val) {
|
|
42
43
|
}
|
43
44
|
}
|
44
45
|
|
45
|
-
static void WriteCopyStatement(FileSystem &fs, stringstream &ss,
|
46
|
-
|
46
|
+
static void WriteCopyStatement(FileSystem &fs, stringstream &ss, CopyInfo &info, ExportedTableData &exported_table,
|
47
|
+
CopyFunction const &function) {
|
47
48
|
ss << "COPY ";
|
48
49
|
|
49
50
|
if (exported_table.schema_name != DEFAULT_SCHEMA) {
|
@@ -107,52 +108,53 @@ void PhysicalExport::GetData(ExecutionContext &context, DataChunk &chunk, Global
|
|
107
108
|
auto *opener = FileSystem::GetFileOpener(ccontext);
|
108
109
|
|
109
110
|
// gather all catalog types to export
|
110
|
-
vector<CatalogEntry
|
111
|
-
vector<CatalogEntry
|
112
|
-
vector<CatalogEntry
|
113
|
-
vector<CatalogEntry
|
114
|
-
vector<CatalogEntry
|
115
|
-
vector<CatalogEntry
|
116
|
-
vector<CatalogEntry
|
111
|
+
vector<reference<CatalogEntry>> schemas;
|
112
|
+
vector<reference<CatalogEntry>> custom_types;
|
113
|
+
vector<reference<CatalogEntry>> sequences;
|
114
|
+
vector<reference<CatalogEntry>> tables;
|
115
|
+
vector<reference<CatalogEntry>> views;
|
116
|
+
vector<reference<CatalogEntry>> indexes;
|
117
|
+
vector<reference<CatalogEntry>> macros;
|
117
118
|
|
118
119
|
auto schema_list = Catalog::GetSchemas(ccontext, info->catalog);
|
119
120
|
for (auto &schema : schema_list) {
|
120
121
|
if (!schema->internal) {
|
121
|
-
schemas.push_back(schema);
|
122
|
+
schemas.push_back(*schema);
|
122
123
|
}
|
123
124
|
schema->Scan(context.client, CatalogType::TABLE_ENTRY, [&](CatalogEntry *entry) {
|
124
125
|
if (entry->internal) {
|
125
126
|
return;
|
126
127
|
}
|
127
128
|
if (entry->type != CatalogType::TABLE_ENTRY) {
|
128
|
-
views.push_back(entry);
|
129
|
+
views.push_back(*entry);
|
129
130
|
}
|
130
131
|
});
|
131
132
|
schema->Scan(context.client, CatalogType::SEQUENCE_ENTRY,
|
132
|
-
[&](CatalogEntry *entry) { sequences.push_back(entry); });
|
133
|
+
[&](CatalogEntry *entry) { sequences.push_back(*entry); });
|
133
134
|
schema->Scan(context.client, CatalogType::TYPE_ENTRY,
|
134
|
-
[&](CatalogEntry *entry) { custom_types.push_back(entry); });
|
135
|
-
schema->Scan(context.client, CatalogType::INDEX_ENTRY, [&](CatalogEntry *entry) { indexes.push_back(entry); });
|
135
|
+
[&](CatalogEntry *entry) { custom_types.push_back(*entry); });
|
136
|
+
schema->Scan(context.client, CatalogType::INDEX_ENTRY, [&](CatalogEntry *entry) { indexes.push_back(*entry); });
|
136
137
|
schema->Scan(context.client, CatalogType::MACRO_ENTRY, [&](CatalogEntry *entry) {
|
137
138
|
if (!entry->internal && entry->type == CatalogType::MACRO_ENTRY) {
|
138
|
-
macros.push_back(entry);
|
139
|
+
macros.push_back(*entry);
|
139
140
|
}
|
140
141
|
});
|
141
142
|
schema->Scan(context.client, CatalogType::TABLE_MACRO_ENTRY, [&](CatalogEntry *entry) {
|
142
143
|
if (!entry->internal && entry->type == CatalogType::TABLE_MACRO_ENTRY) {
|
143
|
-
macros.push_back(entry);
|
144
|
+
macros.push_back(*entry);
|
144
145
|
}
|
145
146
|
});
|
146
147
|
}
|
147
148
|
|
148
149
|
// consider the order of tables because of foreign key constraint
|
149
150
|
for (idx_t i = 0; i < exported_tables.data.size(); i++) {
|
150
|
-
tables.push_back(
|
151
|
+
tables.push_back(exported_tables.data[i].entry);
|
151
152
|
}
|
152
153
|
|
153
154
|
// order macro's by timestamp so nested macro's are imported nicely
|
154
|
-
sort(macros.begin(), macros.end(),
|
155
|
-
|
155
|
+
sort(macros.begin(), macros.end(), [](const reference<CatalogEntry> &lhs, const reference<CatalogEntry> &rhs) {
|
156
|
+
return lhs.get().oid < rhs.get().oid;
|
157
|
+
});
|
156
158
|
|
157
159
|
// write the schema.sql file
|
158
160
|
// export order is SCHEMA -> SEQUENCE -> TABLE -> VIEW -> INDEX
|
@@ -172,9 +174,8 @@ void PhysicalExport::GetData(ExecutionContext &context, DataChunk &chunk, Global
|
|
172
174
|
// for every table, we write COPY INTO statement with the specified options
|
173
175
|
stringstream load_ss;
|
174
176
|
for (idx_t i = 0; i < exported_tables.data.size(); i++) {
|
175
|
-
auto &table = exported_tables.data[i].entry;
|
176
177
|
auto exported_table_info = exported_tables.data[i].table_data;
|
177
|
-
WriteCopyStatement(fs, load_ss,
|
178
|
+
WriteCopyStatement(fs, load_ss, *info, exported_table_info, function);
|
178
179
|
}
|
179
180
|
WriteStringStreamToFile(fs, opener, load_ss, fs.JoinPath(info->file_path, "load.sql"));
|
180
181
|
state.finished = true;
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#include "duckdb/execution/operator/persistent/physical_insert.hpp"
|
2
2
|
#include "duckdb/parallel/thread_context.hpp"
|
3
3
|
#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
|
4
|
-
#include "duckdb/common/types/column_data_collection.hpp"
|
4
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
5
5
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
6
6
|
#include "duckdb/execution/expression_executor.hpp"
|
7
7
|
#include "duckdb/storage/data_table.hpp"
|
@@ -1,12 +1,13 @@
|
|
1
1
|
#include "duckdb/execution/operator/persistent/physical_update.hpp"
|
2
|
-
|
2
|
+
|
3
3
|
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
4
|
-
#include "duckdb/common/types/column_data_collection.hpp"
|
4
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
5
5
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
6
6
|
#include "duckdb/execution/expression_executor.hpp"
|
7
|
+
#include "duckdb/main/client_context.hpp"
|
8
|
+
#include "duckdb/parallel/thread_context.hpp"
|
7
9
|
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
8
10
|
#include "duckdb/storage/data_table.hpp"
|
9
|
-
#include "duckdb/main/client_context.hpp"
|
10
11
|
|
11
12
|
namespace duckdb {
|
12
13
|
|
@@ -103,7 +103,7 @@ idx_t PhysicalTableScan::GetBatchIndex(ExecutionContext &context, DataChunk &chu
|
|
103
103
|
}
|
104
104
|
|
105
105
|
string PhysicalTableScan::GetName() const {
|
106
|
-
return StringUtil::Upper(function.name);
|
106
|
+
return StringUtil::Upper(function.name + " " + function.extra_info);
|
107
107
|
}
|
108
108
|
|
109
109
|
string PhysicalTableScan::ParamsToString() const {
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#include "duckdb/execution/operator/set/physical_recursive_cte.hpp"
|
2
2
|
|
3
|
-
#include "duckdb/common/types/column_data_collection.hpp"
|
3
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
4
4
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
5
5
|
#include "duckdb/execution/aggregate_hashtable.hpp"
|
6
6
|
#include "duckdb/execution/executor.hpp"
|
@@ -183,12 +183,12 @@ void PhysicalRecursiveCTE::BuildPipelines(Pipeline ¤t, MetaPipeline &meta_
|
|
183
183
|
|
184
184
|
// the LHS of the recursive CTE is our initial state
|
185
185
|
auto initial_state_pipeline = meta_pipeline.CreateChildMetaPipeline(current, this);
|
186
|
-
initial_state_pipeline->Build(children[0]
|
186
|
+
initial_state_pipeline->Build(*children[0]);
|
187
187
|
|
188
188
|
// the RHS is the recursive pipeline
|
189
189
|
recursive_meta_pipeline = make_shared<MetaPipeline>(executor, state, this);
|
190
190
|
recursive_meta_pipeline->SetRecursiveCTE();
|
191
|
-
recursive_meta_pipeline->Build(children[1]
|
191
|
+
recursive_meta_pipeline->Build(*children[1]);
|
192
192
|
}
|
193
193
|
|
194
194
|
vector<const PhysicalOperator *> PhysicalRecursiveCTE::GetSources() const {
|
@@ -1,46 +1,17 @@
|
|
1
1
|
#include "duckdb/execution/partitionable_hashtable.hpp"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
static idx_t PartitionInfoNPartitions(const idx_t n_partitions_upper_bound) {
|
6
|
-
idx_t n_partitions = 1;
|
7
|
-
while (n_partitions <= n_partitions_upper_bound / 2) {
|
8
|
-
n_partitions *= 2;
|
9
|
-
if (n_partitions >= 256) {
|
10
|
-
break;
|
11
|
-
}
|
12
|
-
}
|
13
|
-
return n_partitions;
|
14
|
-
}
|
15
|
-
|
16
|
-
static idx_t PartitionInfoRadixBits(const idx_t n_partitions) {
|
17
|
-
idx_t radix_bits = 0;
|
18
|
-
auto radix_partitions_copy = n_partitions;
|
19
|
-
while (radix_partitions_copy - 1) {
|
20
|
-
radix_bits++;
|
21
|
-
radix_partitions_copy >>= 1;
|
22
|
-
}
|
23
|
-
return radix_bits;
|
24
|
-
}
|
3
|
+
#include "duckdb/common/radix_partitioning.hpp"
|
25
4
|
|
26
|
-
|
27
|
-
hash_t radix_mask = 0;
|
28
|
-
// we use the fifth byte of the 64 bit hash as radix source
|
29
|
-
for (idx_t i = 0; i < radix_bits; i++) {
|
30
|
-
radix_mask = (radix_mask << 1) | 1;
|
31
|
-
}
|
32
|
-
radix_mask <<= radix_shift;
|
33
|
-
return radix_mask;
|
34
|
-
}
|
5
|
+
namespace duckdb {
|
35
6
|
|
36
7
|
RadixPartitionInfo::RadixPartitionInfo(const idx_t n_partitions_upper_bound)
|
37
|
-
: n_partitions(
|
38
|
-
radix_bits(
|
8
|
+
: n_partitions(PreviousPowerOfTwo(n_partitions_upper_bound)),
|
9
|
+
radix_bits(RadixPartitioning::RadixBits(n_partitions)), radix_mask(RadixPartitioning::Mask(radix_bits)),
|
10
|
+
radix_shift(RadixPartitioning::Shift(radix_bits)) {
|
39
11
|
|
40
|
-
// finalize_threads needs to be a power of 2
|
41
12
|
D_ASSERT(n_partitions > 0);
|
42
13
|
D_ASSERT(n_partitions <= 256);
|
43
|
-
D_ASSERT((n_partitions
|
14
|
+
D_ASSERT(IsPowerOfTwo(n_partitions));
|
44
15
|
D_ASSERT(radix_bits <= 8);
|
45
16
|
}
|
46
17
|
|
@@ -80,7 +51,7 @@ idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &group
|
|
80
51
|
DataChunk &payload, const vector<idx_t> &filter) {
|
81
52
|
// If this is false, a single AddChunk would overflow the max capacity
|
82
53
|
D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
|
83
|
-
if (list.empty() || list.back()->
|
54
|
+
if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity()) {
|
84
55
|
idx_t new_capacity = GroupedAggregateHashTable::InitialCapacity();
|
85
56
|
if (!list.empty()) {
|
86
57
|
new_capacity = list.back()->Capacity();
|
@@ -159,7 +130,7 @@ void PartitionableHashTable::Partition() {
|
|
159
130
|
context, allocator, group_types, payload_types, bindings, GetHTEntrySize()));
|
160
131
|
partition_hts[r] = radix_partitioned_hts[r].back().get();
|
161
132
|
}
|
162
|
-
unpartitioned_ht->Partition(partition_hts, partition_info.
|
133
|
+
unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits);
|
163
134
|
unpartitioned_ht.reset();
|
164
135
|
}
|
165
136
|
unpartitioned_hts.clear();
|
@@ -176,6 +147,7 @@ HashTableList PartitionableHashTable::GetPartition(idx_t partition) {
|
|
176
147
|
D_ASSERT(radix_partitioned_hts.size() > partition);
|
177
148
|
return std::move(radix_partitioned_hts[partition]);
|
178
149
|
}
|
150
|
+
|
179
151
|
HashTableList PartitionableHashTable::GetUnpartitioned() {
|
180
152
|
D_ASSERT(!IsPartitioned());
|
181
153
|
return std::move(unpartitioned_hts);
|
@@ -138,7 +138,7 @@ void PhysicalOperator::BuildPipelines(Pipeline ¤t, MetaPipeline &meta_pipe
|
|
138
138
|
|
139
139
|
// we create a new pipeline starting from the child
|
140
140
|
auto child_meta_pipeline = meta_pipeline.CreateChildMetaPipeline(current, this);
|
141
|
-
child_meta_pipeline->Build(children[0]
|
141
|
+
child_meta_pipeline->Build(*children[0]);
|
142
142
|
} else {
|
143
143
|
// operator is not a sink! recurse in children
|
144
144
|
if (children.empty()) {
|