duckdb 0.7.2-dev1901.0 → 0.7.2-dev2144.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +22 -4
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
package/binding.gyp
CHANGED
@@ -22,6 +22,8 @@
|
|
22
22
|
"src/duckdb/ub_src_common_serializer.cpp",
|
23
23
|
"src/duckdb/ub_src_common_sort.cpp",
|
24
24
|
"src/duckdb/ub_src_common_types.cpp",
|
25
|
+
"src/duckdb/ub_src_common_types_column.cpp",
|
26
|
+
"src/duckdb/ub_src_common_types_row.cpp",
|
25
27
|
"src/duckdb/ub_src_common_value_operations.cpp",
|
26
28
|
"src/duckdb/src/common/vector_operations/boolean_operators.cpp",
|
27
29
|
"src/duckdb/src/common/vector_operations/comparison_operators.cpp",
|
package/package.json
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
#include "duckdb/common/exception.hpp"
|
15
15
|
#include "duckdb/common/mutex.hpp"
|
16
16
|
#include "duckdb/common/serializer/buffered_file_writer.hpp"
|
17
|
-
#include "duckdb/common/types/column_data_collection.hpp"
|
17
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
18
18
|
#endif
|
19
19
|
|
20
20
|
#include "parquet_types.h"
|
@@ -1,12 +1,14 @@
|
|
1
1
|
#include "parquet_metadata.hpp"
|
2
|
+
|
2
3
|
#include "parquet_statistics.hpp"
|
4
|
+
|
3
5
|
#include <sstream>
|
4
6
|
|
5
7
|
#ifndef DUCKDB_AMALGAMATION
|
8
|
+
#include "duckdb/common/multi_file_reader.hpp"
|
6
9
|
#include "duckdb/common/types/blob.hpp"
|
10
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
7
11
|
#include "duckdb/main/config.hpp"
|
8
|
-
#include "duckdb/common/types/column_data_collection.hpp"
|
9
|
-
#include "duckdb/common/multi_file_reader.hpp"
|
10
12
|
#endif
|
11
13
|
|
12
14
|
namespace duckdb {
|
@@ -12,7 +12,7 @@ namespace duckdb {
|
|
12
12
|
// Arrow append data
|
13
13
|
//===--------------------------------------------------------------------===//
|
14
14
|
typedef void (*initialize_t)(ArrowAppendData &result, const LogicalType &type, idx_t capacity);
|
15
|
-
typedef void (*append_vector_t)(ArrowAppendData &append_data, Vector &input, idx_t
|
15
|
+
typedef void (*append_vector_t)(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size);
|
16
16
|
typedef void (*finalize_t)(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result);
|
17
17
|
|
18
18
|
struct ArrowAppendData {
|
@@ -84,8 +84,9 @@ static void SetNull(ArrowAppendData &append_data, uint8_t *validity_data, idx_t
|
|
84
84
|
append_data.null_count++;
|
85
85
|
}
|
86
86
|
|
87
|
-
static void AppendValidity(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t
|
87
|
+
static void AppendValidity(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t from, idx_t to) {
|
88
88
|
// resize the buffer, filling the validity buffer with all valid values
|
89
|
+
idx_t size = to - from;
|
89
90
|
ResizeValidity(append_data.validity, append_data.row_count + size);
|
90
91
|
if (format.validity.AllValid()) {
|
91
92
|
// if all values are valid we don't need to do anything else
|
@@ -97,7 +98,7 @@ static void AppendValidity(ArrowAppendData &append_data, UnifiedVectorFormat &fo
|
|
97
98
|
uint8_t current_bit;
|
98
99
|
idx_t current_byte;
|
99
100
|
GetBitPosition(append_data.row_count, current_byte, current_bit);
|
100
|
-
for (idx_t i =
|
101
|
+
for (idx_t i = from; i < to; i++) {
|
101
102
|
auto source_idx = format.sel->get_index(i);
|
102
103
|
// append the validity mask
|
103
104
|
if (!format.validity.RowIsValid(source_idx)) {
|
@@ -146,21 +147,22 @@ struct ArrowIntervalConverter {
|
|
146
147
|
|
147
148
|
template <class TGT, class SRC = TGT, class OP = ArrowScalarConverter>
|
148
149
|
struct ArrowScalarBaseData {
|
149
|
-
static void Append(ArrowAppendData &append_data, Vector &input, idx_t
|
150
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
151
|
+
idx_t size = to - from;
|
150
152
|
UnifiedVectorFormat format;
|
151
|
-
input.ToUnifiedFormat(
|
153
|
+
input.ToUnifiedFormat(input_size, format);
|
152
154
|
|
153
155
|
// append the validity mask
|
154
|
-
AppendValidity(append_data, format,
|
156
|
+
AppendValidity(append_data, format, from, to);
|
155
157
|
|
156
158
|
// append the main data
|
157
159
|
append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(TGT) * size);
|
158
160
|
auto data = (SRC *)format.data;
|
159
161
|
auto result_data = (TGT *)append_data.main_buffer.data();
|
160
162
|
|
161
|
-
for (idx_t i =
|
163
|
+
for (idx_t i = from; i < to; i++) {
|
162
164
|
auto source_idx = format.sel->get_index(i);
|
163
|
-
auto result_idx = append_data.row_count + i;
|
165
|
+
auto result_idx = append_data.row_count + i - from;
|
164
166
|
|
165
167
|
if (OP::SkipNulls() && !format.validity.RowIsValid(source_idx)) {
|
166
168
|
OP::template SetNull<TGT>(result_data[result_idx]);
|
@@ -254,9 +256,10 @@ struct ArrowBoolData {
|
|
254
256
|
result.main_buffer.reserve(byte_count);
|
255
257
|
}
|
256
258
|
|
257
|
-
static void Append(ArrowAppendData &append_data, Vector &input, idx_t
|
259
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
260
|
+
idx_t size = to - from;
|
258
261
|
UnifiedVectorFormat format;
|
259
|
-
input.ToUnifiedFormat(
|
262
|
+
input.ToUnifiedFormat(input_size, format);
|
260
263
|
|
261
264
|
// we initialize both the validity and the bit set to 1's
|
262
265
|
ResizeValidity(append_data.validity, append_data.row_count + size);
|
@@ -268,7 +271,7 @@ struct ArrowBoolData {
|
|
268
271
|
uint8_t current_bit;
|
269
272
|
idx_t current_byte;
|
270
273
|
GetBitPosition(append_data.row_count, current_byte, current_bit);
|
271
|
-
for (idx_t i =
|
274
|
+
for (idx_t i = from; i < to; i++) {
|
272
275
|
auto source_idx = format.sel->get_index(i);
|
273
276
|
// append the validity mask
|
274
277
|
if (!format.validity.RowIsValid(source_idx)) {
|
@@ -321,9 +324,10 @@ struct ArrowVarcharData {
|
|
321
324
|
result.aux_buffer.reserve(capacity);
|
322
325
|
}
|
323
326
|
|
324
|
-
static void Append(ArrowAppendData &append_data, Vector &input, idx_t
|
327
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
328
|
+
idx_t size = to - from;
|
325
329
|
UnifiedVectorFormat format;
|
326
|
-
input.ToUnifiedFormat(
|
330
|
+
input.ToUnifiedFormat(input_size, format);
|
327
331
|
|
328
332
|
// resize the validity mask and set up the validity buffer for iteration
|
329
333
|
ResizeValidity(append_data.validity, append_data.row_count + size);
|
@@ -340,14 +344,14 @@ struct ArrowVarcharData {
|
|
340
344
|
// now append the string data to the auxiliary buffer
|
341
345
|
// the auxiliary buffer's length depends on the string lengths, so we resize as required
|
342
346
|
auto last_offset = offset_data[append_data.row_count];
|
343
|
-
for (idx_t i =
|
347
|
+
for (idx_t i = from; i < to; i++) {
|
344
348
|
auto source_idx = format.sel->get_index(i);
|
345
|
-
auto offset_idx = append_data.row_count + i + 1;
|
349
|
+
auto offset_idx = append_data.row_count + i + 1 - from;
|
346
350
|
|
347
351
|
if (!format.validity.RowIsValid(source_idx)) {
|
348
352
|
uint8_t current_bit;
|
349
353
|
idx_t current_byte;
|
350
|
-
GetBitPosition(append_data.row_count + i, current_byte, current_bit);
|
354
|
+
GetBitPosition(append_data.row_count + i - from, current_byte, current_bit);
|
351
355
|
SetNull(append_data, validity_data, current_byte, current_bit);
|
352
356
|
offset_data[offset_idx] = last_offset;
|
353
357
|
continue;
|
@@ -387,17 +391,17 @@ struct ArrowStructData {
|
|
387
391
|
}
|
388
392
|
}
|
389
393
|
|
390
|
-
static void Append(ArrowAppendData &append_data, Vector &input, idx_t
|
394
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
391
395
|
UnifiedVectorFormat format;
|
392
|
-
input.ToUnifiedFormat(
|
393
|
-
|
394
|
-
AppendValidity(append_data, format,
|
396
|
+
input.ToUnifiedFormat(input_size, format);
|
397
|
+
idx_t size = to - from;
|
398
|
+
AppendValidity(append_data, format, from, to);
|
395
399
|
// append the children of the struct
|
396
400
|
auto &children = StructVector::GetEntries(input);
|
397
401
|
for (idx_t child_idx = 0; child_idx < children.size(); child_idx++) {
|
398
402
|
auto &child = children[child_idx];
|
399
403
|
auto &child_data = *append_data.child_data[child_idx];
|
400
|
-
child_data.append_vector(child_data, *child, size);
|
404
|
+
child_data.append_vector(child_data, *child, from, to, size);
|
401
405
|
}
|
402
406
|
append_data.row_count += size;
|
403
407
|
}
|
@@ -419,9 +423,10 @@ struct ArrowStructData {
|
|
419
423
|
//===--------------------------------------------------------------------===//
|
420
424
|
// Lists
|
421
425
|
//===--------------------------------------------------------------------===//
|
422
|
-
void AppendListOffsets(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t
|
426
|
+
void AppendListOffsets(ArrowAppendData &append_data, UnifiedVectorFormat &format, idx_t from, idx_t to,
|
423
427
|
vector<sel_t> &child_sel) {
|
424
428
|
// resize the offset buffer - the offset buffer holds the offsets into the child array
|
429
|
+
idx_t size = to - from;
|
425
430
|
append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(uint32_t) * (size + 1));
|
426
431
|
auto data = (list_entry_t *)format.data;
|
427
432
|
auto offset_data = (uint32_t *)append_data.main_buffer.data();
|
@@ -431,9 +436,9 @@ void AppendListOffsets(ArrowAppendData &append_data, UnifiedVectorFormat &format
|
|
431
436
|
}
|
432
437
|
// set up the offsets using the list entries
|
433
438
|
auto last_offset = offset_data[append_data.row_count];
|
434
|
-
for (idx_t i =
|
439
|
+
for (idx_t i = from; i < to; i++) {
|
435
440
|
auto source_idx = format.sel->get_index(i);
|
436
|
-
auto offset_idx = append_data.row_count + i + 1;
|
441
|
+
auto offset_idx = append_data.row_count + i + 1 - from;
|
437
442
|
|
438
443
|
if (!format.validity.RowIsValid(source_idx)) {
|
439
444
|
offset_data[offset_idx] = last_offset;
|
@@ -459,21 +464,28 @@ struct ArrowListData {
|
|
459
464
|
result.child_data.push_back(std::move(child_buffer));
|
460
465
|
}
|
461
466
|
|
462
|
-
static void Append(ArrowAppendData &append_data, Vector &input, idx_t
|
467
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
463
468
|
UnifiedVectorFormat format;
|
464
|
-
input.ToUnifiedFormat(
|
465
|
-
|
469
|
+
input.ToUnifiedFormat(input_size, format);
|
470
|
+
idx_t size = to - from;
|
466
471
|
vector<sel_t> child_indices;
|
467
|
-
AppendValidity(append_data, format,
|
468
|
-
AppendListOffsets(append_data, format,
|
472
|
+
AppendValidity(append_data, format, from, to);
|
473
|
+
AppendListOffsets(append_data, format, from, to, child_indices);
|
469
474
|
|
470
475
|
// append the child vector of the list
|
471
476
|
SelectionVector child_sel(child_indices.data());
|
472
477
|
auto &child = ListVector::GetEntry(input);
|
473
478
|
auto child_size = child_indices.size();
|
474
|
-
|
475
|
-
|
476
|
-
|
479
|
+
if (size != input_size) {
|
480
|
+
// Let's avoid doing this
|
481
|
+
Vector child_copy(child.GetType());
|
482
|
+
child_copy.Slice(child, child_sel, child_size);
|
483
|
+
append_data.child_data[0]->append_vector(*append_data.child_data[0], child_copy, 0, child_size, child_size);
|
484
|
+
} else {
|
485
|
+
// We don't care about the vector, slice it
|
486
|
+
child.Slice(child_sel, child_size);
|
487
|
+
append_data.child_data[0]->append_vector(*append_data.child_data[0], child, 0, child_size, child_size);
|
488
|
+
}
|
477
489
|
append_data.row_count += size;
|
478
490
|
}
|
479
491
|
|
@@ -508,26 +520,39 @@ struct ArrowMapData {
|
|
508
520
|
result.child_data.push_back(std::move(internal_struct));
|
509
521
|
}
|
510
522
|
|
511
|
-
static void Append(ArrowAppendData &append_data, Vector &input, idx_t
|
523
|
+
static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size) {
|
512
524
|
UnifiedVectorFormat format;
|
513
|
-
input.ToUnifiedFormat(
|
514
|
-
|
515
|
-
AppendValidity(append_data, format,
|
525
|
+
input.ToUnifiedFormat(input_size, format);
|
526
|
+
idx_t size = to - from;
|
527
|
+
AppendValidity(append_data, format, from, to);
|
516
528
|
vector<sel_t> child_indices;
|
517
|
-
AppendListOffsets(append_data, format,
|
529
|
+
AppendListOffsets(append_data, format, from, to, child_indices);
|
518
530
|
|
519
531
|
SelectionVector child_sel(child_indices.data());
|
520
532
|
auto &key_vector = MapVector::GetKeys(input);
|
521
533
|
auto &value_vector = MapVector::GetValues(input);
|
522
534
|
auto list_size = child_indices.size();
|
523
|
-
key_vector.Slice(child_sel, list_size);
|
524
|
-
value_vector.Slice(child_sel, list_size);
|
525
535
|
|
526
536
|
auto &struct_data = *append_data.child_data[0];
|
527
537
|
auto &key_data = *struct_data.child_data[0];
|
528
538
|
auto &value_data = *struct_data.child_data[1];
|
529
|
-
|
530
|
-
|
539
|
+
|
540
|
+
if (size != input_size) {
|
541
|
+
// Let's avoid doing this
|
542
|
+
Vector key_vector_copy(key_vector.GetType());
|
543
|
+
key_vector_copy.Slice(key_vector, child_sel, list_size);
|
544
|
+
Vector value_vector_copy(value_vector.GetType());
|
545
|
+
value_vector_copy.Slice(value_vector, child_sel, list_size);
|
546
|
+
key_data.append_vector(key_data, key_vector_copy, 0, list_size, list_size);
|
547
|
+
value_data.append_vector(value_data, value_vector_copy, 0, list_size, list_size);
|
548
|
+
} else {
|
549
|
+
// We don't care about the vector, slice it
|
550
|
+
key_vector.Slice(child_sel, list_size);
|
551
|
+
value_vector.Slice(child_sel, list_size);
|
552
|
+
key_data.append_vector(key_data, key_vector, 0, list_size, list_size);
|
553
|
+
value_data.append_vector(value_data, value_vector, 0, list_size, list_size);
|
554
|
+
}
|
555
|
+
|
531
556
|
append_data.row_count += size;
|
532
557
|
struct_data.row_count += size;
|
533
558
|
}
|
@@ -567,12 +592,12 @@ struct ArrowMapData {
|
|
567
592
|
};
|
568
593
|
|
569
594
|
//! Append a data chunk to the underlying arrow array
|
570
|
-
void ArrowAppender::Append(DataChunk &input) {
|
595
|
+
void ArrowAppender::Append(DataChunk &input, idx_t from, idx_t to, idx_t input_size) {
|
571
596
|
D_ASSERT(types == input.GetTypes());
|
572
597
|
for (idx_t i = 0; i < input.ColumnCount(); i++) {
|
573
|
-
root_data[i]->append_vector(*root_data[i], input.data[i],
|
598
|
+
root_data[i]->append_vector(*root_data[i], input.data[i], from, to, input_size);
|
574
599
|
}
|
575
|
-
row_count +=
|
600
|
+
row_count += to - from;
|
576
601
|
}
|
577
602
|
//===--------------------------------------------------------------------===//
|
578
603
|
// Initialize Arrow Child
|
@@ -17,7 +17,7 @@ namespace duckdb {
|
|
17
17
|
|
18
18
|
void ArrowConverter::ToArrowArray(DataChunk &input, ArrowArray *out_array) {
|
19
19
|
ArrowAppender appender(input.GetTypes(), input.size());
|
20
|
-
appender.Append(input);
|
20
|
+
appender.Append(input, 0, input.size(), input.size());
|
21
21
|
*out_array = appender.Finalize();
|
22
22
|
}
|
23
23
|
|
@@ -187,6 +187,15 @@ bool ArrowUtil::TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray
|
|
187
187
|
PreservedError &error) {
|
188
188
|
count = 0;
|
189
189
|
ArrowAppender appender(result->types, chunk_size);
|
190
|
+
auto ¤t_chunk = result->current_chunk;
|
191
|
+
if (current_chunk.Valid()) {
|
192
|
+
// We start by scanning the non-finished current chunk
|
193
|
+
idx_t cur_consumption = current_chunk.RemainingSize() > chunk_size ? chunk_size : current_chunk.RemainingSize();
|
194
|
+
count += cur_consumption;
|
195
|
+
appender.Append(*current_chunk.data_chunk, current_chunk.position, current_chunk.position + cur_consumption,
|
196
|
+
current_chunk.data_chunk->size());
|
197
|
+
current_chunk.position += cur_consumption;
|
198
|
+
}
|
190
199
|
while (count < chunk_size) {
|
191
200
|
unique_ptr<DataChunk> data_chunk;
|
192
201
|
if (!TryFetchNext(*result, data_chunk, error)) {
|
@@ -198,8 +207,17 @@ bool ArrowUtil::TryFetchChunk(QueryResult *result, idx_t chunk_size, ArrowArray
|
|
198
207
|
if (!data_chunk || data_chunk->size() == 0) {
|
199
208
|
break;
|
200
209
|
}
|
201
|
-
count
|
202
|
-
|
210
|
+
if (count + data_chunk->size() > chunk_size) {
|
211
|
+
// We have to split the chunk between this and the next batch
|
212
|
+
idx_t available_space = chunk_size - count;
|
213
|
+
appender.Append(*data_chunk, 0, available_space, data_chunk->size());
|
214
|
+
count += available_space;
|
215
|
+
current_chunk.data_chunk = std::move(data_chunk);
|
216
|
+
current_chunk.position = available_space;
|
217
|
+
} else {
|
218
|
+
count += data_chunk->size();
|
219
|
+
appender.Append(*data_chunk, 0, data_chunk->size(), data_chunk->size());
|
220
|
+
}
|
203
221
|
}
|
204
222
|
if (count > 0) {
|
205
223
|
*out = appender.Finalize();
|
@@ -1,8 +1,10 @@
|
|
1
1
|
#include "duckdb/common/box_renderer.hpp"
|
2
|
-
|
2
|
+
|
3
3
|
#include "duckdb/common/printer.hpp"
|
4
|
-
#include "
|
4
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
5
5
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
6
|
+
#include "utf8proc_wrapper.hpp"
|
7
|
+
|
6
8
|
#include <sstream>
|
7
9
|
|
8
10
|
namespace duckdb {
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "duckdb/common/constants.hpp"
|
2
|
-
|
2
|
+
|
3
3
|
#include "duckdb/common/limits.hpp"
|
4
|
+
#include "duckdb/common/vector_size.hpp"
|
4
5
|
|
5
6
|
namespace duckdb {
|
6
7
|
|
@@ -15,6 +16,10 @@ const transaction_t MAX_TRANSACTION_ID = NumericLimits<transaction_t>::Maximum()
|
|
15
16
|
const transaction_t NOT_DELETED_ID = NumericLimits<transaction_t>::Maximum() - 1; // 2^64 - 1
|
16
17
|
const transaction_t MAXIMUM_QUERY_ID = NumericLimits<transaction_t>::Maximum(); // 2^64
|
17
18
|
|
19
|
+
bool IsPowerOfTwo(uint64_t v) {
|
20
|
+
return (v & (v - 1)) == 0;
|
21
|
+
}
|
22
|
+
|
18
23
|
uint64_t NextPowerOfTwo(uint64_t v) {
|
19
24
|
v--;
|
20
25
|
v |= v >> 1;
|
@@ -27,6 +32,10 @@ uint64_t NextPowerOfTwo(uint64_t v) {
|
|
27
32
|
return v;
|
28
33
|
}
|
29
34
|
|
35
|
+
uint64_t PreviousPowerOfTwo(uint64_t v) {
|
36
|
+
return NextPowerOfTwo((v / 2) + 1);
|
37
|
+
}
|
38
|
+
|
30
39
|
bool IsInvalidSchema(const string &str) {
|
31
40
|
return str.empty();
|
32
41
|
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#include "duckdb/common/filename_pattern.hpp"
|
2
|
+
#include "duckdb/common/string_util.hpp"
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
void FilenamePattern::SetFilenamePattern(const string &pattern) {
|
7
|
+
const string id_format {"{i}"};
|
8
|
+
const string uuid_format {"{uuid}"};
|
9
|
+
|
10
|
+
_base = pattern;
|
11
|
+
|
12
|
+
_pos = _base.find(id_format);
|
13
|
+
if (_pos != string::npos) {
|
14
|
+
_base = StringUtil::Replace(_base, id_format, "");
|
15
|
+
_uuid = false;
|
16
|
+
}
|
17
|
+
|
18
|
+
_pos = _base.find(uuid_format);
|
19
|
+
if (_pos != string::npos) {
|
20
|
+
_base = StringUtil::Replace(_base, uuid_format, "");
|
21
|
+
_uuid = true;
|
22
|
+
}
|
23
|
+
|
24
|
+
_pos = std::min(_pos, (idx_t)_base.length());
|
25
|
+
}
|
26
|
+
|
27
|
+
string FilenamePattern::CreateFilename(const FileSystem &fs, const string &path, const string &extension,
|
28
|
+
idx_t offset) const {
|
29
|
+
string result(_base);
|
30
|
+
string replacement;
|
31
|
+
|
32
|
+
if (_uuid) {
|
33
|
+
replacement = UUID::ToString(UUID::GenerateRandomUUID());
|
34
|
+
} else {
|
35
|
+
replacement = std::to_string(offset);
|
36
|
+
}
|
37
|
+
result.insert(_pos, replacement);
|
38
|
+
return fs.JoinPath(path, result + "." + extension);
|
39
|
+
}
|
40
|
+
|
41
|
+
} // namespace duckdb
|
@@ -1,11 +1,12 @@
|
|
1
1
|
#include "duckdb/common/hive_partitioning.hpp"
|
2
|
-
|
2
|
+
|
3
3
|
#include "duckdb/execution/expression_executor.hpp"
|
4
4
|
#include "duckdb/optimizer/filter_combiner.hpp"
|
5
|
-
#include "duckdb/planner/expression_iterator.hpp"
|
6
|
-
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
7
5
|
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
|
6
|
+
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
8
7
|
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
8
|
+
#include "duckdb/planner/expression_iterator.hpp"
|
9
|
+
#include "duckdb/planner/table_filter.hpp"
|
9
10
|
#include "re2/re2.h"
|
10
11
|
|
11
12
|
namespace duckdb {
|
@@ -140,29 +141,157 @@ void HivePartitioning::ApplyFiltersToFileList(ClientContext &context, vector<str
|
|
140
141
|
}
|
141
142
|
|
142
143
|
HivePartitionedColumnData::HivePartitionedColumnData(const HivePartitionedColumnData &other)
|
143
|
-
: PartitionedColumnData(other) {
|
144
|
+
: PartitionedColumnData(other), hashes_v(LogicalType::HASH) {
|
144
145
|
// Synchronize to ensure consistency of shared partition map
|
145
146
|
if (other.global_state) {
|
146
147
|
global_state = other.global_state;
|
147
148
|
unique_lock<mutex> lck(global_state->lock);
|
148
149
|
SynchronizeLocalMap();
|
149
150
|
}
|
151
|
+
InitializeKeys();
|
150
152
|
}
|
151
153
|
|
152
|
-
void HivePartitionedColumnData::
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
154
|
+
void HivePartitionedColumnData::InitializeKeys() {
|
155
|
+
keys.resize(STANDARD_VECTOR_SIZE);
|
156
|
+
for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) {
|
157
|
+
keys[i].values.resize(group_by_columns.size());
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
template <class T>
|
162
|
+
static inline Value GetHiveKeyValue(const T &val) {
|
163
|
+
return Value::CreateValue<T>(val);
|
164
|
+
}
|
165
|
+
|
166
|
+
template <class T>
|
167
|
+
static inline Value GetHiveKeyValue(const T &val, const LogicalType &type) {
|
168
|
+
auto result = GetHiveKeyValue(val);
|
169
|
+
result.Reinterpret(type);
|
170
|
+
return result;
|
171
|
+
}
|
172
|
+
|
173
|
+
static inline Value GetHiveKeyNullValue(const LogicalType &type) {
|
174
|
+
Value result;
|
175
|
+
result.Reinterpret(type);
|
176
|
+
return result;
|
177
|
+
}
|
178
|
+
|
179
|
+
template <class T>
|
180
|
+
static void TemplatedGetHivePartitionValues(Vector &input, vector<HivePartitionKey> &keys, const idx_t col_idx,
|
181
|
+
const idx_t count) {
|
182
|
+
UnifiedVectorFormat format;
|
183
|
+
input.ToUnifiedFormat(count, format);
|
184
|
+
|
185
|
+
const auto &sel = *format.sel;
|
186
|
+
const auto data = (T *)format.data;
|
187
|
+
const auto &validity = format.validity;
|
188
|
+
|
189
|
+
const auto &type = input.GetType();
|
190
|
+
|
191
|
+
const auto reinterpret = Value::CreateValue<T>(data[0]).GetTypeMutable() != type;
|
192
|
+
if (reinterpret) {
|
193
|
+
for (idx_t i = 0; i < count; i++) {
|
194
|
+
auto &key = keys[i];
|
195
|
+
const auto idx = sel.get_index(i);
|
196
|
+
if (validity.RowIsValid(idx)) {
|
197
|
+
key.values[col_idx] = GetHiveKeyValue(data[idx], type);
|
198
|
+
} else {
|
199
|
+
key.values[col_idx] = GetHiveKeyNullValue(type);
|
200
|
+
}
|
201
|
+
}
|
202
|
+
} else {
|
203
|
+
for (idx_t i = 0; i < count; i++) {
|
204
|
+
auto &key = keys[i];
|
205
|
+
const auto idx = sel.get_index(i);
|
206
|
+
if (validity.RowIsValid(idx)) {
|
207
|
+
key.values[col_idx] = GetHiveKeyValue(data[idx]);
|
208
|
+
} else {
|
209
|
+
key.values[col_idx] = GetHiveKeyNullValue(type);
|
210
|
+
}
|
162
211
|
}
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
static void GetNestedHivePartitionValues(Vector &input, vector<HivePartitionKey> &keys, const idx_t col_idx,
|
216
|
+
const idx_t count) {
|
217
|
+
for (idx_t i = 0; i < count; i++) {
|
218
|
+
auto &key = keys[i];
|
219
|
+
key.values[col_idx] = input.GetValue(i);
|
220
|
+
}
|
221
|
+
}
|
222
|
+
|
223
|
+
static void GetHivePartitionValuesTypeSwitch(Vector &input, vector<HivePartitionKey> &keys, const idx_t col_idx,
|
224
|
+
const idx_t count) {
|
225
|
+
const auto &type = input.GetType();
|
226
|
+
switch (type.InternalType()) {
|
227
|
+
case PhysicalType::BOOL:
|
228
|
+
TemplatedGetHivePartitionValues<bool>(input, keys, col_idx, count);
|
229
|
+
break;
|
230
|
+
case PhysicalType::INT8:
|
231
|
+
TemplatedGetHivePartitionValues<int8_t>(input, keys, col_idx, count);
|
232
|
+
break;
|
233
|
+
case PhysicalType::INT16:
|
234
|
+
TemplatedGetHivePartitionValues<int16_t>(input, keys, col_idx, count);
|
235
|
+
break;
|
236
|
+
case PhysicalType::INT32:
|
237
|
+
TemplatedGetHivePartitionValues<int32_t>(input, keys, col_idx, count);
|
238
|
+
break;
|
239
|
+
case PhysicalType::INT64:
|
240
|
+
TemplatedGetHivePartitionValues<int64_t>(input, keys, col_idx, count);
|
241
|
+
break;
|
242
|
+
case PhysicalType::INT128:
|
243
|
+
TemplatedGetHivePartitionValues<hugeint_t>(input, keys, col_idx, count);
|
244
|
+
break;
|
245
|
+
case PhysicalType::UINT8:
|
246
|
+
TemplatedGetHivePartitionValues<uint8_t>(input, keys, col_idx, count);
|
247
|
+
break;
|
248
|
+
case PhysicalType::UINT16:
|
249
|
+
TemplatedGetHivePartitionValues<uint16_t>(input, keys, col_idx, count);
|
250
|
+
break;
|
251
|
+
case PhysicalType::UINT32:
|
252
|
+
TemplatedGetHivePartitionValues<uint32_t>(input, keys, col_idx, count);
|
253
|
+
break;
|
254
|
+
case PhysicalType::UINT64:
|
255
|
+
TemplatedGetHivePartitionValues<uint64_t>(input, keys, col_idx, count);
|
256
|
+
break;
|
257
|
+
case PhysicalType::FLOAT:
|
258
|
+
TemplatedGetHivePartitionValues<float>(input, keys, col_idx, count);
|
259
|
+
break;
|
260
|
+
case PhysicalType::DOUBLE:
|
261
|
+
TemplatedGetHivePartitionValues<double>(input, keys, col_idx, count);
|
262
|
+
break;
|
263
|
+
case PhysicalType::INTERVAL:
|
264
|
+
TemplatedGetHivePartitionValues<interval_t>(input, keys, col_idx, count);
|
265
|
+
break;
|
266
|
+
case PhysicalType::VARCHAR:
|
267
|
+
TemplatedGetHivePartitionValues<string_t>(input, keys, col_idx, count);
|
268
|
+
break;
|
269
|
+
case PhysicalType::STRUCT:
|
270
|
+
case PhysicalType::LIST:
|
271
|
+
GetNestedHivePartitionValues(input, keys, col_idx, count);
|
272
|
+
break;
|
273
|
+
default:
|
274
|
+
throw InternalException("Unsupported type for HivePartitionedColumnData::ComputePartitionIndices");
|
275
|
+
}
|
276
|
+
}
|
277
|
+
|
278
|
+
void HivePartitionedColumnData::ComputePartitionIndices(PartitionedColumnDataAppendState &state, DataChunk &input) {
|
279
|
+
const auto count = input.size();
|
280
|
+
|
281
|
+
input.Hash(group_by_columns, hashes_v);
|
282
|
+
hashes_v.Flatten(count);
|
283
|
+
|
284
|
+
for (idx_t col_idx = 0; col_idx < group_by_columns.size(); col_idx++) {
|
285
|
+
auto &group_by_col = input.data[group_by_columns[col_idx]];
|
286
|
+
GetHivePartitionValuesTypeSwitch(group_by_col, keys, col_idx, count);
|
287
|
+
}
|
163
288
|
|
289
|
+
const auto hashes = FlatVector::GetData<hash_t>(hashes_v);
|
290
|
+
const auto partition_indices = FlatVector::GetData<idx_t>(state.partition_indices);
|
291
|
+
for (idx_t i = 0; i < count; i++) {
|
292
|
+
auto &key = keys[i];
|
293
|
+
key.hash = hashes[i];
|
164
294
|
auto lookup = local_partition_map.find(key);
|
165
|
-
const auto partition_indices = FlatVector::GetData<idx_t>(state.partition_indices);
|
166
295
|
if (lookup == local_partition_map.end()) {
|
167
296
|
idx_t new_partition_id = RegisterNewPartition(key, state);
|
168
297
|
partition_indices[i] = new_partition_id;
|