duckdb 0.7.2-dev2320.0 → 0.7.2-dev2410.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/data_chunk.cpp +1 -1
- package/src/duckdb/extension/icu/icu-extension.cpp +2 -2
- package/src/duckdb/extension/icu/icu-makedate.cpp +52 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +4 -0
- package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.cpp +28 -28
- package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.h +4 -4
- package/src/duckdb/extension/json/include/json_common.hpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +2 -2
- package/src/duckdb/extension/json/json_serializer.cpp +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/column_writer.cpp +3 -3
- package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +2 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +2 -2
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/file_buffer.cpp +8 -0
- package/src/duckdb/src/common/operator/cast_operators.cpp +24 -25
- package/src/duckdb/src/common/radix_partitioning.cpp +34 -0
- package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -124
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/bit.cpp +18 -18
- package/src/duckdb/src/common/types/blob.cpp +7 -7
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +1 -1
- package/src/duckdb/src/common/types/hash.cpp +1 -1
- package/src/duckdb/src/common/types/hyperloglog.cpp +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/string_heap.cpp +2 -2
- package/src/duckdb/src/common/types/string_type.cpp +2 -2
- package/src/duckdb/src/common/types/timestamp.cpp +1 -1
- package/src/duckdb/src/common/types/vector.cpp +7 -7
- package/src/duckdb/src/execution/index/art/art_key.cpp +2 -2
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +144 -31
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +698 -0
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -1
- package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/first.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/kurtosis.cpp +3 -2
- package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/skew.cpp +5 -1
- package/src/duckdb/src/function/aggregate/distributive/string_agg.cpp +1 -1
- package/src/duckdb/src/function/cast/list_casts.cpp +1 -1
- package/src/duckdb/src/function/cast/struct_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -3
- package/src/duckdb/src/function/scalar/bit/bitstring.cpp +1 -1
- package/src/duckdb/src/function/scalar/blob/encode.cpp +1 -1
- package/src/duckdb/src/function/scalar/date/strftime.cpp +3 -3
- package/src/duckdb/src/function/scalar/generic/current_setting.cpp +1 -1
- package/src/duckdb/src/function/scalar/list/list_sort.cpp +30 -56
- package/src/duckdb/src/function/scalar/string/ascii.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/concat.cpp +6 -6
- package/src/duckdb/src/function/scalar/string/contains.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/hex.cpp +4 -4
- package/src/duckdb/src/function/scalar/string/instr.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/jaccard.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/jaro_winkler.cpp +5 -5
- package/src/duckdb/src/function/scalar/string/length.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/levenshtein.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/like.cpp +10 -11
- package/src/duckdb/src/function/scalar/string/mismatches.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/pad.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/prefix.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/printf.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +4 -4
- package/src/duckdb/src/function/scalar/string/repeat.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/replace.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/reverse.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/starts_with.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/string_split.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/strip_accents.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/substring.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/suffix.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/translate.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/trim.cpp +3 -3
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +1 -1
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -7
- package/src/duckdb/src/function/scalar/union/union_extract.cpp +1 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/crypto/md5.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/debug_initialize.hpp +17 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -60
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +8 -6
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +6 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +93 -0
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/macro_function.hpp +17 -0
- package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +13 -3
- package/src/duckdb/src/include/duckdb/optimizer/cse_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/aggregate_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/alter_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/check_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/constant_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/insert_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/relation_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/returning_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/update_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +12 -9
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +11 -5
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
- package/src/duckdb/src/main/capi/cast/from_decimal-c.cpp +1 -1
- package/src/duckdb/src/main/capi/result-c.cpp +2 -2
- package/src/duckdb/src/main/config.cpp +26 -0
- package/src/duckdb/src/main/settings/settings.cpp +31 -8
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +9 -8
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +33 -29
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +8 -10
- package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +8 -7
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +6 -6
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +6 -14
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +2 -5
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +8 -8
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +5 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/plan_expressionlistref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +4 -4
- package/src/duckdb/src/planner/expression.cpp +2 -1
- package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/check_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/update_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/where_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder.cpp +12 -12
- package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -2
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +2 -2
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
- package/src/duckdb/src/storage/compression/fsst.cpp +3 -3
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/meta_block_writer.cpp +4 -0
- package/src/duckdb/src/storage/partial_block_manager.cpp +11 -4
- package/src/duckdb/src/storage/single_file_block_manager.cpp +16 -9
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +5 -2
- package/src/duckdb/src/storage/statistics/string_stats.cpp +2 -2
- package/src/duckdb/src/storage/storage_manager.cpp +7 -2
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +21 -1
- package/src/duckdb/ub_src_execution_operator_join.cpp +2 -0
- package/src/statement.cpp +3 -3
@@ -0,0 +1,698 @@
|
|
1
|
+
#include "duckdb/execution/operator/join/physical_asof_join.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/fast_mem.hpp"
|
4
|
+
#include "duckdb/common/operator/comparison_operators.hpp"
|
5
|
+
#include "duckdb/common/row_operations/row_operations.hpp"
|
6
|
+
#include "duckdb/common/sort/comparators.hpp"
|
7
|
+
#include "duckdb/common/sort/partition_state.hpp"
|
8
|
+
#include "duckdb/common/sort/sort.hpp"
|
9
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
10
|
+
#include "duckdb/execution/expression_executor.hpp"
|
11
|
+
#include "duckdb/execution/operator/join/outer_join_marker.hpp"
|
12
|
+
#include "duckdb/main/client_context.hpp"
|
13
|
+
#include "duckdb/parallel/event.hpp"
|
14
|
+
#include "duckdb/parallel/thread_context.hpp"
|
15
|
+
|
16
|
+
namespace duckdb {
|
17
|
+
|
18
|
+
PhysicalAsOfJoin::PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left,
|
19
|
+
unique_ptr<PhysicalOperator> right)
|
20
|
+
: PhysicalComparisonJoin(op, PhysicalOperatorType::ASOF_JOIN, std::move(op.conditions), op.join_type,
|
21
|
+
op.estimated_cardinality) {
|
22
|
+
|
23
|
+
// Convert the conditions partitions and sorts
|
24
|
+
for (auto &cond : conditions) {
|
25
|
+
D_ASSERT(cond.left->return_type == cond.right->return_type);
|
26
|
+
join_key_types.push_back(cond.left->return_type);
|
27
|
+
|
28
|
+
auto left = cond.left->Copy();
|
29
|
+
auto right = cond.right->Copy();
|
30
|
+
switch (cond.comparison) {
|
31
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
32
|
+
null_sensitive.emplace_back(lhs_orders.size());
|
33
|
+
lhs_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(left));
|
34
|
+
rhs_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(right));
|
35
|
+
break;
|
36
|
+
case ExpressionType::COMPARE_EQUAL:
|
37
|
+
null_sensitive.emplace_back(lhs_orders.size());
|
38
|
+
// Fall through
|
39
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
40
|
+
lhs_partitions.emplace_back(std::move(left));
|
41
|
+
rhs_partitions.emplace_back(std::move(right));
|
42
|
+
break;
|
43
|
+
default:
|
44
|
+
throw NotImplementedException("Unsupported join condition for ASOF join");
|
45
|
+
}
|
46
|
+
}
|
47
|
+
D_ASSERT(!lhs_orders.empty());
|
48
|
+
D_ASSERT(!rhs_orders.empty());
|
49
|
+
|
50
|
+
children.push_back(std::move(left));
|
51
|
+
children.push_back(std::move(right));
|
52
|
+
|
53
|
+
// Fill out the right projection map.
|
54
|
+
right_projection_map = op.right_projection_map;
|
55
|
+
if (right_projection_map.empty()) {
|
56
|
+
const auto right_count = children[1]->types.size();
|
57
|
+
right_projection_map.reserve(right_count);
|
58
|
+
for (column_t i = 0; i < right_count; ++i) {
|
59
|
+
right_projection_map.emplace_back(i);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
//===--------------------------------------------------------------------===//
|
65
|
+
// Sink
|
66
|
+
//===--------------------------------------------------------------------===//
|
67
|
+
class AsOfGlobalSinkState : public GlobalSinkState {
|
68
|
+
public:
|
69
|
+
AsOfGlobalSinkState(ClientContext &context, const PhysicalAsOfJoin &op)
|
70
|
+
: global_partition(context, op.rhs_partitions, op.rhs_orders, op.children[1]->types, {},
|
71
|
+
op.estimated_cardinality),
|
72
|
+
is_outer(IsRightOuterJoin(op.join_type)), has_null(false) {
|
73
|
+
}
|
74
|
+
|
75
|
+
idx_t Count() const {
|
76
|
+
return global_partition.count;
|
77
|
+
}
|
78
|
+
|
79
|
+
PartitionGlobalSinkState global_partition;
|
80
|
+
|
81
|
+
// One per partition
|
82
|
+
const bool is_outer;
|
83
|
+
vector<OuterJoinMarker> right_outers;
|
84
|
+
bool has_null;
|
85
|
+
};
|
86
|
+
|
87
|
+
class AsOfLocalSinkState : public LocalSinkState {
|
88
|
+
public:
|
89
|
+
explicit AsOfLocalSinkState(ClientContext &context, PartitionGlobalSinkState &gstate_p)
|
90
|
+
: local_partition(context, gstate_p) {
|
91
|
+
}
|
92
|
+
|
93
|
+
void Sink(DataChunk &input_chunk) {
|
94
|
+
local_partition.Sink(input_chunk);
|
95
|
+
}
|
96
|
+
|
97
|
+
void Combine() {
|
98
|
+
local_partition.Combine();
|
99
|
+
}
|
100
|
+
|
101
|
+
PartitionLocalSinkState local_partition;
|
102
|
+
};
|
103
|
+
|
104
|
+
unique_ptr<GlobalSinkState> PhysicalAsOfJoin::GetGlobalSinkState(ClientContext &context) const {
|
105
|
+
return make_uniq<AsOfGlobalSinkState>(context, *this);
|
106
|
+
}
|
107
|
+
|
108
|
+
unique_ptr<LocalSinkState> PhysicalAsOfJoin::GetLocalSinkState(ExecutionContext &context) const {
|
109
|
+
// We only sink the RHS
|
110
|
+
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
111
|
+
return make_uniq<AsOfLocalSinkState>(context.client, gsink.global_partition);
|
112
|
+
}
|
113
|
+
|
114
|
+
SinkResultType PhysicalAsOfJoin::Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
|
115
|
+
DataChunk &input) const {
|
116
|
+
auto &lstate = lstate_p.Cast<AsOfLocalSinkState>();
|
117
|
+
|
118
|
+
lstate.Sink(input);
|
119
|
+
|
120
|
+
return SinkResultType::NEED_MORE_INPUT;
|
121
|
+
}
|
122
|
+
|
123
|
+
void PhysicalAsOfJoin::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
|
124
|
+
auto &lstate = lstate_p.Cast<AsOfLocalSinkState>();
|
125
|
+
lstate.Combine();
|
126
|
+
}
|
127
|
+
|
128
|
+
//===--------------------------------------------------------------------===//
|
129
|
+
// Finalize
|
130
|
+
//===--------------------------------------------------------------------===//
|
131
|
+
SinkFinalizeType PhysicalAsOfJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
132
|
+
GlobalSinkState &gstate_p) const {
|
133
|
+
auto &gstate = gstate_p.Cast<AsOfGlobalSinkState>();
|
134
|
+
|
135
|
+
// Find the first group to sort
|
136
|
+
auto &groups = gstate.global_partition.grouping_data->GetPartitions();
|
137
|
+
if (groups.empty() && EmptyResultIfRHSIsEmpty()) {
|
138
|
+
// Empty input!
|
139
|
+
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
140
|
+
}
|
141
|
+
|
142
|
+
// Schedule all the sorts for maximum thread utilisation
|
143
|
+
auto new_event = make_shared<PartitionMergeEvent>(gstate.global_partition, pipeline);
|
144
|
+
event.InsertEvent(std::move(new_event));
|
145
|
+
|
146
|
+
return SinkFinalizeType::READY;
|
147
|
+
}
|
148
|
+
|
149
|
+
//===--------------------------------------------------------------------===//
|
150
|
+
// Operator
|
151
|
+
//===--------------------------------------------------------------------===//
|
152
|
+
class AsOfGlobalState : public GlobalOperatorState {
|
153
|
+
public:
|
154
|
+
explicit AsOfGlobalState(AsOfGlobalSinkState &gsink) {
|
155
|
+
// for FULL/RIGHT OUTER JOIN, initialize right_outers to false for every tuple
|
156
|
+
auto &global_partition = gsink.global_partition;
|
157
|
+
auto &right_outers = gsink.right_outers;
|
158
|
+
right_outers.reserve(global_partition.hash_groups.size());
|
159
|
+
for (const auto &hash_group : global_partition.hash_groups) {
|
160
|
+
right_outers.emplace_back(OuterJoinMarker(gsink.is_outer));
|
161
|
+
right_outers.back().Initialize(hash_group->count);
|
162
|
+
}
|
163
|
+
}
|
164
|
+
};
|
165
|
+
|
166
|
+
unique_ptr<GlobalOperatorState> PhysicalAsOfJoin::GetGlobalOperatorState(ClientContext &context) const {
|
167
|
+
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
168
|
+
return make_uniq<AsOfGlobalState>(gsink);
|
169
|
+
}
|
170
|
+
|
171
|
+
class AsOfLocalState : public CachingOperatorState {
|
172
|
+
public:
|
173
|
+
using Orders = vector<BoundOrderByNode>;
|
174
|
+
using Match = std::pair<hash_t, idx_t>;
|
175
|
+
|
176
|
+
AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op, bool force_external);
|
177
|
+
|
178
|
+
public:
|
179
|
+
void ResolveJoin(DataChunk &input, bool *found_matches, Match *matches = nullptr);
|
180
|
+
|
181
|
+
void ResolveJoinKeys(DataChunk &input);
|
182
|
+
|
183
|
+
ClientContext &context;
|
184
|
+
Allocator &allocator;
|
185
|
+
const PhysicalAsOfJoin &op;
|
186
|
+
BufferManager &buffer_manager;
|
187
|
+
const bool force_external;
|
188
|
+
Orders lhs_orders;
|
189
|
+
|
190
|
+
// LHS sorting
|
191
|
+
ExpressionExecutor lhs_executor;
|
192
|
+
DataChunk lhs_keys;
|
193
|
+
ValidityMask lhs_valid_mask;
|
194
|
+
SelectionVector lhs_sel;
|
195
|
+
idx_t lhs_valid;
|
196
|
+
RowLayout lhs_layout;
|
197
|
+
unique_ptr<GlobalSortState> lhs_global_state;
|
198
|
+
DataChunk lhs_sorted;
|
199
|
+
|
200
|
+
// LHS binning
|
201
|
+
Vector hash_vector;
|
202
|
+
Vector bin_vector;
|
203
|
+
|
204
|
+
// Output
|
205
|
+
idx_t lhs_match_count;
|
206
|
+
SelectionVector lhs_matched;
|
207
|
+
OuterJoinMarker left_outer;
|
208
|
+
bool fetch_next_left;
|
209
|
+
DataChunk group_payload;
|
210
|
+
DataChunk rhs_payload;
|
211
|
+
};
|
212
|
+
|
213
|
+
AsOfLocalState::AsOfLocalState(ClientContext &context, const PhysicalAsOfJoin &op, bool force_external)
|
214
|
+
: context(context), allocator(Allocator::Get(context)), op(op),
|
215
|
+
buffer_manager(BufferManager::GetBufferManager(context)), force_external(force_external), lhs_executor(context),
|
216
|
+
hash_vector(LogicalType::HASH), bin_vector(LogicalType::HASH), left_outer(IsLeftOuterJoin(op.join_type)),
|
217
|
+
fetch_next_left(true) {
|
218
|
+
vector<unique_ptr<BaseStatistics>> partition_stats;
|
219
|
+
Orders partitions; // Not used.
|
220
|
+
PartitionGlobalSinkState::GenerateOrderings(partitions, lhs_orders, op.lhs_partitions, op.lhs_orders,
|
221
|
+
partition_stats);
|
222
|
+
|
223
|
+
// We sort the row numbers of the incoming block, not the rows
|
224
|
+
lhs_layout.Initialize({LogicalType::UINTEGER});
|
225
|
+
lhs_sorted.Initialize(allocator, lhs_layout.GetTypes());
|
226
|
+
|
227
|
+
lhs_keys.Initialize(allocator, op.join_key_types);
|
228
|
+
for (const auto &cond : op.conditions) {
|
229
|
+
lhs_executor.AddExpression(*cond.left);
|
230
|
+
}
|
231
|
+
|
232
|
+
group_payload.Initialize(allocator, op.children[1]->types);
|
233
|
+
rhs_payload.Initialize(allocator, op.children[1]->types);
|
234
|
+
|
235
|
+
lhs_matched.Initialize();
|
236
|
+
lhs_sel.Initialize();
|
237
|
+
left_outer.Initialize(STANDARD_VECTOR_SIZE);
|
238
|
+
}
|
239
|
+
|
240
|
+
void AsOfLocalState::ResolveJoinKeys(DataChunk &input) {
|
241
|
+
// Compute the join keys
|
242
|
+
lhs_keys.Reset();
|
243
|
+
lhs_executor.Execute(input, lhs_keys);
|
244
|
+
|
245
|
+
// Extract the NULLs
|
246
|
+
const auto count = input.size();
|
247
|
+
lhs_valid_mask.Reset();
|
248
|
+
for (auto col_idx : op.null_sensitive) {
|
249
|
+
auto &col = lhs_keys.data[col_idx];
|
250
|
+
UnifiedVectorFormat unified;
|
251
|
+
col.ToUnifiedFormat(count, unified);
|
252
|
+
lhs_valid_mask.Combine(unified.validity, count);
|
253
|
+
}
|
254
|
+
|
255
|
+
// Convert the mask to a selection vector.
|
256
|
+
// We need this anyway for sorting
|
257
|
+
lhs_valid = 0;
|
258
|
+
const auto entry_count = lhs_valid_mask.EntryCount(count);
|
259
|
+
idx_t base_idx = 0;
|
260
|
+
for (idx_t entry_idx = 0; entry_idx < entry_count;) {
|
261
|
+
const auto validity_entry = lhs_valid_mask.GetValidityEntry(entry_idx++);
|
262
|
+
const auto next = MinValue<idx_t>(base_idx + ValidityMask::BITS_PER_VALUE, count);
|
263
|
+
if (ValidityMask::AllValid(validity_entry)) {
|
264
|
+
for (; base_idx < next; ++base_idx) {
|
265
|
+
lhs_sel.set_index(lhs_valid++, base_idx);
|
266
|
+
}
|
267
|
+
} else if (ValidityMask::NoneValid(validity_entry)) {
|
268
|
+
base_idx = next;
|
269
|
+
} else {
|
270
|
+
const auto start = base_idx;
|
271
|
+
for (; base_idx < next; ++base_idx) {
|
272
|
+
if (ValidityMask::RowIsValid(validity_entry, base_idx - start)) {
|
273
|
+
lhs_sel.set_index(lhs_valid++, base_idx);
|
274
|
+
}
|
275
|
+
}
|
276
|
+
}
|
277
|
+
}
|
278
|
+
|
279
|
+
// Slice the keys to the ones we can match
|
280
|
+
if (lhs_valid < count) {
|
281
|
+
lhs_keys.Slice(lhs_sel, lhs_valid);
|
282
|
+
}
|
283
|
+
|
284
|
+
// Hash to assign the partitions
|
285
|
+
auto &global_partition = op.sink_state->Cast<AsOfGlobalSinkState>().global_partition;
|
286
|
+
if (op.lhs_partitions.empty()) {
|
287
|
+
// Only one hash group
|
288
|
+
bin_vector.Reference(Value::HASH(0));
|
289
|
+
} else {
|
290
|
+
// Hash to determine the partitions.
|
291
|
+
VectorOperations::Hash(lhs_keys.data[0], hash_vector, lhs_sel, lhs_valid);
|
292
|
+
for (size_t prt_idx = 1; prt_idx < op.lhs_partitions.size(); ++prt_idx) {
|
293
|
+
VectorOperations::CombineHash(hash_vector, lhs_keys.data[prt_idx], lhs_sel, lhs_valid);
|
294
|
+
}
|
295
|
+
|
296
|
+
// Convert hashes to hash groups
|
297
|
+
const auto radix_bits = global_partition.grouping_data->GetRadixBits();
|
298
|
+
RadixPartitioning::HashesToBins(hash_vector, radix_bits, bin_vector, count);
|
299
|
+
}
|
300
|
+
|
301
|
+
// Sort the selection vector on the valid keys
|
302
|
+
lhs_global_state = make_uniq<GlobalSortState>(buffer_manager, lhs_orders, lhs_layout);
|
303
|
+
auto &global_state = *lhs_global_state;
|
304
|
+
LocalSortState local_sort;
|
305
|
+
local_sort.Initialize(*lhs_global_state, buffer_manager);
|
306
|
+
|
307
|
+
DataChunk payload_chunk;
|
308
|
+
payload_chunk.InitializeEmpty({LogicalType::UINTEGER});
|
309
|
+
FlatVector::SetData(payload_chunk.data[0], (data_ptr_t)lhs_sel.data());
|
310
|
+
payload_chunk.SetCardinality(lhs_valid);
|
311
|
+
local_sort.SinkChunk(lhs_keys, payload_chunk);
|
312
|
+
|
313
|
+
// Set external (can be forced with the PRAGMA)
|
314
|
+
global_state.external = force_external;
|
315
|
+
global_state.AddLocalState(local_sort);
|
316
|
+
global_state.PrepareMergePhase();
|
317
|
+
while (global_state.sorted_blocks.size() > 1) {
|
318
|
+
MergeSorter merge_sorter(*lhs_global_state, buffer_manager);
|
319
|
+
merge_sorter.PerformInMergeRound();
|
320
|
+
global_state.CompleteMergeRound();
|
321
|
+
}
|
322
|
+
|
323
|
+
// Scan the sorted selection
|
324
|
+
D_ASSERT(global_state.sorted_blocks.size() == 1);
|
325
|
+
|
326
|
+
auto scanner = make_uniq<PayloadScanner>(*global_state.sorted_blocks[0]->payload_data, global_state, false);
|
327
|
+
lhs_sorted.Reset();
|
328
|
+
scanner->Scan(lhs_sorted);
|
329
|
+
}
|
330
|
+
|
331
|
+
void AsOfLocalState::ResolveJoin(DataChunk &input, bool *found_match, std::pair<hash_t, idx_t> *matches) {
|
332
|
+
// Sort the input into lhs_payload, radix keys in lhs_global_state
|
333
|
+
ResolveJoinKeys(input);
|
334
|
+
|
335
|
+
auto &gsink = op.sink_state->Cast<AsOfGlobalSinkState>();
|
336
|
+
auto &global_partition = gsink.global_partition;
|
337
|
+
|
338
|
+
// The bins are contiguous from sorting, so load them one at a time
|
339
|
+
// But they may be constant, so unify.
|
340
|
+
UnifiedVectorFormat bin_unified;
|
341
|
+
bin_vector.ToUnifiedFormat(lhs_valid, bin_unified);
|
342
|
+
const auto bins = (hash_t *)bin_unified.data;
|
343
|
+
|
344
|
+
hash_t prev_bin = global_partition.bin_groups.size();
|
345
|
+
optional_ptr<PartitionGlobalHashGroup> hash_group;
|
346
|
+
optional_ptr<OuterJoinMarker> right_outer;
|
347
|
+
// Searching for right <= left
|
348
|
+
SBIterator left(*lhs_global_state, ExpressionType::COMPARE_LESSTHANOREQUALTO);
|
349
|
+
unique_ptr<SBIterator> right;
|
350
|
+
lhs_match_count = 0;
|
351
|
+
const auto sorted_sel = FlatVector::GetData<sel_t>(lhs_sorted.data[0]);
|
352
|
+
for (idx_t i = 0; i < lhs_valid; ++i) {
|
353
|
+
// idx is the index in the input; i is the index in the sorted keys
|
354
|
+
const auto idx = sorted_sel[i];
|
355
|
+
const auto curr_bin = bins[bin_unified.sel->get_index(idx)];
|
356
|
+
if (!hash_group || curr_bin != prev_bin) {
|
357
|
+
// Grab the next group
|
358
|
+
prev_bin = curr_bin;
|
359
|
+
const auto group_idx = global_partition.bin_groups[curr_bin];
|
360
|
+
if (group_idx >= global_partition.hash_groups.size()) {
|
361
|
+
// No matching partition
|
362
|
+
hash_group = nullptr;
|
363
|
+
right_outer = nullptr;
|
364
|
+
right.reset();
|
365
|
+
continue;
|
366
|
+
}
|
367
|
+
hash_group = global_partition.hash_groups[group_idx].get();
|
368
|
+
right_outer = gsink.right_outers.data() + group_idx;
|
369
|
+
right = make_uniq<SBIterator>(*(hash_group->global_sort), ExpressionType::COMPARE_LESSTHANOREQUALTO);
|
370
|
+
}
|
371
|
+
left.SetIndex(i);
|
372
|
+
|
373
|
+
// If right > left, then there is no match
|
374
|
+
if (!right->Compare(left)) {
|
375
|
+
continue;
|
376
|
+
}
|
377
|
+
|
378
|
+
// Exponential search forward for a non-matching value using radix iterators
|
379
|
+
// (We use exponential search to avoid thrashing the block manager on large probes)
|
380
|
+
idx_t bound = 1;
|
381
|
+
idx_t begin = right->GetIndex();
|
382
|
+
right->SetIndex(begin + bound);
|
383
|
+
while (right->GetIndex() < hash_group->count) {
|
384
|
+
if (right->Compare(left)) {
|
385
|
+
// If right <= left, jump ahead
|
386
|
+
bound *= 2;
|
387
|
+
right->SetIndex(begin + bound);
|
388
|
+
} else {
|
389
|
+
break;
|
390
|
+
}
|
391
|
+
}
|
392
|
+
|
393
|
+
// Binary search for the first non-matching value using radix iterators
|
394
|
+
// The previous value (which we know exists) is the match
|
395
|
+
auto first = begin + bound / 2;
|
396
|
+
auto last = MinValue<idx_t>(begin + bound, hash_group->count);
|
397
|
+
while (first < last) {
|
398
|
+
const auto mid = first + (last - first) / 2;
|
399
|
+
right->SetIndex(mid);
|
400
|
+
if (right->Compare(left)) {
|
401
|
+
// If right <= left, new lower bound
|
402
|
+
first = mid + 1;
|
403
|
+
} else {
|
404
|
+
last = mid;
|
405
|
+
}
|
406
|
+
}
|
407
|
+
right->SetIndex(--first);
|
408
|
+
|
409
|
+
// Check partitions for strict equality
|
410
|
+
if (!op.lhs_partitions.empty() && hash_group->ComparePartitions(left, *right)) {
|
411
|
+
continue;
|
412
|
+
}
|
413
|
+
|
414
|
+
// Emit match data
|
415
|
+
right_outer->SetMatch(first);
|
416
|
+
left_outer.SetMatch(idx);
|
417
|
+
if (found_match) {
|
418
|
+
found_match[idx] = true;
|
419
|
+
}
|
420
|
+
if (matches) {
|
421
|
+
matches[idx] = Match(curr_bin, first);
|
422
|
+
}
|
423
|
+
lhs_matched.set_index(lhs_match_count++, idx);
|
424
|
+
}
|
425
|
+
}
|
426
|
+
|
427
|
+
unique_ptr<OperatorState> PhysicalAsOfJoin::GetOperatorState(ExecutionContext &context) const {
|
428
|
+
auto &config = ClientConfig::GetConfig(context.client);
|
429
|
+
return make_uniq<AsOfLocalState>(context.client, *this, config.force_external);
|
430
|
+
}
|
431
|
+
|
432
|
+
void PhysicalAsOfJoin::ResolveSimpleJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
433
|
+
OperatorState &lstate_p) const {
|
434
|
+
auto &lstate = lstate_p.Cast<AsOfLocalState>();
|
435
|
+
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
436
|
+
|
437
|
+
// perform the actual join
|
438
|
+
bool found_match[STANDARD_VECTOR_SIZE] = {false};
|
439
|
+
lstate.ResolveJoin(input, found_match);
|
440
|
+
|
441
|
+
// now construct the result based on the join result
|
442
|
+
switch (join_type) {
|
443
|
+
case JoinType::MARK: {
|
444
|
+
PhysicalJoin::ConstructMarkJoinResult(lstate.lhs_keys, input, chunk, found_match, gsink.has_null);
|
445
|
+
break;
|
446
|
+
}
|
447
|
+
case JoinType::SEMI:
|
448
|
+
PhysicalJoin::ConstructSemiJoinResult(input, chunk, found_match);
|
449
|
+
break;
|
450
|
+
case JoinType::ANTI:
|
451
|
+
PhysicalJoin::ConstructAntiJoinResult(input, chunk, found_match);
|
452
|
+
break;
|
453
|
+
default:
|
454
|
+
throw NotImplementedException("Unimplemented join type for AsOf join");
|
455
|
+
}
|
456
|
+
}
|
457
|
+
|
458
|
+
OperatorResultType PhysicalAsOfJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
459
|
+
OperatorState &lstate_p) const {
|
460
|
+
auto &lstate = lstate_p.Cast<AsOfLocalState>();
|
461
|
+
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
462
|
+
|
463
|
+
if (!lstate.fetch_next_left) {
|
464
|
+
lstate.fetch_next_left = true;
|
465
|
+
if (lstate.left_outer.Enabled()) {
|
466
|
+
// left join: before we move to the next chunk, see if we need to output any vectors that didn't
|
467
|
+
// have a match found
|
468
|
+
lstate.left_outer.ConstructLeftJoinResult(input, chunk);
|
469
|
+
lstate.left_outer.Reset();
|
470
|
+
}
|
471
|
+
return OperatorResultType::NEED_MORE_INPUT;
|
472
|
+
}
|
473
|
+
|
474
|
+
// perform the actual join
|
475
|
+
AsOfLocalState::Match matches[STANDARD_VECTOR_SIZE];
|
476
|
+
lstate.ResolveJoin(input, nullptr, matches);
|
477
|
+
lstate.group_payload.Reset();
|
478
|
+
lstate.rhs_payload.Reset();
|
479
|
+
|
480
|
+
auto &global_partition = gsink.global_partition;
|
481
|
+
hash_t scan_bin = global_partition.bin_groups.size();
|
482
|
+
optional_ptr<PartitionGlobalHashGroup> hash_group;
|
483
|
+
unique_ptr<PayloadScanner> scanner;
|
484
|
+
for (idx_t i = 0; i < lstate.lhs_match_count; ++i) {
|
485
|
+
const auto idx = lstate.lhs_matched[i];
|
486
|
+
const auto match_bin = matches[idx].first;
|
487
|
+
const auto match_pos = matches[idx].second;
|
488
|
+
if (match_bin != scan_bin) {
|
489
|
+
// Grab the next group
|
490
|
+
const auto group_idx = global_partition.bin_groups[match_bin];
|
491
|
+
hash_group = global_partition.hash_groups[group_idx].get();
|
492
|
+
scan_bin = match_bin;
|
493
|
+
scanner = make_uniq<PayloadScanner>(*hash_group->global_sort, false);
|
494
|
+
lstate.group_payload.Reset();
|
495
|
+
}
|
496
|
+
// Skip to the range containing the match
|
497
|
+
while (match_pos >= scanner->Scanned()) {
|
498
|
+
lstate.group_payload.Reset();
|
499
|
+
scanner->Scan(lstate.group_payload);
|
500
|
+
}
|
501
|
+
// Append the individual values
|
502
|
+
// TODO: Batch the copies
|
503
|
+
const auto source_offset = match_pos - (scanner->Scanned() - lstate.group_payload.size());
|
504
|
+
for (idx_t col_idx = 0; col_idx < right_projection_map.size(); ++col_idx) {
|
505
|
+
const auto rhs_idx = right_projection_map[col_idx];
|
506
|
+
auto &source = lstate.group_payload.data[rhs_idx];
|
507
|
+
auto &target = chunk.data[input.ColumnCount() + col_idx];
|
508
|
+
VectorOperations::Copy(source, target, source_offset + 1, source_offset, i);
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
// Slice the input into the left side
|
513
|
+
chunk.Slice(input, lstate.lhs_matched, lstate.lhs_match_count);
|
514
|
+
|
515
|
+
// If we are doing a left join, come back for the NULLs
|
516
|
+
if (lstate.left_outer.Enabled()) {
|
517
|
+
lstate.fetch_next_left = false;
|
518
|
+
return OperatorResultType::HAVE_MORE_OUTPUT;
|
519
|
+
}
|
520
|
+
|
521
|
+
return OperatorResultType::NEED_MORE_INPUT;
|
522
|
+
}
|
523
|
+
|
524
|
+
OperatorResultType PhysicalAsOfJoin::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
525
|
+
GlobalOperatorState &gstate, OperatorState &lstate) const {
|
526
|
+
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
527
|
+
|
528
|
+
if (gsink.global_partition.count == 0) {
|
529
|
+
// empty RHS
|
530
|
+
if (!EmptyResultIfRHSIsEmpty()) {
|
531
|
+
ConstructEmptyJoinResult(join_type, gsink.has_null, input, chunk);
|
532
|
+
return OperatorResultType::NEED_MORE_INPUT;
|
533
|
+
} else {
|
534
|
+
return OperatorResultType::FINISHED;
|
535
|
+
}
|
536
|
+
}
|
537
|
+
|
538
|
+
input.Verify();
|
539
|
+
switch (join_type) {
|
540
|
+
case JoinType::SEMI:
|
541
|
+
case JoinType::ANTI:
|
542
|
+
case JoinType::MARK:
|
543
|
+
// simple joins can have max STANDARD_VECTOR_SIZE matches per chunk
|
544
|
+
ResolveSimpleJoin(context, input, chunk, lstate);
|
545
|
+
return OperatorResultType::NEED_MORE_INPUT;
|
546
|
+
case JoinType::LEFT:
|
547
|
+
case JoinType::INNER:
|
548
|
+
case JoinType::RIGHT:
|
549
|
+
case JoinType::OUTER:
|
550
|
+
return ResolveComplexJoin(context, input, chunk, lstate);
|
551
|
+
default:
|
552
|
+
throw NotImplementedException("Unimplemented type for as-of join!");
|
553
|
+
}
|
554
|
+
}
|
555
|
+
|
556
|
+
//===--------------------------------------------------------------------===//
|
557
|
+
// Source
|
558
|
+
//===--------------------------------------------------------------------===//
|
559
|
+
class AsOfGlobalSourceState : public GlobalSourceState {
|
560
|
+
public:
|
561
|
+
explicit AsOfGlobalSourceState(PartitionGlobalSinkState &gsink_p) : gsink(gsink_p), next_bin(0) {
|
562
|
+
}
|
563
|
+
|
564
|
+
PartitionGlobalSinkState &gsink;
|
565
|
+
//! The output read position.
|
566
|
+
atomic<idx_t> next_bin;
|
567
|
+
|
568
|
+
public:
|
569
|
+
idx_t MaxThreads() override {
|
570
|
+
// If there is only one partition, we have to process it on one thread.
|
571
|
+
if (!gsink.grouping_data) {
|
572
|
+
return 1;
|
573
|
+
}
|
574
|
+
|
575
|
+
// If there is not a lot of data, process serially.
|
576
|
+
if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
|
577
|
+
return 1;
|
578
|
+
}
|
579
|
+
|
580
|
+
return gsink.hash_groups.size();
|
581
|
+
}
|
582
|
+
};
|
583
|
+
|
584
|
+
unique_ptr<GlobalSourceState> PhysicalAsOfJoin::GetGlobalSourceState(ClientContext &context) const {
|
585
|
+
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
586
|
+
return make_uniq<AsOfGlobalSourceState>(gsink.global_partition);
|
587
|
+
}
|
588
|
+
|
589
|
+
class AsOfLocalSourceState : public LocalSourceState {
|
590
|
+
public:
|
591
|
+
using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
|
592
|
+
|
593
|
+
explicit AsOfLocalSourceState(AsOfGlobalSinkState &gstate_p);
|
594
|
+
|
595
|
+
idx_t GeneratePartition(const idx_t hash_bin);
|
596
|
+
|
597
|
+
AsOfGlobalSinkState &gstate;
|
598
|
+
|
599
|
+
//! The read partition
|
600
|
+
idx_t hash_bin;
|
601
|
+
HashGroupPtr hash_group;
|
602
|
+
|
603
|
+
//! The read cursor
|
604
|
+
unique_ptr<PayloadScanner> scanner;
|
605
|
+
//! Buffer for the inputs
|
606
|
+
DataChunk input_chunk;
|
607
|
+
//! Pointer to the matches
|
608
|
+
const bool *found_match;
|
609
|
+
};
|
610
|
+
|
611
|
+
AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSinkState &gstate_p) : gstate(gstate_p) {
|
612
|
+
input_chunk.Initialize(gstate.global_partition.allocator, gstate.global_partition.payload_types);
|
613
|
+
}
|
614
|
+
|
615
|
+
idx_t AsOfLocalSourceState::GeneratePartition(const idx_t hash_bin_p) {
|
616
|
+
// Get rid of any stale data
|
617
|
+
hash_bin = hash_bin_p;
|
618
|
+
|
619
|
+
hash_group = std::move(gstate.global_partition.hash_groups[hash_bin]);
|
620
|
+
scanner = make_uniq<PayloadScanner>(*hash_group->global_sort);
|
621
|
+
found_match = gstate.right_outers[hash_bin].GetMatches();
|
622
|
+
|
623
|
+
return scanner->Remaining();
|
624
|
+
}
|
625
|
+
|
626
|
+
unique_ptr<LocalSourceState> PhysicalAsOfJoin::GetLocalSourceState(ExecutionContext &context,
|
627
|
+
GlobalSourceState &gstate) const {
|
628
|
+
auto &gsink = sink_state->Cast<AsOfGlobalSinkState>();
|
629
|
+
return make_uniq<AsOfLocalSourceState>(gsink);
|
630
|
+
}
|
631
|
+
|
632
|
+
void PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk &result, GlobalSourceState &gstate_p,
|
633
|
+
LocalSourceState &lstate_p) const {
|
634
|
+
D_ASSERT(IsRightOuterJoin(join_type));
|
635
|
+
|
636
|
+
auto &gsource = gstate_p.Cast<AsOfGlobalSourceState>();
|
637
|
+
auto &lsource = lstate_p.Cast<AsOfLocalSourceState>();
|
638
|
+
auto &gsink = gsource.gsink;
|
639
|
+
|
640
|
+
auto &hash_groups = gsink.hash_groups;
|
641
|
+
const auto bin_count = hash_groups.size();
|
642
|
+
|
643
|
+
DataChunk rhs_chunk;
|
644
|
+
rhs_chunk.Initialize(Allocator::Get(context.client), gsink.payload_types);
|
645
|
+
SelectionVector rsel(STANDARD_VECTOR_SIZE);
|
646
|
+
|
647
|
+
while (result.size() == 0) {
|
648
|
+
// Move to the next bin if we are done.
|
649
|
+
while (!lsource.scanner || !lsource.scanner->Remaining()) {
|
650
|
+
lsource.scanner.reset();
|
651
|
+
lsource.hash_group.reset();
|
652
|
+
auto hash_bin = gsource.next_bin++;
|
653
|
+
if (hash_bin >= bin_count) {
|
654
|
+
return;
|
655
|
+
}
|
656
|
+
|
657
|
+
for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
|
658
|
+
if (hash_groups[hash_bin]) {
|
659
|
+
break;
|
660
|
+
}
|
661
|
+
}
|
662
|
+
lsource.GeneratePartition(hash_bin);
|
663
|
+
}
|
664
|
+
const auto rhs_position = lsource.scanner->Scanned();
|
665
|
+
lsource.scanner->Scan(rhs_chunk);
|
666
|
+
|
667
|
+
const auto count = rhs_chunk.size();
|
668
|
+
if (count == 0) {
|
669
|
+
return;
|
670
|
+
}
|
671
|
+
|
672
|
+
// figure out which tuples didn't find a match in the RHS
|
673
|
+
auto found_match = lsource.found_match;
|
674
|
+
idx_t result_count = 0;
|
675
|
+
for (idx_t i = 0; i < count; i++) {
|
676
|
+
if (!found_match[rhs_position + i]) {
|
677
|
+
rsel.set_index(result_count++, i);
|
678
|
+
}
|
679
|
+
}
|
680
|
+
|
681
|
+
if (result_count > 0) {
|
682
|
+
// if there were any tuples that didn't find a match, output them
|
683
|
+
const idx_t left_column_count = children[0]->types.size();
|
684
|
+
for (idx_t col_idx = 0; col_idx < left_column_count; ++col_idx) {
|
685
|
+
result.data[col_idx].SetVectorType(VectorType::CONSTANT_VECTOR);
|
686
|
+
ConstantVector::SetNull(result.data[col_idx], true);
|
687
|
+
}
|
688
|
+
for (idx_t col_idx = 0; col_idx < right_projection_map.size(); ++col_idx) {
|
689
|
+
const auto rhs_idx = right_projection_map[col_idx];
|
690
|
+
result.data[left_column_count + col_idx].Slice(rhs_chunk.data[rhs_idx], rsel, result_count);
|
691
|
+
}
|
692
|
+
result.SetCardinality(result_count);
|
693
|
+
return;
|
694
|
+
}
|
695
|
+
}
|
696
|
+
}
|
697
|
+
|
698
|
+
} // namespace duckdb
|
@@ -381,7 +381,7 @@ void BaseCSVReader::VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, i
|
|
381
381
|
|
382
382
|
auto parse_data = FlatVector::GetData<string_t>(chunk.data[col_idx]);
|
383
383
|
auto s = parse_data[row_idx];
|
384
|
-
auto utf_type = Utf8Proc::Analyze(s.
|
384
|
+
auto utf_type = Utf8Proc::Analyze(s.GetData(), s.GetSize());
|
385
385
|
if (utf_type == UnicodeType::INVALID) {
|
386
386
|
string col_name = to_string(col_idx);
|
387
387
|
if (col_idx < names.size()) {
|