duckdb 1.4.2 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +14 -5
- package/src/duckdb/extension/parquet/column_writer.cpp +4 -4
- package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +12 -4
- package/src/duckdb/src/common/encryption_key_manager.cpp +4 -0
- package/src/duckdb/src/common/local_file_system.cpp +23 -0
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +6 -0
- package/src/duckdb/src/common/types/conflict_manager.cpp +1 -1
- package/src/duckdb/src/execution/index/art/base_node.cpp +3 -1
- package/src/duckdb/src/execution/index/art/prefix.cpp +5 -8
- package/src/duckdb/src/execution/index/bound_index.cpp +68 -25
- package/src/duckdb/src/execution/index/unbound_index.cpp +21 -10
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +4 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +36 -28
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +3 -2
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +12 -6
- package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +8 -4
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +4 -3
- package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +3 -2
- package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +0 -1
- package/src/duckdb/src/execution/physical_plan/plan_window.cpp +6 -8
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +4 -3
- package/src/duckdb/src/function/macro_function.cpp +20 -2
- package/src/duckdb/src/function/table/system/duckdb_log.cpp +3 -0
- package/src/duckdb/src/function/table/system/test_all_types.cpp +26 -13
- package/src/duckdb/src/function/table/table_scan.cpp +72 -38
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/table_function.cpp +24 -0
- package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/limits.hpp +4 -2
- package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +41 -7
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +15 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -1
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +4 -4
- package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +3 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +2 -6
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +4 -1
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
- package/src/duckdb/src/logging/log_storage.cpp +17 -23
- package/src/duckdb/src/main/capi/duckdb-c.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +0 -5
- package/src/duckdb/src/main/database_manager.cpp +12 -9
- package/src/duckdb/src/main/db_instance_cache.cpp +15 -1
- package/src/duckdb/src/main/extension/extension_alias.cpp +1 -0
- package/src/duckdb/src/optimizer/filter_combiner.cpp +38 -4
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -15
- package/src/duckdb/src/optimizer/late_materialization.cpp +5 -0
- package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +6 -3
- package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +3 -2
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +4 -1
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +17 -10
- package/src/duckdb/src/planner/binder.cpp +3 -3
- package/src/duckdb/src/planner/bound_result_modifier.cpp +22 -5
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +4 -1
- package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder.cpp +1 -2
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +57 -24
- package/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp +5 -3
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +9 -0
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/table/chunk_info.cpp +3 -3
- package/src/duckdb/src/storage/table/column_data.cpp +5 -1
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +1 -1
- package/src/duckdb/src/storage/table/column_segment.cpp +3 -1
- package/src/duckdb/src/storage/table/row_group.cpp +6 -8
- package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -1
- package/src/duckdb/src/storage/table/row_version_manager.cpp +37 -23
- package/src/duckdb/src/storage/table/standard_column_data.cpp +5 -5
- package/src/duckdb/src/storage/table/validity_column_data.cpp +17 -0
|
@@ -462,24 +462,30 @@ void CSVSniffer::DetectTypes() {
|
|
|
462
462
|
idx_t varchar_cols = 0;
|
|
463
463
|
for (idx_t col = 0; col < info_sql_types_candidates.size(); col++) {
|
|
464
464
|
auto &col_type_candidates = info_sql_types_candidates[col];
|
|
465
|
-
// check number of varchar columns
|
|
465
|
+
// check the number of varchar columns
|
|
466
466
|
const auto &col_type = col_type_candidates.back();
|
|
467
467
|
if (col_type == LogicalType::VARCHAR) {
|
|
468
468
|
varchar_cols++;
|
|
469
469
|
}
|
|
470
470
|
}
|
|
471
471
|
|
|
472
|
-
// it's good if the dialect creates more non-varchar columns
|
|
473
|
-
|
|
472
|
+
// it's good if the dialect creates more non-varchar columns
|
|
473
|
+
const bool has_less_varchar_cols = varchar_cols < min_varchar_cols;
|
|
474
|
+
// but only if we sacrifice < 30% of best_num_cols.
|
|
475
|
+
const bool acceptable_best_num_cols =
|
|
476
|
+
static_cast<double>(info_sql_types_candidates.size()) > static_cast<double>(max_columns_found) * 0.7;
|
|
474
477
|
const idx_t number_of_errors = candidate->error_handler->GetSize();
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
+
const bool better_strictness = best_candidate_is_strict ? !candidate->used_unstrictness : true;
|
|
479
|
+
const bool acceptable_candidate = has_less_varchar_cols && acceptable_best_num_cols && better_strictness;
|
|
480
|
+
// If we escaped an unquoted character when strict is false.
|
|
481
|
+
if (!best_candidate ||
|
|
482
|
+
(acceptable_candidate && (!options.ignore_errors.GetValue() || number_of_errors < min_errors))) {
|
|
478
483
|
min_errors = number_of_errors;
|
|
479
484
|
best_header_row.clear();
|
|
480
485
|
// we have a new best_options candidate
|
|
481
486
|
best_candidate = std::move(candidate);
|
|
482
487
|
min_varchar_cols = varchar_cols;
|
|
488
|
+
best_candidate_is_strict = !best_candidate->used_unstrictness;
|
|
483
489
|
best_sql_types_candidates_per_column_idx = info_sql_types_candidates;
|
|
484
490
|
for (auto &format_candidate : format_candidates) {
|
|
485
491
|
best_format_candidates[format_candidate.first] = format_candidate.second.format;
|
|
@@ -67,10 +67,14 @@ public:
|
|
|
67
67
|
|
|
68
68
|
InterruptState interrupt_state;
|
|
69
69
|
OperatorSourceInput source_input {global_state, *local_state, interrupt_state};
|
|
70
|
-
auto source_result =
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
70
|
+
auto source_result = SourceResultType::HAVE_MORE_OUTPUT;
|
|
71
|
+
while (source_result == SourceResultType::HAVE_MORE_OUTPUT && source.size() == 0) {
|
|
72
|
+
// TODO: this could as well just be propagated further, but for now iterating it is
|
|
73
|
+
source_result = table.GetData(context, source, source_input);
|
|
74
|
+
if (source_result == SourceResultType::BLOCKED) {
|
|
75
|
+
throw NotImplementedException(
|
|
76
|
+
"Unexpected interrupt from table Source in PositionalTableScanner refill");
|
|
77
|
+
}
|
|
74
78
|
}
|
|
75
79
|
}
|
|
76
80
|
source_offset = 0;
|
|
@@ -259,7 +259,7 @@ bool PhysicalTableScan::Equals(const PhysicalOperator &other_p) const {
|
|
|
259
259
|
return false;
|
|
260
260
|
}
|
|
261
261
|
auto &other = other_p.Cast<PhysicalTableScan>();
|
|
262
|
-
if (function
|
|
262
|
+
if (function != other.function) {
|
|
263
263
|
return false;
|
|
264
264
|
}
|
|
265
265
|
if (column_ids != other.column_ids) {
|
|
@@ -236,7 +236,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalAggregate &op) {
|
|
|
236
236
|
D_ASSERT(op.children.size() == 1);
|
|
237
237
|
|
|
238
238
|
reference<PhysicalOperator> plan = CreatePlan(*op.children[0]);
|
|
239
|
-
plan = ExtractAggregateExpressions(plan, op.expressions, op.groups);
|
|
239
|
+
plan = ExtractAggregateExpressions(plan, op.expressions, op.groups, op.grouping_sets);
|
|
240
240
|
|
|
241
241
|
bool can_use_simple_aggregation = true;
|
|
242
242
|
for (auto &expression : op.expressions) {
|
|
@@ -305,7 +305,8 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalAggregate &op) {
|
|
|
305
305
|
|
|
306
306
|
PhysicalOperator &PhysicalPlanGenerator::ExtractAggregateExpressions(PhysicalOperator &child,
|
|
307
307
|
vector<unique_ptr<Expression>> &aggregates,
|
|
308
|
-
vector<unique_ptr<Expression>> &groups
|
|
308
|
+
vector<unique_ptr<Expression>> &groups,
|
|
309
|
+
optional_ptr<vector<GroupingSet>> grouping_sets) {
|
|
309
310
|
vector<unique_ptr<Expression>> expressions;
|
|
310
311
|
vector<LogicalType> types;
|
|
311
312
|
|
|
@@ -314,7 +315,7 @@ PhysicalOperator &PhysicalPlanGenerator::ExtractAggregateExpressions(PhysicalOpe
|
|
|
314
315
|
auto &bound_aggr = aggr->Cast<BoundAggregateExpression>();
|
|
315
316
|
if (bound_aggr.order_bys) {
|
|
316
317
|
// sorted aggregate!
|
|
317
|
-
FunctionBinder::BindSortedAggregate(context, bound_aggr, groups);
|
|
318
|
+
FunctionBinder::BindSortedAggregate(context, bound_aggr, groups, grouping_sets);
|
|
318
319
|
}
|
|
319
320
|
}
|
|
320
321
|
for (auto &group : groups) {
|
|
@@ -65,7 +65,8 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
|
|
|
65
65
|
|
|
66
66
|
if (ClientConfig::GetConfig(context).enable_optimizer) {
|
|
67
67
|
bool changes_made = false;
|
|
68
|
-
auto new_expr =
|
|
68
|
+
auto new_expr =
|
|
69
|
+
OrderedAggregateOptimizer::Apply(context, *first_aggregate, groups, nullptr, changes_made);
|
|
69
70
|
if (new_expr) {
|
|
70
71
|
D_ASSERT(new_expr->return_type == first_aggregate->return_type);
|
|
71
72
|
D_ASSERT(new_expr->GetExpressionType() == ExpressionType::BOUND_AGGREGATE);
|
|
@@ -81,7 +82,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
|
|
|
81
82
|
}
|
|
82
83
|
}
|
|
83
84
|
|
|
84
|
-
child = ExtractAggregateExpressions(child, aggregates, groups);
|
|
85
|
+
child = ExtractAggregateExpressions(child, aggregates, groups, nullptr);
|
|
85
86
|
|
|
86
87
|
// we add a physical hash aggregation in the plan to select the distinct groups
|
|
87
88
|
auto &group_by = Make<PhysicalHashAggregate>(context, aggregate_types, std::move(aggregates), std::move(groups),
|
|
@@ -14,7 +14,6 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalFilter &op) {
|
|
|
14
14
|
D_ASSERT(op.children.size() == 1);
|
|
15
15
|
reference<PhysicalOperator> plan = CreatePlan(*op.children[0]);
|
|
16
16
|
if (!op.expressions.empty()) {
|
|
17
|
-
D_ASSERT(!plan.get().GetTypes().empty());
|
|
18
17
|
// create a filter if there is anything to filter
|
|
19
18
|
auto &filter = Make<PhysicalFilter>(plan.get().GetTypes(), std::move(op.expressions), op.estimated_cardinality);
|
|
20
19
|
filter.children.push_back(plan);
|
|
@@ -2,13 +2,11 @@
|
|
|
2
2
|
#include "duckdb/execution/operator/aggregate/physical_window.hpp"
|
|
3
3
|
#include "duckdb/execution/operator/projection/physical_projection.hpp"
|
|
4
4
|
#include "duckdb/execution/physical_plan_generator.hpp"
|
|
5
|
-
#include "duckdb/main/
|
|
5
|
+
#include "duckdb/main/client_config.hpp"
|
|
6
6
|
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
|
7
7
|
#include "duckdb/planner/expression/bound_window_expression.hpp"
|
|
8
8
|
#include "duckdb/planner/operator/logical_window.hpp"
|
|
9
9
|
|
|
10
|
-
#include <numeric>
|
|
11
|
-
|
|
12
10
|
namespace duckdb {
|
|
13
11
|
|
|
14
12
|
PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalWindow &op) {
|
|
@@ -44,12 +42,12 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalWindow &op) {
|
|
|
44
42
|
// Process the window functions by sharing the partition/order definitions
|
|
45
43
|
unordered_map<idx_t, idx_t> projection_map;
|
|
46
44
|
vector<vector<idx_t>> window_expressions;
|
|
47
|
-
idx_t
|
|
45
|
+
idx_t streaming_count = 0;
|
|
48
46
|
auto output_pos = input_width;
|
|
49
47
|
while (!blocking_windows.empty() || !streaming_windows.empty()) {
|
|
50
|
-
const bool
|
|
51
|
-
auto &remaining =
|
|
52
|
-
|
|
48
|
+
const bool process_blocking = streaming_windows.empty();
|
|
49
|
+
auto &remaining = process_blocking ? blocking_windows : streaming_windows;
|
|
50
|
+
streaming_count += process_blocking ? 0 : 1;
|
|
53
51
|
|
|
54
52
|
// Find all functions that share the partitioning of the first remaining expression
|
|
55
53
|
auto over_idx = remaining[0];
|
|
@@ -122,7 +120,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalWindow &op) {
|
|
|
122
120
|
}
|
|
123
121
|
|
|
124
122
|
// Chain the new window operator on top of the plan
|
|
125
|
-
if (i
|
|
123
|
+
if (i >= streaming_count) {
|
|
126
124
|
auto &window = Make<PhysicalWindow>(types, std::move(select_list), op.estimated_cardinality);
|
|
127
125
|
window.children.push_back(plan);
|
|
128
126
|
plan = window;
|
|
@@ -677,14 +677,15 @@ struct SortedAggregateFunction {
|
|
|
677
677
|
} // namespace
|
|
678
678
|
|
|
679
679
|
void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateExpression &expr,
|
|
680
|
-
const vector<unique_ptr<Expression>> &groups
|
|
680
|
+
const vector<unique_ptr<Expression>> &groups,
|
|
681
|
+
optional_ptr<vector<GroupingSet>> grouping_sets) {
|
|
681
682
|
if (!expr.order_bys || expr.order_bys->orders.empty() || expr.children.empty()) {
|
|
682
683
|
// not a sorted aggregate: return
|
|
683
684
|
return;
|
|
684
685
|
}
|
|
685
686
|
// Remove unnecessary ORDER BY clauses and return if nothing remains
|
|
686
687
|
if (context.config.enable_optimizer) {
|
|
687
|
-
if (expr.order_bys->Simplify(groups)) {
|
|
688
|
+
if (expr.order_bys->Simplify(groups, grouping_sets)) {
|
|
688
689
|
expr.order_bys.reset();
|
|
689
690
|
return;
|
|
690
691
|
}
|
|
@@ -741,7 +742,7 @@ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundWindowExpr
|
|
|
741
742
|
}
|
|
742
743
|
// Remove unnecessary ORDER BY clauses and return if nothing remains
|
|
743
744
|
if (context.config.enable_optimizer) {
|
|
744
|
-
if (BoundOrderModifier::Simplify(expr.arg_orders, expr.partitions)) {
|
|
745
|
+
if (BoundOrderModifier::Simplify(expr.arg_orders, expr.partitions, nullptr)) {
|
|
745
746
|
expr.arg_orders.clear();
|
|
746
747
|
return;
|
|
747
748
|
}
|
|
@@ -48,13 +48,31 @@ MacroBindResult MacroFunction::BindMacroFunction(
|
|
|
48
48
|
|
|
49
49
|
ExpressionBinder expr_binder(binder, binder.context);
|
|
50
50
|
expr_binder.lambda_bindings = binder.lambda_bindings;
|
|
51
|
+
|
|
52
|
+
// Figure out whether we even need to bind arguments
|
|
53
|
+
bool requires_bind = false;
|
|
54
|
+
for (auto &function : functions) {
|
|
55
|
+
for (const auto &type : function->types) {
|
|
56
|
+
if (type.id() != LogicalTypeId::UNKNOWN) {
|
|
57
|
+
requires_bind = true;
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (requires_bind) {
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
51
66
|
// Find argument types and separate positional and default arguments
|
|
52
67
|
vector<LogicalType> positional_arg_types;
|
|
53
68
|
InsertionOrderPreservingMap<LogicalType> named_arg_types;
|
|
54
69
|
for (auto &arg : function_expr.children) {
|
|
55
70
|
auto arg_copy = arg->Copy();
|
|
56
|
-
|
|
57
|
-
|
|
71
|
+
LogicalType arg_type = LogicalType::UNKNOWN;
|
|
72
|
+
if (requires_bind) {
|
|
73
|
+
const auto arg_bind_result = expr_binder.BindExpression(arg_copy, depth + 1);
|
|
74
|
+
arg_type = arg_bind_result.HasError() ? LogicalType::UNKNOWN : arg_bind_result.expression->return_type;
|
|
75
|
+
}
|
|
58
76
|
if (!arg->GetAlias().empty()) {
|
|
59
77
|
// Default argument
|
|
60
78
|
if (named_arguments.find(arg->GetAlias()) != named_arguments.end()) {
|
|
@@ -62,6 +62,9 @@ unique_ptr<TableRef> DuckDBLogBindReplace(ClientContext &context, TableFunctionB
|
|
|
62
62
|
bool denormalized_table = false;
|
|
63
63
|
auto denormalized_table_setting = input.named_parameters.find("denormalized_table");
|
|
64
64
|
if (denormalized_table_setting != input.named_parameters.end()) {
|
|
65
|
+
if (denormalized_table_setting->second.IsNull()) {
|
|
66
|
+
throw InvalidInputException("denormalized_table cannot be NULL");
|
|
67
|
+
}
|
|
65
68
|
denormalized_table = denormalized_table_setting->second.GetValue<bool>();
|
|
66
69
|
}
|
|
67
70
|
|
|
@@ -19,9 +19,10 @@ struct TestAllTypesData : public GlobalTableFunctionState {
|
|
|
19
19
|
idx_t offset;
|
|
20
20
|
};
|
|
21
21
|
|
|
22
|
-
vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_large_bignum) {
|
|
22
|
+
vector<TestType> TestAllTypesFun::GetTestTypes(const bool use_large_enum, const bool use_large_bignum) {
|
|
23
23
|
vector<TestType> result;
|
|
24
|
-
|
|
24
|
+
|
|
25
|
+
// Numeric types.
|
|
25
26
|
result.emplace_back(LogicalType::BOOLEAN, "bool");
|
|
26
27
|
result.emplace_back(LogicalType::TINYINT, "tinyint");
|
|
27
28
|
result.emplace_back(LogicalType::SMALLINT, "smallint");
|
|
@@ -33,24 +34,31 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
|
|
|
33
34
|
result.emplace_back(LogicalType::USMALLINT, "usmallint");
|
|
34
35
|
result.emplace_back(LogicalType::UINTEGER, "uint");
|
|
35
36
|
result.emplace_back(LogicalType::UBIGINT, "ubigint");
|
|
37
|
+
|
|
38
|
+
// BIGNUM.
|
|
36
39
|
if (use_large_bignum) {
|
|
37
40
|
string data;
|
|
38
|
-
idx_t total_data_size = Bignum::BIGNUM_HEADER_SIZE + Bignum::MAX_DATA_SIZE;
|
|
41
|
+
constexpr idx_t total_data_size = Bignum::BIGNUM_HEADER_SIZE + Bignum::MAX_DATA_SIZE;
|
|
39
42
|
data.resize(total_data_size);
|
|
40
|
-
|
|
43
|
+
|
|
44
|
+
// Let's set the max header.
|
|
41
45
|
Bignum::SetHeader(&data[0], Bignum::MAX_DATA_SIZE, false);
|
|
42
|
-
// Set all
|
|
46
|
+
// Set all other max bits.
|
|
43
47
|
memset(&data[Bignum::BIGNUM_HEADER_SIZE], 0xFF, Bignum::MAX_DATA_SIZE);
|
|
44
48
|
auto max = Value::BIGNUM(data);
|
|
45
|
-
|
|
49
|
+
|
|
50
|
+
// Let's set the min header.
|
|
46
51
|
Bignum::SetHeader(&data[0], Bignum::MAX_DATA_SIZE, true);
|
|
47
|
-
// Set all
|
|
52
|
+
// Set all other min bits.
|
|
48
53
|
memset(&data[Bignum::BIGNUM_HEADER_SIZE], 0x00, Bignum::MAX_DATA_SIZE);
|
|
49
54
|
auto min = Value::BIGNUM(data);
|
|
50
55
|
result.emplace_back(LogicalType::BIGNUM, "bignum", min, max);
|
|
56
|
+
|
|
51
57
|
} else {
|
|
52
58
|
result.emplace_back(LogicalType::BIGNUM, "bignum");
|
|
53
59
|
}
|
|
60
|
+
|
|
61
|
+
// Time-types.
|
|
54
62
|
result.emplace_back(LogicalType::DATE, "date");
|
|
55
63
|
result.emplace_back(LogicalType::TIME, "time");
|
|
56
64
|
result.emplace_back(LogicalType::TIMESTAMP, "timestamp");
|
|
@@ -59,15 +67,19 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
|
|
|
59
67
|
result.emplace_back(LogicalType::TIMESTAMP_NS, "timestamp_ns");
|
|
60
68
|
result.emplace_back(LogicalType::TIME_TZ, "time_tz");
|
|
61
69
|
result.emplace_back(LogicalType::TIMESTAMP_TZ, "timestamp_tz");
|
|
62
|
-
|
|
63
|
-
|
|
70
|
+
|
|
71
|
+
// More complex numeric types.
|
|
72
|
+
result.emplace_back(LogicalType::FLOAT, "float", Value::FLOAT(std::numeric_limits<float>::lowest()),
|
|
73
|
+
Value::FLOAT(std::numeric_limits<float>::max()));
|
|
74
|
+
result.emplace_back(LogicalType::DOUBLE, "double", Value::DOUBLE(std::numeric_limits<double>::lowest()),
|
|
75
|
+
Value::DOUBLE(std::numeric_limits<double>::max()));
|
|
64
76
|
result.emplace_back(LogicalType::DECIMAL(4, 1), "dec_4_1");
|
|
65
77
|
result.emplace_back(LogicalType::DECIMAL(9, 4), "dec_9_4");
|
|
66
78
|
result.emplace_back(LogicalType::DECIMAL(18, 6), "dec_18_6");
|
|
67
79
|
result.emplace_back(LogicalType::DECIMAL(38, 10), "dec38_10");
|
|
68
80
|
result.emplace_back(LogicalType::UUID, "uuid");
|
|
69
81
|
|
|
70
|
-
//
|
|
82
|
+
// Interval.
|
|
71
83
|
interval_t min_interval;
|
|
72
84
|
min_interval.months = 0;
|
|
73
85
|
min_interval.days = 0;
|
|
@@ -79,14 +91,15 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
|
|
|
79
91
|
max_interval.micros = 999999999;
|
|
80
92
|
result.emplace_back(LogicalType::INTERVAL, "interval", Value::INTERVAL(min_interval),
|
|
81
93
|
Value::INTERVAL(max_interval));
|
|
82
|
-
|
|
94
|
+
|
|
95
|
+
// VARCHAR / BLOB / Bitstrings.
|
|
83
96
|
result.emplace_back(LogicalType::VARCHAR, "varchar", Value("🦆🦆🦆🦆🦆🦆"),
|
|
84
97
|
Value(string("goo\x00se", 6)));
|
|
85
98
|
result.emplace_back(LogicalType::BLOB, "blob", Value::BLOB("thisisalongblob\\x00withnullbytes"),
|
|
86
99
|
Value::BLOB("\\x00\\x00\\x00a"));
|
|
87
100
|
result.emplace_back(LogicalType::BIT, "bit", Value::BIT("0010001001011100010101011010111"), Value::BIT("10101"));
|
|
88
101
|
|
|
89
|
-
//
|
|
102
|
+
// ENUMs.
|
|
90
103
|
Vector small_enum(LogicalType::VARCHAR, 2);
|
|
91
104
|
auto small_enum_ptr = FlatVector::GetData<string_t>(small_enum);
|
|
92
105
|
small_enum_ptr[0] = StringVector::AddStringOrBlob(small_enum, "DUCK_DUCK_ENUM");
|
|
@@ -116,7 +129,7 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
|
|
|
116
129
|
result.emplace_back(LogicalType::ENUM(large_enum, 2), "large_enum");
|
|
117
130
|
}
|
|
118
131
|
|
|
119
|
-
//
|
|
132
|
+
// ARRAYs.
|
|
120
133
|
auto int_list_type = LogicalType::LIST(LogicalType::INTEGER);
|
|
121
134
|
auto empty_int_list = Value::LIST(LogicalType::INTEGER, vector<Value>());
|
|
122
135
|
auto int_list =
|
|
@@ -54,6 +54,7 @@ struct IndexScanLocalState : public LocalTableFunctionState {
|
|
|
54
54
|
TableScanState scan_state;
|
|
55
55
|
//! The column IDs of the local storage scan.
|
|
56
56
|
vector<StorageIndex> column_ids;
|
|
57
|
+
bool in_charge_of_final_stretch {false};
|
|
57
58
|
};
|
|
58
59
|
|
|
59
60
|
static StorageIndex TransformStorageIndex(const ColumnIndex &column_id) {
|
|
@@ -114,7 +115,7 @@ class DuckIndexScanState : public TableScanGlobalState {
|
|
|
114
115
|
public:
|
|
115
116
|
DuckIndexScanState(ClientContext &context, const FunctionData *bind_data_p)
|
|
116
117
|
: TableScanGlobalState(context, bind_data_p), next_batch_index(0), arena(Allocator::Get(context)),
|
|
117
|
-
row_ids(nullptr), row_id_count(0),
|
|
118
|
+
row_ids(nullptr), row_id_count(0), finished_first_phase(false), started_last_phase(false) {
|
|
118
119
|
}
|
|
119
120
|
|
|
120
121
|
//! The batch index of the next Sink.
|
|
@@ -129,7 +130,8 @@ public:
|
|
|
129
130
|
//! The column IDs of the to-be-scanned columns.
|
|
130
131
|
vector<StorageIndex> column_ids;
|
|
131
132
|
//! True, if no more row IDs must be scanned.
|
|
132
|
-
bool
|
|
133
|
+
bool finished_first_phase;
|
|
134
|
+
bool started_last_phase;
|
|
133
135
|
//! Synchronize changes to the global index scan state.
|
|
134
136
|
mutex index_scan_lock;
|
|
135
137
|
|
|
@@ -163,44 +165,75 @@ public:
|
|
|
163
165
|
auto &storage = duck_table.GetStorage();
|
|
164
166
|
auto &l_state = data_p.local_state->Cast<IndexScanLocalState>();
|
|
165
167
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
{
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
168
|
+
enum class ExecutionPhase { NONE = 0, STORAGE = 1, LOCAL_STORAGE = 2 };
|
|
169
|
+
|
|
170
|
+
// We might need to loop back, so while (true)
|
|
171
|
+
while (true) {
|
|
172
|
+
idx_t scan_count = 0;
|
|
173
|
+
idx_t offset = 0;
|
|
174
|
+
|
|
175
|
+
// Phase selection
|
|
176
|
+
auto phase_to_be_performed = ExecutionPhase::NONE;
|
|
177
|
+
{
|
|
178
|
+
// Synchronize changes to the shared global state.
|
|
179
|
+
lock_guard<mutex> l(index_scan_lock);
|
|
180
|
+
if (!finished_first_phase) {
|
|
181
|
+
l_state.batch_index = next_batch_index;
|
|
182
|
+
next_batch_index++;
|
|
183
|
+
|
|
184
|
+
offset = l_state.batch_index * STANDARD_VECTOR_SIZE;
|
|
185
|
+
auto remaining = row_id_count - offset;
|
|
186
|
+
scan_count = remaining <= STANDARD_VECTOR_SIZE ? remaining : STANDARD_VECTOR_SIZE;
|
|
187
|
+
finished_first_phase = remaining <= STANDARD_VECTOR_SIZE ? true : false;
|
|
188
|
+
phase_to_be_performed = ExecutionPhase::STORAGE;
|
|
189
|
+
} else if (!started_last_phase) {
|
|
190
|
+
// First thread to get last phase, great, set l_state's in_charge_of_final_stretch, so same thread
|
|
191
|
+
// will be on again
|
|
192
|
+
started_last_phase = true;
|
|
193
|
+
l_state.in_charge_of_final_stretch = true;
|
|
194
|
+
phase_to_be_performed = ExecutionPhase::LOCAL_STORAGE;
|
|
195
|
+
} else if (l_state.in_charge_of_final_stretch) {
|
|
196
|
+
phase_to_be_performed = ExecutionPhase::LOCAL_STORAGE;
|
|
197
|
+
}
|
|
180
198
|
}
|
|
181
|
-
}
|
|
182
199
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
200
|
+
switch (phase_to_be_performed) {
|
|
201
|
+
case ExecutionPhase::NONE: {
|
|
202
|
+
// No work to be picked up
|
|
203
|
+
return;
|
|
204
|
+
}
|
|
205
|
+
case ExecutionPhase::STORAGE: {
|
|
206
|
+
// Scan (in parallel) storage
|
|
207
|
+
auto row_id_data = reinterpret_cast<data_ptr_t>(row_ids + offset);
|
|
208
|
+
Vector local_vector(LogicalType::ROW_TYPE, row_id_data);
|
|
209
|
+
|
|
210
|
+
if (CanRemoveFilterColumns()) {
|
|
211
|
+
l_state.all_columns.Reset();
|
|
212
|
+
storage.Fetch(tx, l_state.all_columns, column_ids, local_vector, scan_count, l_state.fetch_state);
|
|
213
|
+
output.ReferenceColumns(l_state.all_columns, projection_ids);
|
|
214
|
+
} else {
|
|
215
|
+
storage.Fetch(tx, output, column_ids, local_vector, scan_count, l_state.fetch_state);
|
|
216
|
+
}
|
|
217
|
+
if (output.size() == 0) {
|
|
218
|
+
// output is empty, loop back, since there might be results to be picked up from LOCAL_STORAGE phase
|
|
219
|
+
continue;
|
|
220
|
+
}
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
case ExecutionPhase::LOCAL_STORAGE: {
|
|
224
|
+
// Scan (sequentially, always same logical thread) local_storage
|
|
225
|
+
auto &local_storage = LocalStorage::Get(tx);
|
|
226
|
+
{
|
|
227
|
+
if (CanRemoveFilterColumns()) {
|
|
228
|
+
l_state.all_columns.Reset();
|
|
229
|
+
local_storage.Scan(l_state.scan_state.local_state, column_ids, l_state.all_columns);
|
|
230
|
+
output.ReferenceColumns(l_state.all_columns, projection_ids);
|
|
231
|
+
} else {
|
|
232
|
+
local_storage.Scan(l_state.scan_state.local_state, column_ids, output);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
return;
|
|
193
236
|
}
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
if (output.size() == 0) {
|
|
197
|
-
auto &local_storage = LocalStorage::Get(tx);
|
|
198
|
-
if (CanRemoveFilterColumns()) {
|
|
199
|
-
l_state.all_columns.Reset();
|
|
200
|
-
local_storage.Scan(l_state.scan_state.local_state, column_ids, l_state.all_columns);
|
|
201
|
-
output.ReferenceColumns(l_state.all_columns, projection_ids);
|
|
202
|
-
} else {
|
|
203
|
-
local_storage.Scan(l_state.scan_state.local_state, column_ids, output);
|
|
204
237
|
}
|
|
205
238
|
}
|
|
206
239
|
}
|
|
@@ -350,7 +383,8 @@ unique_ptr<GlobalTableFunctionState> DuckTableScanInitGlobal(ClientContext &cont
|
|
|
350
383
|
unique_ptr<GlobalTableFunctionState> DuckIndexScanInitGlobal(ClientContext &context, TableFunctionInitInput &input,
|
|
351
384
|
const TableScanBindData &bind_data, set<row_t> &row_ids) {
|
|
352
385
|
auto g_state = make_uniq<DuckIndexScanState>(context, input.bind_data.get());
|
|
353
|
-
g_state->
|
|
386
|
+
g_state->finished_first_phase = row_ids.empty() ? true : false;
|
|
387
|
+
g_state->started_last_phase = false;
|
|
354
388
|
|
|
355
389
|
if (!row_ids.empty()) {
|
|
356
390
|
auto row_id_ptr = g_state->arena.AllocateAligned(row_ids.size() * sizeof(row_t));
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#ifndef DUCKDB_PATCH_VERSION
|
|
2
|
-
#define DUCKDB_PATCH_VERSION "
|
|
2
|
+
#define DUCKDB_PATCH_VERSION "3"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_MINOR_VERSION
|
|
5
5
|
#define DUCKDB_MINOR_VERSION 4
|
|
@@ -8,10 +8,10 @@
|
|
|
8
8
|
#define DUCKDB_MAJOR_VERSION 1
|
|
9
9
|
#endif
|
|
10
10
|
#ifndef DUCKDB_VERSION
|
|
11
|
-
#define DUCKDB_VERSION "v1.4.
|
|
11
|
+
#define DUCKDB_VERSION "v1.4.3"
|
|
12
12
|
#endif
|
|
13
13
|
#ifndef DUCKDB_SOURCE_ID
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
14
|
+
#define DUCKDB_SOURCE_ID "d1dc88f950"
|
|
15
15
|
#endif
|
|
16
16
|
#include "duckdb/function/table/system_functions.hpp"
|
|
17
17
|
#include "duckdb/main/database.hpp"
|
|
@@ -37,6 +37,30 @@ TableFunction::TableFunction(const vector<LogicalType> &arguments, table_functio
|
|
|
37
37
|
TableFunction::TableFunction() : TableFunction("", {}, nullptr, nullptr, nullptr, nullptr) {
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
bool TableFunction::operator==(const TableFunction &rhs) const {
|
|
41
|
+
return name == rhs.name && arguments == rhs.arguments && varargs == rhs.varargs && bind == rhs.bind &&
|
|
42
|
+
bind_replace == rhs.bind_replace && bind_operator == rhs.bind_operator && init_global == rhs.init_global &&
|
|
43
|
+
init_local == rhs.init_local && function == rhs.function && in_out_function == rhs.in_out_function &&
|
|
44
|
+
in_out_function_final == rhs.in_out_function_final && statistics == rhs.statistics &&
|
|
45
|
+
dependency == rhs.dependency && cardinality == rhs.cardinality &&
|
|
46
|
+
pushdown_complex_filter == rhs.pushdown_complex_filter && pushdown_expression == rhs.pushdown_expression &&
|
|
47
|
+
to_string == rhs.to_string && dynamic_to_string == rhs.dynamic_to_string &&
|
|
48
|
+
table_scan_progress == rhs.table_scan_progress && get_partition_data == rhs.get_partition_data &&
|
|
49
|
+
get_bind_info == rhs.get_bind_info && type_pushdown == rhs.type_pushdown &&
|
|
50
|
+
get_multi_file_reader == rhs.get_multi_file_reader && supports_pushdown_type == rhs.supports_pushdown_type &&
|
|
51
|
+
get_partition_info == rhs.get_partition_info && get_partition_stats == rhs.get_partition_stats &&
|
|
52
|
+
get_virtual_columns == rhs.get_virtual_columns && get_row_id_columns == rhs.get_row_id_columns &&
|
|
53
|
+
serialize == rhs.serialize && deserialize == rhs.deserialize &&
|
|
54
|
+
verify_serialization == rhs.verify_serialization && projection_pushdown == rhs.projection_pushdown &&
|
|
55
|
+
filter_pushdown == rhs.filter_pushdown && filter_prune == rhs.filter_prune &&
|
|
56
|
+
sampling_pushdown == rhs.sampling_pushdown && late_materialization == rhs.late_materialization &&
|
|
57
|
+
global_initialization == rhs.global_initialization;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
bool TableFunction::operator!=(const TableFunction &rhs) const {
|
|
61
|
+
return !(*this == rhs);
|
|
62
|
+
}
|
|
63
|
+
|
|
40
64
|
bool TableFunction::Equal(const TableFunction &rhs) const {
|
|
41
65
|
// number of types
|
|
42
66
|
if (this->arguments.size() != rhs.arguments.size()) {
|
|
@@ -24,10 +24,12 @@ namespace duckdb {
|
|
|
24
24
|
template <class T>
|
|
25
25
|
struct NumericLimits {
|
|
26
26
|
static constexpr T Minimum() {
|
|
27
|
-
return std::numeric_limits<T>::
|
|
27
|
+
return std::numeric_limits<T>::has_infinity ? -std::numeric_limits<T>::infinity()
|
|
28
|
+
: std::numeric_limits<T>::lowest();
|
|
28
29
|
}
|
|
29
30
|
static constexpr T Maximum() {
|
|
30
|
-
return std::numeric_limits<T>::
|
|
31
|
+
return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity()
|
|
32
|
+
: std::numeric_limits<T>::max();
|
|
31
33
|
}
|
|
32
34
|
static constexpr bool IsSigned() {
|
|
33
35
|
return std::is_signed<T>::value;
|
|
@@ -38,6 +38,8 @@ public:
|
|
|
38
38
|
int64_t GetFileSize(FileHandle &handle) override;
|
|
39
39
|
//! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error
|
|
40
40
|
timestamp_t GetLastModifiedTime(FileHandle &handle) override;
|
|
41
|
+
//! Returns a tag that uniquely identifies the version of the file
|
|
42
|
+
string GetVersionTag(FileHandle &handle) override;
|
|
41
43
|
//! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error
|
|
42
44
|
FileType GetFileType(FileHandle &handle) override;
|
|
43
45
|
//! Truncate a file to a maximum size of new_size, new_size should be smaller than or equal to the current size of
|
|
@@ -256,6 +256,8 @@ public:
|
|
|
256
256
|
if (parent.get().GetType() == NType::PREFIX) {
|
|
257
257
|
// We might have to compress:
|
|
258
258
|
// PREFIX (greatgrandparent) - Node4 (grandparent) - PREFIX - INLINED_LEAF.
|
|
259
|
+
// The parent does not have to be passed in, as it is a child of the possibly being compressed N4.
|
|
260
|
+
// Then, when we delete that child, we also free it.
|
|
259
261
|
Node::DeleteChild(art, grandparent, greatgrandparent, current_key.get()[grandparent_depth], status,
|
|
260
262
|
row_id);
|
|
261
263
|
return;
|
|
@@ -48,7 +48,7 @@ public:
|
|
|
48
48
|
|
|
49
49
|
//! Concatenates parent -> prev_node4 -> child.
|
|
50
50
|
static void Concat(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte,
|
|
51
|
-
const GateStatus node4_status);
|
|
51
|
+
const GateStatus node4_status, const GateStatus status);
|
|
52
52
|
|
|
53
53
|
//! Removes up to pos bytes from the prefix.
|
|
54
54
|
//! Shifts all subsequent bytes by pos. Frees empty nodes.
|
|
@@ -72,7 +72,7 @@ private:
|
|
|
72
72
|
static Prefix GetTail(ART &art, const Node &node);
|
|
73
73
|
|
|
74
74
|
static void ConcatInternal(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte,
|
|
75
|
-
const
|
|
75
|
+
const GateStatus status);
|
|
76
76
|
static void ConcatNode4WasGate(ART &art, Node &node4, const Node child, uint8_t byte);
|
|
77
77
|
static void ConcatChildIsGate(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte);
|
|
78
78
|
static void ConcatOutsideGate(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte);
|
|
@@ -169,7 +169,7 @@ public:
|
|
|
169
169
|
//! Replay index insert and delete operations buffered during WAL replay.
|
|
170
170
|
//! table_types has the physical types of the table in the order they appear, not logical (no generated columns).
|
|
171
171
|
//! mapped_column_ids contains the sorted order of Indexed physical column ID's (see unbound_index.hpp comments).
|
|
172
|
-
void ApplyBufferedReplays(const vector<LogicalType> &table_types,
|
|
172
|
+
void ApplyBufferedReplays(const vector<LogicalType> &table_types, BufferedIndexReplays &buffered_replays,
|
|
173
173
|
const vector<StorageIndex> &mapped_column_ids);
|
|
174
174
|
|
|
175
175
|
protected:
|