duckdb 1.4.3-dev0.0 → 1.4.4-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp +1 -1
  3. package/src/duckdb/extension/icu/icu_extension.cpp +14 -5
  4. package/src/duckdb/extension/parquet/column_writer.cpp +4 -4
  5. package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +12 -4
  6. package/src/duckdb/src/common/encryption_key_manager.cpp +4 -0
  7. package/src/duckdb/src/common/local_file_system.cpp +23 -0
  8. package/src/duckdb/src/common/types/column/column_data_collection.cpp +6 -0
  9. package/src/duckdb/src/common/types/conflict_manager.cpp +1 -1
  10. package/src/duckdb/src/execution/index/art/base_node.cpp +3 -1
  11. package/src/duckdb/src/execution/index/art/prefix.cpp +5 -8
  12. package/src/duckdb/src/execution/index/bound_index.cpp +68 -25
  13. package/src/duckdb/src/execution/index/unbound_index.cpp +21 -10
  14. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +4 -0
  15. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +36 -28
  16. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +3 -2
  17. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +12 -6
  18. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +8 -4
  19. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  20. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +4 -3
  21. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +3 -2
  22. package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +0 -1
  23. package/src/duckdb/src/execution/physical_plan/plan_window.cpp +6 -8
  24. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +4 -3
  25. package/src/duckdb/src/function/macro_function.cpp +20 -2
  26. package/src/duckdb/src/function/table/system/duckdb_log.cpp +3 -0
  27. package/src/duckdb/src/function/table/system/test_all_types.cpp +26 -13
  28. package/src/duckdb/src/function/table/table_scan.cpp +72 -38
  29. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  30. package/src/duckdb/src/function/table_function.cpp +24 -0
  31. package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +1 -0
  32. package/src/duckdb/src/include/duckdb/common/limits.hpp +4 -2
  33. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +2 -0
  34. package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +2 -0
  36. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
  37. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +41 -7
  39. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +15 -1
  40. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -0
  41. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -1
  42. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -1
  43. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +2 -1
  44. package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
  45. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  47. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +4 -4
  49. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +3 -1
  50. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +3 -0
  51. package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +4 -2
  53. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -2
  54. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp +3 -1
  56. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -3
  57. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +2 -6
  58. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +4 -1
  59. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
  60. package/src/duckdb/src/logging/log_storage.cpp +17 -23
  61. package/src/duckdb/src/main/capi/duckdb-c.cpp +1 -1
  62. package/src/duckdb/src/main/connection.cpp +0 -5
  63. package/src/duckdb/src/main/database_manager.cpp +12 -9
  64. package/src/duckdb/src/main/db_instance_cache.cpp +15 -1
  65. package/src/duckdb/src/main/extension/extension_alias.cpp +1 -0
  66. package/src/duckdb/src/optimizer/filter_combiner.cpp +38 -4
  67. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -15
  68. package/src/duckdb/src/optimizer/late_materialization.cpp +5 -0
  69. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +6 -3
  70. package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +3 -2
  71. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +1 -1
  72. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +1 -1
  73. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +4 -1
  74. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +17 -10
  75. package/src/duckdb/src/planner/binder.cpp +3 -3
  76. package/src/duckdb/src/planner/bound_result_modifier.cpp +22 -5
  77. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +4 -1
  78. package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +1 -1
  79. package/src/duckdb/src/planner/expression_binder.cpp +1 -2
  80. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +57 -24
  81. package/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp +5 -3
  82. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +9 -0
  83. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  84. package/src/duckdb/src/storage/table/chunk_info.cpp +3 -3
  85. package/src/duckdb/src/storage/table/column_data.cpp +5 -1
  86. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +1 -1
  87. package/src/duckdb/src/storage/table/column_segment.cpp +3 -1
  88. package/src/duckdb/src/storage/table/row_group.cpp +6 -8
  89. package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -1
  90. package/src/duckdb/src/storage/table/row_version_manager.cpp +37 -23
  91. package/src/duckdb/src/storage/table/standard_column_data.cpp +5 -5
  92. package/src/duckdb/src/storage/table/validity_column_data.cpp +17 -0
@@ -2,6 +2,7 @@
2
2
 
3
3
  #include "duckdb/common/enums/expression_type.hpp"
4
4
  #include "duckdb/execution/expression_executor.hpp"
5
+ #include "duckdb/function/scalar/string_common.hpp"
5
6
  #include "duckdb/optimizer/optimizer.hpp"
6
7
  #include "duckdb/planner/expression.hpp"
7
8
  #include "duckdb/planner/expression/bound_between_expression.hpp"
@@ -24,6 +25,7 @@
24
25
  #include "duckdb/optimizer/column_lifetime_analyzer.hpp"
25
26
  #include "duckdb/planner/expression_iterator.hpp"
26
27
  #include "duckdb/planner/operator/logical_get.hpp"
28
+ #include "utf8proc_wrapper.hpp"
27
29
 
28
30
  namespace duckdb {
29
31
 
@@ -282,6 +284,35 @@ static bool SupportedFilterComparison(ExpressionType expression_type) {
282
284
  }
283
285
  }
284
286
 
287
+ bool FilterCombiner::FindNextLegalUTF8(string &prefix_string) {
288
+ // find the start of the last codepoint
289
+ idx_t last_codepoint_start;
290
+ for (last_codepoint_start = prefix_string.size(); last_codepoint_start > 0; last_codepoint_start--) {
291
+ if (IsCharacter(prefix_string[last_codepoint_start - 1])) {
292
+ break;
293
+ }
294
+ }
295
+ if (last_codepoint_start == 0) {
296
+ throw InvalidInputException("Invalid UTF8 found in string \"%s\"", prefix_string);
297
+ }
298
+ last_codepoint_start--;
299
+ int codepoint_size;
300
+ auto codepoint = Utf8Proc::UTF8ToCodepoint(prefix_string.c_str() + last_codepoint_start, codepoint_size) + 1;
301
+ if (codepoint >= 0xD800 && codepoint <= 0xDFFF) {
302
+ // next codepoint falls within surrogate range increment to next valid character
303
+ codepoint = 0xE000;
304
+ }
305
+ char next_codepoint_text[4];
306
+ int next_codepoint_size;
307
+ if (!Utf8Proc::CodepointToUtf8(codepoint, next_codepoint_size, next_codepoint_text)) {
308
+ // invalid codepoint
309
+ return false;
310
+ }
311
+ auto s = static_cast<idx_t>(next_codepoint_size);
312
+ prefix_string = prefix_string.substr(0, last_codepoint_start) + string(next_codepoint_text, s);
313
+ return true;
314
+ }
315
+
285
316
  bool TypeSupportsConstantFilter(const LogicalType &type) {
286
317
  if (TypeIsNumeric(type.InternalType())) {
287
318
  return true;
@@ -397,11 +428,14 @@ FilterPushdownResult FilterCombiner::TryPushdownPrefixFilter(TableFilterSet &tab
397
428
  auto &column_index = column_ids[column_ref.binding.column_index];
398
429
  //! Replace prefix with a set of comparisons
399
430
  auto lower_bound = make_uniq<ConstantFilter>(ExpressionType::COMPARE_GREATERTHANOREQUALTO, Value(prefix_string));
400
- prefix_string[prefix_string.size() - 1]++;
401
- auto upper_bound = make_uniq<ConstantFilter>(ExpressionType::COMPARE_LESSTHAN, Value(prefix_string));
402
431
  table_filters.PushFilter(column_index, std::move(lower_bound));
403
- table_filters.PushFilter(column_index, std::move(upper_bound));
404
- return FilterPushdownResult::PUSHED_DOWN_FULLY;
432
+ if (FilterCombiner::FindNextLegalUTF8(prefix_string)) {
433
+ auto upper_bound = make_uniq<ConstantFilter>(ExpressionType::COMPARE_LESSTHAN, Value(prefix_string));
434
+ table_filters.PushFilter(column_index, std::move(upper_bound));
435
+ return FilterPushdownResult::PUSHED_DOWN_FULLY;
436
+ }
437
+ // could not find next legal utf8 string - skip upper bound
438
+ return FilterPushdownResult::NO_PUSHDOWN;
405
439
  }
406
440
 
407
441
  FilterPushdownResult FilterCombiner::TryPushdownLikeFilter(TableFilterSet &table_filters,
@@ -54,12 +54,9 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr<LogicalOpera
54
54
  auto relation_id = relations.size();
55
55
 
56
56
  auto table_indexes = op.GetTableIndex();
57
- bool is_unnest_or_get_with_unnest = op.type == LogicalOperatorType::LOGICAL_UNNEST;
57
+ bool get_all_child_bindings = op.type == LogicalOperatorType::LOGICAL_UNNEST;
58
58
  if (op.type == LogicalOperatorType::LOGICAL_GET) {
59
- auto &get = op.Cast<LogicalGet>();
60
- if (get.function.name == "unnest") {
61
- is_unnest_or_get_with_unnest = true;
62
- }
59
+ get_all_child_bindings = !op.children.empty();
63
60
  }
64
61
  if (table_indexes.empty()) {
65
62
  // relation represents a non-reorderable relation, most likely a join relation
@@ -72,9 +69,9 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr<LogicalOpera
72
69
  D_ASSERT(relation_mapping.find(reference) == relation_mapping.end());
73
70
  relation_mapping[reference] = relation_id;
74
71
  }
75
- } else if (is_unnest_or_get_with_unnest) {
76
- // logical unnest has a logical_unnest index, but other bindings can refer to
77
- // columns that are not unnested.
72
+ } else if (get_all_child_bindings) {
73
+ // logical get has a logical_get index, but if a function is present other bindings can refer to
74
+ // columns that are not unnested, and from the child of the logical get.
78
75
  auto bindings = op.GetColumnBindings();
79
76
  for (auto &binding : bindings) {
80
77
  relation_mapping[binding.table_index] = relation_id;
@@ -189,10 +186,10 @@ static void ModifyStatsIfLimit(optional_ptr<LogicalOperator> limit_op, RelationS
189
186
  }
190
187
  }
191
188
 
192
- void RelationManager::AddUnnestRelation(JoinOrderOptimizer &optimizer, LogicalOperator &op, LogicalOperator &input_op,
193
- optional_ptr<LogicalOperator> parent, RelationStats &child_stats,
194
- optional_ptr<LogicalOperator> limit_op,
195
- vector<reference<LogicalOperator>> &datasource_filters) {
189
+ void RelationManager::AddRelationWithChildren(JoinOrderOptimizer &optimizer, LogicalOperator &op,
190
+ LogicalOperator &input_op, optional_ptr<LogicalOperator> parent,
191
+ RelationStats &child_stats, optional_ptr<LogicalOperator> limit_op,
192
+ vector<reference<LogicalOperator>> &datasource_filters) {
196
193
  D_ASSERT(!op.children.empty());
197
194
  auto child_optimizer = optimizer.CreateChildOptimizer();
198
195
  op.children[0] = child_optimizer.Optimize(std::move(op.children[0]), &child_stats);
@@ -301,7 +298,7 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica
301
298
  case LogicalOperatorType::LOGICAL_UNNEST: {
302
299
  // optimize children of unnest
303
300
  RelationStats child_stats;
304
- AddUnnestRelation(optimizer, *op, input_op, parent, child_stats, limit_op, datasource_filters);
301
+ AddRelationWithChildren(optimizer, *op, input_op, parent, child_stats, limit_op, datasource_filters);
305
302
  return true;
306
303
  }
307
304
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
@@ -359,9 +356,12 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica
359
356
  case LogicalOperatorType::LOGICAL_GET: {
360
357
  // TODO: Get stats from a logical GET
361
358
  auto &get = op->Cast<LogicalGet>();
362
- if (get.function.name == "unnest" && !op->children.empty()) {
359
+ // this is a get that *most likely* has a function (like unnest or json_each).
360
+ // there are new bindings for output of the function, but child bindings also exist, and can
361
+ // be used in joins
362
+ if (!op->children.empty()) {
363
363
  RelationStats child_stats;
364
- AddUnnestRelation(optimizer, *op, input_op, parent, child_stats, limit_op, datasource_filters);
364
+ AddRelationWithChildren(optimizer, *op, input_op, parent, child_stats, limit_op, datasource_filters);
365
365
  return true;
366
366
  }
367
367
  auto stats = RelationStatisticsHelper::ExtractGetStats(get, context);
@@ -432,6 +432,11 @@ bool LateMaterialization::OptimizeLargeLimit(LogicalLimit &limit, idx_t limit_va
432
432
  }
433
433
  current_op = *current_op.get().children[0];
434
434
  }
435
+ // if there are any filters we shouldn't do large limit optimization
436
+ auto &get = current_op.get().Cast<LogicalGet>();
437
+ if (!get.table_filters.filters.empty()) {
438
+ return false;
439
+ }
435
440
  return true;
436
441
  }
437
442
 
@@ -17,7 +17,9 @@ OrderedAggregateOptimizer::OrderedAggregateOptimizer(ExpressionRewriter &rewrite
17
17
  }
18
18
 
19
19
  unique_ptr<Expression> OrderedAggregateOptimizer::Apply(ClientContext &context, BoundAggregateExpression &aggr,
20
- vector<unique_ptr<Expression>> &groups, bool &changes_made) {
20
+ vector<unique_ptr<Expression>> &groups,
21
+ optional_ptr<vector<GroupingSet>> grouping_sets,
22
+ bool &changes_made) {
21
23
  if (!aggr.order_bys) {
22
24
  // no ORDER BYs defined
23
25
  return nullptr;
@@ -30,7 +32,7 @@ unique_ptr<Expression> OrderedAggregateOptimizer::Apply(ClientContext &context,
30
32
  }
31
33
 
32
34
  // Remove unnecessary ORDER BY clauses and return if nothing remains
33
- if (aggr.order_bys->Simplify(groups)) {
35
+ if (aggr.order_bys->Simplify(groups, grouping_sets)) {
34
36
  aggr.order_bys.reset();
35
37
  changes_made = true;
36
38
  return nullptr;
@@ -90,7 +92,8 @@ unique_ptr<Expression> OrderedAggregateOptimizer::Apply(ClientContext &context,
90
92
  unique_ptr<Expression> OrderedAggregateOptimizer::Apply(LogicalOperator &op, vector<reference<Expression>> &bindings,
91
93
  bool &changes_made, bool is_root) {
92
94
  auto &aggr = bindings[0].get().Cast<BoundAggregateExpression>();
93
- return Apply(rewriter.context, aggr, op.Cast<LogicalAggregate>().groups, changes_made);
95
+ return Apply(rewriter.context, aggr, op.Cast<LogicalAggregate>().groups, op.Cast<LogicalAggregate>().grouping_sets,
96
+ changes_made);
94
97
  }
95
98
 
96
99
  } // namespace duckdb
@@ -44,8 +44,9 @@ unique_ptr<SampleOptions> Transformer::TransformSampleOptions(optional_ptr<duckd
44
44
  } else {
45
45
  // sample size is given in rows: use reservoir sampling
46
46
  auto rows = sample_value.GetValue<int64_t>();
47
- if (rows < 0) {
48
- throw ParserException("Sample rows %lld out of range, must be bigger than or equal to 0", rows);
47
+ if (rows < 0 || sample_value.GetValue<uint64_t>() > SampleOptions::MAX_SAMPLE_ROWS) {
48
+ throw ParserException("Sample rows %lld out of range, must be between 0 and %lld", rows,
49
+ SampleOptions::MAX_SAMPLE_ROWS);
49
50
  }
50
51
  result->sample_size = Value::BIGINT(rows);
51
52
  result->method = SampleMethod::RESERVOIR_SAMPLE;
@@ -216,7 +216,7 @@ void TryTransformStarLike(unique_ptr<ParsedExpression> &root) {
216
216
  child_expr = std::move(list_filter);
217
217
  }
218
218
 
219
- auto columns_expr = make_uniq<StarExpression>();
219
+ auto columns_expr = make_uniq<StarExpression>(star.relation_name);
220
220
  columns_expr->columns = true;
221
221
  columns_expr->expr = std::move(child_expr);
222
222
  columns_expr->SetAlias(std::move(original_alias));
@@ -30,7 +30,7 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundSelectNode &statement) {
30
30
  root = PlanFilter(std::move(statement.where_clause), std::move(root));
31
31
  }
32
32
 
33
- if (!statement.aggregates.empty() || !statement.groups.group_expressions.empty()) {
33
+ if (!statement.aggregates.empty() || !statement.groups.group_expressions.empty() || statement.having) {
34
34
  if (!statement.groups.group_expressions.empty()) {
35
35
  // visit the groups
36
36
  for (auto &group : statement.groups.group_expressions) {
@@ -423,7 +423,10 @@ vector<Value> BindCopyOption(ClientContext &context, TableFunctionBinder &option
423
423
  }
424
424
  }
425
425
  auto bound_expr = option_binder.Bind(expr);
426
- auto val = ExpressionExecutor::EvaluateScalar(context, *bound_expr);
426
+ if (bound_expr->HasParameter()) {
427
+ throw ParameterNotResolvedException();
428
+ }
429
+ auto val = ExpressionExecutor::EvaluateScalar(context, *bound_expr, true);
427
430
  if (val.IsNull()) {
428
431
  throw BinderException("NULL is not supported as a valid option for COPY option \"" + name + "\"");
429
432
  }
@@ -465,7 +465,11 @@ unique_ptr<MergeIntoStatement> Binder::GenerateMergeInto(InsertStatement &stmt,
465
465
 
466
466
  if (on_conflict_info.action_type == OnConflictAction::REPLACE) {
467
467
  D_ASSERT(!on_conflict_info.set_info);
468
- on_conflict_info.set_info = CreateSetInfoForReplace(table, stmt, storage_info);
468
+ // For BY POSITION, create explicit SET information
469
+ // For BY NAME, leave it empty and let bind_merge_into handle it automatically
470
+ if (stmt.column_order != InsertColumnOrder::INSERT_BY_NAME) {
471
+ on_conflict_info.set_info = CreateSetInfoForReplace(table, stmt, storage_info);
472
+ }
469
473
  on_conflict_info.action_type = OnConflictAction::UPDATE;
470
474
  }
471
475
  // now set up the merge actions
@@ -484,16 +488,19 @@ unique_ptr<MergeIntoStatement> Binder::GenerateMergeInto(InsertStatement &stmt,
484
488
  // when doing UPDATE set up the when matched action
485
489
  auto update_action = make_uniq<MergeIntoAction>();
486
490
  update_action->action_type = MergeActionType::MERGE_UPDATE;
487
- for (auto &col : on_conflict_info.set_info->expressions) {
488
- vector<unordered_set<string>> lambda_params;
489
- DoUpdateSetQualify(col, table_name, lambda_params);
490
- }
491
- if (on_conflict_info.set_info->condition) {
492
- vector<unordered_set<string>> lambda_params;
493
- DoUpdateSetQualify(on_conflict_info.set_info->condition, table_name, lambda_params);
494
- update_action->condition = std::move(on_conflict_info.set_info->condition);
491
+ update_action->column_order = stmt.column_order;
492
+ if (on_conflict_info.set_info) {
493
+ for (auto &col : on_conflict_info.set_info->expressions) {
494
+ vector<unordered_set<string>> lambda_params;
495
+ DoUpdateSetQualify(col, table_name, lambda_params);
496
+ }
497
+ if (on_conflict_info.set_info->condition) {
498
+ vector<unordered_set<string>> lambda_params;
499
+ DoUpdateSetQualify(on_conflict_info.set_info->condition, table_name, lambda_params);
500
+ update_action->condition = std::move(on_conflict_info.set_info->condition);
501
+ }
502
+ update_action->update_info = std::move(on_conflict_info.set_info);
495
503
  }
496
- update_action->update_info = std::move(on_conflict_info.set_info);
497
504
 
498
505
  merge_into->actions[MergeActionCondition::WHEN_MATCHED].push_back(std::move(update_action));
499
506
  }
@@ -70,7 +70,7 @@ Binder::Binder(ClientContext &context, shared_ptr<Binder> parent_p, BinderType b
70
70
  }
71
71
  }
72
72
 
73
- unique_ptr<BoundCTENode> Binder::BindMaterializedCTE(CommonTableExpressionMap &cte_map) {
73
+ unique_ptr<BoundCTENode> Binder::BindMaterializedCTE(CommonTableExpressionMap &cte_map, unique_ptr<CTENode> &cte_root) {
74
74
  // Extract materialized CTEs from cte_map
75
75
  vector<unique_ptr<CTENode>> materialized_ctes;
76
76
  for (auto &cte : cte_map.map) {
@@ -87,7 +87,6 @@ unique_ptr<BoundCTENode> Binder::BindMaterializedCTE(CommonTableExpressionMap &c
87
87
  return nullptr;
88
88
  }
89
89
 
90
- unique_ptr<CTENode> cte_root = nullptr;
91
90
  while (!materialized_ctes.empty()) {
92
91
  unique_ptr<CTENode> node_result;
93
92
  node_result = std::move(materialized_ctes.back());
@@ -110,7 +109,8 @@ unique_ptr<BoundCTENode> Binder::BindMaterializedCTE(CommonTableExpressionMap &c
110
109
  template <class T>
111
110
  BoundStatement Binder::BindWithCTE(T &statement) {
112
111
  BoundStatement bound_statement;
113
- auto bound_cte = BindMaterializedCTE(statement.template Cast<T>().cte_map);
112
+ unique_ptr<CTENode> cte_root;
113
+ auto bound_cte = BindMaterializedCTE(statement.template Cast<T>().cte_map, cte_root);
114
114
  if (bound_cte) {
115
115
  reference<BoundCTENode> tail_ref = *bound_cte;
116
116
 
@@ -101,14 +101,17 @@ bool BoundOrderModifier::Equals(const unique_ptr<BoundOrderModifier> &left,
101
101
  return BoundOrderModifier::Equals(*left, *right);
102
102
  }
103
103
 
104
- bool BoundOrderModifier::Simplify(vector<BoundOrderByNode> &orders, const vector<unique_ptr<Expression>> &groups) {
104
+ bool BoundOrderModifier::Simplify(vector<BoundOrderByNode> &orders, const vector<unique_ptr<Expression>> &groups,
105
+ optional_ptr<vector<GroupingSet>> grouping_sets) {
105
106
  // for each ORDER BY - check if it is actually necessary
106
107
  // expressions that are in the groups do not need to be ORDERED BY
107
108
  // `ORDER BY` on a group has no effect, because for each aggregate, the group is unique
108
109
  // similarly, we only need to ORDER BY each aggregate once
110
+ expression_map_t<idx_t> group_expressions;
109
111
  expression_set_t seen_expressions;
112
+ idx_t i = 0;
110
113
  for (auto &target : groups) {
111
- seen_expressions.insert(*target);
114
+ group_expressions.insert({*target, i++});
112
115
  }
113
116
  vector<BoundOrderByNode> new_order_nodes;
114
117
  for (auto &order_node : orders) {
@@ -116,16 +119,30 @@ bool BoundOrderModifier::Simplify(vector<BoundOrderByNode> &orders, const vector
116
119
  // we do not need to order by this node
117
120
  continue;
118
121
  }
122
+ auto it = group_expressions.find(*order_node.expression);
123
+ bool add_to_new_order = it == group_expressions.end();
124
+ if (!add_to_new_order && grouping_sets) {
125
+ idx_t group_idx = it->second;
126
+ for (auto &grouping_set : *grouping_sets) {
127
+ if (grouping_set.find(group_idx) == grouping_set.end()) {
128
+ add_to_new_order = true;
129
+ break;
130
+ }
131
+ }
132
+ }
119
133
  seen_expressions.insert(*order_node.expression);
120
- new_order_nodes.push_back(std::move(order_node));
134
+ if (add_to_new_order) {
135
+ new_order_nodes.push_back(std::move(order_node));
136
+ }
121
137
  }
122
138
  orders.swap(new_order_nodes);
123
139
 
124
140
  return orders.empty(); // NOLINT
125
141
  }
126
142
 
127
- bool BoundOrderModifier::Simplify(const vector<unique_ptr<Expression>> &groups) {
128
- return Simplify(orders, groups);
143
+ bool BoundOrderModifier::Simplify(const vector<unique_ptr<Expression>> &groups,
144
+ optional_ptr<vector<GroupingSet>> grouping_sets) {
145
+ return Simplify(orders, groups, grouping_sets);
129
146
  }
130
147
 
131
148
  BoundLimitNode::BoundLimitNode(LimitNodeType type, idx_t constant_integer, double constant_percentage,
@@ -39,7 +39,10 @@ bool BoundFunctionExpression::IsFoldable() const {
39
39
  }
40
40
  }
41
41
  }
42
- return function.stability == FunctionStability::VOLATILE ? false : Expression::IsFoldable();
42
+ if (function.stability == FunctionStability::VOLATILE) {
43
+ return false;
44
+ }
45
+ return Expression::IsFoldable();
43
46
  }
44
47
 
45
48
  bool BoundFunctionExpression::CanThrow() const {
@@ -19,7 +19,7 @@ BindResult ConstantBinder::BindExpression(unique_ptr<ParsedExpression> &expr_ptr
19
19
  return BindExpression(expr_ptr, depth, root_expression);
20
20
  }
21
21
  }
22
- return BindUnsupportedExpression(expr, depth, clause + " cannot contain column names");
22
+ throw BinderException::Unsupported(expr, clause + " cannot contain column names");
23
23
  }
24
24
  case ExpressionClass::SUBQUERY:
25
25
  throw BinderException(clause + " cannot contain subqueries");
@@ -1,6 +1,5 @@
1
1
  #include "duckdb/planner/expression_binder.hpp"
2
2
 
3
- #include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp"
4
3
  #include "duckdb/parser/expression/list.hpp"
5
4
  #include "duckdb/parser/parsed_expression_iterator.hpp"
6
5
  #include "duckdb/planner/binder.hpp"
@@ -166,7 +165,7 @@ static bool CombineMissingColumns(ErrorData &current, ErrorData new_error) {
166
165
  }
167
166
  auto score = StringUtil::SimilarityRating(candidate_column, column_name);
168
167
  candidates.insert(candidate);
169
- scores.emplace_back(make_pair(std::move(candidate), score));
168
+ scores.emplace_back(std::move(candidate), score);
170
169
  }
171
170
  // get a new top-n
172
171
  auto top_candidates = StringUtil::TopNStrings(scores);
@@ -236,6 +236,16 @@ bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator &op, boo
236
236
  if (DetectCorrelatedExpressions(*child, lateral, new_lateral_depth, condition)) {
237
237
  has_correlation = true;
238
238
  }
239
+
240
+ if (op.type == LogicalOperatorType::LOGICAL_MATERIALIZED_CTE && child_idx == 0) {
241
+ auto &setop = op.Cast<LogicalCTE>();
242
+ binder.recursive_ctes[setop.table_index] = &setop;
243
+ has_correlated_expressions[op] = has_correlation;
244
+ if (has_correlation) {
245
+ setop.correlated_columns = correlated_columns;
246
+ }
247
+ }
248
+
239
249
  child_idx++;
240
250
  }
241
251
 
@@ -261,6 +271,7 @@ bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator &op, boo
261
271
  return true;
262
272
  }
263
273
  // Found a materialized CTE, subtree correlation depends on the CTE node
274
+ has_correlated_expressions[op] = has_correlated_expressions[*cte_node];
264
275
  return has_correlated_expressions[*cte_node];
265
276
  }
266
277
  // No CTE found: subtree is correlated
@@ -279,47 +290,32 @@ bool FlattenDependentJoins::DetectCorrelatedExpressions(LogicalOperator &op, boo
279
290
  binder.recursive_ctes[setop.table_index] = &setop;
280
291
  if (has_correlation) {
281
292
  setop.correlated_columns = correlated_columns;
282
- MarkSubtreeCorrelated(*op.children[1].get());
283
- }
284
- }
285
-
286
- if (op.type == LogicalOperatorType::LOGICAL_MATERIALIZED_CTE) {
287
- auto &setop = op.Cast<LogicalCTE>();
288
- binder.recursive_ctes[setop.table_index] = &setop;
289
- // only mark the entire subtree as correlated if the materializing side is correlated
290
- auto entry = has_correlated_expressions.find(*op.children[0]);
291
- if (entry != has_correlated_expressions.end()) {
292
- if (has_correlation && entry->second) {
293
- setop.correlated_columns = correlated_columns;
294
- MarkSubtreeCorrelated(*op.children[1].get());
295
- }
293
+ MarkSubtreeCorrelated(*op.children[1].get(), setop.table_index);
296
294
  }
297
295
  }
298
296
 
299
297
  return has_correlation;
300
298
  }
301
299
 
302
- bool FlattenDependentJoins::MarkSubtreeCorrelated(LogicalOperator &op) {
300
+ bool FlattenDependentJoins::MarkSubtreeCorrelated(LogicalOperator &op, idx_t cte_index) {
303
301
  // Do not mark base table scans as correlated
304
302
  auto entry = has_correlated_expressions.find(op);
305
303
  D_ASSERT(entry != has_correlated_expressions.end());
306
304
  bool has_correlation = entry->second;
307
305
  for (auto &child : op.children) {
308
- has_correlation |= MarkSubtreeCorrelated(*child.get());
306
+ has_correlation |= MarkSubtreeCorrelated(*child.get(), cte_index);
309
307
  }
310
308
  if (op.type != LogicalOperatorType::LOGICAL_GET || op.children.size() == 1) {
311
309
  if (op.type == LogicalOperatorType::LOGICAL_CTE_REF) {
312
310
  // There may be multiple recursive CTEs. Only mark CTE_REFs as correlated,
313
311
  // IFF the CTE that we are reading from is correlated.
314
312
  auto &cteref = op.Cast<LogicalCTERef>();
315
- auto cte = binder.recursive_ctes.find(cteref.cte_index);
316
- bool has_correlation = false;
317
- if (cte != binder.recursive_ctes.end()) {
318
- auto &rec_cte = cte->second->Cast<LogicalCTE>();
319
- has_correlation = !rec_cte.correlated_columns.empty();
313
+ if (cteref.cte_index != cte_index) {
314
+ has_correlated_expressions[op] = has_correlation;
315
+ return has_correlation;
320
316
  }
321
- has_correlated_expressions[op] = has_correlation;
322
- return has_correlation;
317
+ has_correlated_expressions[op] = true;
318
+ return true;
323
319
  } else {
324
320
  has_correlated_expressions[op] = has_correlation;
325
321
  }
@@ -695,6 +691,42 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
695
691
  return plan;
696
692
  }
697
693
  } else if (join.join_type == JoinType::MARK) {
694
+ if (!left_has_correlation && right_has_correlation) {
695
+ // found a MARK join where the left side has no correlation
696
+
697
+ ColumnBinding right_binding;
698
+
699
+ // there may still be correlation on the right side that we have to deal with
700
+ // push into the right side if necessary or decorrelate it independently otherwise
701
+ plan->children[1] = PushDownDependentJoinInternal(std::move(plan->children[1]),
702
+ parent_propagate_null_values, lateral_depth);
703
+ right_binding = this->base_binding;
704
+
705
+ // now push into the left side of the MARK join even though it has no correlation
706
+ // this is necessary to add the correlated columns to the column bindings and allow
707
+ // the join condition to be rewritten correctly
708
+ plan->children[0] = PushDownDependentJoinInternal(std::move(plan->children[0]),
709
+ parent_propagate_null_values, lateral_depth);
710
+
711
+ auto left_binding = this->base_binding;
712
+
713
+ // add the correlated columns to the join conditions
714
+ for (idx_t i = 0; i < correlated_columns.size(); i++) {
715
+ JoinCondition cond;
716
+ cond.left = make_uniq<BoundColumnRefExpression>(
717
+ correlated_columns[i].type,
718
+ ColumnBinding(left_binding.table_index, left_binding.column_index + i));
719
+ cond.right = make_uniq<BoundColumnRefExpression>(
720
+ correlated_columns[i].type,
721
+ ColumnBinding(right_binding.table_index, right_binding.column_index + i));
722
+ cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
723
+
724
+ auto &comparison_join = join.Cast<LogicalComparisonJoin>();
725
+ comparison_join.conditions.push_back(std::move(cond));
726
+ }
727
+ return plan;
728
+ }
729
+
698
730
  // push the child into the LHS
699
731
  plan->children[0] = PushDownDependentJoinInternal(std::move(plan->children[0]),
700
732
  parent_propagate_null_values, lateral_depth);
@@ -1031,7 +1063,8 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
1031
1063
  }
1032
1064
  }
1033
1065
 
1034
- RewriteCTEScan cte_rewriter(table_index, correlated_columns);
1066
+ RewriteCTEScan cte_rewriter(table_index, correlated_columns,
1067
+ plan->type == LogicalOperatorType::LOGICAL_RECURSIVE_CTE);
1035
1068
  cte_rewriter.VisitOperator(*plan->children[1]);
1036
1069
 
1037
1070
  parent_propagate_null_values = false;
@@ -14,8 +14,10 @@
14
14
 
15
15
  namespace duckdb {
16
16
 
17
- RewriteCTEScan::RewriteCTEScan(idx_t table_index, const CorrelatedColumns &correlated_columns)
18
- : table_index(table_index), correlated_columns(correlated_columns) {
17
+ RewriteCTEScan::RewriteCTEScan(idx_t table_index, const CorrelatedColumns &correlated_columns,
18
+ bool rewrite_dependent_joins)
19
+ : table_index(table_index), correlated_columns(correlated_columns),
20
+ rewrite_dependent_joins(rewrite_dependent_joins) {
19
21
  }
20
22
 
21
23
  void RewriteCTEScan::VisitOperator(LogicalOperator &op) {
@@ -29,7 +31,7 @@ void RewriteCTEScan::VisitOperator(LogicalOperator &op) {
29
31
  }
30
32
  cteref.correlated_columns += correlated_columns.size();
31
33
  }
32
- } else if (op.type == LogicalOperatorType::LOGICAL_DEPENDENT_JOIN) {
34
+ } else if (op.type == LogicalOperatorType::LOGICAL_DEPENDENT_JOIN && rewrite_dependent_joins) {
33
35
  // There is another DependentJoin below the correlated recursive CTE.
34
36
  // We have to add the correlated columns of the recursive CTE to the
35
37
  // set of columns of this operator.
@@ -118,6 +118,15 @@ void SingleFileTableDataWriter::FinalizeTable(const TableStatistics &global_stat
118
118
  if (!v1_0_0_storage) {
119
119
  options.emplace("v1_0_0_storage", v1_0_0_storage);
120
120
  }
121
+
122
+ // If there is a context available, bind indexes before serialization.
123
+ // This is necessary so that buffered index operations are replayed before we checkpoint, otherwise
124
+ // we would lose them if there was a restart after this.
125
+ if (context && context->transaction.HasActiveTransaction()) {
126
+ info.BindIndexes(*context);
127
+ }
128
+ // FIXME: If we do not have a context, however, the unbound indexes have to be serialized to disk.
129
+
121
130
  auto index_storage_infos = info.GetIndexes().SerializeToDisk(context, options);
122
131
 
123
132
  auto debug_verify_blocks = DBConfig::GetSetting<DebugVerifyBlocksSetting>(GetDatabase());
@@ -85,6 +85,7 @@ static const StorageVersionInfo storage_version_info[] = {
85
85
  {"v1.4.0", 67},
86
86
  {"v1.4.1", 67},
87
87
  {"v1.4.2", 67},
88
+ {"v1.4.3", 67},
88
89
  {nullptr, 0}
89
90
  };
90
91
  // END OF STORAGE VERSION INFO
@@ -112,6 +113,7 @@ static const SerializationVersionInfo serialization_version_info[] = {
112
113
  {"v1.4.0", 6},
113
114
  {"v1.4.1", 6},
114
115
  {"v1.4.2", 6},
116
+ {"v1.4.3", 6},
115
117
  {"latest", 6},
116
118
  {nullptr, 0}
117
119
  };
@@ -32,7 +32,7 @@ static bool UseVersion(TransactionData transaction, transaction_t id) {
32
32
  return TransactionVersionOperator::UseInsertedVersion(transaction.start_time, transaction.transaction_id, id);
33
33
  }
34
34
 
35
- bool ChunkInfo::Cleanup(transaction_t lowest_transaction, unique_ptr<ChunkInfo> &result) const {
35
+ bool ChunkInfo::Cleanup(transaction_t lowest_transaction) const {
36
36
  return false;
37
37
  }
38
38
 
@@ -99,7 +99,7 @@ idx_t ChunkConstantInfo::GetCommittedDeletedCount(idx_t max_count) {
99
99
  return delete_id < TRANSACTION_ID_START ? max_count : 0;
100
100
  }
101
101
 
102
- bool ChunkConstantInfo::Cleanup(transaction_t lowest_transaction, unique_ptr<ChunkInfo> &result) const {
102
+ bool ChunkConstantInfo::Cleanup(transaction_t lowest_transaction) const {
103
103
  if (delete_id != NOT_DELETED_ID) {
104
104
  // the chunk info is labeled as deleted - we need to keep it around
105
105
  return false;
@@ -253,7 +253,7 @@ void ChunkVectorInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t e
253
253
  }
254
254
  }
255
255
 
256
- bool ChunkVectorInfo::Cleanup(transaction_t lowest_transaction, unique_ptr<ChunkInfo> &result) const {
256
+ bool ChunkVectorInfo::Cleanup(transaction_t lowest_transaction) const {
257
257
  if (any_deleted) {
258
258
  // if any rows are deleted we can't clean-up
259
259
  return false;
@@ -536,6 +536,11 @@ void ColumnData::RevertAppend(row_t start_row_p) {
536
536
  if (segment->start == start_row) {
537
537
  // we are truncating exactly this segment - erase it entirely
538
538
  data.EraseSegments(l, segment_index);
539
+ if (segment_index > 0) {
540
+ // if we have a previous segment, we need to update the next pointer
541
+ auto previous_segment = data.GetSegmentByIndex(l, UnsafeNumericCast<int64_t>(segment_index - 1));
542
+ previous_segment->next = nullptr;
543
+ }
539
544
  } else {
540
545
  // we need to truncate within the segment
541
546
  // remove any segments AFTER this segment: they should be deleted entirely
@@ -583,7 +588,6 @@ void ColumnData::Update(TransactionData transaction, DataTable &data_table, idx_
583
588
  Vector base_vector(type);
584
589
  ColumnScanState state;
585
590
  FetchUpdateData(state, row_ids, base_vector);
586
-
587
591
  UpdateInternal(transaction, data_table, column_index, update_vector, row_ids, update_count, base_vector);
588
592
  }
589
593
 
@@ -363,7 +363,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
363
363
  }
364
364
 
365
365
  bool ColumnDataCheckpointer::HasChanges(ColumnData &col_data) {
366
- return col_data.HasChanges();
366
+ return col_data.HasAnyChanges();
367
367
  }
368
368
 
369
369
  void ColumnDataCheckpointer::WritePersistentSegments(ColumnCheckpointState &state) {
@@ -242,7 +242,9 @@ void ColumnSegment::ConvertToPersistent(QueryContext context, optional_ptr<Block
242
242
  // Thus, we set the compression function to constant and reset the block buffer.
243
243
  D_ASSERT(stats.statistics.IsConstant());
244
244
  auto &config = DBConfig::GetConfig(db);
245
- function = *config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, type.InternalType());
245
+ if (GetCompressionFunction().type != CompressionType::COMPRESSION_EMPTY) {
246
+ function = *config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, type.InternalType());
247
+ }
246
248
  block.reset();
247
249
  }
248
250