duckdb 1.4.3-dev0.0 → 1.4.4-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp +1 -1
  3. package/src/duckdb/extension/icu/icu_extension.cpp +14 -5
  4. package/src/duckdb/extension/parquet/column_writer.cpp +4 -4
  5. package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +12 -4
  6. package/src/duckdb/src/common/encryption_key_manager.cpp +4 -0
  7. package/src/duckdb/src/common/local_file_system.cpp +23 -0
  8. package/src/duckdb/src/common/types/column/column_data_collection.cpp +6 -0
  9. package/src/duckdb/src/common/types/conflict_manager.cpp +1 -1
  10. package/src/duckdb/src/execution/index/art/base_node.cpp +3 -1
  11. package/src/duckdb/src/execution/index/art/prefix.cpp +5 -8
  12. package/src/duckdb/src/execution/index/bound_index.cpp +68 -25
  13. package/src/duckdb/src/execution/index/unbound_index.cpp +21 -10
  14. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +4 -0
  15. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +36 -28
  16. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +3 -2
  17. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +12 -6
  18. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +8 -4
  19. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  20. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +4 -3
  21. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +3 -2
  22. package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +0 -1
  23. package/src/duckdb/src/execution/physical_plan/plan_window.cpp +6 -8
  24. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +4 -3
  25. package/src/duckdb/src/function/macro_function.cpp +20 -2
  26. package/src/duckdb/src/function/table/system/duckdb_log.cpp +3 -0
  27. package/src/duckdb/src/function/table/system/test_all_types.cpp +26 -13
  28. package/src/duckdb/src/function/table/table_scan.cpp +72 -38
  29. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  30. package/src/duckdb/src/function/table_function.cpp +24 -0
  31. package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +1 -0
  32. package/src/duckdb/src/include/duckdb/common/limits.hpp +4 -2
  33. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +2 -0
  34. package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +2 -0
  36. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
  37. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +41 -7
  39. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +15 -1
  40. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -0
  41. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -1
  42. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -1
  43. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +2 -1
  44. package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
  45. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  47. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +4 -4
  49. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +3 -1
  50. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +3 -0
  51. package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +4 -2
  53. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -2
  54. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp +3 -1
  56. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -3
  57. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +2 -6
  58. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +4 -1
  59. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
  60. package/src/duckdb/src/logging/log_storage.cpp +17 -23
  61. package/src/duckdb/src/main/capi/duckdb-c.cpp +1 -1
  62. package/src/duckdb/src/main/connection.cpp +0 -5
  63. package/src/duckdb/src/main/database_manager.cpp +12 -9
  64. package/src/duckdb/src/main/db_instance_cache.cpp +15 -1
  65. package/src/duckdb/src/main/extension/extension_alias.cpp +1 -0
  66. package/src/duckdb/src/optimizer/filter_combiner.cpp +38 -4
  67. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -15
  68. package/src/duckdb/src/optimizer/late_materialization.cpp +5 -0
  69. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +6 -3
  70. package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +3 -2
  71. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +1 -1
  72. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +1 -1
  73. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +4 -1
  74. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +17 -10
  75. package/src/duckdb/src/planner/binder.cpp +3 -3
  76. package/src/duckdb/src/planner/bound_result_modifier.cpp +22 -5
  77. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +4 -1
  78. package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +1 -1
  79. package/src/duckdb/src/planner/expression_binder.cpp +1 -2
  80. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +57 -24
  81. package/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp +5 -3
  82. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +9 -0
  83. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  84. package/src/duckdb/src/storage/table/chunk_info.cpp +3 -3
  85. package/src/duckdb/src/storage/table/column_data.cpp +5 -1
  86. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +1 -1
  87. package/src/duckdb/src/storage/table/column_segment.cpp +3 -1
  88. package/src/duckdb/src/storage/table/row_group.cpp +6 -8
  89. package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -1
  90. package/src/duckdb/src/storage/table/row_version_manager.cpp +37 -23
  91. package/src/duckdb/src/storage/table/standard_column_data.cpp +5 -5
  92. package/src/duckdb/src/storage/table/validity_column_data.cpp +17 -0
@@ -462,24 +462,30 @@ void CSVSniffer::DetectTypes() {
462
462
  idx_t varchar_cols = 0;
463
463
  for (idx_t col = 0; col < info_sql_types_candidates.size(); col++) {
464
464
  auto &col_type_candidates = info_sql_types_candidates[col];
465
- // check number of varchar columns
465
+ // check the number of varchar columns
466
466
  const auto &col_type = col_type_candidates.back();
467
467
  if (col_type == LogicalType::VARCHAR) {
468
468
  varchar_cols++;
469
469
  }
470
470
  }
471
471
 
472
- // it's good if the dialect creates more non-varchar columns, but only if we sacrifice < 30% of
473
- // best_num_cols.
472
+ // it's good if the dialect creates more non-varchar columns
473
+ const bool has_less_varchar_cols = varchar_cols < min_varchar_cols;
474
+ // but only if we sacrifice < 30% of best_num_cols.
475
+ const bool acceptable_best_num_cols =
476
+ static_cast<double>(info_sql_types_candidates.size()) > static_cast<double>(max_columns_found) * 0.7;
474
477
  const idx_t number_of_errors = candidate->error_handler->GetSize();
475
- if (!best_candidate || (varchar_cols<min_varchar_cols &&static_cast<double>(info_sql_types_candidates.size())>(
476
- static_cast<double>(max_columns_found) * 0.7) &&
477
- (!options.ignore_errors.GetValue() || number_of_errors < min_errors))) {
478
+ const bool better_strictness = best_candidate_is_strict ? !candidate->used_unstrictness : true;
479
+ const bool acceptable_candidate = has_less_varchar_cols && acceptable_best_num_cols && better_strictness;
480
+ // If we escaped an unquoted character when strict is false.
481
+ if (!best_candidate ||
482
+ (acceptable_candidate && (!options.ignore_errors.GetValue() || number_of_errors < min_errors))) {
478
483
  min_errors = number_of_errors;
479
484
  best_header_row.clear();
480
485
  // we have a new best_options candidate
481
486
  best_candidate = std::move(candidate);
482
487
  min_varchar_cols = varchar_cols;
488
+ best_candidate_is_strict = !best_candidate->used_unstrictness;
483
489
  best_sql_types_candidates_per_column_idx = info_sql_types_candidates;
484
490
  for (auto &format_candidate : format_candidates) {
485
491
  best_format_candidates[format_candidate.first] = format_candidate.second.format;
@@ -67,10 +67,14 @@ public:
67
67
 
68
68
  InterruptState interrupt_state;
69
69
  OperatorSourceInput source_input {global_state, *local_state, interrupt_state};
70
- auto source_result = table.GetData(context, source, source_input);
71
- if (source_result == SourceResultType::BLOCKED) {
72
- throw NotImplementedException(
73
- "Unexpected interrupt from table Source in PositionalTableScanner refill");
70
+ auto source_result = SourceResultType::HAVE_MORE_OUTPUT;
71
+ while (source_result == SourceResultType::HAVE_MORE_OUTPUT && source.size() == 0) {
72
+ // TODO: this could as well just be propagated further, but for now iterating it is
73
+ source_result = table.GetData(context, source, source_input);
74
+ if (source_result == SourceResultType::BLOCKED) {
75
+ throw NotImplementedException(
76
+ "Unexpected interrupt from table Source in PositionalTableScanner refill");
77
+ }
74
78
  }
75
79
  }
76
80
  source_offset = 0;
@@ -259,7 +259,7 @@ bool PhysicalTableScan::Equals(const PhysicalOperator &other_p) const {
259
259
  return false;
260
260
  }
261
261
  auto &other = other_p.Cast<PhysicalTableScan>();
262
- if (function.function != other.function.function) {
262
+ if (function != other.function) {
263
263
  return false;
264
264
  }
265
265
  if (column_ids != other.column_ids) {
@@ -236,7 +236,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalAggregate &op) {
236
236
  D_ASSERT(op.children.size() == 1);
237
237
 
238
238
  reference<PhysicalOperator> plan = CreatePlan(*op.children[0]);
239
- plan = ExtractAggregateExpressions(plan, op.expressions, op.groups);
239
+ plan = ExtractAggregateExpressions(plan, op.expressions, op.groups, op.grouping_sets);
240
240
 
241
241
  bool can_use_simple_aggregation = true;
242
242
  for (auto &expression : op.expressions) {
@@ -305,7 +305,8 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalAggregate &op) {
305
305
 
306
306
  PhysicalOperator &PhysicalPlanGenerator::ExtractAggregateExpressions(PhysicalOperator &child,
307
307
  vector<unique_ptr<Expression>> &aggregates,
308
- vector<unique_ptr<Expression>> &groups) {
308
+ vector<unique_ptr<Expression>> &groups,
309
+ optional_ptr<vector<GroupingSet>> grouping_sets) {
309
310
  vector<unique_ptr<Expression>> expressions;
310
311
  vector<LogicalType> types;
311
312
 
@@ -314,7 +315,7 @@ PhysicalOperator &PhysicalPlanGenerator::ExtractAggregateExpressions(PhysicalOpe
314
315
  auto &bound_aggr = aggr->Cast<BoundAggregateExpression>();
315
316
  if (bound_aggr.order_bys) {
316
317
  // sorted aggregate!
317
- FunctionBinder::BindSortedAggregate(context, bound_aggr, groups);
318
+ FunctionBinder::BindSortedAggregate(context, bound_aggr, groups, grouping_sets);
318
319
  }
319
320
  }
320
321
  for (auto &group : groups) {
@@ -65,7 +65,8 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
65
65
 
66
66
  if (ClientConfig::GetConfig(context).enable_optimizer) {
67
67
  bool changes_made = false;
68
- auto new_expr = OrderedAggregateOptimizer::Apply(context, *first_aggregate, groups, changes_made);
68
+ auto new_expr =
69
+ OrderedAggregateOptimizer::Apply(context, *first_aggregate, groups, nullptr, changes_made);
69
70
  if (new_expr) {
70
71
  D_ASSERT(new_expr->return_type == first_aggregate->return_type);
71
72
  D_ASSERT(new_expr->GetExpressionType() == ExpressionType::BOUND_AGGREGATE);
@@ -81,7 +82,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
81
82
  }
82
83
  }
83
84
 
84
- child = ExtractAggregateExpressions(child, aggregates, groups);
85
+ child = ExtractAggregateExpressions(child, aggregates, groups, nullptr);
85
86
 
86
87
  // we add a physical hash aggregation in the plan to select the distinct groups
87
88
  auto &group_by = Make<PhysicalHashAggregate>(context, aggregate_types, std::move(aggregates), std::move(groups),
@@ -14,7 +14,6 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalFilter &op) {
14
14
  D_ASSERT(op.children.size() == 1);
15
15
  reference<PhysicalOperator> plan = CreatePlan(*op.children[0]);
16
16
  if (!op.expressions.empty()) {
17
- D_ASSERT(!plan.get().GetTypes().empty());
18
17
  // create a filter if there is anything to filter
19
18
  auto &filter = Make<PhysicalFilter>(plan.get().GetTypes(), std::move(op.expressions), op.estimated_cardinality);
20
19
  filter.children.push_back(plan);
@@ -2,13 +2,11 @@
2
2
  #include "duckdb/execution/operator/aggregate/physical_window.hpp"
3
3
  #include "duckdb/execution/operator/projection/physical_projection.hpp"
4
4
  #include "duckdb/execution/physical_plan_generator.hpp"
5
- #include "duckdb/main/client_context.hpp"
5
+ #include "duckdb/main/client_config.hpp"
6
6
  #include "duckdb/planner/expression/bound_reference_expression.hpp"
7
7
  #include "duckdb/planner/expression/bound_window_expression.hpp"
8
8
  #include "duckdb/planner/operator/logical_window.hpp"
9
9
 
10
- #include <numeric>
11
-
12
10
  namespace duckdb {
13
11
 
14
12
  PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalWindow &op) {
@@ -44,12 +42,12 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalWindow &op) {
44
42
  // Process the window functions by sharing the partition/order definitions
45
43
  unordered_map<idx_t, idx_t> projection_map;
46
44
  vector<vector<idx_t>> window_expressions;
47
- idx_t blocking_count = 0;
45
+ idx_t streaming_count = 0;
48
46
  auto output_pos = input_width;
49
47
  while (!blocking_windows.empty() || !streaming_windows.empty()) {
50
- const bool process_streaming = blocking_windows.empty();
51
- auto &remaining = process_streaming ? streaming_windows : blocking_windows;
52
- blocking_count += process_streaming ? 0 : 1;
48
+ const bool process_blocking = streaming_windows.empty();
49
+ auto &remaining = process_blocking ? blocking_windows : streaming_windows;
50
+ streaming_count += process_blocking ? 0 : 1;
53
51
 
54
52
  // Find all functions that share the partitioning of the first remaining expression
55
53
  auto over_idx = remaining[0];
@@ -122,7 +120,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalWindow &op) {
122
120
  }
123
121
 
124
122
  // Chain the new window operator on top of the plan
125
- if (i < blocking_count) {
123
+ if (i >= streaming_count) {
126
124
  auto &window = Make<PhysicalWindow>(types, std::move(select_list), op.estimated_cardinality);
127
125
  window.children.push_back(plan);
128
126
  plan = window;
@@ -677,14 +677,15 @@ struct SortedAggregateFunction {
677
677
  } // namespace
678
678
 
679
679
  void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateExpression &expr,
680
- const vector<unique_ptr<Expression>> &groups) {
680
+ const vector<unique_ptr<Expression>> &groups,
681
+ optional_ptr<vector<GroupingSet>> grouping_sets) {
681
682
  if (!expr.order_bys || expr.order_bys->orders.empty() || expr.children.empty()) {
682
683
  // not a sorted aggregate: return
683
684
  return;
684
685
  }
685
686
  // Remove unnecessary ORDER BY clauses and return if nothing remains
686
687
  if (context.config.enable_optimizer) {
687
- if (expr.order_bys->Simplify(groups)) {
688
+ if (expr.order_bys->Simplify(groups, grouping_sets)) {
688
689
  expr.order_bys.reset();
689
690
  return;
690
691
  }
@@ -741,7 +742,7 @@ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundWindowExpr
741
742
  }
742
743
  // Remove unnecessary ORDER BY clauses and return if nothing remains
743
744
  if (context.config.enable_optimizer) {
744
- if (BoundOrderModifier::Simplify(expr.arg_orders, expr.partitions)) {
745
+ if (BoundOrderModifier::Simplify(expr.arg_orders, expr.partitions, nullptr)) {
745
746
  expr.arg_orders.clear();
746
747
  return;
747
748
  }
@@ -48,13 +48,31 @@ MacroBindResult MacroFunction::BindMacroFunction(
48
48
 
49
49
  ExpressionBinder expr_binder(binder, binder.context);
50
50
  expr_binder.lambda_bindings = binder.lambda_bindings;
51
+
52
+ // Figure out whether we even need to bind arguments
53
+ bool requires_bind = false;
54
+ for (auto &function : functions) {
55
+ for (const auto &type : function->types) {
56
+ if (type.id() != LogicalTypeId::UNKNOWN) {
57
+ requires_bind = true;
58
+ break;
59
+ }
60
+ }
61
+ if (requires_bind) {
62
+ break;
63
+ }
64
+ }
65
+
51
66
  // Find argument types and separate positional and default arguments
52
67
  vector<LogicalType> positional_arg_types;
53
68
  InsertionOrderPreservingMap<LogicalType> named_arg_types;
54
69
  for (auto &arg : function_expr.children) {
55
70
  auto arg_copy = arg->Copy();
56
- const auto arg_bind_result = expr_binder.BindExpression(arg_copy, depth + 1);
57
- auto arg_type = arg_bind_result.HasError() ? LogicalType::UNKNOWN : arg_bind_result.expression->return_type;
71
+ LogicalType arg_type = LogicalType::UNKNOWN;
72
+ if (requires_bind) {
73
+ const auto arg_bind_result = expr_binder.BindExpression(arg_copy, depth + 1);
74
+ arg_type = arg_bind_result.HasError() ? LogicalType::UNKNOWN : arg_bind_result.expression->return_type;
75
+ }
58
76
  if (!arg->GetAlias().empty()) {
59
77
  // Default argument
60
78
  if (named_arguments.find(arg->GetAlias()) != named_arguments.end()) {
@@ -62,6 +62,9 @@ unique_ptr<TableRef> DuckDBLogBindReplace(ClientContext &context, TableFunctionB
62
62
  bool denormalized_table = false;
63
63
  auto denormalized_table_setting = input.named_parameters.find("denormalized_table");
64
64
  if (denormalized_table_setting != input.named_parameters.end()) {
65
+ if (denormalized_table_setting->second.IsNull()) {
66
+ throw InvalidInputException("denormalized_table cannot be NULL");
67
+ }
65
68
  denormalized_table = denormalized_table_setting->second.GetValue<bool>();
66
69
  }
67
70
 
@@ -19,9 +19,10 @@ struct TestAllTypesData : public GlobalTableFunctionState {
19
19
  idx_t offset;
20
20
  };
21
21
 
22
- vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_large_bignum) {
22
+ vector<TestType> TestAllTypesFun::GetTestTypes(const bool use_large_enum, const bool use_large_bignum) {
23
23
  vector<TestType> result;
24
- // scalar types/numerics
24
+
25
+ // Numeric types.
25
26
  result.emplace_back(LogicalType::BOOLEAN, "bool");
26
27
  result.emplace_back(LogicalType::TINYINT, "tinyint");
27
28
  result.emplace_back(LogicalType::SMALLINT, "smallint");
@@ -33,24 +34,31 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
33
34
  result.emplace_back(LogicalType::USMALLINT, "usmallint");
34
35
  result.emplace_back(LogicalType::UINTEGER, "uint");
35
36
  result.emplace_back(LogicalType::UBIGINT, "ubigint");
37
+
38
+ // BIGNUM.
36
39
  if (use_large_bignum) {
37
40
  string data;
38
- idx_t total_data_size = Bignum::BIGNUM_HEADER_SIZE + Bignum::MAX_DATA_SIZE;
41
+ constexpr idx_t total_data_size = Bignum::BIGNUM_HEADER_SIZE + Bignum::MAX_DATA_SIZE;
39
42
  data.resize(total_data_size);
40
- // Let's set our header
43
+
44
+ // Let's set the max header.
41
45
  Bignum::SetHeader(&data[0], Bignum::MAX_DATA_SIZE, false);
42
- // Set all our other bits
46
+ // Set all other max bits.
43
47
  memset(&data[Bignum::BIGNUM_HEADER_SIZE], 0xFF, Bignum::MAX_DATA_SIZE);
44
48
  auto max = Value::BIGNUM(data);
45
- // Let's set our header
49
+
50
+ // Let's set the min header.
46
51
  Bignum::SetHeader(&data[0], Bignum::MAX_DATA_SIZE, true);
47
- // Set all our other bits
52
+ // Set all other min bits.
48
53
  memset(&data[Bignum::BIGNUM_HEADER_SIZE], 0x00, Bignum::MAX_DATA_SIZE);
49
54
  auto min = Value::BIGNUM(data);
50
55
  result.emplace_back(LogicalType::BIGNUM, "bignum", min, max);
56
+
51
57
  } else {
52
58
  result.emplace_back(LogicalType::BIGNUM, "bignum");
53
59
  }
60
+
61
+ // Time-types.
54
62
  result.emplace_back(LogicalType::DATE, "date");
55
63
  result.emplace_back(LogicalType::TIME, "time");
56
64
  result.emplace_back(LogicalType::TIMESTAMP, "timestamp");
@@ -59,15 +67,19 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
59
67
  result.emplace_back(LogicalType::TIMESTAMP_NS, "timestamp_ns");
60
68
  result.emplace_back(LogicalType::TIME_TZ, "time_tz");
61
69
  result.emplace_back(LogicalType::TIMESTAMP_TZ, "timestamp_tz");
62
- result.emplace_back(LogicalType::FLOAT, "float");
63
- result.emplace_back(LogicalType::DOUBLE, "double");
70
+
71
+ // More complex numeric types.
72
+ result.emplace_back(LogicalType::FLOAT, "float", Value::FLOAT(std::numeric_limits<float>::lowest()),
73
+ Value::FLOAT(std::numeric_limits<float>::max()));
74
+ result.emplace_back(LogicalType::DOUBLE, "double", Value::DOUBLE(std::numeric_limits<double>::lowest()),
75
+ Value::DOUBLE(std::numeric_limits<double>::max()));
64
76
  result.emplace_back(LogicalType::DECIMAL(4, 1), "dec_4_1");
65
77
  result.emplace_back(LogicalType::DECIMAL(9, 4), "dec_9_4");
66
78
  result.emplace_back(LogicalType::DECIMAL(18, 6), "dec_18_6");
67
79
  result.emplace_back(LogicalType::DECIMAL(38, 10), "dec38_10");
68
80
  result.emplace_back(LogicalType::UUID, "uuid");
69
81
 
70
- // interval
82
+ // Interval.
71
83
  interval_t min_interval;
72
84
  min_interval.months = 0;
73
85
  min_interval.days = 0;
@@ -79,14 +91,15 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
79
91
  max_interval.micros = 999999999;
80
92
  result.emplace_back(LogicalType::INTERVAL, "interval", Value::INTERVAL(min_interval),
81
93
  Value::INTERVAL(max_interval));
82
- // strings/blobs/bitstrings
94
+
95
+ // VARCHAR / BLOB / Bitstrings.
83
96
  result.emplace_back(LogicalType::VARCHAR, "varchar", Value("🦆🦆🦆🦆🦆🦆"),
84
97
  Value(string("goo\x00se", 6)));
85
98
  result.emplace_back(LogicalType::BLOB, "blob", Value::BLOB("thisisalongblob\\x00withnullbytes"),
86
99
  Value::BLOB("\\x00\\x00\\x00a"));
87
100
  result.emplace_back(LogicalType::BIT, "bit", Value::BIT("0010001001011100010101011010111"), Value::BIT("10101"));
88
101
 
89
- // enums
102
+ // ENUMs.
90
103
  Vector small_enum(LogicalType::VARCHAR, 2);
91
104
  auto small_enum_ptr = FlatVector::GetData<string_t>(small_enum);
92
105
  small_enum_ptr[0] = StringVector::AddStringOrBlob(small_enum, "DUCK_DUCK_ENUM");
@@ -116,7 +129,7 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum, bool use_lar
116
129
  result.emplace_back(LogicalType::ENUM(large_enum, 2), "large_enum");
117
130
  }
118
131
 
119
- // arrays
132
+ // ARRAYs.
120
133
  auto int_list_type = LogicalType::LIST(LogicalType::INTEGER);
121
134
  auto empty_int_list = Value::LIST(LogicalType::INTEGER, vector<Value>());
122
135
  auto int_list =
@@ -54,6 +54,7 @@ struct IndexScanLocalState : public LocalTableFunctionState {
54
54
  TableScanState scan_state;
55
55
  //! The column IDs of the local storage scan.
56
56
  vector<StorageIndex> column_ids;
57
+ bool in_charge_of_final_stretch {false};
57
58
  };
58
59
 
59
60
  static StorageIndex TransformStorageIndex(const ColumnIndex &column_id) {
@@ -114,7 +115,7 @@ class DuckIndexScanState : public TableScanGlobalState {
114
115
  public:
115
116
  DuckIndexScanState(ClientContext &context, const FunctionData *bind_data_p)
116
117
  : TableScanGlobalState(context, bind_data_p), next_batch_index(0), arena(Allocator::Get(context)),
117
- row_ids(nullptr), row_id_count(0), finished(false) {
118
+ row_ids(nullptr), row_id_count(0), finished_first_phase(false), started_last_phase(false) {
118
119
  }
119
120
 
120
121
  //! The batch index of the next Sink.
@@ -129,7 +130,8 @@ public:
129
130
  //! The column IDs of the to-be-scanned columns.
130
131
  vector<StorageIndex> column_ids;
131
132
  //! True, if no more row IDs must be scanned.
132
- bool finished;
133
+ bool finished_first_phase;
134
+ bool started_last_phase;
133
135
  //! Synchronize changes to the global index scan state.
134
136
  mutex index_scan_lock;
135
137
 
@@ -163,44 +165,75 @@ public:
163
165
  auto &storage = duck_table.GetStorage();
164
166
  auto &l_state = data_p.local_state->Cast<IndexScanLocalState>();
165
167
 
166
- idx_t scan_count = 0;
167
- idx_t offset = 0;
168
-
169
- {
170
- // Synchronize changes to the shared global state.
171
- lock_guard<mutex> l(index_scan_lock);
172
- if (!finished) {
173
- l_state.batch_index = next_batch_index;
174
- next_batch_index++;
175
-
176
- offset = l_state.batch_index * STANDARD_VECTOR_SIZE;
177
- auto remaining = row_id_count - offset;
178
- scan_count = remaining < STANDARD_VECTOR_SIZE ? remaining : STANDARD_VECTOR_SIZE;
179
- finished = remaining < STANDARD_VECTOR_SIZE ? true : false;
168
+ enum class ExecutionPhase { NONE = 0, STORAGE = 1, LOCAL_STORAGE = 2 };
169
+
170
+ // We might need to loop back, so while (true)
171
+ while (true) {
172
+ idx_t scan_count = 0;
173
+ idx_t offset = 0;
174
+
175
+ // Phase selection
176
+ auto phase_to_be_performed = ExecutionPhase::NONE;
177
+ {
178
+ // Synchronize changes to the shared global state.
179
+ lock_guard<mutex> l(index_scan_lock);
180
+ if (!finished_first_phase) {
181
+ l_state.batch_index = next_batch_index;
182
+ next_batch_index++;
183
+
184
+ offset = l_state.batch_index * STANDARD_VECTOR_SIZE;
185
+ auto remaining = row_id_count - offset;
186
+ scan_count = remaining <= STANDARD_VECTOR_SIZE ? remaining : STANDARD_VECTOR_SIZE;
187
+ finished_first_phase = remaining <= STANDARD_VECTOR_SIZE ? true : false;
188
+ phase_to_be_performed = ExecutionPhase::STORAGE;
189
+ } else if (!started_last_phase) {
190
+ // First thread to get last phase, great, set l_state's in_charge_of_final_stretch, so same thread
191
+ // will be on again
192
+ started_last_phase = true;
193
+ l_state.in_charge_of_final_stretch = true;
194
+ phase_to_be_performed = ExecutionPhase::LOCAL_STORAGE;
195
+ } else if (l_state.in_charge_of_final_stretch) {
196
+ phase_to_be_performed = ExecutionPhase::LOCAL_STORAGE;
197
+ }
180
198
  }
181
- }
182
199
 
183
- if (scan_count != 0) {
184
- auto row_id_data = reinterpret_cast<data_ptr_t>(row_ids + offset);
185
- Vector local_vector(LogicalType::ROW_TYPE, row_id_data);
186
-
187
- if (CanRemoveFilterColumns()) {
188
- l_state.all_columns.Reset();
189
- storage.Fetch(tx, l_state.all_columns, column_ids, local_vector, scan_count, l_state.fetch_state);
190
- output.ReferenceColumns(l_state.all_columns, projection_ids);
191
- } else {
192
- storage.Fetch(tx, output, column_ids, local_vector, scan_count, l_state.fetch_state);
200
+ switch (phase_to_be_performed) {
201
+ case ExecutionPhase::NONE: {
202
+ // No work to be picked up
203
+ return;
204
+ }
205
+ case ExecutionPhase::STORAGE: {
206
+ // Scan (in parallel) storage
207
+ auto row_id_data = reinterpret_cast<data_ptr_t>(row_ids + offset);
208
+ Vector local_vector(LogicalType::ROW_TYPE, row_id_data);
209
+
210
+ if (CanRemoveFilterColumns()) {
211
+ l_state.all_columns.Reset();
212
+ storage.Fetch(tx, l_state.all_columns, column_ids, local_vector, scan_count, l_state.fetch_state);
213
+ output.ReferenceColumns(l_state.all_columns, projection_ids);
214
+ } else {
215
+ storage.Fetch(tx, output, column_ids, local_vector, scan_count, l_state.fetch_state);
216
+ }
217
+ if (output.size() == 0) {
218
+ // output is empty, loop back, since there might be results to be picked up from LOCAL_STORAGE phase
219
+ continue;
220
+ }
221
+ return;
222
+ }
223
+ case ExecutionPhase::LOCAL_STORAGE: {
224
+ // Scan (sequentially, always same logical thread) local_storage
225
+ auto &local_storage = LocalStorage::Get(tx);
226
+ {
227
+ if (CanRemoveFilterColumns()) {
228
+ l_state.all_columns.Reset();
229
+ local_storage.Scan(l_state.scan_state.local_state, column_ids, l_state.all_columns);
230
+ output.ReferenceColumns(l_state.all_columns, projection_ids);
231
+ } else {
232
+ local_storage.Scan(l_state.scan_state.local_state, column_ids, output);
233
+ }
234
+ }
235
+ return;
193
236
  }
194
- }
195
-
196
- if (output.size() == 0) {
197
- auto &local_storage = LocalStorage::Get(tx);
198
- if (CanRemoveFilterColumns()) {
199
- l_state.all_columns.Reset();
200
- local_storage.Scan(l_state.scan_state.local_state, column_ids, l_state.all_columns);
201
- output.ReferenceColumns(l_state.all_columns, projection_ids);
202
- } else {
203
- local_storage.Scan(l_state.scan_state.local_state, column_ids, output);
204
237
  }
205
238
  }
206
239
  }
@@ -350,7 +383,8 @@ unique_ptr<GlobalTableFunctionState> DuckTableScanInitGlobal(ClientContext &cont
350
383
  unique_ptr<GlobalTableFunctionState> DuckIndexScanInitGlobal(ClientContext &context, TableFunctionInitInput &input,
351
384
  const TableScanBindData &bind_data, set<row_t> &row_ids) {
352
385
  auto g_state = make_uniq<DuckIndexScanState>(context, input.bind_data.get());
353
- g_state->finished = row_ids.empty() ? true : false;
386
+ g_state->finished_first_phase = row_ids.empty() ? true : false;
387
+ g_state->started_last_phase = false;
354
388
 
355
389
  if (!row_ids.empty()) {
356
390
  auto row_id_ptr = g_state->arena.AllocateAligned(row_ids.size() * sizeof(row_t));
@@ -1,5 +1,5 @@
1
1
  #ifndef DUCKDB_PATCH_VERSION
2
- #define DUCKDB_PATCH_VERSION "2"
2
+ #define DUCKDB_PATCH_VERSION "3"
3
3
  #endif
4
4
  #ifndef DUCKDB_MINOR_VERSION
5
5
  #define DUCKDB_MINOR_VERSION 4
@@ -8,10 +8,10 @@
8
8
  #define DUCKDB_MAJOR_VERSION 1
9
9
  #endif
10
10
  #ifndef DUCKDB_VERSION
11
- #define DUCKDB_VERSION "v1.4.2"
11
+ #define DUCKDB_VERSION "v1.4.3"
12
12
  #endif
13
13
  #ifndef DUCKDB_SOURCE_ID
14
- #define DUCKDB_SOURCE_ID "68d7555f68"
14
+ #define DUCKDB_SOURCE_ID "d1dc88f950"
15
15
  #endif
16
16
  #include "duckdb/function/table/system_functions.hpp"
17
17
  #include "duckdb/main/database.hpp"
@@ -37,6 +37,30 @@ TableFunction::TableFunction(const vector<LogicalType> &arguments, table_functio
37
37
  TableFunction::TableFunction() : TableFunction("", {}, nullptr, nullptr, nullptr, nullptr) {
38
38
  }
39
39
 
40
+ bool TableFunction::operator==(const TableFunction &rhs) const {
41
+ return name == rhs.name && arguments == rhs.arguments && varargs == rhs.varargs && bind == rhs.bind &&
42
+ bind_replace == rhs.bind_replace && bind_operator == rhs.bind_operator && init_global == rhs.init_global &&
43
+ init_local == rhs.init_local && function == rhs.function && in_out_function == rhs.in_out_function &&
44
+ in_out_function_final == rhs.in_out_function_final && statistics == rhs.statistics &&
45
+ dependency == rhs.dependency && cardinality == rhs.cardinality &&
46
+ pushdown_complex_filter == rhs.pushdown_complex_filter && pushdown_expression == rhs.pushdown_expression &&
47
+ to_string == rhs.to_string && dynamic_to_string == rhs.dynamic_to_string &&
48
+ table_scan_progress == rhs.table_scan_progress && get_partition_data == rhs.get_partition_data &&
49
+ get_bind_info == rhs.get_bind_info && type_pushdown == rhs.type_pushdown &&
50
+ get_multi_file_reader == rhs.get_multi_file_reader && supports_pushdown_type == rhs.supports_pushdown_type &&
51
+ get_partition_info == rhs.get_partition_info && get_partition_stats == rhs.get_partition_stats &&
52
+ get_virtual_columns == rhs.get_virtual_columns && get_row_id_columns == rhs.get_row_id_columns &&
53
+ serialize == rhs.serialize && deserialize == rhs.deserialize &&
54
+ verify_serialization == rhs.verify_serialization && projection_pushdown == rhs.projection_pushdown &&
55
+ filter_pushdown == rhs.filter_pushdown && filter_prune == rhs.filter_prune &&
56
+ sampling_pushdown == rhs.sampling_pushdown && late_materialization == rhs.late_materialization &&
57
+ global_initialization == rhs.global_initialization;
58
+ }
59
+
60
+ bool TableFunction::operator!=(const TableFunction &rhs) const {
61
+ return !(*this == rhs);
62
+ }
63
+
40
64
  bool TableFunction::Equal(const TableFunction &rhs) const {
41
65
  // number of types
42
66
  if (this->arguments.size() != rhs.arguments.size()) {
@@ -76,6 +76,7 @@ public:
76
76
  static constexpr idx_t DERIVED_KEY_LENGTH = 32;
77
77
 
78
78
  private:
79
+ mutable mutex lock;
79
80
  std::unordered_map<std::string, EncryptionKey> derived_keys;
80
81
  };
81
82
 
@@ -24,10 +24,12 @@ namespace duckdb {
24
24
  template <class T>
25
25
  struct NumericLimits {
26
26
  static constexpr T Minimum() {
27
- return std::numeric_limits<T>::lowest();
27
+ return std::numeric_limits<T>::has_infinity ? -std::numeric_limits<T>::infinity()
28
+ : std::numeric_limits<T>::lowest();
28
29
  }
29
30
  static constexpr T Maximum() {
30
- return std::numeric_limits<T>::max();
31
+ return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity()
32
+ : std::numeric_limits<T>::max();
31
33
  }
32
34
  static constexpr bool IsSigned() {
33
35
  return std::is_signed<T>::value;
@@ -38,6 +38,8 @@ public:
38
38
  int64_t GetFileSize(FileHandle &handle) override;
39
39
  //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error
40
40
  timestamp_t GetLastModifiedTime(FileHandle &handle) override;
41
+ //! Returns a tag that uniquely identifies the version of the file
42
+ string GetVersionTag(FileHandle &handle) override;
41
43
  //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error
42
44
  FileType GetFileType(FileHandle &handle) override;
43
45
  //! Truncate a file to a maximum size of new_size, new_size should be smaller than or equal to the current size of
@@ -31,6 +31,8 @@ template <class BLOCK_ITERATOR_STATE>
31
31
  class BlockIteratorStateBase {
32
32
  protected:
33
33
  friend BLOCK_ITERATOR_STATE;
34
+
35
+ private:
34
36
  explicit BlockIteratorStateBase(const idx_t tuple_count_p) : tuple_count(tuple_count_p) {
35
37
  }
36
38
 
@@ -256,6 +256,8 @@ public:
256
256
  if (parent.get().GetType() == NType::PREFIX) {
257
257
  // We might have to compress:
258
258
  // PREFIX (greatgrandparent) - Node4 (grandparent) - PREFIX - INLINED_LEAF.
259
+ // The parent does not have to be passed in, as it is a child of the possibly being compressed N4.
260
+ // Then, when we delete that child, we also free it.
259
261
  Node::DeleteChild(art, grandparent, greatgrandparent, current_key.get()[grandparent_depth], status,
260
262
  row_id);
261
263
  return;
@@ -48,7 +48,7 @@ public:
48
48
 
49
49
  //! Concatenates parent -> prev_node4 -> child.
50
50
  static void Concat(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte,
51
- const GateStatus node4_status);
51
+ const GateStatus node4_status, const GateStatus status);
52
52
 
53
53
  //! Removes up to pos bytes from the prefix.
54
54
  //! Shifts all subsequent bytes by pos. Frees empty nodes.
@@ -72,7 +72,7 @@ private:
72
72
  static Prefix GetTail(ART &art, const Node &node);
73
73
 
74
74
  static void ConcatInternal(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte,
75
- const bool inside_gate);
75
+ const GateStatus status);
76
76
  static void ConcatNode4WasGate(ART &art, Node &node4, const Node child, uint8_t byte);
77
77
  static void ConcatChildIsGate(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte);
78
78
  static void ConcatOutsideGate(ART &art, Node &parent, Node &node4, const Node child, uint8_t byte);
@@ -169,7 +169,7 @@ public:
169
169
  //! Replay index insert and delete operations buffered during WAL replay.
170
170
  //! table_types has the physical types of the table in the order they appear, not logical (no generated columns).
171
171
  //! mapped_column_ids contains the sorted order of Indexed physical column ID's (see unbound_index.hpp comments).
172
- void ApplyBufferedReplays(const vector<LogicalType> &table_types, vector<BufferedIndexData> &buffered_replays,
172
+ void ApplyBufferedReplays(const vector<LogicalType> &table_types, BufferedIndexReplays &buffered_replays,
173
173
  const vector<StorageIndex> &mapped_column_ids);
174
174
 
175
175
  protected: