duckdb 1.3.2-dev0.0 → 1.3.3-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/.github/workflows/NodeJS.yml +2 -2
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-timezone.cpp +10 -6
  4. package/src/duckdb/extension/parquet/column_reader.cpp +2 -0
  5. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +7 -2
  6. package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +9 -5
  7. package/src/duckdb/extension/parquet/parquet_extension.cpp +5 -2
  8. package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -2
  9. package/src/duckdb/src/catalog/catalog.cpp +1 -1
  10. package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +4 -2
  11. package/src/duckdb/src/common/error_data.cpp +7 -0
  12. package/src/duckdb/src/common/operator/string_cast.cpp +3 -0
  13. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +5 -0
  14. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +2 -2
  15. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +10 -2
  16. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -2
  17. package/src/duckdb/src/function/function_list.cpp +1 -1
  18. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +6 -6
  19. package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +1 -1
  20. package/src/duckdb/src/function/table/table_scan.cpp +43 -84
  21. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  22. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +1 -0
  23. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +3 -1
  24. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +2 -2
  25. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  26. package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +3 -0
  27. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +5 -0
  28. package/src/duckdb/src/include/duckdb/main/settings.hpp +2 -2
  29. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -0
  30. package/src/duckdb/src/include/duckdb/parallel/concurrentqueue.hpp +8 -0
  31. package/src/duckdb/src/include/duckdb/planner/filter/expression_filter.hpp +2 -0
  32. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +1 -1
  33. package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
  34. package/src/duckdb/src/main/database_manager.cpp +7 -2
  35. package/src/duckdb/src/main/database_path_and_type.cpp +1 -1
  36. package/src/duckdb/src/main/settings/autogenerated_settings.cpp +0 -34
  37. package/src/duckdb/src/main/settings/custom_settings.cpp +49 -0
  38. package/src/duckdb/src/optimizer/compressed_materialization.cpp +4 -4
  39. package/src/duckdb/src/optimizer/filter_combiner.cpp +1 -1
  40. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +9 -1
  41. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +3 -0
  42. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +68 -3
  43. package/src/duckdb/src/parser/statement/set_statement.cpp +1 -1
  44. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +2 -3
  45. package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -3
  46. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +2 -2
  47. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +4 -4
  48. package/src/duckdb/src/planner/filter/expression_filter.cpp +4 -3
  49. package/src/duckdb/src/planner/table_filter.cpp +2 -1
  50. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +5 -3
  51. package/src/duckdb/src/storage/compression/fsst.cpp +20 -10
  52. package/src/duckdb/src/storage/compression/roaring/compress.cpp +15 -9
  53. package/src/duckdb/src/storage/compression/roaring/scan.cpp +10 -1
  54. package/src/duckdb/src/storage/data_table.cpp +1 -1
  55. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +2 -1
  56. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  57. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -1
  58. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +6 -3
@@ -221,6 +221,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
221
221
  {"iceberg_metadata", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
222
222
  {"iceberg_scan", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
223
223
  {"iceberg_snapshots", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
224
+ {"iceberg_to_ducklake", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
224
225
  {"icu_calendar_names", "icu", CatalogType::TABLE_FUNCTION_ENTRY},
225
226
  {"icu_collate_af", "icu", CatalogType::SCALAR_FUNCTION_ENTRY},
226
227
  {"icu_collate_am", "icu", CatalogType::SCALAR_FUNCTION_ENTRY},
@@ -624,6 +625,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
624
625
  {"st_linestring2dfromwkb", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
625
626
  {"st_linesubstring", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
626
627
  {"st_m", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
628
+ {"st_makebox2d", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
627
629
  {"st_makeenvelope", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
628
630
  {"st_makeline", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
629
631
  {"st_makepolygon", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
@@ -996,6 +998,9 @@ static constexpr ExtensionEntry EXTENSION_SETTINGS[] = {
996
998
  {"ca_cert_file", "httpfs"},
997
999
  {"calendar", "icu"},
998
1000
  {"disable_parquet_prefetching", "parquet"},
1001
+ {"ducklake_max_retry_count", "ducklake"},
1002
+ {"ducklake_retry_backoff", "ducklake"},
1003
+ {"ducklake_retry_wait_ms", "ducklake"},
999
1004
  {"enable_geoparquet_conversion", "parquet"},
1000
1005
  {"enable_server_cert_verification", "httpfs"},
1001
1006
  {"force_download", "httpfs"},
@@ -560,7 +560,7 @@ struct EnableFSSTVectorsSetting {
560
560
  struct EnableHTTPLoggingSetting {
561
561
  using RETURN_TYPE = bool;
562
562
  static constexpr const char *Name = "enable_http_logging";
563
- static constexpr const char *Description = "Enables HTTP logging";
563
+ static constexpr const char *Description = "(deprecated) Enables HTTP logging";
564
564
  static constexpr const char *InputType = "BOOLEAN";
565
565
  static void SetLocal(ClientContext &context, const Value &parameter);
566
566
  static void ResetLocal(ClientContext &context);
@@ -750,7 +750,7 @@ struct HTTPLoggingOutputSetting {
750
750
  using RETURN_TYPE = string;
751
751
  static constexpr const char *Name = "http_logging_output";
752
752
  static constexpr const char *Description =
753
- "The file to which HTTP logging output should be saved, or empty to print to the terminal";
753
+ "(deprecated) The file to which HTTP logging output should be saved, or empty to print to the terminal";
754
754
  static constexpr const char *InputType = "VARCHAR";
755
755
  static void SetLocal(ClientContext &context, const Value &parameter);
756
756
  static void ResetLocal(ClientContext &context);
@@ -60,6 +60,7 @@ private:
60
60
  unordered_map<idx_t, RelationStats> materialized_cte_stats;
61
61
  //! Stats of Delim Scans of the Delim Join that is currently being optimized
62
62
  optional_ptr<RelationStats> delim_scan_stats;
63
+ idx_t depth;
63
64
  };
64
65
 
65
66
  } // namespace duckdb
@@ -84,6 +84,14 @@ public:
84
84
  }
85
85
  return max;
86
86
  }
87
+
88
+ template <typename It>
89
+ bool enqueue_bulk(It itemFirst, size_t count) {
90
+ for (size_t i = 0; i < count; i++) {
91
+ q.push(std::move(*itemFirst++));
92
+ }
93
+ return true;
94
+ }
87
95
  };
88
96
 
89
97
  } // namespace duckdb_moodycamel
@@ -35,6 +35,8 @@ public:
35
35
  unique_ptr<Expression> ToExpression(const Expression &column) const override;
36
36
  void Serialize(Serializer &serializer) const override;
37
37
  static unique_ptr<TableFilter> Deserialize(Deserializer &deserializer);
38
+ static void ReplaceExpressionRecursive(unique_ptr<Expression> &expr, const Expression &column,
39
+ ExpressionType replace_type = ExpressionType::BOUND_REF);
38
40
  };
39
41
 
40
42
  } // namespace duckdb
@@ -198,7 +198,7 @@ public:
198
198
  error += StringUtil::Format("Node %lld: Start %lld, Count %lld", i, nodes[i].row_start,
199
199
  nodes[i].node->count.load());
200
200
  }
201
- throw InternalException("Could not find node in column segment tree!\n%s%s", error, Exception::GetStackTrace());
201
+ throw InternalException("Could not find node in column segment tree!\n%s", error);
202
202
  }
203
203
 
204
204
  bool TryGetSegmentIndex(SegmentLock &l, idx_t row_number, idx_t &result) {
@@ -24,6 +24,10 @@ optional_ptr<DatabaseInstance> ClientContextFileOpener::TryGetDatabase() {
24
24
  return context.db.get();
25
25
  }
26
26
 
27
+ shared_ptr<HTTPUtil> &ClientContextFileOpener::GetHTTPUtil() {
28
+ return TryGetDatabase()->config.http_util;
29
+ }
30
+
27
31
  unique_ptr<CatalogTransaction> FileOpener::TryGetCatalogTransaction(optional_ptr<FileOpener> opener) {
28
32
  if (!opener) {
29
33
  return nullptr;
@@ -256,8 +256,13 @@ vector<reference<AttachedDatabase>> DatabaseManager::GetDatabases(ClientContext
256
256
  return true;
257
257
  });
258
258
 
259
- result.push_back(*system);
260
- result.push_back(*context.client_data->temporary_objects);
259
+ if (!max_db_count.IsValid() || max_db_count.GetIndex() >= 1) {
260
+ result.push_back(*system);
261
+ }
262
+ if (!max_db_count.IsValid() || max_db_count.GetIndex() >= 2) {
263
+ result.push_back(*context.client_data->temporary_objects);
264
+ }
265
+
261
266
  return result;
262
267
  }
263
268
 
@@ -26,7 +26,7 @@ void DBPathAndType::CheckMagicBytes(FileSystem &fs, string &path, string &db_typ
26
26
  case DataFileType::PARQUET_FILE:
27
27
  case DataFileType::UNKNOWN_FILE: {
28
28
  // FIXME: we should get this from the registered replacement scans instead of hardcoding it here
29
- vector<string> supported_suffixes {"parquet", "csv", "json", "jsonl", "ndjson"};
29
+ vector<string> supported_suffixes {"parquet", "csv", "tsv", "json", "jsonl", "ndjson"};
30
30
  if (ReplacementScan::CanReplace(path, supported_suffixes)) {
31
31
  db_type = "__open_file__";
32
32
  break;
@@ -505,23 +505,6 @@ Value EnableFSSTVectorsSetting::GetSetting(const ClientContext &context) {
505
505
  return Value::BOOLEAN(config.options.enable_fsst_vectors);
506
506
  }
507
507
 
508
- //===----------------------------------------------------------------------===//
509
- // Enable H T T P Logging
510
- //===----------------------------------------------------------------------===//
511
- void EnableHTTPLoggingSetting::SetLocal(ClientContext &context, const Value &input) {
512
- auto &config = ClientConfig::GetConfig(context);
513
- config.enable_http_logging = input.GetValue<bool>();
514
- }
515
-
516
- void EnableHTTPLoggingSetting::ResetLocal(ClientContext &context) {
517
- ClientConfig::GetConfig(context).enable_http_logging = ClientConfig().enable_http_logging;
518
- }
519
-
520
- Value EnableHTTPLoggingSetting::GetSetting(const ClientContext &context) {
521
- auto &config = ClientConfig::GetConfig(context);
522
- return Value::BOOLEAN(config.enable_http_logging);
523
- }
524
-
525
508
  //===----------------------------------------------------------------------===//
526
509
  // Enable H T T P Metadata Cache
527
510
  //===----------------------------------------------------------------------===//
@@ -678,23 +661,6 @@ Value HomeDirectorySetting::GetSetting(const ClientContext &context) {
678
661
  return Value(config.home_directory);
679
662
  }
680
663
 
681
- //===----------------------------------------------------------------------===//
682
- // H T T P Logging Output
683
- //===----------------------------------------------------------------------===//
684
- void HTTPLoggingOutputSetting::SetLocal(ClientContext &context, const Value &input) {
685
- auto &config = ClientConfig::GetConfig(context);
686
- config.http_logging_output = input.GetValue<string>();
687
- }
688
-
689
- void HTTPLoggingOutputSetting::ResetLocal(ClientContext &context) {
690
- ClientConfig::GetConfig(context).http_logging_output = ClientConfig().http_logging_output;
691
- }
692
-
693
- Value HTTPLoggingOutputSetting::GetSetting(const ClientContext &context) {
694
- auto &config = ClientConfig::GetConfig(context);
695
- return Value(config.http_logging_output);
696
- }
697
-
698
664
  //===----------------------------------------------------------------------===//
699
665
  // H T T P Proxy
700
666
  //===----------------------------------------------------------------------===//
@@ -1088,6 +1088,55 @@ void HomeDirectorySetting::SetLocal(ClientContext &context, const Value &input)
1088
1088
  config.home_directory = input.IsNull() ? string() : input.ToString();
1089
1089
  }
1090
1090
 
1091
+ //===----------------------------------------------------------------------===//
1092
+ // Enable H T T P Logging
1093
+ //===----------------------------------------------------------------------===//
1094
+ void EnableHTTPLoggingSetting::SetLocal(ClientContext &context, const Value &input) {
1095
+ auto &config = ClientConfig::GetConfig(context);
1096
+ config.enable_http_logging = input.GetValue<bool>();
1097
+
1098
+ // NOTE: this is a deprecated setting: we mimick the old behaviour by setting the log storage output to STDOUT and
1099
+ // enabling logging for http only. Note that this behaviour is slightly wonky in that it sets all sorts of logging
1100
+ // config
1101
+ auto &log_manager = LogManager::Get(context);
1102
+ if (config.enable_http_logging) {
1103
+ log_manager.SetEnableLogging(true);
1104
+ log_manager.SetLogLevel(HTTPLogType::LEVEL);
1105
+ unordered_set<string> enabled_log_types = {HTTPLogType::NAME};
1106
+ log_manager.SetEnabledLogTypes(enabled_log_types);
1107
+ log_manager.SetLogStorage(*context.db, LogConfig::STDOUT_STORAGE_NAME);
1108
+ } else {
1109
+ log_manager.SetEnableLogging(false);
1110
+ }
1111
+ }
1112
+
1113
+ void EnableHTTPLoggingSetting::ResetLocal(ClientContext &context) {
1114
+ ClientConfig::GetConfig(context).enable_http_logging = ClientConfig().enable_http_logging;
1115
+ }
1116
+
1117
+ Value EnableHTTPLoggingSetting::GetSetting(const ClientContext &context) {
1118
+ auto &config = ClientConfig::GetConfig(context);
1119
+ return Value::BOOLEAN(config.enable_http_logging);
1120
+ }
1121
+
1122
+ //===----------------------------------------------------------------------===//
1123
+ // H T T P Logging Output
1124
+ //===----------------------------------------------------------------------===//
1125
+ void HTTPLoggingOutputSetting::SetLocal(ClientContext &context, const Value &input) {
1126
+ throw NotImplementedException("This setting is deprecated and can no longer be used. Check out the DuckDB docs on "
1127
+ "logging for more information");
1128
+ }
1129
+
1130
+ void HTTPLoggingOutputSetting::ResetLocal(ClientContext &context) {
1131
+ throw NotImplementedException("This setting is deprecated and can no longer be used. Check out the DuckDB docs on "
1132
+ "logging for more information");
1133
+ }
1134
+
1135
+ Value HTTPLoggingOutputSetting::GetSetting(const ClientContext &context) {
1136
+ auto &config = ClientConfig::GetConfig(context);
1137
+ return Value(config.http_logging_output);
1138
+ }
1139
+
1091
1140
  //===----------------------------------------------------------------------===//
1092
1141
  // Index Scan Percentage
1093
1142
  //===----------------------------------------------------------------------===//
@@ -330,7 +330,7 @@ static Value GetIntegralRangeValue(ClientContext &context, const LogicalType &ty
330
330
  auto min = NumericStats::Min(stats);
331
331
  auto max = NumericStats::Max(stats);
332
332
  if (max < min) {
333
- return Value::HUGEINT(NumericLimits<hugeint_t>::Maximum());
333
+ return Value::UHUGEINT(NumericLimits<uhugeint_t>::Maximum());
334
334
  }
335
335
 
336
336
  vector<unique_ptr<Expression>> arguments;
@@ -342,8 +342,8 @@ static Value GetIntegralRangeValue(ClientContext &context, const LogicalType &ty
342
342
  if (ExpressionExecutor::TryEvaluateScalar(context, sub, result)) {
343
343
  return result;
344
344
  } else {
345
- // Couldn't evaluate: Return max hugeint as range so GetIntegralCompress will return nullptr
346
- return Value::HUGEINT(NumericLimits<hugeint_t>::Maximum());
345
+ // Couldn't evaluate: Return max uhugeint as range so GetIntegralCompress will return nullptr
346
+ return Value::UHUGEINT(NumericLimits<uhugeint_t>::Maximum());
347
347
  }
348
348
  }
349
349
 
@@ -354,7 +354,7 @@ unique_ptr<CompressExpression> CompressedMaterialization::GetIntegralCompress(un
354
354
  return nullptr;
355
355
  }
356
356
 
357
- // Get range and cast to UBIGINT (might fail for HUGEINT, in which case we just return)
357
+ // Get range and cast to UBIGINT (might fail for UHUGEINT, in which case we just return)
358
358
  Value range_value = GetIntegralRangeValue(context, type, stats);
359
359
  if (!range_value.DefaultTryCastAs(LogicalType::UBIGINT)) {
360
360
  return nullptr;
@@ -242,7 +242,7 @@ bool FilterCombiner::IsDenseRange(vector<Value> &in_list) {
242
242
  if (in_list.empty()) {
243
243
  return true;
244
244
  }
245
- if (!in_list[0].type().IsIntegral()) {
245
+ if (!in_list[0].type().IsIntegral() || in_list[0].type() == LogicalType::UHUGEINT) {
246
246
  return false;
247
247
  }
248
248
  // sort the input list
@@ -10,19 +10,27 @@
10
10
 
11
11
  namespace duckdb {
12
12
 
13
- JoinOrderOptimizer::JoinOrderOptimizer(ClientContext &context) : context(context), query_graph_manager(context) {
13
+ JoinOrderOptimizer::JoinOrderOptimizer(ClientContext &context)
14
+ : context(context), query_graph_manager(context), depth(1) {
14
15
  }
15
16
 
16
17
  JoinOrderOptimizer JoinOrderOptimizer::CreateChildOptimizer() {
17
18
  JoinOrderOptimizer child_optimizer(context);
18
19
  child_optimizer.materialized_cte_stats = materialized_cte_stats;
19
20
  child_optimizer.delim_scan_stats = delim_scan_stats;
21
+ child_optimizer.depth = depth + 1;
20
22
  return child_optimizer;
21
23
  }
22
24
 
23
25
  unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOperator> plan,
24
26
  optional_ptr<RelationStats> stats) {
25
27
 
28
+ if (depth > query_graph_manager.context.config.max_expression_depth) {
29
+ // Very deep plans will eventually consume quite some stack space
30
+ // Returning the current plan is always a valid choice
31
+ return plan;
32
+ }
33
+
26
34
  // make sure query graph manager has not extracted a relation graph already
27
35
  LogicalOperator *op = plan.get();
28
36
 
@@ -256,6 +256,9 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
256
256
  ColumnBinding filter_binding(get.table_index, index.GetIndex());
257
257
  auto column_ref = make_uniq<BoundColumnRefExpression>(std::move(column_type), filter_binding);
258
258
  auto filter_expr = filter.second->ToExpression(*column_ref);
259
+ if (filter_expr->IsScalar()) {
260
+ filter_expr = std::move(column_ref);
261
+ }
259
262
  VisitExpression(&filter_expr);
260
263
  filter_expressions.push_back(std::move(filter_expr));
261
264
  }
@@ -1,26 +1,51 @@
1
1
  #include "duckdb/common/helper.hpp"
2
2
  #include "duckdb/optimizer/statistics_propagator.hpp"
3
3
  #include "duckdb/planner/expression/bound_columnref_expression.hpp"
4
+ #include "duckdb/planner/expression_iterator.hpp"
4
5
  #include "duckdb/planner/filter/conjunction_filter.hpp"
5
6
  #include "duckdb/planner/filter/constant_filter.hpp"
7
+ #include "duckdb/planner/expression/bound_function_expression.hpp"
8
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
6
9
  #include "duckdb/planner/filter/expression_filter.hpp"
7
10
  #include "duckdb/planner/filter/null_filter.hpp"
8
11
  #include "duckdb/planner/operator/logical_get.hpp"
9
12
  #include "duckdb/planner/table_filter.hpp"
13
+ #include "duckdb/function/scalar/generic_common.hpp"
14
+ #include "duckdb/function/scalar/generic_functions.hpp"
10
15
 
11
16
  namespace duckdb {
12
17
 
18
+ static void GetColumnIndex(unique_ptr<Expression> &expr, idx_t &index) {
19
+ if (expr->type == ExpressionType::BOUND_REF) {
20
+ auto &bound_ref = expr->Cast<BoundReferenceExpression>();
21
+ index = bound_ref.index;
22
+ return;
23
+ }
24
+ ExpressionIterator::EnumerateChildren(*expr, [&](unique_ptr<Expression> &child) { GetColumnIndex(child, index); });
25
+ }
26
+
13
27
  FilterPropagateResult StatisticsPropagator::PropagateTableFilter(ColumnBinding stats_binding, BaseStatistics &stats,
14
28
  TableFilter &filter) {
15
29
  if (filter.filter_type == TableFilterType::EXPRESSION_FILTER) {
16
30
  auto &expr_filter = filter.Cast<ExpressionFilter>();
31
+
32
+ // get physical storage index of the filter
33
+ // since it is a table filter, every storage index is the same
34
+ idx_t physical_index = DConstants::INVALID_INDEX;
35
+ GetColumnIndex(expr_filter.expr, physical_index);
36
+ D_ASSERT(physical_index != DConstants::INVALID_INDEX);
37
+
17
38
  auto column_ref = make_uniq<BoundColumnRefExpression>(stats.GetType(), stats_binding);
18
39
  auto filter_expr = expr_filter.ToExpression(*column_ref);
19
40
  // handle the filter before updating the statistics
20
41
  // otherwise the filter can be pruned by the updated statistics
21
- auto copy_expr = filter_expr->Copy();
22
42
  auto propagate_result = HandleFilter(filter_expr);
23
- UpdateFilterStatistics(*copy_expr);
43
+ auto colref = make_uniq<BoundReferenceExpression>(stats.GetType(), physical_index);
44
+ UpdateFilterStatistics(*filter_expr);
45
+
46
+ // replace BoundColumnRefs with BoundRefs
47
+ ExpressionFilter::ReplaceExpressionRecursive(filter_expr, *colref, ExpressionType::BOUND_COLUMN_REF);
48
+ expr_filter.expr = std::move(filter_expr);
24
49
  return propagate_result;
25
50
  }
26
51
  return filter.CheckStatistics(stats);
@@ -46,6 +71,41 @@ void StatisticsPropagator::UpdateFilterStatistics(BaseStatistics &input, TableFi
46
71
  }
47
72
  }
48
73
 
74
+ static bool IsConstantOrNullFilter(TableFilter &table_filter) {
75
+ if (table_filter.filter_type != TableFilterType::EXPRESSION_FILTER) {
76
+ return false;
77
+ }
78
+ auto &expr_filter = table_filter.Cast<ExpressionFilter>();
79
+ if (expr_filter.expr->type != ExpressionType::BOUND_FUNCTION) {
80
+ return false;
81
+ }
82
+ auto &func = expr_filter.expr->Cast<BoundFunctionExpression>();
83
+ return ConstantOrNull::IsConstantOrNull(func, Value::BOOLEAN(true));
84
+ }
85
+
86
+ static bool CanReplaceConstantOrNull(TableFilter &table_filter) {
87
+ if (!IsConstantOrNullFilter(table_filter)) {
88
+ throw InternalException("CanReplaceConstantOrNull() called on unexepected Table Filter");
89
+ }
90
+ D_ASSERT(table_filter.filter_type == TableFilterType::EXPRESSION_FILTER);
91
+ auto &expr_filter = table_filter.Cast<ExpressionFilter>();
92
+ auto &func = expr_filter.expr->Cast<BoundFunctionExpression>();
93
+ if (ConstantOrNull::IsConstantOrNull(func, Value::BOOLEAN(true))) {
94
+ for (auto child = ++func.children.begin(); child != func.children.end(); child++) {
95
+ switch (child->get()->type) {
96
+ case ExpressionType::BOUND_REF:
97
+ case ExpressionType::VALUE_CONSTANT:
98
+ continue;
99
+ default:
100
+ // expression type could be a function like Coalesce
101
+ return false;
102
+ }
103
+ }
104
+ }
105
+ // all children of constant or null are bound refs to the table filter column
106
+ return true;
107
+ }
108
+
49
109
  unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalGet &get,
50
110
  unique_ptr<LogicalOperator> &node_ptr) {
51
111
  if (get.function.cardinality) {
@@ -99,10 +159,15 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalGet
99
159
  // erase this condition
100
160
  get.table_filters.filters.erase(table_filter_column);
101
161
  break;
102
- case FilterPropagateResult::FILTER_TRUE_OR_NULL:
162
+ case FilterPropagateResult::FILTER_TRUE_OR_NULL: {
163
+ if (IsConstantOrNullFilter(*get.table_filters.filters[table_filter_column]) &&
164
+ !CanReplaceConstantOrNull(*get.table_filters.filters[table_filter_column])) {
165
+ break;
166
+ }
103
167
  // filter is true or null; we can replace this with a not null filter
104
168
  get.table_filters.filters[table_filter_column] = make_uniq<IsNotNullFilter>();
105
169
  break;
170
+ }
106
171
  case FilterPropagateResult::FILTER_FALSE_OR_NULL:
107
172
  case FilterPropagateResult::FILTER_ALWAYS_FALSE:
108
173
  // filter is always false; this entire filter should be replaced by an empty result block
@@ -14,7 +14,7 @@ SetVariableStatement::SetVariableStatement(string name_p, unique_ptr<ParsedExpre
14
14
  }
15
15
 
16
16
  SetVariableStatement::SetVariableStatement(const SetVariableStatement &other)
17
- : SetVariableStatement(other.name, other.value->Copy(), other.scope) {
17
+ : SetStatement(other), value(other.value->Copy()) {
18
18
  }
19
19
 
20
20
  unique_ptr<SQLStatement> SetVariableStatement::Copy() const {
@@ -445,10 +445,9 @@ unique_ptr<ParsedExpression> ExpressionBinder::QualifyColumnNameWithManyDots(Col
445
445
  }
446
446
 
447
447
  unique_ptr<ParsedExpression> ExpressionBinder::QualifyColumnName(ColumnRefExpression &col_ref, ErrorData &error) {
448
-
449
- // try binding as a lambda parameter
450
448
  if (!col_ref.IsQualified()) {
451
- auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetName());
449
+ // Try binding as a lambda parameter.
450
+ auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName());
452
451
  if (lambda_ref) {
453
452
  return lambda_ref;
454
453
  }
@@ -38,10 +38,9 @@ string AlterBinder::UnsupportedAggregateMessage() {
38
38
  }
39
39
 
40
40
  BindResult AlterBinder::BindColumnReference(ColumnRefExpression &col_ref, idx_t depth) {
41
-
42
- // try binding as a lambda parameter
43
41
  if (!col_ref.IsQualified()) {
44
- auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetName());
42
+ // Try binding as a lambda parameter.
43
+ auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName());
45
44
  if (lambda_ref) {
46
45
  return BindLambdaReference(lambda_ref->Cast<LambdaRefExpression>(), depth);
47
46
  }
@@ -44,9 +44,9 @@ BindResult HavingBinder::BindColumnRef(unique_ptr<ParsedExpression> &expr_ptr, i
44
44
  auto col_ref = expr_ptr->Cast<ColumnRefExpression>();
45
45
  const auto &column_name = col_ref.GetColumnName();
46
46
 
47
- // Try binding as a lambda parameter
48
47
  if (!col_ref.IsQualified()) {
49
- auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetName());
48
+ // Try binding as a lambda parameter.
49
+ auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName());
50
50
  if (lambda_ref) {
51
51
  return BindLambdaReference(lambda_ref->Cast<LambdaRefExpression>(), depth);
52
52
  }
@@ -18,18 +18,18 @@ BindResult TableFunctionBinder::BindLambdaReference(LambdaRefExpression &expr, i
18
18
 
19
19
  BindResult TableFunctionBinder::BindColumnReference(unique_ptr<ParsedExpression> &expr_ptr, idx_t depth,
20
20
  bool root_expression) {
21
- // try binding as a lambda parameter
22
21
  auto &col_ref = expr_ptr->Cast<ColumnRefExpression>();
23
22
  if (!col_ref.IsQualified()) {
24
- auto column_name = col_ref.GetName();
25
- auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, column_name);
23
+ // Try binding as a lambda parameter.
24
+ auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName());
26
25
  if (lambda_ref) {
27
26
  return BindLambdaReference(lambda_ref->Cast<LambdaRefExpression>(), depth);
28
27
  }
29
- if (binder.macro_binding && binder.macro_binding->HasMatchingBinding(column_name)) {
28
+ if (binder.macro_binding && binder.macro_binding->HasMatchingBinding(col_ref.GetName())) {
30
29
  throw ParameterNotResolvedException();
31
30
  }
32
31
  }
32
+
33
33
  auto query_location = col_ref.GetQueryLocation();
34
34
  auto column_names = col_ref.column_names;
35
35
  auto result_name = StringUtil::Join(column_names, ".");
@@ -36,13 +36,14 @@ string ExpressionFilter::ToString(const string &column_name) const {
36
36
  return ToExpression(*name_expr)->ToString();
37
37
  }
38
38
 
39
- static void ReplaceExpressionRecursive(unique_ptr<Expression> &expr, const Expression &column) {
40
- if (expr->type == ExpressionType::BOUND_REF) {
39
+ void ExpressionFilter::ReplaceExpressionRecursive(unique_ptr<Expression> &expr, const Expression &column,
40
+ ExpressionType replace_type) {
41
+ if (expr->type == replace_type) {
41
42
  expr = column.Copy();
42
43
  return;
43
44
  }
44
45
  ExpressionIterator::EnumerateChildren(
45
- *expr, [&](unique_ptr<Expression> &child) { ReplaceExpressionRecursive(child, column); });
46
+ *expr, [&](unique_ptr<Expression> &child) { ReplaceExpressionRecursive(child, column, replace_type); });
46
47
  }
47
48
 
48
49
  unique_ptr<Expression> ExpressionFilter::ToExpression(const Expression &column) const {
@@ -58,7 +58,8 @@ bool DynamicTableFilterSet::HasFilters() const {
58
58
  unique_ptr<TableFilterSet>
59
59
  DynamicTableFilterSet::GetFinalTableFilters(const PhysicalTableScan &scan,
60
60
  optional_ptr<TableFilterSet> existing_filters) const {
61
- D_ASSERT(HasFilters());
61
+ lock_guard<mutex> l(lock);
62
+ D_ASSERT(!filters.empty());
62
63
  auto result = make_uniq<TableFilterSet>();
63
64
  if (existing_filters) {
64
65
  for (auto &entry : existing_filters->filters) {
@@ -211,7 +211,7 @@ void EvictionQueue::PurgeIteration(const idx_t purge_size) {
211
211
  }
212
212
 
213
213
  // bulk purge
214
- idx_t actually_dequeued = q.try_dequeue_bulk(purge_nodes.begin(), purge_size);
214
+ const idx_t actually_dequeued = q.try_dequeue_bulk(purge_nodes.begin(), purge_size);
215
215
 
216
216
  // retrieve all alive nodes that have been wrongly dequeued
217
217
  idx_t alive_nodes = 0;
@@ -219,11 +219,13 @@ void EvictionQueue::PurgeIteration(const idx_t purge_size) {
219
219
  auto &node = purge_nodes[i];
220
220
  auto handle = node.TryGetBlockHandle();
221
221
  if (handle) {
222
- q.enqueue(std::move(node));
223
- alive_nodes++;
222
+ purge_nodes[alive_nodes++] = std::move(node);
224
223
  }
225
224
  }
226
225
 
226
+ // bulk re-add (TODO order them by timestamp to better retain the LRU behavior)
227
+ q.enqueue_bulk(purge_nodes.begin(), alive_nodes);
228
+
227
229
  total_dead_nodes -= actually_dequeued - alive_nodes;
228
230
  }
229
231
 
@@ -64,7 +64,7 @@ struct FSSTStorage {
64
64
  static char *FetchStringPointer(StringDictionaryContainer dict, data_ptr_t baseptr, int32_t dict_offset);
65
65
  static bp_delta_offsets_t CalculateBpDeltaOffsets(int64_t last_known_row, idx_t start, idx_t scan_count);
66
66
  static bool ParseFSSTSegmentHeader(data_ptr_t base_ptr, duckdb_fsst_decoder_t *decoder_out,
67
- bitpacking_width_t *width_out);
67
+ bitpacking_width_t *width_out, const idx_t block_size);
68
68
  static bp_delta_offsets_t StartScan(FSSTScanState &scan_state, data_ptr_t base_data, idx_t start,
69
69
  idx_t vector_count);
70
70
  static void EndScan(FSSTScanState &scan_state, bp_delta_offsets_t &offsets, idx_t start, idx_t scan_count);
@@ -335,14 +335,15 @@ public:
335
335
  idx_t Finalize() {
336
336
  auto &buffer_manager = BufferManager::GetBufferManager(current_segment->db);
337
337
  auto handle = buffer_manager.Pin(current_segment->block);
338
- D_ASSERT(current_dictionary.end == info.GetBlockSize());
338
+ if (current_dictionary.end != info.GetBlockSize()) {
339
+ throw InternalException("dictionary end does not match the block size in FSSTCompressionState::Finalize");
340
+ }
339
341
 
340
342
  // calculate sizes
341
343
  auto compressed_index_buffer_size =
342
344
  BitpackingPrimitives::GetRequiredSize(current_segment->count, current_width);
343
345
  auto total_size = sizeof(fsst_compression_header_t) + compressed_index_buffer_size + current_dictionary.size +
344
346
  fsst_serialized_symbol_table_size;
345
-
346
347
  if (total_size != last_fitting_size) {
347
348
  throw InternalException("FSST string compression failed due to incorrect size calculation");
348
349
  }
@@ -365,8 +366,12 @@ public:
365
366
  memset(base_ptr + symbol_table_offset, 0, fsst_serialized_symbol_table_size);
366
367
  }
367
368
 
368
- Store<uint32_t>(NumericCast<uint32_t>(symbol_table_offset),
369
- data_ptr_cast(&header_ptr->fsst_symbol_table_offset));
369
+ auto cast_symbol_table_offset = NumericCast<uint32_t>(symbol_table_offset);
370
+ if (cast_symbol_table_offset > info.GetBlockSize()) {
371
+ throw InternalException("invalid fsst_symbol_table_offset in FSSTCompressionState::Finalize");
372
+ }
373
+
374
+ Store<uint32_t>(cast_symbol_table_offset, data_ptr_cast(&header_ptr->fsst_symbol_table_offset));
370
375
  Store<uint32_t>((uint32_t)current_width, data_ptr_cast(&header_ptr->bitpacking_width));
371
376
 
372
377
  if (total_size >= info.GetCompactionFlushLimit()) {
@@ -563,15 +568,16 @@ struct FSSTScanState : public StringScanState {
563
568
  };
564
569
 
565
570
  unique_ptr<SegmentScanState> FSSTStorage::StringInitScan(ColumnSegment &segment) {
566
- auto string_block_limit = StringUncompressed::GetStringBlockLimit(segment.GetBlockManager().GetBlockSize());
571
+ auto block_size = segment.GetBlockManager().GetBlockSize();
572
+ auto string_block_limit = StringUncompressed::GetStringBlockLimit(block_size);
567
573
  auto state = make_uniq<FSSTScanState>(string_block_limit);
568
574
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
569
575
  state->handle = buffer_manager.Pin(segment.block);
570
576
  auto base_ptr = state->handle.Ptr() + segment.GetBlockOffset();
571
577
 
572
578
  state->duckdb_fsst_decoder = make_buffer<duckdb_fsst_decoder_t>();
573
- auto retval = ParseFSSTSegmentHeader(
574
- base_ptr, reinterpret_cast<duckdb_fsst_decoder_t *>(state->duckdb_fsst_decoder.get()), &state->current_width);
579
+ auto decoder = reinterpret_cast<duckdb_fsst_decoder_t *>(state->duckdb_fsst_decoder.get());
580
+ auto retval = ParseFSSTSegmentHeader(base_ptr, decoder, &state->current_width, block_size);
575
581
  if (!retval) {
576
582
  state->duckdb_fsst_decoder = nullptr;
577
583
  }
@@ -736,7 +742,8 @@ void FSSTStorage::StringFetchRow(ColumnSegment &segment, ColumnFetchState &state
736
742
 
737
743
  duckdb_fsst_decoder_t decoder;
738
744
  bitpacking_width_t width;
739
- auto have_symbol_table = ParseFSSTSegmentHeader(base_ptr, &decoder, &width);
745
+ auto block_size = segment.GetBlockManager().GetBlockSize();
746
+ auto have_symbol_table = ParseFSSTSegmentHeader(base_ptr, &decoder, &width, block_size);
740
747
 
741
748
  auto result_data = FlatVector::GetData<string_t>(result);
742
749
  if (!have_symbol_table) {
@@ -814,9 +821,12 @@ char *FSSTStorage::FetchStringPointer(StringDictionaryContainer dict, data_ptr_t
814
821
 
815
822
  // Returns false if no symbol table was found. This means all strings are either empty or null
816
823
  bool FSSTStorage::ParseFSSTSegmentHeader(data_ptr_t base_ptr, duckdb_fsst_decoder_t *decoder_out,
817
- bitpacking_width_t *width_out) {
824
+ bitpacking_width_t *width_out, const idx_t block_size) {
818
825
  auto header_ptr = reinterpret_cast<fsst_compression_header_t *>(base_ptr);
819
826
  auto fsst_symbol_table_offset = Load<uint32_t>(data_ptr_cast(&header_ptr->fsst_symbol_table_offset));
827
+ if (fsst_symbol_table_offset > block_size) {
828
+ throw InternalException("invalid fsst_symbol_table_offset in FSSTStorage::ParseFSSTSegmentHeader");
829
+ }
820
830
  *width_out = (bitpacking_width_t)(Load<uint32_t>(data_ptr_cast(&header_ptr->bitpacking_width)));
821
831
  return duckdb_fsst_import(decoder_out, base_ptr + fsst_symbol_table_offset);
822
832
  }