duckdb 1.3.2-dev0.0 → 1.3.3-dev0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +2 -2
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-timezone.cpp +10 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +2 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +7 -2
- package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +9 -5
- package/src/duckdb/extension/parquet/parquet_extension.cpp +5 -2
- package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -2
- package/src/duckdb/src/catalog/catalog.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +4 -2
- package/src/duckdb/src/common/error_data.cpp +7 -0
- package/src/duckdb/src/common/operator/string_cast.cpp +3 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +5 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +10 -2
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -2
- package/src/duckdb/src/function/function_list.cpp +1 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +6 -6
- package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +1 -1
- package/src/duckdb/src/function/table/table_scan.cpp +43 -84
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +3 -1
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parallel/concurrentqueue.hpp +8 -0
- package/src/duckdb/src/include/duckdb/planner/filter/expression_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +1 -1
- package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
- package/src/duckdb/src/main/database_manager.cpp +7 -2
- package/src/duckdb/src/main/database_path_and_type.cpp +1 -1
- package/src/duckdb/src/main/settings/autogenerated_settings.cpp +0 -34
- package/src/duckdb/src/main/settings/custom_settings.cpp +49 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +4 -4
- package/src/duckdb/src/optimizer/filter_combiner.cpp +1 -1
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +9 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +3 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +68 -3
- package/src/duckdb/src/parser/statement/set_statement.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +2 -3
- package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -3
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +4 -4
- package/src/duckdb/src/planner/filter/expression_filter.cpp +4 -3
- package/src/duckdb/src/planner/table_filter.cpp +2 -1
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +5 -3
- package/src/duckdb/src/storage/compression/fsst.cpp +20 -10
- package/src/duckdb/src/storage/compression/roaring/compress.cpp +15 -9
- package/src/duckdb/src/storage/compression/roaring/scan.cpp +10 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +2 -1
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -1
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +6 -3
@@ -221,6 +221,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
|
|
221
221
|
{"iceberg_metadata", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
|
222
222
|
{"iceberg_scan", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
|
223
223
|
{"iceberg_snapshots", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
|
224
|
+
{"iceberg_to_ducklake", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
|
224
225
|
{"icu_calendar_names", "icu", CatalogType::TABLE_FUNCTION_ENTRY},
|
225
226
|
{"icu_collate_af", "icu", CatalogType::SCALAR_FUNCTION_ENTRY},
|
226
227
|
{"icu_collate_am", "icu", CatalogType::SCALAR_FUNCTION_ENTRY},
|
@@ -624,6 +625,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
|
|
624
625
|
{"st_linestring2dfromwkb", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
|
625
626
|
{"st_linesubstring", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
|
626
627
|
{"st_m", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
|
628
|
+
{"st_makebox2d", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
|
627
629
|
{"st_makeenvelope", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
|
628
630
|
{"st_makeline", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
|
629
631
|
{"st_makepolygon", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
|
@@ -996,6 +998,9 @@ static constexpr ExtensionEntry EXTENSION_SETTINGS[] = {
|
|
996
998
|
{"ca_cert_file", "httpfs"},
|
997
999
|
{"calendar", "icu"},
|
998
1000
|
{"disable_parquet_prefetching", "parquet"},
|
1001
|
+
{"ducklake_max_retry_count", "ducklake"},
|
1002
|
+
{"ducklake_retry_backoff", "ducklake"},
|
1003
|
+
{"ducklake_retry_wait_ms", "ducklake"},
|
999
1004
|
{"enable_geoparquet_conversion", "parquet"},
|
1000
1005
|
{"enable_server_cert_verification", "httpfs"},
|
1001
1006
|
{"force_download", "httpfs"},
|
@@ -560,7 +560,7 @@ struct EnableFSSTVectorsSetting {
|
|
560
560
|
struct EnableHTTPLoggingSetting {
|
561
561
|
using RETURN_TYPE = bool;
|
562
562
|
static constexpr const char *Name = "enable_http_logging";
|
563
|
-
static constexpr const char *Description = "Enables HTTP logging";
|
563
|
+
static constexpr const char *Description = "(deprecated) Enables HTTP logging";
|
564
564
|
static constexpr const char *InputType = "BOOLEAN";
|
565
565
|
static void SetLocal(ClientContext &context, const Value ¶meter);
|
566
566
|
static void ResetLocal(ClientContext &context);
|
@@ -750,7 +750,7 @@ struct HTTPLoggingOutputSetting {
|
|
750
750
|
using RETURN_TYPE = string;
|
751
751
|
static constexpr const char *Name = "http_logging_output";
|
752
752
|
static constexpr const char *Description =
|
753
|
-
"The file to which HTTP logging output should be saved, or empty to print to the terminal";
|
753
|
+
"(deprecated) The file to which HTTP logging output should be saved, or empty to print to the terminal";
|
754
754
|
static constexpr const char *InputType = "VARCHAR";
|
755
755
|
static void SetLocal(ClientContext &context, const Value ¶meter);
|
756
756
|
static void ResetLocal(ClientContext &context);
|
@@ -84,6 +84,14 @@ public:
|
|
84
84
|
}
|
85
85
|
return max;
|
86
86
|
}
|
87
|
+
|
88
|
+
template <typename It>
|
89
|
+
bool enqueue_bulk(It itemFirst, size_t count) {
|
90
|
+
for (size_t i = 0; i < count; i++) {
|
91
|
+
q.push(std::move(*itemFirst++));
|
92
|
+
}
|
93
|
+
return true;
|
94
|
+
}
|
87
95
|
};
|
88
96
|
|
89
97
|
} // namespace duckdb_moodycamel
|
@@ -35,6 +35,8 @@ public:
|
|
35
35
|
unique_ptr<Expression> ToExpression(const Expression &column) const override;
|
36
36
|
void Serialize(Serializer &serializer) const override;
|
37
37
|
static unique_ptr<TableFilter> Deserialize(Deserializer &deserializer);
|
38
|
+
static void ReplaceExpressionRecursive(unique_ptr<Expression> &expr, const Expression &column,
|
39
|
+
ExpressionType replace_type = ExpressionType::BOUND_REF);
|
38
40
|
};
|
39
41
|
|
40
42
|
} // namespace duckdb
|
@@ -198,7 +198,7 @@ public:
|
|
198
198
|
error += StringUtil::Format("Node %lld: Start %lld, Count %lld", i, nodes[i].row_start,
|
199
199
|
nodes[i].node->count.load());
|
200
200
|
}
|
201
|
-
throw InternalException("Could not find node in column segment tree!\n%s
|
201
|
+
throw InternalException("Could not find node in column segment tree!\n%s", error);
|
202
202
|
}
|
203
203
|
|
204
204
|
bool TryGetSegmentIndex(SegmentLock &l, idx_t row_number, idx_t &result) {
|
@@ -24,6 +24,10 @@ optional_ptr<DatabaseInstance> ClientContextFileOpener::TryGetDatabase() {
|
|
24
24
|
return context.db.get();
|
25
25
|
}
|
26
26
|
|
27
|
+
shared_ptr<HTTPUtil> &ClientContextFileOpener::GetHTTPUtil() {
|
28
|
+
return TryGetDatabase()->config.http_util;
|
29
|
+
}
|
30
|
+
|
27
31
|
unique_ptr<CatalogTransaction> FileOpener::TryGetCatalogTransaction(optional_ptr<FileOpener> opener) {
|
28
32
|
if (!opener) {
|
29
33
|
return nullptr;
|
@@ -256,8 +256,13 @@ vector<reference<AttachedDatabase>> DatabaseManager::GetDatabases(ClientContext
|
|
256
256
|
return true;
|
257
257
|
});
|
258
258
|
|
259
|
-
|
260
|
-
|
259
|
+
if (!max_db_count.IsValid() || max_db_count.GetIndex() >= 1) {
|
260
|
+
result.push_back(*system);
|
261
|
+
}
|
262
|
+
if (!max_db_count.IsValid() || max_db_count.GetIndex() >= 2) {
|
263
|
+
result.push_back(*context.client_data->temporary_objects);
|
264
|
+
}
|
265
|
+
|
261
266
|
return result;
|
262
267
|
}
|
263
268
|
|
@@ -26,7 +26,7 @@ void DBPathAndType::CheckMagicBytes(FileSystem &fs, string &path, string &db_typ
|
|
26
26
|
case DataFileType::PARQUET_FILE:
|
27
27
|
case DataFileType::UNKNOWN_FILE: {
|
28
28
|
// FIXME: we should get this from the registered replacement scans instead of hardcoding it here
|
29
|
-
vector<string> supported_suffixes {"parquet", "csv", "json", "jsonl", "ndjson"};
|
29
|
+
vector<string> supported_suffixes {"parquet", "csv", "tsv", "json", "jsonl", "ndjson"};
|
30
30
|
if (ReplacementScan::CanReplace(path, supported_suffixes)) {
|
31
31
|
db_type = "__open_file__";
|
32
32
|
break;
|
@@ -505,23 +505,6 @@ Value EnableFSSTVectorsSetting::GetSetting(const ClientContext &context) {
|
|
505
505
|
return Value::BOOLEAN(config.options.enable_fsst_vectors);
|
506
506
|
}
|
507
507
|
|
508
|
-
//===----------------------------------------------------------------------===//
|
509
|
-
// Enable H T T P Logging
|
510
|
-
//===----------------------------------------------------------------------===//
|
511
|
-
void EnableHTTPLoggingSetting::SetLocal(ClientContext &context, const Value &input) {
|
512
|
-
auto &config = ClientConfig::GetConfig(context);
|
513
|
-
config.enable_http_logging = input.GetValue<bool>();
|
514
|
-
}
|
515
|
-
|
516
|
-
void EnableHTTPLoggingSetting::ResetLocal(ClientContext &context) {
|
517
|
-
ClientConfig::GetConfig(context).enable_http_logging = ClientConfig().enable_http_logging;
|
518
|
-
}
|
519
|
-
|
520
|
-
Value EnableHTTPLoggingSetting::GetSetting(const ClientContext &context) {
|
521
|
-
auto &config = ClientConfig::GetConfig(context);
|
522
|
-
return Value::BOOLEAN(config.enable_http_logging);
|
523
|
-
}
|
524
|
-
|
525
508
|
//===----------------------------------------------------------------------===//
|
526
509
|
// Enable H T T P Metadata Cache
|
527
510
|
//===----------------------------------------------------------------------===//
|
@@ -678,23 +661,6 @@ Value HomeDirectorySetting::GetSetting(const ClientContext &context) {
|
|
678
661
|
return Value(config.home_directory);
|
679
662
|
}
|
680
663
|
|
681
|
-
//===----------------------------------------------------------------------===//
|
682
|
-
// H T T P Logging Output
|
683
|
-
//===----------------------------------------------------------------------===//
|
684
|
-
void HTTPLoggingOutputSetting::SetLocal(ClientContext &context, const Value &input) {
|
685
|
-
auto &config = ClientConfig::GetConfig(context);
|
686
|
-
config.http_logging_output = input.GetValue<string>();
|
687
|
-
}
|
688
|
-
|
689
|
-
void HTTPLoggingOutputSetting::ResetLocal(ClientContext &context) {
|
690
|
-
ClientConfig::GetConfig(context).http_logging_output = ClientConfig().http_logging_output;
|
691
|
-
}
|
692
|
-
|
693
|
-
Value HTTPLoggingOutputSetting::GetSetting(const ClientContext &context) {
|
694
|
-
auto &config = ClientConfig::GetConfig(context);
|
695
|
-
return Value(config.http_logging_output);
|
696
|
-
}
|
697
|
-
|
698
664
|
//===----------------------------------------------------------------------===//
|
699
665
|
// H T T P Proxy
|
700
666
|
//===----------------------------------------------------------------------===//
|
@@ -1088,6 +1088,55 @@ void HomeDirectorySetting::SetLocal(ClientContext &context, const Value &input)
|
|
1088
1088
|
config.home_directory = input.IsNull() ? string() : input.ToString();
|
1089
1089
|
}
|
1090
1090
|
|
1091
|
+
//===----------------------------------------------------------------------===//
|
1092
|
+
// Enable H T T P Logging
|
1093
|
+
//===----------------------------------------------------------------------===//
|
1094
|
+
void EnableHTTPLoggingSetting::SetLocal(ClientContext &context, const Value &input) {
|
1095
|
+
auto &config = ClientConfig::GetConfig(context);
|
1096
|
+
config.enable_http_logging = input.GetValue<bool>();
|
1097
|
+
|
1098
|
+
// NOTE: this is a deprecated setting: we mimick the old behaviour by setting the log storage output to STDOUT and
|
1099
|
+
// enabling logging for http only. Note that this behaviour is slightly wonky in that it sets all sorts of logging
|
1100
|
+
// config
|
1101
|
+
auto &log_manager = LogManager::Get(context);
|
1102
|
+
if (config.enable_http_logging) {
|
1103
|
+
log_manager.SetEnableLogging(true);
|
1104
|
+
log_manager.SetLogLevel(HTTPLogType::LEVEL);
|
1105
|
+
unordered_set<string> enabled_log_types = {HTTPLogType::NAME};
|
1106
|
+
log_manager.SetEnabledLogTypes(enabled_log_types);
|
1107
|
+
log_manager.SetLogStorage(*context.db, LogConfig::STDOUT_STORAGE_NAME);
|
1108
|
+
} else {
|
1109
|
+
log_manager.SetEnableLogging(false);
|
1110
|
+
}
|
1111
|
+
}
|
1112
|
+
|
1113
|
+
void EnableHTTPLoggingSetting::ResetLocal(ClientContext &context) {
|
1114
|
+
ClientConfig::GetConfig(context).enable_http_logging = ClientConfig().enable_http_logging;
|
1115
|
+
}
|
1116
|
+
|
1117
|
+
Value EnableHTTPLoggingSetting::GetSetting(const ClientContext &context) {
|
1118
|
+
auto &config = ClientConfig::GetConfig(context);
|
1119
|
+
return Value::BOOLEAN(config.enable_http_logging);
|
1120
|
+
}
|
1121
|
+
|
1122
|
+
//===----------------------------------------------------------------------===//
|
1123
|
+
// H T T P Logging Output
|
1124
|
+
//===----------------------------------------------------------------------===//
|
1125
|
+
void HTTPLoggingOutputSetting::SetLocal(ClientContext &context, const Value &input) {
|
1126
|
+
throw NotImplementedException("This setting is deprecated and can no longer be used. Check out the DuckDB docs on "
|
1127
|
+
"logging for more information");
|
1128
|
+
}
|
1129
|
+
|
1130
|
+
void HTTPLoggingOutputSetting::ResetLocal(ClientContext &context) {
|
1131
|
+
throw NotImplementedException("This setting is deprecated and can no longer be used. Check out the DuckDB docs on "
|
1132
|
+
"logging for more information");
|
1133
|
+
}
|
1134
|
+
|
1135
|
+
Value HTTPLoggingOutputSetting::GetSetting(const ClientContext &context) {
|
1136
|
+
auto &config = ClientConfig::GetConfig(context);
|
1137
|
+
return Value(config.http_logging_output);
|
1138
|
+
}
|
1139
|
+
|
1091
1140
|
//===----------------------------------------------------------------------===//
|
1092
1141
|
// Index Scan Percentage
|
1093
1142
|
//===----------------------------------------------------------------------===//
|
@@ -330,7 +330,7 @@ static Value GetIntegralRangeValue(ClientContext &context, const LogicalType &ty
|
|
330
330
|
auto min = NumericStats::Min(stats);
|
331
331
|
auto max = NumericStats::Max(stats);
|
332
332
|
if (max < min) {
|
333
|
-
return Value::
|
333
|
+
return Value::UHUGEINT(NumericLimits<uhugeint_t>::Maximum());
|
334
334
|
}
|
335
335
|
|
336
336
|
vector<unique_ptr<Expression>> arguments;
|
@@ -342,8 +342,8 @@ static Value GetIntegralRangeValue(ClientContext &context, const LogicalType &ty
|
|
342
342
|
if (ExpressionExecutor::TryEvaluateScalar(context, sub, result)) {
|
343
343
|
return result;
|
344
344
|
} else {
|
345
|
-
// Couldn't evaluate: Return max
|
346
|
-
return Value::
|
345
|
+
// Couldn't evaluate: Return max uhugeint as range so GetIntegralCompress will return nullptr
|
346
|
+
return Value::UHUGEINT(NumericLimits<uhugeint_t>::Maximum());
|
347
347
|
}
|
348
348
|
}
|
349
349
|
|
@@ -354,7 +354,7 @@ unique_ptr<CompressExpression> CompressedMaterialization::GetIntegralCompress(un
|
|
354
354
|
return nullptr;
|
355
355
|
}
|
356
356
|
|
357
|
-
// Get range and cast to UBIGINT (might fail for
|
357
|
+
// Get range and cast to UBIGINT (might fail for UHUGEINT, in which case we just return)
|
358
358
|
Value range_value = GetIntegralRangeValue(context, type, stats);
|
359
359
|
if (!range_value.DefaultTryCastAs(LogicalType::UBIGINT)) {
|
360
360
|
return nullptr;
|
@@ -242,7 +242,7 @@ bool FilterCombiner::IsDenseRange(vector<Value> &in_list) {
|
|
242
242
|
if (in_list.empty()) {
|
243
243
|
return true;
|
244
244
|
}
|
245
|
-
if (!in_list[0].type().IsIntegral()) {
|
245
|
+
if (!in_list[0].type().IsIntegral() || in_list[0].type() == LogicalType::UHUGEINT) {
|
246
246
|
return false;
|
247
247
|
}
|
248
248
|
// sort the input list
|
@@ -10,19 +10,27 @@
|
|
10
10
|
|
11
11
|
namespace duckdb {
|
12
12
|
|
13
|
-
JoinOrderOptimizer::JoinOrderOptimizer(ClientContext &context)
|
13
|
+
JoinOrderOptimizer::JoinOrderOptimizer(ClientContext &context)
|
14
|
+
: context(context), query_graph_manager(context), depth(1) {
|
14
15
|
}
|
15
16
|
|
16
17
|
JoinOrderOptimizer JoinOrderOptimizer::CreateChildOptimizer() {
|
17
18
|
JoinOrderOptimizer child_optimizer(context);
|
18
19
|
child_optimizer.materialized_cte_stats = materialized_cte_stats;
|
19
20
|
child_optimizer.delim_scan_stats = delim_scan_stats;
|
21
|
+
child_optimizer.depth = depth + 1;
|
20
22
|
return child_optimizer;
|
21
23
|
}
|
22
24
|
|
23
25
|
unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOperator> plan,
|
24
26
|
optional_ptr<RelationStats> stats) {
|
25
27
|
|
28
|
+
if (depth > query_graph_manager.context.config.max_expression_depth) {
|
29
|
+
// Very deep plans will eventually consume quite some stack space
|
30
|
+
// Returning the current plan is always a valid choice
|
31
|
+
return plan;
|
32
|
+
}
|
33
|
+
|
26
34
|
// make sure query graph manager has not extracted a relation graph already
|
27
35
|
LogicalOperator *op = plan.get();
|
28
36
|
|
@@ -256,6 +256,9 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
|
|
256
256
|
ColumnBinding filter_binding(get.table_index, index.GetIndex());
|
257
257
|
auto column_ref = make_uniq<BoundColumnRefExpression>(std::move(column_type), filter_binding);
|
258
258
|
auto filter_expr = filter.second->ToExpression(*column_ref);
|
259
|
+
if (filter_expr->IsScalar()) {
|
260
|
+
filter_expr = std::move(column_ref);
|
261
|
+
}
|
259
262
|
VisitExpression(&filter_expr);
|
260
263
|
filter_expressions.push_back(std::move(filter_expr));
|
261
264
|
}
|
@@ -1,26 +1,51 @@
|
|
1
1
|
#include "duckdb/common/helper.hpp"
|
2
2
|
#include "duckdb/optimizer/statistics_propagator.hpp"
|
3
3
|
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
|
4
|
+
#include "duckdb/planner/expression_iterator.hpp"
|
4
5
|
#include "duckdb/planner/filter/conjunction_filter.hpp"
|
5
6
|
#include "duckdb/planner/filter/constant_filter.hpp"
|
7
|
+
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
8
|
+
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
6
9
|
#include "duckdb/planner/filter/expression_filter.hpp"
|
7
10
|
#include "duckdb/planner/filter/null_filter.hpp"
|
8
11
|
#include "duckdb/planner/operator/logical_get.hpp"
|
9
12
|
#include "duckdb/planner/table_filter.hpp"
|
13
|
+
#include "duckdb/function/scalar/generic_common.hpp"
|
14
|
+
#include "duckdb/function/scalar/generic_functions.hpp"
|
10
15
|
|
11
16
|
namespace duckdb {
|
12
17
|
|
18
|
+
static void GetColumnIndex(unique_ptr<Expression> &expr, idx_t &index) {
|
19
|
+
if (expr->type == ExpressionType::BOUND_REF) {
|
20
|
+
auto &bound_ref = expr->Cast<BoundReferenceExpression>();
|
21
|
+
index = bound_ref.index;
|
22
|
+
return;
|
23
|
+
}
|
24
|
+
ExpressionIterator::EnumerateChildren(*expr, [&](unique_ptr<Expression> &child) { GetColumnIndex(child, index); });
|
25
|
+
}
|
26
|
+
|
13
27
|
FilterPropagateResult StatisticsPropagator::PropagateTableFilter(ColumnBinding stats_binding, BaseStatistics &stats,
|
14
28
|
TableFilter &filter) {
|
15
29
|
if (filter.filter_type == TableFilterType::EXPRESSION_FILTER) {
|
16
30
|
auto &expr_filter = filter.Cast<ExpressionFilter>();
|
31
|
+
|
32
|
+
// get physical storage index of the filter
|
33
|
+
// since it is a table filter, every storage index is the same
|
34
|
+
idx_t physical_index = DConstants::INVALID_INDEX;
|
35
|
+
GetColumnIndex(expr_filter.expr, physical_index);
|
36
|
+
D_ASSERT(physical_index != DConstants::INVALID_INDEX);
|
37
|
+
|
17
38
|
auto column_ref = make_uniq<BoundColumnRefExpression>(stats.GetType(), stats_binding);
|
18
39
|
auto filter_expr = expr_filter.ToExpression(*column_ref);
|
19
40
|
// handle the filter before updating the statistics
|
20
41
|
// otherwise the filter can be pruned by the updated statistics
|
21
|
-
auto copy_expr = filter_expr->Copy();
|
22
42
|
auto propagate_result = HandleFilter(filter_expr);
|
23
|
-
|
43
|
+
auto colref = make_uniq<BoundReferenceExpression>(stats.GetType(), physical_index);
|
44
|
+
UpdateFilterStatistics(*filter_expr);
|
45
|
+
|
46
|
+
// replace BoundColumnRefs with BoundRefs
|
47
|
+
ExpressionFilter::ReplaceExpressionRecursive(filter_expr, *colref, ExpressionType::BOUND_COLUMN_REF);
|
48
|
+
expr_filter.expr = std::move(filter_expr);
|
24
49
|
return propagate_result;
|
25
50
|
}
|
26
51
|
return filter.CheckStatistics(stats);
|
@@ -46,6 +71,41 @@ void StatisticsPropagator::UpdateFilterStatistics(BaseStatistics &input, TableFi
|
|
46
71
|
}
|
47
72
|
}
|
48
73
|
|
74
|
+
static bool IsConstantOrNullFilter(TableFilter &table_filter) {
|
75
|
+
if (table_filter.filter_type != TableFilterType::EXPRESSION_FILTER) {
|
76
|
+
return false;
|
77
|
+
}
|
78
|
+
auto &expr_filter = table_filter.Cast<ExpressionFilter>();
|
79
|
+
if (expr_filter.expr->type != ExpressionType::BOUND_FUNCTION) {
|
80
|
+
return false;
|
81
|
+
}
|
82
|
+
auto &func = expr_filter.expr->Cast<BoundFunctionExpression>();
|
83
|
+
return ConstantOrNull::IsConstantOrNull(func, Value::BOOLEAN(true));
|
84
|
+
}
|
85
|
+
|
86
|
+
static bool CanReplaceConstantOrNull(TableFilter &table_filter) {
|
87
|
+
if (!IsConstantOrNullFilter(table_filter)) {
|
88
|
+
throw InternalException("CanReplaceConstantOrNull() called on unexepected Table Filter");
|
89
|
+
}
|
90
|
+
D_ASSERT(table_filter.filter_type == TableFilterType::EXPRESSION_FILTER);
|
91
|
+
auto &expr_filter = table_filter.Cast<ExpressionFilter>();
|
92
|
+
auto &func = expr_filter.expr->Cast<BoundFunctionExpression>();
|
93
|
+
if (ConstantOrNull::IsConstantOrNull(func, Value::BOOLEAN(true))) {
|
94
|
+
for (auto child = ++func.children.begin(); child != func.children.end(); child++) {
|
95
|
+
switch (child->get()->type) {
|
96
|
+
case ExpressionType::BOUND_REF:
|
97
|
+
case ExpressionType::VALUE_CONSTANT:
|
98
|
+
continue;
|
99
|
+
default:
|
100
|
+
// expression type could be a function like Coalesce
|
101
|
+
return false;
|
102
|
+
}
|
103
|
+
}
|
104
|
+
}
|
105
|
+
// all children of constant or null are bound refs to the table filter column
|
106
|
+
return true;
|
107
|
+
}
|
108
|
+
|
49
109
|
unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalGet &get,
|
50
110
|
unique_ptr<LogicalOperator> &node_ptr) {
|
51
111
|
if (get.function.cardinality) {
|
@@ -99,10 +159,15 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalGet
|
|
99
159
|
// erase this condition
|
100
160
|
get.table_filters.filters.erase(table_filter_column);
|
101
161
|
break;
|
102
|
-
case FilterPropagateResult::FILTER_TRUE_OR_NULL:
|
162
|
+
case FilterPropagateResult::FILTER_TRUE_OR_NULL: {
|
163
|
+
if (IsConstantOrNullFilter(*get.table_filters.filters[table_filter_column]) &&
|
164
|
+
!CanReplaceConstantOrNull(*get.table_filters.filters[table_filter_column])) {
|
165
|
+
break;
|
166
|
+
}
|
103
167
|
// filter is true or null; we can replace this with a not null filter
|
104
168
|
get.table_filters.filters[table_filter_column] = make_uniq<IsNotNullFilter>();
|
105
169
|
break;
|
170
|
+
}
|
106
171
|
case FilterPropagateResult::FILTER_FALSE_OR_NULL:
|
107
172
|
case FilterPropagateResult::FILTER_ALWAYS_FALSE:
|
108
173
|
// filter is always false; this entire filter should be replaced by an empty result block
|
@@ -14,7 +14,7 @@ SetVariableStatement::SetVariableStatement(string name_p, unique_ptr<ParsedExpre
|
|
14
14
|
}
|
15
15
|
|
16
16
|
SetVariableStatement::SetVariableStatement(const SetVariableStatement &other)
|
17
|
-
:
|
17
|
+
: SetStatement(other), value(other.value->Copy()) {
|
18
18
|
}
|
19
19
|
|
20
20
|
unique_ptr<SQLStatement> SetVariableStatement::Copy() const {
|
@@ -445,10 +445,9 @@ unique_ptr<ParsedExpression> ExpressionBinder::QualifyColumnNameWithManyDots(Col
|
|
445
445
|
}
|
446
446
|
|
447
447
|
unique_ptr<ParsedExpression> ExpressionBinder::QualifyColumnName(ColumnRefExpression &col_ref, ErrorData &error) {
|
448
|
-
|
449
|
-
// try binding as a lambda parameter
|
450
448
|
if (!col_ref.IsQualified()) {
|
451
|
-
|
449
|
+
// Try binding as a lambda parameter.
|
450
|
+
auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName());
|
452
451
|
if (lambda_ref) {
|
453
452
|
return lambda_ref;
|
454
453
|
}
|
@@ -38,10 +38,9 @@ string AlterBinder::UnsupportedAggregateMessage() {
|
|
38
38
|
}
|
39
39
|
|
40
40
|
BindResult AlterBinder::BindColumnReference(ColumnRefExpression &col_ref, idx_t depth) {
|
41
|
-
|
42
|
-
// try binding as a lambda parameter
|
43
41
|
if (!col_ref.IsQualified()) {
|
44
|
-
|
42
|
+
// Try binding as a lambda parameter.
|
43
|
+
auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName());
|
45
44
|
if (lambda_ref) {
|
46
45
|
return BindLambdaReference(lambda_ref->Cast<LambdaRefExpression>(), depth);
|
47
46
|
}
|
@@ -44,9 +44,9 @@ BindResult HavingBinder::BindColumnRef(unique_ptr<ParsedExpression> &expr_ptr, i
|
|
44
44
|
auto col_ref = expr_ptr->Cast<ColumnRefExpression>();
|
45
45
|
const auto &column_name = col_ref.GetColumnName();
|
46
46
|
|
47
|
-
// Try binding as a lambda parameter
|
48
47
|
if (!col_ref.IsQualified()) {
|
49
|
-
|
48
|
+
// Try binding as a lambda parameter.
|
49
|
+
auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName());
|
50
50
|
if (lambda_ref) {
|
51
51
|
return BindLambdaReference(lambda_ref->Cast<LambdaRefExpression>(), depth);
|
52
52
|
}
|
@@ -18,18 +18,18 @@ BindResult TableFunctionBinder::BindLambdaReference(LambdaRefExpression &expr, i
|
|
18
18
|
|
19
19
|
BindResult TableFunctionBinder::BindColumnReference(unique_ptr<ParsedExpression> &expr_ptr, idx_t depth,
|
20
20
|
bool root_expression) {
|
21
|
-
// try binding as a lambda parameter
|
22
21
|
auto &col_ref = expr_ptr->Cast<ColumnRefExpression>();
|
23
22
|
if (!col_ref.IsQualified()) {
|
24
|
-
|
25
|
-
auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings,
|
23
|
+
// Try binding as a lambda parameter.
|
24
|
+
auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetColumnName());
|
26
25
|
if (lambda_ref) {
|
27
26
|
return BindLambdaReference(lambda_ref->Cast<LambdaRefExpression>(), depth);
|
28
27
|
}
|
29
|
-
if (binder.macro_binding && binder.macro_binding->HasMatchingBinding(
|
28
|
+
if (binder.macro_binding && binder.macro_binding->HasMatchingBinding(col_ref.GetName())) {
|
30
29
|
throw ParameterNotResolvedException();
|
31
30
|
}
|
32
31
|
}
|
32
|
+
|
33
33
|
auto query_location = col_ref.GetQueryLocation();
|
34
34
|
auto column_names = col_ref.column_names;
|
35
35
|
auto result_name = StringUtil::Join(column_names, ".");
|
@@ -36,13 +36,14 @@ string ExpressionFilter::ToString(const string &column_name) const {
|
|
36
36
|
return ToExpression(*name_expr)->ToString();
|
37
37
|
}
|
38
38
|
|
39
|
-
|
40
|
-
|
39
|
+
void ExpressionFilter::ReplaceExpressionRecursive(unique_ptr<Expression> &expr, const Expression &column,
|
40
|
+
ExpressionType replace_type) {
|
41
|
+
if (expr->type == replace_type) {
|
41
42
|
expr = column.Copy();
|
42
43
|
return;
|
43
44
|
}
|
44
45
|
ExpressionIterator::EnumerateChildren(
|
45
|
-
*expr, [&](unique_ptr<Expression> &child) { ReplaceExpressionRecursive(child, column); });
|
46
|
+
*expr, [&](unique_ptr<Expression> &child) { ReplaceExpressionRecursive(child, column, replace_type); });
|
46
47
|
}
|
47
48
|
|
48
49
|
unique_ptr<Expression> ExpressionFilter::ToExpression(const Expression &column) const {
|
@@ -58,7 +58,8 @@ bool DynamicTableFilterSet::HasFilters() const {
|
|
58
58
|
unique_ptr<TableFilterSet>
|
59
59
|
DynamicTableFilterSet::GetFinalTableFilters(const PhysicalTableScan &scan,
|
60
60
|
optional_ptr<TableFilterSet> existing_filters) const {
|
61
|
-
|
61
|
+
lock_guard<mutex> l(lock);
|
62
|
+
D_ASSERT(!filters.empty());
|
62
63
|
auto result = make_uniq<TableFilterSet>();
|
63
64
|
if (existing_filters) {
|
64
65
|
for (auto &entry : existing_filters->filters) {
|
@@ -211,7 +211,7 @@ void EvictionQueue::PurgeIteration(const idx_t purge_size) {
|
|
211
211
|
}
|
212
212
|
|
213
213
|
// bulk purge
|
214
|
-
idx_t actually_dequeued = q.try_dequeue_bulk(purge_nodes.begin(), purge_size);
|
214
|
+
const idx_t actually_dequeued = q.try_dequeue_bulk(purge_nodes.begin(), purge_size);
|
215
215
|
|
216
216
|
// retrieve all alive nodes that have been wrongly dequeued
|
217
217
|
idx_t alive_nodes = 0;
|
@@ -219,11 +219,13 @@ void EvictionQueue::PurgeIteration(const idx_t purge_size) {
|
|
219
219
|
auto &node = purge_nodes[i];
|
220
220
|
auto handle = node.TryGetBlockHandle();
|
221
221
|
if (handle) {
|
222
|
-
|
223
|
-
alive_nodes++;
|
222
|
+
purge_nodes[alive_nodes++] = std::move(node);
|
224
223
|
}
|
225
224
|
}
|
226
225
|
|
226
|
+
// bulk re-add (TODO order them by timestamp to better retain the LRU behavior)
|
227
|
+
q.enqueue_bulk(purge_nodes.begin(), alive_nodes);
|
228
|
+
|
227
229
|
total_dead_nodes -= actually_dequeued - alive_nodes;
|
228
230
|
}
|
229
231
|
|
@@ -64,7 +64,7 @@ struct FSSTStorage {
|
|
64
64
|
static char *FetchStringPointer(StringDictionaryContainer dict, data_ptr_t baseptr, int32_t dict_offset);
|
65
65
|
static bp_delta_offsets_t CalculateBpDeltaOffsets(int64_t last_known_row, idx_t start, idx_t scan_count);
|
66
66
|
static bool ParseFSSTSegmentHeader(data_ptr_t base_ptr, duckdb_fsst_decoder_t *decoder_out,
|
67
|
-
bitpacking_width_t *width_out);
|
67
|
+
bitpacking_width_t *width_out, const idx_t block_size);
|
68
68
|
static bp_delta_offsets_t StartScan(FSSTScanState &scan_state, data_ptr_t base_data, idx_t start,
|
69
69
|
idx_t vector_count);
|
70
70
|
static void EndScan(FSSTScanState &scan_state, bp_delta_offsets_t &offsets, idx_t start, idx_t scan_count);
|
@@ -335,14 +335,15 @@ public:
|
|
335
335
|
idx_t Finalize() {
|
336
336
|
auto &buffer_manager = BufferManager::GetBufferManager(current_segment->db);
|
337
337
|
auto handle = buffer_manager.Pin(current_segment->block);
|
338
|
-
|
338
|
+
if (current_dictionary.end != info.GetBlockSize()) {
|
339
|
+
throw InternalException("dictionary end does not match the block size in FSSTCompressionState::Finalize");
|
340
|
+
}
|
339
341
|
|
340
342
|
// calculate sizes
|
341
343
|
auto compressed_index_buffer_size =
|
342
344
|
BitpackingPrimitives::GetRequiredSize(current_segment->count, current_width);
|
343
345
|
auto total_size = sizeof(fsst_compression_header_t) + compressed_index_buffer_size + current_dictionary.size +
|
344
346
|
fsst_serialized_symbol_table_size;
|
345
|
-
|
346
347
|
if (total_size != last_fitting_size) {
|
347
348
|
throw InternalException("FSST string compression failed due to incorrect size calculation");
|
348
349
|
}
|
@@ -365,8 +366,12 @@ public:
|
|
365
366
|
memset(base_ptr + symbol_table_offset, 0, fsst_serialized_symbol_table_size);
|
366
367
|
}
|
367
368
|
|
368
|
-
|
369
|
-
|
369
|
+
auto cast_symbol_table_offset = NumericCast<uint32_t>(symbol_table_offset);
|
370
|
+
if (cast_symbol_table_offset > info.GetBlockSize()) {
|
371
|
+
throw InternalException("invalid fsst_symbol_table_offset in FSSTCompressionState::Finalize");
|
372
|
+
}
|
373
|
+
|
374
|
+
Store<uint32_t>(cast_symbol_table_offset, data_ptr_cast(&header_ptr->fsst_symbol_table_offset));
|
370
375
|
Store<uint32_t>((uint32_t)current_width, data_ptr_cast(&header_ptr->bitpacking_width));
|
371
376
|
|
372
377
|
if (total_size >= info.GetCompactionFlushLimit()) {
|
@@ -563,15 +568,16 @@ struct FSSTScanState : public StringScanState {
|
|
563
568
|
};
|
564
569
|
|
565
570
|
unique_ptr<SegmentScanState> FSSTStorage::StringInitScan(ColumnSegment &segment) {
|
566
|
-
auto
|
571
|
+
auto block_size = segment.GetBlockManager().GetBlockSize();
|
572
|
+
auto string_block_limit = StringUncompressed::GetStringBlockLimit(block_size);
|
567
573
|
auto state = make_uniq<FSSTScanState>(string_block_limit);
|
568
574
|
auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
|
569
575
|
state->handle = buffer_manager.Pin(segment.block);
|
570
576
|
auto base_ptr = state->handle.Ptr() + segment.GetBlockOffset();
|
571
577
|
|
572
578
|
state->duckdb_fsst_decoder = make_buffer<duckdb_fsst_decoder_t>();
|
573
|
-
auto
|
574
|
-
|
579
|
+
auto decoder = reinterpret_cast<duckdb_fsst_decoder_t *>(state->duckdb_fsst_decoder.get());
|
580
|
+
auto retval = ParseFSSTSegmentHeader(base_ptr, decoder, &state->current_width, block_size);
|
575
581
|
if (!retval) {
|
576
582
|
state->duckdb_fsst_decoder = nullptr;
|
577
583
|
}
|
@@ -736,7 +742,8 @@ void FSSTStorage::StringFetchRow(ColumnSegment &segment, ColumnFetchState &state
|
|
736
742
|
|
737
743
|
duckdb_fsst_decoder_t decoder;
|
738
744
|
bitpacking_width_t width;
|
739
|
-
auto
|
745
|
+
auto block_size = segment.GetBlockManager().GetBlockSize();
|
746
|
+
auto have_symbol_table = ParseFSSTSegmentHeader(base_ptr, &decoder, &width, block_size);
|
740
747
|
|
741
748
|
auto result_data = FlatVector::GetData<string_t>(result);
|
742
749
|
if (!have_symbol_table) {
|
@@ -814,9 +821,12 @@ char *FSSTStorage::FetchStringPointer(StringDictionaryContainer dict, data_ptr_t
|
|
814
821
|
|
815
822
|
// Returns false if no symbol table was found. This means all strings are either empty or null
|
816
823
|
bool FSSTStorage::ParseFSSTSegmentHeader(data_ptr_t base_ptr, duckdb_fsst_decoder_t *decoder_out,
|
817
|
-
bitpacking_width_t *width_out) {
|
824
|
+
bitpacking_width_t *width_out, const idx_t block_size) {
|
818
825
|
auto header_ptr = reinterpret_cast<fsst_compression_header_t *>(base_ptr);
|
819
826
|
auto fsst_symbol_table_offset = Load<uint32_t>(data_ptr_cast(&header_ptr->fsst_symbol_table_offset));
|
827
|
+
if (fsst_symbol_table_offset > block_size) {
|
828
|
+
throw InternalException("invalid fsst_symbol_table_offset in FSSTStorage::ParseFSSTSegmentHeader");
|
829
|
+
}
|
820
830
|
*width_out = (bitpacking_width_t)(Load<uint32_t>(data_ptr_cast(&header_ptr->bitpacking_width)));
|
821
831
|
return duckdb_fsst_import(decoder_out, base_ptr + fsst_symbol_table_offset);
|
822
832
|
}
|