duckdb 1.2.1-dev4.0 → 1.2.1-dev8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/connection.cpp +57 -35
- package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
- package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
- package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
- package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
- package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
- package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
- package/src/duckdb/extension/json/json_extension.cpp +8 -3
- package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
- package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
- package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
- package/src/duckdb/src/catalog/catalog.cpp +12 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
- package/src/duckdb/src/common/bind_helpers.cpp +3 -0
- package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +19 -6
- package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
- package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
- package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
- package/src/duckdb/src/include/duckdb.h +495 -480
- package/src/duckdb/src/main/attached_database.cpp +1 -1
- package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
- package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
- package/src/duckdb/src/main/config.cpp +7 -1
- package/src/duckdb/src/main/database.cpp +8 -8
- package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
- package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
- package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
- package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
- package/src/duckdb/src/parallel/executor_task.cpp +10 -6
- package/src/duckdb/src/parallel/task_executor.cpp +4 -1
- package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/storage_manager.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group.cpp +5 -6
- package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
- package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
- package/src/duckdb/ub_src_parallel.cpp +2 -0
@@ -247,7 +247,7 @@ void AttachedDatabase::Close() {
|
|
247
247
|
}
|
248
248
|
CheckpointOptions options;
|
249
249
|
options.wal_action = CheckpointWALAction::DELETE_WAL;
|
250
|
-
storage->CreateCheckpoint(options);
|
250
|
+
storage->CreateCheckpoint(nullptr, options);
|
251
251
|
}
|
252
252
|
} catch (...) { // NOLINT
|
253
253
|
}
|
@@ -35,7 +35,11 @@ duckdb_state duckdb_open_internal(DBInstanceCacheWrapper *cache, const char *pat
|
|
35
35
|
}
|
36
36
|
|
37
37
|
if (cache) {
|
38
|
-
|
38
|
+
duckdb::string path_str;
|
39
|
+
if (path) {
|
40
|
+
path_str = path;
|
41
|
+
}
|
42
|
+
wrapper->database = cache->instance_cache->GetOrCreateInstance(path_str, *db_config, true);
|
39
43
|
} else {
|
40
44
|
wrapper->database = duckdb::make_shared_ptr<DuckDB>(path, db_config);
|
41
45
|
}
|
@@ -78,6 +78,10 @@ LogicalTypeId ConvertCTypeToCPP(duckdb_type c_type) {
|
|
78
78
|
return LogicalTypeId::VARINT;
|
79
79
|
case DUCKDB_TYPE_SQLNULL:
|
80
80
|
return LogicalTypeId::SQLNULL;
|
81
|
+
case DUCKDB_TYPE_STRING_LITERAL:
|
82
|
+
return LogicalTypeId::STRING_LITERAL;
|
83
|
+
case DUCKDB_TYPE_INTEGER_LITERAL:
|
84
|
+
return LogicalTypeId::INTEGER_LITERAL;
|
81
85
|
default: // LCOV_EXCL_START
|
82
86
|
D_ASSERT(0);
|
83
87
|
return LogicalTypeId::INVALID;
|
@@ -160,6 +164,10 @@ duckdb_type ConvertCPPTypeToC(const LogicalType &sql_type) {
|
|
160
164
|
return DUCKDB_TYPE_ANY;
|
161
165
|
case LogicalTypeId::SQLNULL:
|
162
166
|
return DUCKDB_TYPE_SQLNULL;
|
167
|
+
case LogicalTypeId::STRING_LITERAL:
|
168
|
+
return DUCKDB_TYPE_STRING_LITERAL;
|
169
|
+
case LogicalTypeId::INTEGER_LITERAL:
|
170
|
+
return DUCKDB_TYPE_INTEGER_LITERAL;
|
163
171
|
default: // LCOV_EXCL_START
|
164
172
|
D_ASSERT(0);
|
165
173
|
return DUCKDB_TYPE_INVALID;
|
@@ -380,7 +380,13 @@ void DBConfig::AddExtensionOption(const string &name, string description, Logica
|
|
380
380
|
const Value &default_value, set_option_callback_t function) {
|
381
381
|
extension_parameters.insert(
|
382
382
|
make_pair(name, ExtensionOption(std::move(description), std::move(parameter), function, default_value)));
|
383
|
-
if
|
383
|
+
// copy over unrecognized options, if they match the new extension option
|
384
|
+
auto iter = options.unrecognized_options.find(name);
|
385
|
+
if (iter != options.unrecognized_options.end()) {
|
386
|
+
options.set_variables[name] = iter->second;
|
387
|
+
options.unrecognized_options.erase(iter);
|
388
|
+
}
|
389
|
+
if (!default_value.IsNull() && options.set_variables.find(name) == options.set_variables.end()) {
|
384
390
|
// Default value is set, insert it into the 'set_variables' list
|
385
391
|
options.set_variables[name] = default_value;
|
386
392
|
}
|
@@ -225,10 +225,11 @@ static void ThrowExtensionSetUnrecognizedOptions(const case_insensitive_map_t<Va
|
|
225
225
|
}
|
226
226
|
|
227
227
|
void DatabaseInstance::LoadExtensionSettings() {
|
228
|
-
|
228
|
+
// copy the map, to protect against modifications during
|
229
|
+
auto unrecognized_options_copy = config.options.unrecognized_options;
|
229
230
|
|
230
231
|
if (config.options.autoload_known_extensions) {
|
231
|
-
if (
|
232
|
+
if (unrecognized_options_copy.empty()) {
|
232
233
|
// Nothing to do
|
233
234
|
return;
|
234
235
|
}
|
@@ -237,7 +238,7 @@ void DatabaseInstance::LoadExtensionSettings() {
|
|
237
238
|
con.BeginTransaction();
|
238
239
|
|
239
240
|
vector<string> extension_options;
|
240
|
-
for (auto &option :
|
241
|
+
for (auto &option : unrecognized_options_copy) {
|
241
242
|
auto &name = option.first;
|
242
243
|
auto &value = option.second;
|
243
244
|
|
@@ -254,18 +255,17 @@ void DatabaseInstance::LoadExtensionSettings() {
|
|
254
255
|
if (it == config.extension_parameters.end()) {
|
255
256
|
throw InternalException("Extension %s did not provide the '%s' config setting", extension_name, name);
|
256
257
|
}
|
258
|
+
// if the extension provided the option, it should no longer be unrecognized.
|
259
|
+
D_ASSERT(config.options.unrecognized_options.find(name) == config.options.unrecognized_options.end());
|
257
260
|
auto &context = *con.context;
|
258
261
|
PhysicalSet::SetExtensionVariable(context, it->second, name, SetScope::GLOBAL, value);
|
259
262
|
extension_options.push_back(name);
|
260
263
|
}
|
261
264
|
|
262
|
-
for (auto &option : extension_options) {
|
263
|
-
unrecognized_options.erase(option);
|
264
|
-
}
|
265
265
|
con.Commit();
|
266
266
|
}
|
267
|
-
if (!unrecognized_options.empty()) {
|
268
|
-
ThrowExtensionSetUnrecognizedOptions(unrecognized_options);
|
267
|
+
if (!config.options.unrecognized_options.empty()) {
|
268
|
+
ThrowExtensionSetUnrecognizedOptions(config.options.unrecognized_options);
|
269
269
|
}
|
270
270
|
}
|
271
271
|
|
@@ -121,6 +121,7 @@ static const DefaultExtension internal_extensions[] = {
|
|
121
121
|
{"vss", "Adds indexing support to accelerate Vector Similarity Search", false},
|
122
122
|
{"delta", "Adds support for Delta Lake", false},
|
123
123
|
{"fts", "Adds support for Full-Text Search Indexes", false},
|
124
|
+
{"ui", "Adds local UI for DuckDB", false},
|
124
125
|
{nullptr, nullptr, false}};
|
125
126
|
|
126
127
|
idx_t ExtensionHelper::DefaultExtensionCount() {
|
@@ -139,7 +140,8 @@ DefaultExtension ExtensionHelper::GetDefaultExtension(idx_t index) {
|
|
139
140
|
// Allow Auto-Install Extensions
|
140
141
|
//===--------------------------------------------------------------------===//
|
141
142
|
static const char *const auto_install[] = {"motherduck", "postgres_scanner", "mysql_scanner", "sqlite_scanner",
|
142
|
-
"delta", "iceberg", "uc_catalog",
|
143
|
+
"delta", "iceberg", "uc_catalog", "ui",
|
144
|
+
nullptr};
|
143
145
|
|
144
146
|
// TODO: unify with new autoload mechanism
|
145
147
|
bool ExtensionHelper::AllowAutoInstall(const string &extension) {
|
@@ -71,15 +71,11 @@ struct ExtensionAccess {
|
|
71
71
|
static void SetError(duckdb_extension_info info, const char *error) {
|
72
72
|
auto &load_state = DuckDBExtensionLoadState::Get(info);
|
73
73
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
load_state.error_data = ErrorData(
|
80
|
-
ExceptionType::UNKNOWN_TYPE,
|
81
|
-
"Extension has indicated an error occured during initialization, but did not set an error message.");
|
82
|
-
}
|
74
|
+
load_state.has_error = true;
|
75
|
+
load_state.error_data =
|
76
|
+
error ? ErrorData(error)
|
77
|
+
: ErrorData(ExceptionType::UNKNOWN_TYPE, "Extension has indicated an error occured during "
|
78
|
+
"initialization, but did not set an error message.");
|
83
79
|
}
|
84
80
|
|
85
81
|
//! Called by the extension get a pointer to the database that is loading it
|
@@ -92,9 +88,11 @@ struct ExtensionAccess {
|
|
92
88
|
load_state.database_data->database = make_shared_ptr<DuckDB>(load_state.db);
|
93
89
|
return reinterpret_cast<duckdb_database *>(load_state.database_data.get());
|
94
90
|
} catch (std::exception &ex) {
|
91
|
+
load_state.has_error = true;
|
95
92
|
load_state.error_data = ErrorData(ex);
|
96
93
|
return nullptr;
|
97
94
|
} catch (...) {
|
95
|
+
load_state.has_error = true;
|
98
96
|
load_state.error_data =
|
99
97
|
ErrorData(ExceptionType::UNKNOWN_TYPE, "Unknown error in GetDatabase when trying to load extension!");
|
100
98
|
return nullptr;
|
@@ -125,8 +123,9 @@ struct ExtensionAccess {
|
|
125
123
|
load_state.has_error = true;
|
126
124
|
load_state.error_data =
|
127
125
|
ErrorData(ExceptionType::UNKNOWN_TYPE,
|
128
|
-
StringUtil::Format("Unknown ABI Type '%
|
129
|
-
load_state.init_result.abi_type,
|
126
|
+
StringUtil::Format("Unknown ABI Type of value '%d' found when loading extension '%s'",
|
127
|
+
static_cast<uint8_t>(load_state.init_result.abi_type),
|
128
|
+
load_state.init_result.filename));
|
130
129
|
return nullptr;
|
131
130
|
}
|
132
131
|
|
@@ -590,7 +589,8 @@ void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileSystem &fs
|
|
590
589
|
return;
|
591
590
|
}
|
592
591
|
|
593
|
-
throw IOException("Unknown ABI type '%s' for extension '%s'",
|
592
|
+
throw IOException("Unknown ABI type of value '%s' for extension '%s'",
|
593
|
+
static_cast<uint8_t>(extension_init_result.abi_type), extension);
|
594
594
|
#endif
|
595
595
|
}
|
596
596
|
|
@@ -108,6 +108,7 @@ void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) {
|
|
108
108
|
//! When RETURNING is used, a PROJECTION is the top level operator for INSERTS, UPDATES, and DELETES
|
109
109
|
//! We still need to project all values from these operators so the projection
|
110
110
|
//! on top of them can select from only the table values being inserted.
|
111
|
+
case LogicalOperatorType::LOGICAL_GET:
|
111
112
|
case LogicalOperatorType::LOGICAL_UNION:
|
112
113
|
case LogicalOperatorType::LOGICAL_EXCEPT:
|
113
114
|
case LogicalOperatorType::LOGICAL_INTERSECT:
|
@@ -265,7 +265,6 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vector<unique_ptr<LogicalO
|
|
265
265
|
break;
|
266
266
|
}
|
267
267
|
}
|
268
|
-
|
269
268
|
auto join = make_uniq<LogicalComparisonJoin>(chosen_filter->join_type);
|
270
269
|
// Here we optimize build side probe side. Our build side is the right side
|
271
270
|
// So the right plans should have lower cardinalities.
|
@@ -288,8 +287,9 @@ GenerateJoinRelation QueryGraphManager::GenerateJoins(vector<unique_ptr<LogicalO
|
|
288
287
|
bool invert = !JoinRelationSet::IsSubset(*left.set, *f->left_set);
|
289
288
|
// If the left and right set are inverted AND it is a semi or anti join
|
290
289
|
// swap left and right children back.
|
290
|
+
|
291
291
|
if (invert && (f->join_type == JoinType::SEMI || f->join_type == JoinType::ANTI)) {
|
292
|
-
std::swap(
|
292
|
+
std::swap(join->children[0], join->children[1]);
|
293
293
|
invert = false;
|
294
294
|
}
|
295
295
|
|
@@ -13,6 +13,7 @@
|
|
13
13
|
#include "duckdb/planner/expression_iterator.hpp"
|
14
14
|
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
15
15
|
#include "duckdb/main/client_config.hpp"
|
16
|
+
#include "duckdb/main/config.hpp"
|
16
17
|
|
17
18
|
namespace duckdb {
|
18
19
|
|
@@ -357,9 +358,22 @@ bool LateMaterialization::TryLateMaterialization(unique_ptr<LogicalOperator> &op
|
|
357
358
|
return true;
|
358
359
|
}
|
359
360
|
|
360
|
-
bool LateMaterialization::OptimizeLargeLimit(
|
361
|
-
|
362
|
-
|
361
|
+
bool LateMaterialization::OptimizeLargeLimit(LogicalLimit &limit, idx_t limit_val, bool has_offset) {
|
362
|
+
auto &config = DBConfig::GetConfig(optimizer.context);
|
363
|
+
if (!has_offset && !config.options.preserve_insertion_order) {
|
364
|
+
// we avoid optimizing large limits if preserve insertion order is false
|
365
|
+
// since the limit is executed in parallel anyway
|
366
|
+
return false;
|
367
|
+
}
|
368
|
+
// we only perform this optimization until a certain amount of maximum values to reduce memory constraints
|
369
|
+
// since we still materialize the set of row-ids in the hash table this optimization can increase memory pressure
|
370
|
+
// FIXME: make this configurable as well
|
371
|
+
static constexpr const idx_t LIMIT_MAX_VAL = 1000000;
|
372
|
+
if (limit_val > LIMIT_MAX_VAL) {
|
373
|
+
return false;
|
374
|
+
}
|
375
|
+
// we only support large limits if they are directly below the source
|
376
|
+
reference<LogicalOperator> current_op = *limit.children[0];
|
363
377
|
while (current_op.get().type != LogicalOperatorType::LOGICAL_GET) {
|
364
378
|
if (current_op.get().type != LogicalOperatorType::LOGICAL_PROJECTION) {
|
365
379
|
return false;
|
@@ -376,11 +390,18 @@ unique_ptr<LogicalOperator> LateMaterialization::Optimize(unique_ptr<LogicalOper
|
|
376
390
|
if (limit.limit_val.Type() != LimitNodeType::CONSTANT_VALUE) {
|
377
391
|
break;
|
378
392
|
}
|
379
|
-
|
393
|
+
auto limit_val = limit.limit_val.GetConstantValue();
|
394
|
+
bool has_offset = limit.offset_val.Type() != LimitNodeType::UNSET;
|
395
|
+
if (limit_val > max_row_count) {
|
380
396
|
// for large limits - we may still want to do this optimization if the limit is consecutive
|
381
397
|
// this is the case if there are only projections/get below the limit
|
382
398
|
// if the row-ids are not consecutive doing the join can worsen performance
|
383
|
-
if (!OptimizeLargeLimit(
|
399
|
+
if (!OptimizeLargeLimit(limit, limit_val, has_offset)) {
|
400
|
+
break;
|
401
|
+
}
|
402
|
+
} else {
|
403
|
+
// optimizing small limits really only makes sense if we have an offset
|
404
|
+
if (!has_offset) {
|
384
405
|
break;
|
385
406
|
}
|
386
407
|
}
|
@@ -272,12 +272,23 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
|
|
272
272
|
|
273
273
|
this->plan = std::move(plan_p);
|
274
274
|
|
275
|
+
for (auto &pre_optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) {
|
276
|
+
RunOptimizer(OptimizerType::EXTENSION, [&]() {
|
277
|
+
OptimizerExtensionInput input {GetContext(), *this, pre_optimizer_extension.optimizer_info.get()};
|
278
|
+
if (pre_optimizer_extension.pre_optimize_function) {
|
279
|
+
pre_optimizer_extension.pre_optimize_function(input, plan);
|
280
|
+
}
|
281
|
+
});
|
282
|
+
}
|
283
|
+
|
275
284
|
RunBuiltInOptimizers();
|
276
285
|
|
277
286
|
for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) {
|
278
287
|
RunOptimizer(OptimizerType::EXTENSION, [&]() {
|
279
288
|
OptimizerExtensionInput input {GetContext(), *this, optimizer_extension.optimizer_info.get()};
|
280
|
-
optimizer_extension.optimize_function
|
289
|
+
if (optimizer_extension.optimize_function) {
|
290
|
+
optimizer_extension.optimize_function(input, plan);
|
291
|
+
}
|
281
292
|
});
|
282
293
|
}
|
283
294
|
|
@@ -1,4 +1,5 @@
|
|
1
|
-
#include "duckdb/parallel/
|
1
|
+
#include "duckdb/parallel/executor_task.hpp"
|
2
|
+
#include "duckdb/parallel/task_notifier.hpp"
|
2
3
|
#include "duckdb/execution/executor.hpp"
|
3
4
|
#include "duckdb/main/client_context.hpp"
|
4
5
|
#include "duckdb/parallel/thread_context.hpp"
|
@@ -6,13 +7,13 @@
|
|
6
7
|
namespace duckdb {
|
7
8
|
|
8
9
|
ExecutorTask::ExecutorTask(Executor &executor_p, shared_ptr<Event> event_p)
|
9
|
-
: executor(executor_p), event(std::move(event_p)) {
|
10
|
+
: executor(executor_p), event(std::move(event_p)), context(executor_p.context) {
|
10
11
|
executor.RegisterTask();
|
11
12
|
}
|
12
13
|
|
13
|
-
ExecutorTask::ExecutorTask(ClientContext &
|
14
|
-
: executor(Executor::Get(
|
15
|
-
thread_context = make_uniq<ThreadContext>(
|
14
|
+
ExecutorTask::ExecutorTask(ClientContext &context_p, shared_ptr<Event> event_p, const PhysicalOperator &op_p)
|
15
|
+
: executor(Executor::Get(context_p)), event(std::move(event_p)), op(&op_p), context(context_p) {
|
16
|
+
thread_context = make_uniq<ThreadContext>(context_p);
|
16
17
|
executor.RegisterTask();
|
17
18
|
}
|
18
19
|
|
@@ -38,6 +39,7 @@ TaskExecutionResult ExecutorTask::Execute(TaskExecutionMode mode) {
|
|
38
39
|
if (thread_context) {
|
39
40
|
TaskExecutionResult result;
|
40
41
|
do {
|
42
|
+
TaskNotifier task_notifier {context};
|
41
43
|
thread_context->profiler.StartOperator(op);
|
42
44
|
// to allow continuous profiling, always execute in small steps
|
43
45
|
result = ExecuteTask(TaskExecutionMode::PROCESS_PARTIAL);
|
@@ -46,7 +48,9 @@ TaskExecutionResult ExecutorTask::Execute(TaskExecutionMode mode) {
|
|
46
48
|
} while (mode == TaskExecutionMode::PROCESS_ALL && result == TaskExecutionResult::TASK_NOT_FINISHED);
|
47
49
|
return result;
|
48
50
|
} else {
|
49
|
-
|
51
|
+
TaskNotifier task_notifier {context};
|
52
|
+
auto result = ExecuteTask(mode);
|
53
|
+
return result;
|
50
54
|
}
|
51
55
|
} catch (std::exception &ex) {
|
52
56
|
executor.PushError(ErrorData(ex));
|
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "duckdb/parallel/task_executor.hpp"
|
2
|
+
#include "duckdb/parallel/task_notifier.hpp"
|
2
3
|
#include "duckdb/parallel/task_scheduler.hpp"
|
3
4
|
|
4
5
|
namespace duckdb {
|
@@ -7,7 +8,8 @@ TaskExecutor::TaskExecutor(TaskScheduler &scheduler)
|
|
7
8
|
: scheduler(scheduler), token(scheduler.CreateProducer()), completed_tasks(0), total_tasks(0) {
|
8
9
|
}
|
9
10
|
|
10
|
-
TaskExecutor::TaskExecutor(ClientContext &
|
11
|
+
TaskExecutor::TaskExecutor(ClientContext &context_p) : TaskExecutor(TaskScheduler::GetScheduler(context_p)) {
|
12
|
+
context = context_p;
|
11
13
|
}
|
12
14
|
|
13
15
|
TaskExecutor::~TaskExecutor() {
|
@@ -69,6 +71,7 @@ TaskExecutionResult BaseExecutorTask::Execute(TaskExecutionMode mode) {
|
|
69
71
|
return TaskExecutionResult::TASK_FINISHED;
|
70
72
|
}
|
71
73
|
try {
|
74
|
+
TaskNotifier task_notifier {executor.context};
|
72
75
|
ExecuteTask();
|
73
76
|
executor.FinishTask();
|
74
77
|
return TaskExecutionResult::TASK_FINISHED;
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#include "duckdb/parallel/task_notifier.hpp"
|
2
|
+
#include "duckdb/main/client_context.hpp"
|
3
|
+
#include "duckdb/main/client_context_state.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
TaskNotifier::TaskNotifier(optional_ptr<ClientContext> context_p) : context(context_p) {
|
8
|
+
if (context) {
|
9
|
+
for (auto &state : context->registered_state->States()) {
|
10
|
+
state->OnTaskStart(*context);
|
11
|
+
}
|
12
|
+
}
|
13
|
+
}
|
14
|
+
|
15
|
+
TaskNotifier::~TaskNotifier() {
|
16
|
+
if (context) {
|
17
|
+
for (auto &state : context->registered_state->States()) {
|
18
|
+
state->OnTaskStop(*context);
|
19
|
+
}
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
} // namespace duckdb
|
@@ -284,6 +284,39 @@ int32_t TaskScheduler::NumberOfThreads() {
|
|
284
284
|
return current_thread_count.load();
|
285
285
|
}
|
286
286
|
|
287
|
+
idx_t TaskScheduler::GetNumberOfTasks() const {
|
288
|
+
#ifndef DUCKDB_NO_THREADS
|
289
|
+
return queue->q.size_approx();
|
290
|
+
#else
|
291
|
+
idx_t task_count = 0;
|
292
|
+
for (auto &producer : queue->q) {
|
293
|
+
task_count += producer.second.size();
|
294
|
+
}
|
295
|
+
return task_count;
|
296
|
+
#endif
|
297
|
+
}
|
298
|
+
|
299
|
+
idx_t TaskScheduler::GetProducerCount() const {
|
300
|
+
#ifndef DUCKDB_NO_THREADS
|
301
|
+
return queue->q.size_producers_approx();
|
302
|
+
#else
|
303
|
+
return queue->q.size();
|
304
|
+
#endif
|
305
|
+
}
|
306
|
+
|
307
|
+
idx_t TaskScheduler::GetTaskCountForProducer(ProducerToken &token) const {
|
308
|
+
#ifndef DUCKDB_NO_THREADS
|
309
|
+
lock_guard<mutex> producer_lock(token.producer_lock);
|
310
|
+
return queue->q.size_producer_approx(token.token->queue_token);
|
311
|
+
#else
|
312
|
+
const auto it = queue->q.find(std::ref(*token.token));
|
313
|
+
if (it == queue->q.end()) {
|
314
|
+
return 0;
|
315
|
+
}
|
316
|
+
return it->second.size();
|
317
|
+
#endif
|
318
|
+
}
|
319
|
+
|
287
320
|
void TaskScheduler::SetThreads(idx_t total_threads, idx_t external_threads) {
|
288
321
|
if (total_threads == 0) {
|
289
322
|
throw SyntaxException("Number of threads must be positive!");
|
@@ -107,6 +107,7 @@ unique_ptr<ParsedExpression> Transformer::TransformSubquery(duckdb_libpgquery::P
|
|
107
107
|
}
|
108
108
|
}
|
109
109
|
// transform constants (e.g. ORDER BY 1) into positional references (ORDER BY #1)
|
110
|
+
idx_t array_idx = 0;
|
110
111
|
if (aggr->order_bys) {
|
111
112
|
for (auto &order : aggr->order_bys->orders) {
|
112
113
|
if (order.expression->GetExpressionType() == ExpressionType::VALUE_CONSTANT) {
|
@@ -120,8 +121,10 @@ unique_ptr<ParsedExpression> Transformer::TransformSubquery(duckdb_libpgquery::P
|
|
120
121
|
}
|
121
122
|
} else if (sub_select) {
|
122
123
|
// if we have a SELECT we can push the ORDER BY clause into the SELECT list and reference it
|
124
|
+
auto alias = "__array_internal_idx_" + to_string(++array_idx);
|
125
|
+
order.expression->alias = alias;
|
123
126
|
sub_select->select_list.push_back(std::move(order.expression));
|
124
|
-
order.expression = make_uniq<
|
127
|
+
order.expression = make_uniq<ColumnRefExpression>(alias);
|
125
128
|
} else {
|
126
129
|
// otherwise we remove order qualifications
|
127
130
|
RemoveOrderQualificationRecursive(order.expression);
|
@@ -153,7 +153,7 @@ BindResult ExpressionBinder::BindExpression(SubqueryExpression &expr, idx_t dept
|
|
153
153
|
}
|
154
154
|
child = BoundCastExpression::AddCastToType(context, std::move(child), compare_type);
|
155
155
|
result->child_types.push_back(subquery_type);
|
156
|
-
result->
|
156
|
+
result->child_targets.push_back(compare_type);
|
157
157
|
result->children.push_back(std::move(child));
|
158
158
|
}
|
159
159
|
}
|
@@ -168,7 +168,8 @@ static unique_ptr<Expression> PlanUncorrelatedSubquery(Binder &binder, BoundSubq
|
|
168
168
|
cond.left = std::move(expr.children[child_idx]);
|
169
169
|
auto &child_type = expr.child_types[child_idx];
|
170
170
|
cond.right = BoundCastExpression::AddDefaultCastToType(
|
171
|
-
make_uniq<BoundColumnRefExpression>(child_type, plan_columns[child_idx]),
|
171
|
+
make_uniq<BoundColumnRefExpression>(child_type, plan_columns[child_idx]),
|
172
|
+
expr.child_targets[child_idx]);
|
172
173
|
cond.comparison = expr.comparison_type;
|
173
174
|
join->conditions.push_back(std::move(cond));
|
174
175
|
}
|
@@ -371,7 +372,8 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
|
|
371
372
|
compare_cond.left = std::move(expr.children[child_idx]);
|
372
373
|
auto &child_type = expr.child_types[child_idx];
|
373
374
|
compare_cond.right = BoundCastExpression::AddDefaultCastToType(
|
374
|
-
make_uniq<BoundColumnRefExpression>(child_type, plan_columns[child_idx]),
|
375
|
+
make_uniq<BoundColumnRefExpression>(child_type, plan_columns[child_idx]),
|
376
|
+
expr.child_targets[child_idx]);
|
375
377
|
compare_cond.comparison = expr.comparison_type;
|
376
378
|
delim_join->conditions.push_back(std::move(compare_cond));
|
377
379
|
}
|
@@ -46,6 +46,7 @@
|
|
46
46
|
namespace duckdb {
|
47
47
|
|
48
48
|
void Binder::BindSchemaOrCatalog(ClientContext &context, string &catalog, string &schema) {
|
49
|
+
CatalogEntryRetriever retriever(context);
|
49
50
|
if (catalog.empty() && !schema.empty()) {
|
50
51
|
// schema is specified - but catalog is not
|
51
52
|
// try searching for the catalog instead
|
@@ -60,8 +61,12 @@ void Binder::BindSchemaOrCatalog(ClientContext &context, string &catalog, string
|
|
60
61
|
catalog_names.push_back(DatabaseManager::GetDefaultDatabase(context));
|
61
62
|
}
|
62
63
|
for (auto &catalog_name : catalog_names) {
|
63
|
-
auto
|
64
|
-
if (catalog
|
64
|
+
auto catalog = Catalog::GetCatalogEntry(retriever, catalog_name);
|
65
|
+
if (!catalog) {
|
66
|
+
continue;
|
67
|
+
}
|
68
|
+
if (catalog->CheckAmbiguousCatalogOrSchema(context, schema)) {
|
69
|
+
|
65
70
|
throw BinderException(
|
66
71
|
"Ambiguous reference to catalog or schema \"%s\" - use a fully qualified path like \"%s.%s\"",
|
67
72
|
schema, catalog_name, schema);
|
@@ -271,13 +271,14 @@ void Binder::BindDefaultValues(const ColumnList &columns, vector<unique_ptr<Expr
|
|
271
271
|
schema_name = DEFAULT_SCHEMA;
|
272
272
|
}
|
273
273
|
|
274
|
-
// FIXME: We might want to save the existing search path of the binder
|
275
274
|
vector<CatalogSearchEntry> defaults_search_path;
|
276
275
|
defaults_search_path.emplace_back(catalog_name, schema_name);
|
277
276
|
if (schema_name != DEFAULT_SCHEMA) {
|
278
277
|
defaults_search_path.emplace_back(catalog_name, DEFAULT_SCHEMA);
|
279
278
|
}
|
280
|
-
|
279
|
+
|
280
|
+
auto default_binder = Binder::CreateBinder(context, *this);
|
281
|
+
default_binder->entry_retriever.SetSearchPath(std::move(defaults_search_path));
|
281
282
|
|
282
283
|
for (auto &column : columns.Physical()) {
|
283
284
|
unique_ptr<Expression> bound_default;
|
@@ -288,9 +289,9 @@ void Binder::BindDefaultValues(const ColumnList &columns, vector<unique_ptr<Expr
|
|
288
289
|
if (default_copy->HasParameter()) {
|
289
290
|
throw BinderException("DEFAULT values cannot contain parameters");
|
290
291
|
}
|
291
|
-
ConstantBinder
|
292
|
-
|
293
|
-
bound_default =
|
292
|
+
ConstantBinder default_value_binder(*default_binder, context, "DEFAULT value");
|
293
|
+
default_value_binder.target_type = column.Type();
|
294
|
+
bound_default = default_value_binder.Bind(default_copy);
|
294
295
|
} else {
|
295
296
|
// no default value specified: push a default value of constant null
|
296
297
|
bound_default = make_uniq<BoundConstantExpression>(Value(column.Type()));
|
@@ -10,7 +10,8 @@
|
|
10
10
|
|
11
11
|
namespace duckdb {
|
12
12
|
|
13
|
-
TableDataWriter::TableDataWriter(TableCatalogEntry &table_p
|
13
|
+
TableDataWriter::TableDataWriter(TableCatalogEntry &table_p, optional_ptr<ClientContext> client_context_p)
|
14
|
+
: table(table_p.Cast<DuckTableEntry>()), client_context(client_context_p) {
|
14
15
|
D_ASSERT(table_p.IsDuckTable());
|
15
16
|
}
|
16
17
|
|
@@ -40,7 +41,8 @@ DatabaseInstance &TableDataWriter::GetDatabase() {
|
|
40
41
|
|
41
42
|
SingleFileTableDataWriter::SingleFileTableDataWriter(SingleFileCheckpointWriter &checkpoint_manager,
|
42
43
|
TableCatalogEntry &table, MetadataWriter &table_data_writer)
|
43
|
-
: TableDataWriter(table
|
44
|
+
: TableDataWriter(table, checkpoint_manager.GetClientContext()), checkpoint_manager(checkpoint_manager),
|
45
|
+
table_data_writer(table_data_writer) {
|
44
46
|
}
|
45
47
|
|
46
48
|
unique_ptr<RowGroupWriter> SingleFileTableDataWriter::GetRowGroupWriter(RowGroup &row_group) {
|
@@ -36,10 +36,11 @@ namespace duckdb {
|
|
36
36
|
|
37
37
|
void ReorderTableEntries(catalog_entry_vector_t &tables);
|
38
38
|
|
39
|
-
SingleFileCheckpointWriter::SingleFileCheckpointWriter(
|
39
|
+
SingleFileCheckpointWriter::SingleFileCheckpointWriter(optional_ptr<ClientContext> client_context_p,
|
40
|
+
AttachedDatabase &db, BlockManager &block_manager,
|
40
41
|
CheckpointType checkpoint_type)
|
41
|
-
: CheckpointWriter(db),
|
42
|
-
checkpoint_type(checkpoint_type) {
|
42
|
+
: CheckpointWriter(db), client_context(client_context_p),
|
43
|
+
partial_block_manager(block_manager, PartialBlockType::FULL_CHECKPOINT), checkpoint_type(checkpoint_type) {
|
43
44
|
}
|
44
45
|
|
45
46
|
BlockManager &SingleFileCheckpointWriter::GetBlockManager() {
|
@@ -385,12 +385,19 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
|
|
385
385
|
uint32_t remaining = length;
|
386
386
|
offset += sizeof(uint32_t);
|
387
387
|
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
388
|
+
BufferHandle target_handle;
|
389
|
+
string_t overflow_string;
|
390
|
+
data_ptr_t target_ptr;
|
391
|
+
bool allocate_block = length >= block_manager.GetBlockSize();
|
392
|
+
if (allocate_block) {
|
393
|
+
// overflow string is bigger than a block - allocate a temporary buffer for it
|
394
|
+
target_handle = buffer_manager.Allocate(MemoryTag::OVERFLOW_STRINGS, length);
|
395
|
+
target_ptr = target_handle.Ptr();
|
396
|
+
} else {
|
397
|
+
// overflow string is smaller than a block - add it to the vector directly
|
398
|
+
overflow_string = StringVector::EmptyString(result, length);
|
399
|
+
target_ptr = data_ptr_cast(overflow_string.GetDataWriteable());
|
400
|
+
}
|
394
401
|
|
395
402
|
// now append the string to the single buffer
|
396
403
|
while (remaining > 0) {
|
@@ -408,10 +415,14 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
|
|
408
415
|
offset = 0;
|
409
416
|
}
|
410
417
|
}
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
418
|
+
if (allocate_block) {
|
419
|
+
auto final_buffer = target_handle.Ptr();
|
420
|
+
StringVector::AddHandle(result, std::move(target_handle));
|
421
|
+
return ReadString(final_buffer, 0, length);
|
422
|
+
} else {
|
423
|
+
overflow_string.Finalize();
|
424
|
+
return overflow_string;
|
425
|
+
}
|
415
426
|
}
|
416
427
|
|
417
428
|
// read the overflow string from memory
|
@@ -77,6 +77,7 @@ static const StorageVersionInfo storage_version_info[] = {
|
|
77
77
|
{"v1.1.2", 64},
|
78
78
|
{"v1.1.3", 64},
|
79
79
|
{"v1.2.0", 65},
|
80
|
+
{"v1.2.1", 65},
|
80
81
|
{nullptr, 0}
|
81
82
|
};
|
82
83
|
// END OF STORAGE VERSION INFO
|
@@ -96,6 +97,7 @@ static const SerializationVersionInfo serialization_version_info[] = {
|
|
96
97
|
{"v1.1.2", 3},
|
97
98
|
{"v1.1.3", 3},
|
98
99
|
{"v1.2.0", 4},
|
100
|
+
{"v1.2.1", 4},
|
99
101
|
{"latest", 4},
|
100
102
|
{nullptr, 0}
|
101
103
|
};
|
@@ -355,7 +355,7 @@ bool SingleFileStorageManager::IsCheckpointClean(MetaBlockPointer checkpoint_id)
|
|
355
355
|
return block_manager->IsRootBlock(checkpoint_id);
|
356
356
|
}
|
357
357
|
|
358
|
-
void SingleFileStorageManager::CreateCheckpoint(CheckpointOptions options) {
|
358
|
+
void SingleFileStorageManager::CreateCheckpoint(optional_ptr<ClientContext> client_context, CheckpointOptions options) {
|
359
359
|
if (InMemory() || read_only || !load_complete) {
|
360
360
|
return;
|
361
361
|
}
|
@@ -366,7 +366,7 @@ void SingleFileStorageManager::CreateCheckpoint(CheckpointOptions options) {
|
|
366
366
|
if (GetWALSize() > 0 || config.options.force_checkpoint || options.action == CheckpointAction::ALWAYS_CHECKPOINT) {
|
367
367
|
// we only need to checkpoint if there is anything in the WAL
|
368
368
|
try {
|
369
|
-
SingleFileCheckpointWriter checkpointer(db, *block_manager, options.type);
|
369
|
+
SingleFileCheckpointWriter checkpointer(client_context, db, *block_manager, options.type);
|
370
370
|
checkpointer.CreateCheckpoint();
|
371
371
|
} catch (std::exception &ex) {
|
372
372
|
ErrorData error(ex);
|