duckdb 1.2.1-dev4.0 → 1.2.1-dev8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/connection.cpp +57 -35
- package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
- package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
- package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
- package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
- package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
- package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
- package/src/duckdb/extension/json/json_extension.cpp +8 -3
- package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
- package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
- package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
- package/src/duckdb/src/catalog/catalog.cpp +12 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
- package/src/duckdb/src/common/bind_helpers.cpp +3 -0
- package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +19 -6
- package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
- package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
- package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
- package/src/duckdb/src/include/duckdb.h +495 -480
- package/src/duckdb/src/main/attached_database.cpp +1 -1
- package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
- package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
- package/src/duckdb/src/main/config.cpp +7 -1
- package/src/duckdb/src/main/database.cpp +8 -8
- package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
- package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
- package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
- package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
- package/src/duckdb/src/parallel/executor_task.cpp +10 -6
- package/src/duckdb/src/parallel/task_executor.cpp +4 -1
- package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/storage_manager.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group.cpp +5 -6
- package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
- package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
- package/src/duckdb/ub_src_parallel.cpp +2 -0
@@ -397,7 +397,13 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
397
397
|
}
|
398
398
|
}
|
399
399
|
}
|
400
|
-
if (max_columns_found == num_cols && ignored_rows > min_ignored_rows) {
|
400
|
+
if (max_columns_found == num_cols && (ignored_rows > min_ignored_rows)) {
|
401
|
+
return;
|
402
|
+
}
|
403
|
+
if (max_columns_found > 1 && num_cols > max_columns_found && consistent_rows < best_consistent_rows / 2 &&
|
404
|
+
options.null_padding) {
|
405
|
+
// When null_padding is true, we only give preference to a max number of columns if null padding is at least
|
406
|
+
// 50% as consistent as the best case scenario
|
401
407
|
return;
|
402
408
|
}
|
403
409
|
if (quoted && num_cols < max_columns_found) {
|
@@ -436,28 +442,19 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
436
442
|
!require_more_padding && !invalid_padding && num_cols == max_columns_found && comments_are_acceptable) {
|
437
443
|
auto &sniffing_state_machine = scanner->GetStateMachine();
|
438
444
|
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
}
|
445
|
-
}
|
446
|
-
if (!same_quote_is_candidate) {
|
447
|
-
if (options.dialect_options.skip_rows.IsSetByUser()) {
|
448
|
-
// If skip rows is set by user, and we found dirty notes, we only accept it if either null_padding or
|
449
|
-
// ignore_errors is set
|
450
|
-
if (dirty_notes != 0 && !options.null_padding && !options.ignore_errors.GetValue()) {
|
451
|
-
return;
|
452
|
-
}
|
453
|
-
sniffing_state_machine.dialect_options.skip_rows = options.dialect_options.skip_rows.GetValue();
|
454
|
-
} else if (!options.null_padding) {
|
455
|
-
sniffing_state_machine.dialect_options.skip_rows = dirty_notes;
|
445
|
+
if (options.dialect_options.skip_rows.IsSetByUser()) {
|
446
|
+
// If skip rows is set by user, and we found dirty notes, we only accept it if either null_padding or
|
447
|
+
// ignore_errors is set
|
448
|
+
if (dirty_notes != 0 && !options.null_padding && !options.ignore_errors.GetValue()) {
|
449
|
+
return;
|
456
450
|
}
|
457
|
-
sniffing_state_machine.dialect_options.
|
458
|
-
|
459
|
-
|
451
|
+
sniffing_state_machine.dialect_options.skip_rows = options.dialect_options.skip_rows.GetValue();
|
452
|
+
} else if (!options.null_padding) {
|
453
|
+
sniffing_state_machine.dialect_options.skip_rows = dirty_notes;
|
460
454
|
}
|
455
|
+
sniffing_state_machine.dialect_options.num_cols = num_cols;
|
456
|
+
lines_sniffed = sniffed_column_counts.result_position;
|
457
|
+
candidates.emplace_back(std::move(scanner));
|
461
458
|
}
|
462
459
|
}
|
463
460
|
|
@@ -491,7 +488,7 @@ void CSVSniffer::RefineCandidates() {
|
|
491
488
|
|
492
489
|
for (idx_t i = 1; i <= options.sample_size_chunks; i++) {
|
493
490
|
vector<unique_ptr<ColumnCountScanner>> successful_candidates;
|
494
|
-
bool done =
|
491
|
+
bool done = candidates.empty();
|
495
492
|
for (auto &cur_candidate : candidates) {
|
496
493
|
const bool finished_file = cur_candidate->FinishedFile();
|
497
494
|
if (successful_candidates.empty()) {
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "duckdb/execution/operator/csv_scanner/csv_casting.hpp"
|
3
3
|
|
4
4
|
namespace duckdb {
|
5
|
-
bool CSVSniffer::TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type) {
|
5
|
+
bool CSVSniffer::TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type) const {
|
6
6
|
auto &sniffing_state_machine = best_candidate->GetStateMachine();
|
7
7
|
// try vector-cast from string to sql_type
|
8
8
|
Vector dummy_result(sql_type, size);
|
@@ -303,6 +303,7 @@ CSVError CSVError::CastError(const CSVReaderOptions &options, const string &colu
|
|
303
303
|
"correctly parse this column."
|
304
304
|
<< '\n';
|
305
305
|
}
|
306
|
+
how_to_fix_it << "* Check whether the null string value is set correctly (e.g., nullstr = 'N/A')" << '\n';
|
306
307
|
|
307
308
|
return CSVError(error.str(), CAST_ERROR, column_idx, csv_row, error_info, row_byte_position, byte_position, options,
|
308
309
|
how_to_fix_it.str(), current_path);
|
@@ -251,6 +251,10 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
|
|
251
251
|
throw BinderException("Invalid value for MAX_LINE_SIZE parameter: it cannot be smaller than 0");
|
252
252
|
}
|
253
253
|
maximum_line_size.Set(NumericCast<idx_t>(line_size));
|
254
|
+
if (buffer_size_option.IsSetByUser() && maximum_line_size.GetValue() > buffer_size_option.GetValue()) {
|
255
|
+
throw InvalidInputException("Buffer Size of %d must be a higher value than the maximum line size %d",
|
256
|
+
buffer_size_option.GetValue(), maximum_line_size.GetValue());
|
257
|
+
}
|
254
258
|
} else if (loption == "date_format" || loption == "dateformat") {
|
255
259
|
string format = ParseString(value, loption);
|
256
260
|
SetDateFormat(LogicalTypeId::DATE, format, true);
|
@@ -264,6 +268,12 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
|
|
264
268
|
if (buffer_size_option == 0) {
|
265
269
|
throw InvalidInputException("Buffer Size option must be higher than 0");
|
266
270
|
}
|
271
|
+
if (maximum_line_size.IsSetByUser() && maximum_line_size.GetValue() > buffer_size_option.GetValue()) {
|
272
|
+
throw InvalidInputException("Buffer Size of %d must be a higher value than the maximum line size %d",
|
273
|
+
buffer_size_option.GetValue(), maximum_line_size.GetValue());
|
274
|
+
} else {
|
275
|
+
maximum_line_size.Set(buffer_size_option.GetValue(), false);
|
276
|
+
}
|
267
277
|
} else if (loption == "decimal_separator") {
|
268
278
|
decimal_separator = ParseString(value, loption);
|
269
279
|
if (decimal_separator != "." && decimal_separator != ",") {
|
@@ -298,6 +308,9 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
|
|
298
308
|
if (table_name.empty()) {
|
299
309
|
throw BinderException("REJECTS_TABLE option cannot be empty");
|
300
310
|
}
|
311
|
+
if (KeywordHelper::RequiresQuotes(table_name)) {
|
312
|
+
throw BinderException("rejects_scan option: %s requires quotes to be used as an identifier", table_name);
|
313
|
+
}
|
301
314
|
rejects_table_name.Set(table_name);
|
302
315
|
} else if (loption == "rejects_scan") {
|
303
316
|
// skip, handled in SetRejectsOptions
|
@@ -305,6 +318,9 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
|
|
305
318
|
if (table_name.empty()) {
|
306
319
|
throw BinderException("rejects_scan option cannot be empty");
|
307
320
|
}
|
321
|
+
if (KeywordHelper::RequiresQuotes(table_name)) {
|
322
|
+
throw BinderException("rejects_scan option: %s requires quotes to be used as an identifier", table_name);
|
323
|
+
}
|
308
324
|
rejects_scan_name.Set(table_name);
|
309
325
|
} else if (loption == "rejects_limit") {
|
310
326
|
auto limit = ParseInteger(value, loption);
|
@@ -5,10 +5,11 @@
|
|
5
5
|
|
6
6
|
namespace duckdb {
|
7
7
|
|
8
|
-
PhysicalStreamingSample::PhysicalStreamingSample(vector<LogicalType> types,
|
9
|
-
|
10
|
-
: PhysicalOperator(PhysicalOperatorType::STREAMING_SAMPLE, std::move(types), estimated_cardinality),
|
11
|
-
|
8
|
+
PhysicalStreamingSample::PhysicalStreamingSample(vector<LogicalType> types, unique_ptr<SampleOptions> options,
|
9
|
+
idx_t estimated_cardinality)
|
10
|
+
: PhysicalOperator(PhysicalOperatorType::STREAMING_SAMPLE, std::move(types), estimated_cardinality),
|
11
|
+
sample_options(std::move(options)) {
|
12
|
+
percentage = sample_options->sample_size.GetValue<double>() / 100;
|
12
13
|
}
|
13
14
|
|
14
15
|
//===--------------------------------------------------------------------===//
|
@@ -49,13 +50,21 @@ void PhysicalStreamingSample::BernoulliSample(DataChunk &input, DataChunk &resul
|
|
49
50
|
}
|
50
51
|
}
|
51
52
|
|
53
|
+
bool PhysicalStreamingSample::ParallelOperator() const {
|
54
|
+
return !(sample_options->repeatable || sample_options->seed.IsValid());
|
55
|
+
}
|
56
|
+
|
52
57
|
unique_ptr<OperatorState> PhysicalStreamingSample::GetOperatorState(ExecutionContext &context) const {
|
53
|
-
|
58
|
+
if (!ParallelOperator()) {
|
59
|
+
return make_uniq<StreamingSampleOperatorState>(static_cast<int64_t>(sample_options->seed.GetIndex()));
|
60
|
+
}
|
61
|
+
RandomEngine random;
|
62
|
+
return make_uniq<StreamingSampleOperatorState>(static_cast<int64_t>(random.NextRandomInteger64()));
|
54
63
|
}
|
55
64
|
|
56
65
|
OperatorResultType PhysicalStreamingSample::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
57
66
|
GlobalOperatorState &gstate, OperatorState &state) const {
|
58
|
-
switch (method) {
|
67
|
+
switch (sample_options->method) {
|
59
68
|
case SampleMethod::BERNOULLI_SAMPLE:
|
60
69
|
BernoulliSample(input, chunk, state);
|
61
70
|
break;
|
@@ -70,7 +79,7 @@ OperatorResultType PhysicalStreamingSample::Execute(ExecutionContext &context, D
|
|
70
79
|
|
71
80
|
InsertionOrderPreservingMap<string> PhysicalStreamingSample::ParamsToString() const {
|
72
81
|
InsertionOrderPreservingMap<string> result;
|
73
|
-
result["Sample Method"] = EnumUtil::ToString(method) + ": " + to_string(100 * percentage) + "%";
|
82
|
+
result["Sample Method"] = EnumUtil::ToString(sample_options->method) + ": " + to_string(100 * percentage) + "%";
|
74
83
|
return result;
|
75
84
|
}
|
76
85
|
|
@@ -215,7 +215,9 @@ public:
|
|
215
215
|
auto &gstate = gstate_p.Cast<BatchInsertGlobalState>();
|
216
216
|
auto &lstate = lstate_p.Cast<BatchInsertLocalState>();
|
217
217
|
// merge together the collections
|
218
|
-
|
218
|
+
if (!lstate.writer) {
|
219
|
+
lstate.writer = &gstate.table.GetStorage().CreateOptimisticWriter(context);
|
220
|
+
}
|
219
221
|
auto final_collection = gstate.MergeCollections(context, std::move(merge_collections), *lstate.writer);
|
220
222
|
// add the merged-together collection to the set of batch indexes
|
221
223
|
lock_guard<mutex> l(gstate.lock);
|
@@ -108,7 +108,17 @@ SourceResultType PhysicalTableScan::GetData(ExecutionContext &context, DataChunk
|
|
108
108
|
if (g_state.in_out_final) {
|
109
109
|
function.in_out_function_final(context, data, chunk);
|
110
110
|
}
|
111
|
-
function.in_out_function(context, data, g_state.input_chunk, chunk)
|
111
|
+
switch (function.in_out_function(context, data, g_state.input_chunk, chunk)) {
|
112
|
+
case OperatorResultType::BLOCKED: {
|
113
|
+
auto guard = g_state.Lock();
|
114
|
+
return g_state.BlockSource(guard, input.interrupt_state);
|
115
|
+
}
|
116
|
+
default:
|
117
|
+
// FIXME: Handling for other cases (such as NEED_MORE_INPUT) breaks current functionality and extensions that
|
118
|
+
// might be relying on current behaviour. Needs a rework that is not in scope
|
119
|
+
break;
|
120
|
+
}
|
121
|
+
|
112
122
|
if (chunk.size() == 0 && function.in_out_function_final) {
|
113
123
|
function.in_out_function_final(context, data, chunk);
|
114
124
|
g_state.in_out_final = true;
|
@@ -34,6 +34,7 @@ PhysicalCreateARTIndex::PhysicalCreateARTIndex(LogicalOperator &op, TableCatalog
|
|
34
34
|
|
35
35
|
class CreateARTIndexGlobalSinkState : public GlobalSinkState {
|
36
36
|
public:
|
37
|
+
//! We merge the local indexes into one global index.
|
37
38
|
unique_ptr<BoundIndex> global_index;
|
38
39
|
};
|
39
40
|
|
@@ -53,8 +54,10 @@ public:
|
|
53
54
|
};
|
54
55
|
|
55
56
|
unique_ptr<GlobalSinkState> PhysicalCreateARTIndex::GetGlobalSinkState(ClientContext &context) const {
|
56
|
-
// Create the global sink state
|
57
|
+
// Create the global sink state.
|
57
58
|
auto state = make_uniq<CreateARTIndexGlobalSinkState>();
|
59
|
+
|
60
|
+
// Create the global index.
|
58
61
|
auto &storage = table.GetStorage();
|
59
62
|
state->global_index = make_uniq<ART>(info->index_name, info->constraint_type, storage_ids,
|
60
63
|
TableIOManager::Get(storage), unbound_expressions, storage.db);
|
@@ -123,7 +126,6 @@ SinkResultType PhysicalCreateARTIndex::SinkSorted(OperatorSinkInput &input) cons
|
|
123
126
|
|
124
127
|
SinkResultType PhysicalCreateARTIndex::Sink(ExecutionContext &context, DataChunk &chunk,
|
125
128
|
OperatorSinkInput &input) const {
|
126
|
-
|
127
129
|
D_ASSERT(chunk.ColumnCount() >= 2);
|
128
130
|
auto &l_state = input.local_state.Cast<CreateARTIndexLocalSinkState>();
|
129
131
|
l_state.arena_allocator.Reset();
|
@@ -151,11 +153,10 @@ SinkResultType PhysicalCreateARTIndex::Sink(ExecutionContext &context, DataChunk
|
|
151
153
|
|
152
154
|
SinkCombineResultType PhysicalCreateARTIndex::Combine(ExecutionContext &context,
|
153
155
|
OperatorSinkCombineInput &input) const {
|
154
|
-
|
155
156
|
auto &g_state = input.global_state.Cast<CreateARTIndexGlobalSinkState>();
|
156
|
-
auto &l_state = input.local_state.Cast<CreateARTIndexLocalSinkState>();
|
157
157
|
|
158
158
|
// Merge the local index into the global index.
|
159
|
+
auto &l_state = input.local_state.Cast<CreateARTIndexLocalSinkState>();
|
159
160
|
if (!g_state.global_index->MergeIndexes(*l_state.local_index)) {
|
160
161
|
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
161
162
|
}
|
@@ -165,8 +166,6 @@ SinkCombineResultType PhysicalCreateARTIndex::Combine(ExecutionContext &context,
|
|
165
166
|
|
166
167
|
SinkFinalizeType PhysicalCreateARTIndex::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
167
168
|
OperatorSinkFinalizeInput &input) const {
|
168
|
-
|
169
|
-
// Here, we set the resulting global index as the newly created index of the table.
|
170
169
|
auto &state = input.global_state.Cast<CreateARTIndexGlobalSinkState>();
|
171
170
|
|
172
171
|
// Vacuum excess memory and verify.
|
@@ -182,7 +181,6 @@ SinkFinalizeType PhysicalCreateARTIndex::Finalize(Pipeline &pipeline, Event &eve
|
|
182
181
|
auto &schema = table.schema;
|
183
182
|
info->column_ids = storage_ids;
|
184
183
|
|
185
|
-
// FIXME: We should check for catalog exceptions prior to index creation, and later double-check.
|
186
184
|
if (!alter_table_info) {
|
187
185
|
// Ensure that the index does not yet exist in the catalog.
|
188
186
|
auto entry = schema.GetEntry(schema.GetCatalogTransaction(context), CatalogType::INDEX_ENTRY, info->index_name);
|
@@ -6,10 +6,21 @@
|
|
6
6
|
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
7
7
|
#include "duckdb/planner/operator/logical_create_index.hpp"
|
8
8
|
#include "duckdb/planner/operator/logical_get.hpp"
|
9
|
+
#include "duckdb/execution/operator/scan/physical_dummy_scan.hpp"
|
9
10
|
|
10
11
|
namespace duckdb {
|
11
12
|
|
12
13
|
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateIndex &op) {
|
14
|
+
// Early-out, if the index already exists.
|
15
|
+
auto &schema = op.table.schema;
|
16
|
+
auto entry = schema.GetEntry(schema.GetCatalogTransaction(context), CatalogType::INDEX_ENTRY, op.info->index_name);
|
17
|
+
if (entry) {
|
18
|
+
if (op.info->on_conflict != OnCreateConflict::IGNORE_ON_CONFLICT) {
|
19
|
+
throw CatalogException("Index with name \"%s\" already exists!", op.info->index_name);
|
20
|
+
}
|
21
|
+
return make_uniq<PhysicalDummyScan>(op.types, op.estimated_cardinality);
|
22
|
+
}
|
23
|
+
|
13
24
|
// Ensure that all expressions contain valid scalar functions.
|
14
25
|
// E.g., get_current_timestamp(), random(), and sequence values cannot be index keys.
|
15
26
|
for (idx_t i = 0; i < op.unbound_expressions.size(); i++) {
|
@@ -28,9 +28,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSample &op
|
|
28
28
|
"reservoir sampling or use a sample_size",
|
29
29
|
EnumUtil::ToString(op.sample_options->method));
|
30
30
|
}
|
31
|
-
sample = make_uniq<PhysicalStreamingSample>(
|
32
|
-
op.types, op.sample_options->method, op.sample_options->sample_size.GetValue<double>(),
|
33
|
-
static_cast<int64_t>(op.sample_options->seed.GetIndex()), op.estimated_cardinality);
|
31
|
+
sample = make_uniq<PhysicalStreamingSample>(op.types, std::move(op.sample_options), op.estimated_cardinality);
|
34
32
|
break;
|
35
33
|
default:
|
36
34
|
throw InternalException("Unimplemented sample method");
|
@@ -97,6 +97,7 @@ public:
|
|
97
97
|
void SetRadixBits(const idx_t &radix_bits_p);
|
98
98
|
bool SetRadixBitsToExternal();
|
99
99
|
idx_t GetRadixBits() const;
|
100
|
+
idx_t GetMaximumSinkRadixBits() const;
|
100
101
|
idx_t GetExternalRadixBits() const;
|
101
102
|
|
102
103
|
private:
|
@@ -161,7 +162,7 @@ public:
|
|
161
162
|
ClientContext &context;
|
162
163
|
//! Temporary memory state for managing this hash table's memory usage
|
163
164
|
unique_ptr<TemporaryMemoryState> temporary_memory_state;
|
164
|
-
idx_t minimum_reservation;
|
165
|
+
atomic<idx_t> minimum_reservation;
|
165
166
|
|
166
167
|
//! Whether we've called Finalize
|
167
168
|
bool finalized;
|
@@ -211,11 +212,11 @@ RadixHTGlobalSinkState::RadixHTGlobalSinkState(ClientContext &context_p, const R
|
|
211
212
|
auto tuples_per_block = block_alloc_size / radix_ht.GetLayout().GetRowWidth();
|
212
213
|
idx_t ht_count =
|
213
214
|
LossyNumericCast<idx_t>(static_cast<double>(config.sink_capacity) / GroupedAggregateHashTable::LOAD_FACTOR);
|
214
|
-
auto num_partitions = RadixPartitioning::NumberOfPartitions(config.
|
215
|
+
auto num_partitions = RadixPartitioning::NumberOfPartitions(config.GetMaximumSinkRadixBits());
|
215
216
|
auto count_per_partition = ht_count / num_partitions;
|
216
|
-
auto blocks_per_partition = (count_per_partition + tuples_per_block) / tuples_per_block
|
217
|
+
auto blocks_per_partition = (count_per_partition + tuples_per_block) / tuples_per_block;
|
217
218
|
if (!radix_ht.GetLayout().AllConstant()) {
|
218
|
-
blocks_per_partition +=
|
219
|
+
blocks_per_partition += 1;
|
219
220
|
}
|
220
221
|
auto ht_size = num_partitions * blocks_per_partition * block_alloc_size + config.sink_capacity * sizeof(ht_entry_t);
|
221
222
|
|
@@ -281,6 +282,10 @@ idx_t RadixHTConfig::GetRadixBits() const {
|
|
281
282
|
return sink_radix_bits;
|
282
283
|
}
|
283
284
|
|
285
|
+
idx_t RadixHTConfig::GetMaximumSinkRadixBits() const {
|
286
|
+
return maximum_sink_radix_bits;
|
287
|
+
}
|
288
|
+
|
284
289
|
idx_t RadixHTConfig::GetExternalRadixBits() const {
|
285
290
|
return MAXIMUM_FINAL_SINK_RADIX_BITS;
|
286
291
|
}
|
@@ -296,8 +301,12 @@ void RadixHTConfig::SetRadixBitsInternal(const idx_t radix_bits_p, bool external
|
|
296
301
|
}
|
297
302
|
|
298
303
|
if (external) {
|
304
|
+
const auto partition_multiplier = RadixPartitioning::NumberOfPartitions(radix_bits_p) /
|
305
|
+
RadixPartitioning::NumberOfPartitions(sink_radix_bits);
|
306
|
+
sink.minimum_reservation = sink.minimum_reservation * partition_multiplier;
|
299
307
|
sink.external = true;
|
300
308
|
}
|
309
|
+
|
301
310
|
sink_radix_bits = radix_bits_p;
|
302
311
|
}
|
303
312
|
|
@@ -590,7 +599,7 @@ idx_t RadixPartitionedHashTable::MaxThreads(GlobalSinkState &sink_p) const {
|
|
590
599
|
|
591
600
|
// we cannot spill aggregate state memory
|
592
601
|
const auto usable_memory = sink.temporary_memory_state->GetReservation() > sink.stored_allocators_size
|
593
|
-
? sink.temporary_memory_state->GetReservation() - sink.
|
602
|
+
? sink.temporary_memory_state->GetReservation() - sink.stored_allocators_size
|
594
603
|
: 0;
|
595
604
|
// This many partitions will fit given our reservation (at least 1))
|
596
605
|
const auto partitions_fit = MaxValue<idx_t>(usable_memory / sink.max_partition_size, 1);
|
@@ -166,8 +166,15 @@ unique_ptr<ReservoirChunk> ReservoirSample::CreateNewSampleChunk(vector<LogicalT
|
|
166
166
|
|
167
167
|
void ReservoirSample::Vacuum() {
|
168
168
|
Verify();
|
169
|
-
|
169
|
+
bool do_vacuum = false;
|
170
|
+
// when it's not a stats sample, sometimes we neverr collect more than FIXED_SAMPLE_SIZE tuples
|
171
|
+
// but we still need to vacuum, so the rules are a little bit different.
|
172
|
+
if (!stats_sample && GetActiveSampleCount() <= static_cast<idx_t>(GetReservoirChunkCapacity<double>() * 0.8)) {
|
173
|
+
do_vacuum = true;
|
174
|
+
}
|
175
|
+
if (!do_vacuum && (NumSamplesCollected() <= FIXED_SAMPLE_SIZE || !reservoir_chunk || destroyed)) {
|
170
176
|
// sample is destroyed or too small to shrink
|
177
|
+
// sample does not need to be vacuumed.
|
171
178
|
return;
|
172
179
|
}
|
173
180
|
|
@@ -201,7 +208,7 @@ unique_ptr<BlockingSample> ReservoirSample::Copy() const {
|
|
201
208
|
// how many values should be copied
|
202
209
|
idx_t values_to_copy = MinValue<idx_t>(GetActiveSampleCount(), sample_count);
|
203
210
|
|
204
|
-
auto new_sample_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity());
|
211
|
+
auto new_sample_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity<idx_t>());
|
205
212
|
|
206
213
|
SelectionVector sel_copy(sel);
|
207
214
|
|
@@ -295,7 +302,7 @@ void ReservoirSample::SimpleMerge(ReservoirSample &other) {
|
|
295
302
|
idx_t size_after_merge = MinValue<idx_t>(keep_from_other + keep_from_this, FIXED_SAMPLE_SIZE);
|
296
303
|
|
297
304
|
// Check if appending the other samples to this will go over the sample chunk size
|
298
|
-
if (reservoir_chunk->chunk.size() + keep_from_other > GetReservoirChunkCapacity()) {
|
305
|
+
if (reservoir_chunk->chunk.size() + keep_from_other > GetReservoirChunkCapacity<idx_t>()) {
|
299
306
|
Vacuum();
|
300
307
|
}
|
301
308
|
|
@@ -542,7 +549,7 @@ void ReservoirSample::ExpandSerializedSample() {
|
|
542
549
|
}
|
543
550
|
|
544
551
|
auto types = reservoir_chunk->chunk.GetTypes();
|
545
|
-
auto new_res_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity());
|
552
|
+
auto new_res_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity<idx_t>());
|
546
553
|
auto copy_count = reservoir_chunk->chunk.size();
|
547
554
|
SelectionVector tmp_sel = SelectionVector(0, copy_count);
|
548
555
|
UpdateSampleAppend(new_res_chunk->chunk, reservoir_chunk->chunk, tmp_sel, copy_count);
|
@@ -550,8 +557,10 @@ void ReservoirSample::ExpandSerializedSample() {
|
|
550
557
|
std::swap(reservoir_chunk, new_res_chunk);
|
551
558
|
}
|
552
559
|
|
553
|
-
|
554
|
-
|
560
|
+
template <typename T>
|
561
|
+
T ReservoirSample::GetReservoirChunkCapacity() const {
|
562
|
+
return static_cast<T>(sample_count +
|
563
|
+
(FIXED_SAMPLE_SIZE_MULTIPLIER * MinValue<idx_t>(sample_count, FIXED_SAMPLE_SIZE)));
|
555
564
|
}
|
556
565
|
|
557
566
|
idx_t ReservoirSample::FillReservoir(DataChunk &chunk) {
|
@@ -563,7 +572,7 @@ idx_t ReservoirSample::FillReservoir(DataChunk &chunk) {
|
|
563
572
|
}
|
564
573
|
auto types = chunk.GetTypes();
|
565
574
|
// create a new sample chunk to store new samples
|
566
|
-
reservoir_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity());
|
575
|
+
reservoir_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity<idx_t>());
|
567
576
|
}
|
568
577
|
|
569
578
|
idx_t actual_sample_index_start = GetActiveSampleCount();
|
@@ -694,9 +703,6 @@ void ReservoirSample::UpdateSampleAppend(DataChunk &this_, DataChunk &other, Sel
|
|
694
703
|
return;
|
695
704
|
}
|
696
705
|
D_ASSERT(this_.GetTypes() == other.GetTypes());
|
697
|
-
|
698
|
-
// UpdateSampleAppend(this_, other, other_sel, append_count);
|
699
|
-
D_ASSERT(this_.GetTypes() == other.GetTypes());
|
700
706
|
auto types = reservoir_chunk->chunk.GetTypes();
|
701
707
|
|
702
708
|
for (idx_t i = 0; i < reservoir_chunk->chunk.ColumnCount(); i++) {
|
@@ -714,6 +720,9 @@ void ReservoirSample::AddToReservoir(DataChunk &chunk) {
|
|
714
720
|
return;
|
715
721
|
}
|
716
722
|
|
723
|
+
if (!reservoir_chunk && GetReservoirChunkCapacity<idx_t>() == 0) {
|
724
|
+
return;
|
725
|
+
}
|
717
726
|
idx_t tuples_consumed = FillReservoir(chunk);
|
718
727
|
base_reservoir_sample->num_entries_seen_total += tuples_consumed;
|
719
728
|
D_ASSERT(sample_count == 0 || reservoir_chunk->chunk.size() >= 1);
|
@@ -752,8 +761,10 @@ void ReservoirSample::AddToReservoir(DataChunk &chunk) {
|
|
752
761
|
base_reservoir_sample->num_entries_seen_total += chunk.size();
|
753
762
|
return;
|
754
763
|
}
|
764
|
+
|
755
765
|
idx_t size = chunk_sel.size;
|
756
766
|
D_ASSERT(size <= chunk.size());
|
767
|
+
D_ASSERT(reservoir_chunk->chunk.size() < GetReservoirChunkCapacity<idx_t>());
|
757
768
|
|
758
769
|
UpdateSampleAppend(reservoir_chunk->chunk, chunk, chunk_sel.sel, size);
|
759
770
|
|
@@ -763,11 +774,12 @@ void ReservoirSample::AddToReservoir(DataChunk &chunk) {
|
|
763
774
|
|
764
775
|
Verify();
|
765
776
|
|
766
|
-
// if we are over the threshold, we ned to
|
777
|
+
// if we are over the threshold, we ned to switch to slow sampling.
|
767
778
|
if (GetSamplingState() == SamplingState::RANDOM && GetTuplesSeen() >= FIXED_SAMPLE_SIZE * FAST_TO_SLOW_THRESHOLD) {
|
768
779
|
ConvertToReservoirSample();
|
769
780
|
}
|
770
|
-
if (reservoir_chunk->chunk.size() >=
|
781
|
+
if (static_cast<int64_t>(reservoir_chunk->chunk.size()) >=
|
782
|
+
GetReservoirChunkCapacity<int64_t>() - (static_cast<int64_t>(FIXED_SAMPLE_SIZE) * 3)) {
|
771
783
|
Vacuum();
|
772
784
|
}
|
773
785
|
}
|
@@ -24,12 +24,12 @@ struct GetVariableBindData : FunctionData {
|
|
24
24
|
|
25
25
|
static unique_ptr<FunctionData> GetVariableBind(ClientContext &context, ScalarFunction &function,
|
26
26
|
vector<unique_ptr<Expression>> &arguments) {
|
27
|
+
if (arguments[0]->HasParameter() || arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN) {
|
28
|
+
throw ParameterNotResolvedException();
|
29
|
+
}
|
27
30
|
if (!arguments[0]->IsFoldable()) {
|
28
31
|
throw NotImplementedException("getvariable requires a constant input");
|
29
32
|
}
|
30
|
-
if (arguments[0]->HasParameter()) {
|
31
|
-
throw ParameterNotResolvedException();
|
32
|
-
}
|
33
33
|
Value value;
|
34
34
|
auto variable_name = ExpressionExecutor::EvaluateScalar(context, *arguments[0]);
|
35
35
|
if (!variable_name.IsNull()) {
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#ifndef DUCKDB_PATCH_VERSION
|
2
|
-
#define DUCKDB_PATCH_VERSION "
|
2
|
+
#define DUCKDB_PATCH_VERSION "1"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_MINOR_VERSION
|
5
5
|
#define DUCKDB_MINOR_VERSION 2
|
@@ -8,10 +8,10 @@
|
|
8
8
|
#define DUCKDB_MAJOR_VERSION 1
|
9
9
|
#endif
|
10
10
|
#ifndef DUCKDB_VERSION
|
11
|
-
#define DUCKDB_VERSION "v1.2.
|
11
|
+
#define DUCKDB_VERSION "v1.2.1"
|
12
12
|
#endif
|
13
13
|
#ifndef DUCKDB_SOURCE_ID
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
14
|
+
#define DUCKDB_SOURCE_ID "8e52ec4395"
|
15
15
|
#endif
|
16
16
|
#include "duckdb/function/table/system_functions.hpp"
|
17
17
|
#include "duckdb/main/database.hpp"
|
@@ -7,6 +7,9 @@ WindowAggregateStates::WindowAggregateStates(const AggregateObject &aggr)
|
|
7
7
|
}
|
8
8
|
|
9
9
|
void WindowAggregateStates::Initialize(idx_t count) {
|
10
|
+
// Don't leak - every Initialize must be matched with a Destroy
|
11
|
+
D_ASSERT(states.empty());
|
12
|
+
|
10
13
|
states.resize(count * state_size);
|
11
14
|
auto state_ptr = states.data();
|
12
15
|
|