duckdb 1.2.1-dev4.0 → 1.2.1-dev8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/package.json +1 -1
  2. package/src/connection.cpp +57 -35
  3. package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
  4. package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
  5. package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
  6. package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
  8. package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
  9. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
  10. package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
  11. package/src/duckdb/extension/json/json_extension.cpp +8 -3
  12. package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
  13. package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
  14. package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
  15. package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
  16. package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
  17. package/src/duckdb/src/catalog/catalog.cpp +12 -0
  18. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  19. package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
  20. package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
  21. package/src/duckdb/src/common/bind_helpers.cpp +3 -0
  22. package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
  23. package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
  24. package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
  25. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  26. package/src/duckdb/src/execution/index/art/art.cpp +19 -6
  27. package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
  28. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
  29. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
  30. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
  31. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
  32. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
  33. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
  34. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
  35. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
  36. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
  37. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
  38. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
  39. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
  40. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
  41. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
  42. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
  43. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
  44. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
  45. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
  46. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
  47. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
  48. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
  49. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  50. package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
  51. package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
  52. package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
  53. package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
  54. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
  56. package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
  58. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
  59. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
  60. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
  61. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
  63. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
  65. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
  66. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
  67. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
  68. package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
  69. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
  70. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
  71. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
  72. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
  74. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
  75. package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
  76. package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
  77. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
  78. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
  79. package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
  80. package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
  81. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
  82. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
  84. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
  85. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
  86. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
  87. package/src/duckdb/src/include/duckdb.h +495 -480
  88. package/src/duckdb/src/main/attached_database.cpp +1 -1
  89. package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
  90. package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
  91. package/src/duckdb/src/main/config.cpp +7 -1
  92. package/src/duckdb/src/main/database.cpp +8 -8
  93. package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
  94. package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
  95. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  96. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
  97. package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
  98. package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
  99. package/src/duckdb/src/parallel/executor_task.cpp +10 -6
  100. package/src/duckdb/src/parallel/task_executor.cpp +4 -1
  101. package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
  102. package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
  103. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
  104. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
  105. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
  106. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
  107. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
  108. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
  109. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  110. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
  111. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  112. package/src/duckdb/src/storage/storage_manager.cpp +2 -2
  113. package/src/duckdb/src/storage/table/row_group.cpp +5 -6
  114. package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
  115. package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
  116. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
  117. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
  118. package/src/duckdb/ub_src_parallel.cpp +2 -0
@@ -397,7 +397,13 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
397
397
  }
398
398
  }
399
399
  }
400
- if (max_columns_found == num_cols && ignored_rows > min_ignored_rows) {
400
+ if (max_columns_found == num_cols && (ignored_rows > min_ignored_rows)) {
401
+ return;
402
+ }
403
+ if (max_columns_found > 1 && num_cols > max_columns_found && consistent_rows < best_consistent_rows / 2 &&
404
+ options.null_padding) {
405
+ // When null_padding is true, we only give preference to a max number of columns if null padding is at least
406
+ // 50% as consistent as the best case scenario
401
407
  return;
402
408
  }
403
409
  if (quoted && num_cols < max_columns_found) {
@@ -436,28 +442,19 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
436
442
  !require_more_padding && !invalid_padding && num_cols == max_columns_found && comments_are_acceptable) {
437
443
  auto &sniffing_state_machine = scanner->GetStateMachine();
438
444
 
439
- bool same_quote_is_candidate = false;
440
- for (const auto &candidate : candidates) {
441
- if (sniffing_state_machine.dialect_options.state_machine_options.quote ==
442
- candidate->GetStateMachine().dialect_options.state_machine_options.quote) {
443
- same_quote_is_candidate = true;
444
- }
445
- }
446
- if (!same_quote_is_candidate) {
447
- if (options.dialect_options.skip_rows.IsSetByUser()) {
448
- // If skip rows is set by user, and we found dirty notes, we only accept it if either null_padding or
449
- // ignore_errors is set
450
- if (dirty_notes != 0 && !options.null_padding && !options.ignore_errors.GetValue()) {
451
- return;
452
- }
453
- sniffing_state_machine.dialect_options.skip_rows = options.dialect_options.skip_rows.GetValue();
454
- } else if (!options.null_padding) {
455
- sniffing_state_machine.dialect_options.skip_rows = dirty_notes;
445
+ if (options.dialect_options.skip_rows.IsSetByUser()) {
446
+ // If skip rows is set by user, and we found dirty notes, we only accept it if either null_padding or
447
+ // ignore_errors is set
448
+ if (dirty_notes != 0 && !options.null_padding && !options.ignore_errors.GetValue()) {
449
+ return;
456
450
  }
457
- sniffing_state_machine.dialect_options.num_cols = num_cols;
458
- lines_sniffed = sniffed_column_counts.result_position;
459
- candidates.emplace_back(std::move(scanner));
451
+ sniffing_state_machine.dialect_options.skip_rows = options.dialect_options.skip_rows.GetValue();
452
+ } else if (!options.null_padding) {
453
+ sniffing_state_machine.dialect_options.skip_rows = dirty_notes;
460
454
  }
455
+ sniffing_state_machine.dialect_options.num_cols = num_cols;
456
+ lines_sniffed = sniffed_column_counts.result_position;
457
+ candidates.emplace_back(std::move(scanner));
461
458
  }
462
459
  }
463
460
 
@@ -491,7 +488,7 @@ void CSVSniffer::RefineCandidates() {
491
488
 
492
489
  for (idx_t i = 1; i <= options.sample_size_chunks; i++) {
493
490
  vector<unique_ptr<ColumnCountScanner>> successful_candidates;
494
- bool done = false;
491
+ bool done = candidates.empty();
495
492
  for (auto &cur_candidate : candidates) {
496
493
  const bool finished_file = cur_candidate->FinishedFile();
497
494
  if (successful_candidates.empty()) {
@@ -2,7 +2,7 @@
2
2
  #include "duckdb/execution/operator/csv_scanner/csv_casting.hpp"
3
3
 
4
4
  namespace duckdb {
5
- bool CSVSniffer::TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type) {
5
+ bool CSVSniffer::TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type) const {
6
6
  auto &sniffing_state_machine = best_candidate->GetStateMachine();
7
7
  // try vector-cast from string to sql_type
8
8
  Vector dummy_result(sql_type, size);
@@ -303,6 +303,7 @@ CSVError CSVError::CastError(const CSVReaderOptions &options, const string &colu
303
303
  "correctly parse this column."
304
304
  << '\n';
305
305
  }
306
+ how_to_fix_it << "* Check whether the null string value is set correctly (e.g., nullstr = 'N/A')" << '\n';
306
307
 
307
308
  return CSVError(error.str(), CAST_ERROR, column_idx, csv_row, error_info, row_byte_position, byte_position, options,
308
309
  how_to_fix_it.str(), current_path);
@@ -251,6 +251,10 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
251
251
  throw BinderException("Invalid value for MAX_LINE_SIZE parameter: it cannot be smaller than 0");
252
252
  }
253
253
  maximum_line_size.Set(NumericCast<idx_t>(line_size));
254
+ if (buffer_size_option.IsSetByUser() && maximum_line_size.GetValue() > buffer_size_option.GetValue()) {
255
+ throw InvalidInputException("Buffer Size of %d must be a higher value than the maximum line size %d",
256
+ buffer_size_option.GetValue(), maximum_line_size.GetValue());
257
+ }
254
258
  } else if (loption == "date_format" || loption == "dateformat") {
255
259
  string format = ParseString(value, loption);
256
260
  SetDateFormat(LogicalTypeId::DATE, format, true);
@@ -264,6 +268,12 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
264
268
  if (buffer_size_option == 0) {
265
269
  throw InvalidInputException("Buffer Size option must be higher than 0");
266
270
  }
271
+ if (maximum_line_size.IsSetByUser() && maximum_line_size.GetValue() > buffer_size_option.GetValue()) {
272
+ throw InvalidInputException("Buffer Size of %d must be a higher value than the maximum line size %d",
273
+ buffer_size_option.GetValue(), maximum_line_size.GetValue());
274
+ } else {
275
+ maximum_line_size.Set(buffer_size_option.GetValue(), false);
276
+ }
267
277
  } else if (loption == "decimal_separator") {
268
278
  decimal_separator = ParseString(value, loption);
269
279
  if (decimal_separator != "." && decimal_separator != ",") {
@@ -298,6 +308,9 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
298
308
  if (table_name.empty()) {
299
309
  throw BinderException("REJECTS_TABLE option cannot be empty");
300
310
  }
311
+ if (KeywordHelper::RequiresQuotes(table_name)) {
312
+ throw BinderException("rejects_scan option: %s requires quotes to be used as an identifier", table_name);
313
+ }
301
314
  rejects_table_name.Set(table_name);
302
315
  } else if (loption == "rejects_scan") {
303
316
  // skip, handled in SetRejectsOptions
@@ -305,6 +318,9 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
305
318
  if (table_name.empty()) {
306
319
  throw BinderException("rejects_scan option cannot be empty");
307
320
  }
321
+ if (KeywordHelper::RequiresQuotes(table_name)) {
322
+ throw BinderException("rejects_scan option: %s requires quotes to be used as an identifier", table_name);
323
+ }
308
324
  rejects_scan_name.Set(table_name);
309
325
  } else if (loption == "rejects_limit") {
310
326
  auto limit = ParseInteger(value, loption);
@@ -86,6 +86,7 @@ SourceResultType PhysicalReservoirSample::GetData(ExecutionContext &context, Dat
86
86
  return SourceResultType::FINISHED;
87
87
  }
88
88
  auto sample_chunk = sink.sample->GetChunk();
89
+
89
90
  if (!sample_chunk) {
90
91
  return SourceResultType::FINISHED;
91
92
  }
@@ -5,10 +5,11 @@
5
5
 
6
6
  namespace duckdb {
7
7
 
8
- PhysicalStreamingSample::PhysicalStreamingSample(vector<LogicalType> types, SampleMethod method, double percentage,
9
- int64_t seed, idx_t estimated_cardinality)
10
- : PhysicalOperator(PhysicalOperatorType::STREAMING_SAMPLE, std::move(types), estimated_cardinality), method(method),
11
- percentage(percentage / 100), seed(seed) {
8
+ PhysicalStreamingSample::PhysicalStreamingSample(vector<LogicalType> types, unique_ptr<SampleOptions> options,
9
+ idx_t estimated_cardinality)
10
+ : PhysicalOperator(PhysicalOperatorType::STREAMING_SAMPLE, std::move(types), estimated_cardinality),
11
+ sample_options(std::move(options)) {
12
+ percentage = sample_options->sample_size.GetValue<double>() / 100;
12
13
  }
13
14
 
14
15
  //===--------------------------------------------------------------------===//
@@ -49,13 +50,21 @@ void PhysicalStreamingSample::BernoulliSample(DataChunk &input, DataChunk &resul
49
50
  }
50
51
  }
51
52
 
53
+ bool PhysicalStreamingSample::ParallelOperator() const {
54
+ return !(sample_options->repeatable || sample_options->seed.IsValid());
55
+ }
56
+
52
57
  unique_ptr<OperatorState> PhysicalStreamingSample::GetOperatorState(ExecutionContext &context) const {
53
- return make_uniq<StreamingSampleOperatorState>(seed);
58
+ if (!ParallelOperator()) {
59
+ return make_uniq<StreamingSampleOperatorState>(static_cast<int64_t>(sample_options->seed.GetIndex()));
60
+ }
61
+ RandomEngine random;
62
+ return make_uniq<StreamingSampleOperatorState>(static_cast<int64_t>(random.NextRandomInteger64()));
54
63
  }
55
64
 
56
65
  OperatorResultType PhysicalStreamingSample::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
57
66
  GlobalOperatorState &gstate, OperatorState &state) const {
58
- switch (method) {
67
+ switch (sample_options->method) {
59
68
  case SampleMethod::BERNOULLI_SAMPLE:
60
69
  BernoulliSample(input, chunk, state);
61
70
  break;
@@ -70,7 +79,7 @@ OperatorResultType PhysicalStreamingSample::Execute(ExecutionContext &context, D
70
79
 
71
80
  InsertionOrderPreservingMap<string> PhysicalStreamingSample::ParamsToString() const {
72
81
  InsertionOrderPreservingMap<string> result;
73
- result["Sample Method"] = EnumUtil::ToString(method) + ": " + to_string(100 * percentage) + "%";
82
+ result["Sample Method"] = EnumUtil::ToString(sample_options->method) + ": " + to_string(100 * percentage) + "%";
74
83
  return result;
75
84
  }
76
85
 
@@ -215,7 +215,9 @@ public:
215
215
  auto &gstate = gstate_p.Cast<BatchInsertGlobalState>();
216
216
  auto &lstate = lstate_p.Cast<BatchInsertLocalState>();
217
217
  // merge together the collections
218
- D_ASSERT(lstate.writer);
218
+ if (!lstate.writer) {
219
+ lstate.writer = &gstate.table.GetStorage().CreateOptimisticWriter(context);
220
+ }
219
221
  auto final_collection = gstate.MergeCollections(context, std::move(merge_collections), *lstate.writer);
220
222
  // add the merged-together collection to the set of batch indexes
221
223
  lock_guard<mutex> l(gstate.lock);
@@ -108,7 +108,17 @@ SourceResultType PhysicalTableScan::GetData(ExecutionContext &context, DataChunk
108
108
  if (g_state.in_out_final) {
109
109
  function.in_out_function_final(context, data, chunk);
110
110
  }
111
- function.in_out_function(context, data, g_state.input_chunk, chunk);
111
+ switch (function.in_out_function(context, data, g_state.input_chunk, chunk)) {
112
+ case OperatorResultType::BLOCKED: {
113
+ auto guard = g_state.Lock();
114
+ return g_state.BlockSource(guard, input.interrupt_state);
115
+ }
116
+ default:
117
+ // FIXME: Handling for other cases (such as NEED_MORE_INPUT) breaks current functionality and extensions that
118
+ // might be relying on current behaviour. Needs a rework that is not in scope
119
+ break;
120
+ }
121
+
112
122
  if (chunk.size() == 0 && function.in_out_function_final) {
113
123
  function.in_out_function_final(context, data, chunk);
114
124
  g_state.in_out_final = true;
@@ -34,6 +34,7 @@ PhysicalCreateARTIndex::PhysicalCreateARTIndex(LogicalOperator &op, TableCatalog
34
34
 
35
35
  class CreateARTIndexGlobalSinkState : public GlobalSinkState {
36
36
  public:
37
+ //! We merge the local indexes into one global index.
37
38
  unique_ptr<BoundIndex> global_index;
38
39
  };
39
40
 
@@ -53,8 +54,10 @@ public:
53
54
  };
54
55
 
55
56
  unique_ptr<GlobalSinkState> PhysicalCreateARTIndex::GetGlobalSinkState(ClientContext &context) const {
56
- // Create the global sink state and add the global index.
57
+ // Create the global sink state.
57
58
  auto state = make_uniq<CreateARTIndexGlobalSinkState>();
59
+
60
+ // Create the global index.
58
61
  auto &storage = table.GetStorage();
59
62
  state->global_index = make_uniq<ART>(info->index_name, info->constraint_type, storage_ids,
60
63
  TableIOManager::Get(storage), unbound_expressions, storage.db);
@@ -123,7 +126,6 @@ SinkResultType PhysicalCreateARTIndex::SinkSorted(OperatorSinkInput &input) cons
123
126
 
124
127
  SinkResultType PhysicalCreateARTIndex::Sink(ExecutionContext &context, DataChunk &chunk,
125
128
  OperatorSinkInput &input) const {
126
-
127
129
  D_ASSERT(chunk.ColumnCount() >= 2);
128
130
  auto &l_state = input.local_state.Cast<CreateARTIndexLocalSinkState>();
129
131
  l_state.arena_allocator.Reset();
@@ -151,11 +153,10 @@ SinkResultType PhysicalCreateARTIndex::Sink(ExecutionContext &context, DataChunk
151
153
 
152
154
  SinkCombineResultType PhysicalCreateARTIndex::Combine(ExecutionContext &context,
153
155
  OperatorSinkCombineInput &input) const {
154
-
155
156
  auto &g_state = input.global_state.Cast<CreateARTIndexGlobalSinkState>();
156
- auto &l_state = input.local_state.Cast<CreateARTIndexLocalSinkState>();
157
157
 
158
158
  // Merge the local index into the global index.
159
+ auto &l_state = input.local_state.Cast<CreateARTIndexLocalSinkState>();
159
160
  if (!g_state.global_index->MergeIndexes(*l_state.local_index)) {
160
161
  throw ConstraintException("Data contains duplicates on indexed column(s)");
161
162
  }
@@ -165,8 +166,6 @@ SinkCombineResultType PhysicalCreateARTIndex::Combine(ExecutionContext &context,
165
166
 
166
167
  SinkFinalizeType PhysicalCreateARTIndex::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
167
168
  OperatorSinkFinalizeInput &input) const {
168
-
169
- // Here, we set the resulting global index as the newly created index of the table.
170
169
  auto &state = input.global_state.Cast<CreateARTIndexGlobalSinkState>();
171
170
 
172
171
  // Vacuum excess memory and verify.
@@ -182,7 +181,6 @@ SinkFinalizeType PhysicalCreateARTIndex::Finalize(Pipeline &pipeline, Event &eve
182
181
  auto &schema = table.schema;
183
182
  info->column_ids = storage_ids;
184
183
 
185
- // FIXME: We should check for catalog exceptions prior to index creation, and later double-check.
186
184
  if (!alter_table_info) {
187
185
  // Ensure that the index does not yet exist in the catalog.
188
186
  auto entry = schema.GetEntry(schema.GetCatalogTransaction(context), CatalogType::INDEX_ENTRY, info->index_name);
@@ -6,10 +6,21 @@
6
6
  #include "duckdb/planner/expression/bound_reference_expression.hpp"
7
7
  #include "duckdb/planner/operator/logical_create_index.hpp"
8
8
  #include "duckdb/planner/operator/logical_get.hpp"
9
+ #include "duckdb/execution/operator/scan/physical_dummy_scan.hpp"
9
10
 
10
11
  namespace duckdb {
11
12
 
12
13
  unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateIndex &op) {
14
+ // Early-out, if the index already exists.
15
+ auto &schema = op.table.schema;
16
+ auto entry = schema.GetEntry(schema.GetCatalogTransaction(context), CatalogType::INDEX_ENTRY, op.info->index_name);
17
+ if (entry) {
18
+ if (op.info->on_conflict != OnCreateConflict::IGNORE_ON_CONFLICT) {
19
+ throw CatalogException("Index with name \"%s\" already exists!", op.info->index_name);
20
+ }
21
+ return make_uniq<PhysicalDummyScan>(op.types, op.estimated_cardinality);
22
+ }
23
+
13
24
  // Ensure that all expressions contain valid scalar functions.
14
25
  // E.g., get_current_timestamp(), random(), and sequence values cannot be index keys.
15
26
  for (idx_t i = 0; i < op.unbound_expressions.size(); i++) {
@@ -28,9 +28,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSample &op
28
28
  "reservoir sampling or use a sample_size",
29
29
  EnumUtil::ToString(op.sample_options->method));
30
30
  }
31
- sample = make_uniq<PhysicalStreamingSample>(
32
- op.types, op.sample_options->method, op.sample_options->sample_size.GetValue<double>(),
33
- static_cast<int64_t>(op.sample_options->seed.GetIndex()), op.estimated_cardinality);
31
+ sample = make_uniq<PhysicalStreamingSample>(op.types, std::move(op.sample_options), op.estimated_cardinality);
34
32
  break;
35
33
  default:
36
34
  throw InternalException("Unimplemented sample method");
@@ -97,6 +97,7 @@ public:
97
97
  void SetRadixBits(const idx_t &radix_bits_p);
98
98
  bool SetRadixBitsToExternal();
99
99
  idx_t GetRadixBits() const;
100
+ idx_t GetMaximumSinkRadixBits() const;
100
101
  idx_t GetExternalRadixBits() const;
101
102
 
102
103
  private:
@@ -161,7 +162,7 @@ public:
161
162
  ClientContext &context;
162
163
  //! Temporary memory state for managing this hash table's memory usage
163
164
  unique_ptr<TemporaryMemoryState> temporary_memory_state;
164
- idx_t minimum_reservation;
165
+ atomic<idx_t> minimum_reservation;
165
166
 
166
167
  //! Whether we've called Finalize
167
168
  bool finalized;
@@ -211,11 +212,11 @@ RadixHTGlobalSinkState::RadixHTGlobalSinkState(ClientContext &context_p, const R
211
212
  auto tuples_per_block = block_alloc_size / radix_ht.GetLayout().GetRowWidth();
212
213
  idx_t ht_count =
213
214
  LossyNumericCast<idx_t>(static_cast<double>(config.sink_capacity) / GroupedAggregateHashTable::LOAD_FACTOR);
214
- auto num_partitions = RadixPartitioning::NumberOfPartitions(config.GetExternalRadixBits());
215
+ auto num_partitions = RadixPartitioning::NumberOfPartitions(config.GetMaximumSinkRadixBits());
215
216
  auto count_per_partition = ht_count / num_partitions;
216
- auto blocks_per_partition = (count_per_partition + tuples_per_block) / tuples_per_block + 1;
217
+ auto blocks_per_partition = (count_per_partition + tuples_per_block) / tuples_per_block;
217
218
  if (!radix_ht.GetLayout().AllConstant()) {
218
- blocks_per_partition += 2;
219
+ blocks_per_partition += 1;
219
220
  }
220
221
  auto ht_size = num_partitions * blocks_per_partition * block_alloc_size + config.sink_capacity * sizeof(ht_entry_t);
221
222
 
@@ -281,6 +282,10 @@ idx_t RadixHTConfig::GetRadixBits() const {
281
282
  return sink_radix_bits;
282
283
  }
283
284
 
285
+ idx_t RadixHTConfig::GetMaximumSinkRadixBits() const {
286
+ return maximum_sink_radix_bits;
287
+ }
288
+
284
289
  idx_t RadixHTConfig::GetExternalRadixBits() const {
285
290
  return MAXIMUM_FINAL_SINK_RADIX_BITS;
286
291
  }
@@ -296,8 +301,12 @@ void RadixHTConfig::SetRadixBitsInternal(const idx_t radix_bits_p, bool external
296
301
  }
297
302
 
298
303
  if (external) {
304
+ const auto partition_multiplier = RadixPartitioning::NumberOfPartitions(radix_bits_p) /
305
+ RadixPartitioning::NumberOfPartitions(sink_radix_bits);
306
+ sink.minimum_reservation = sink.minimum_reservation * partition_multiplier;
299
307
  sink.external = true;
300
308
  }
309
+
301
310
  sink_radix_bits = radix_bits_p;
302
311
  }
303
312
 
@@ -590,7 +599,7 @@ idx_t RadixPartitionedHashTable::MaxThreads(GlobalSinkState &sink_p) const {
590
599
 
591
600
  // we cannot spill aggregate state memory
592
601
  const auto usable_memory = sink.temporary_memory_state->GetReservation() > sink.stored_allocators_size
593
- ? sink.temporary_memory_state->GetReservation() - sink.max_partition_size
602
+ ? sink.temporary_memory_state->GetReservation() - sink.stored_allocators_size
594
603
  : 0;
595
604
  // This many partitions will fit given our reservation (at least 1))
596
605
  const auto partitions_fit = MaxValue<idx_t>(usable_memory / sink.max_partition_size, 1);
@@ -166,8 +166,15 @@ unique_ptr<ReservoirChunk> ReservoirSample::CreateNewSampleChunk(vector<LogicalT
166
166
 
167
167
  void ReservoirSample::Vacuum() {
168
168
  Verify();
169
- if (NumSamplesCollected() <= FIXED_SAMPLE_SIZE || !reservoir_chunk || destroyed) {
169
+ bool do_vacuum = false;
170
+ // when it's not a stats sample, sometimes we neverr collect more than FIXED_SAMPLE_SIZE tuples
171
+ // but we still need to vacuum, so the rules are a little bit different.
172
+ if (!stats_sample && GetActiveSampleCount() <= static_cast<idx_t>(GetReservoirChunkCapacity<double>() * 0.8)) {
173
+ do_vacuum = true;
174
+ }
175
+ if (!do_vacuum && (NumSamplesCollected() <= FIXED_SAMPLE_SIZE || !reservoir_chunk || destroyed)) {
170
176
  // sample is destroyed or too small to shrink
177
+ // sample does not need to be vacuumed.
171
178
  return;
172
179
  }
173
180
 
@@ -201,7 +208,7 @@ unique_ptr<BlockingSample> ReservoirSample::Copy() const {
201
208
  // how many values should be copied
202
209
  idx_t values_to_copy = MinValue<idx_t>(GetActiveSampleCount(), sample_count);
203
210
 
204
- auto new_sample_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity());
211
+ auto new_sample_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity<idx_t>());
205
212
 
206
213
  SelectionVector sel_copy(sel);
207
214
 
@@ -295,7 +302,7 @@ void ReservoirSample::SimpleMerge(ReservoirSample &other) {
295
302
  idx_t size_after_merge = MinValue<idx_t>(keep_from_other + keep_from_this, FIXED_SAMPLE_SIZE);
296
303
 
297
304
  // Check if appending the other samples to this will go over the sample chunk size
298
- if (reservoir_chunk->chunk.size() + keep_from_other > GetReservoirChunkCapacity()) {
305
+ if (reservoir_chunk->chunk.size() + keep_from_other > GetReservoirChunkCapacity<idx_t>()) {
299
306
  Vacuum();
300
307
  }
301
308
 
@@ -542,7 +549,7 @@ void ReservoirSample::ExpandSerializedSample() {
542
549
  }
543
550
 
544
551
  auto types = reservoir_chunk->chunk.GetTypes();
545
- auto new_res_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity());
552
+ auto new_res_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity<idx_t>());
546
553
  auto copy_count = reservoir_chunk->chunk.size();
547
554
  SelectionVector tmp_sel = SelectionVector(0, copy_count);
548
555
  UpdateSampleAppend(new_res_chunk->chunk, reservoir_chunk->chunk, tmp_sel, copy_count);
@@ -550,8 +557,10 @@ void ReservoirSample::ExpandSerializedSample() {
550
557
  std::swap(reservoir_chunk, new_res_chunk);
551
558
  }
552
559
 
553
- idx_t ReservoirSample::GetReservoirChunkCapacity() const {
554
- return sample_count + (FIXED_SAMPLE_SIZE_MULTIPLIER * MinValue<idx_t>(sample_count, FIXED_SAMPLE_SIZE));
560
+ template <typename T>
561
+ T ReservoirSample::GetReservoirChunkCapacity() const {
562
+ return static_cast<T>(sample_count +
563
+ (FIXED_SAMPLE_SIZE_MULTIPLIER * MinValue<idx_t>(sample_count, FIXED_SAMPLE_SIZE)));
555
564
  }
556
565
 
557
566
  idx_t ReservoirSample::FillReservoir(DataChunk &chunk) {
@@ -563,7 +572,7 @@ idx_t ReservoirSample::FillReservoir(DataChunk &chunk) {
563
572
  }
564
573
  auto types = chunk.GetTypes();
565
574
  // create a new sample chunk to store new samples
566
- reservoir_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity());
575
+ reservoir_chunk = CreateNewSampleChunk(types, GetReservoirChunkCapacity<idx_t>());
567
576
  }
568
577
 
569
578
  idx_t actual_sample_index_start = GetActiveSampleCount();
@@ -694,9 +703,6 @@ void ReservoirSample::UpdateSampleAppend(DataChunk &this_, DataChunk &other, Sel
694
703
  return;
695
704
  }
696
705
  D_ASSERT(this_.GetTypes() == other.GetTypes());
697
-
698
- // UpdateSampleAppend(this_, other, other_sel, append_count);
699
- D_ASSERT(this_.GetTypes() == other.GetTypes());
700
706
  auto types = reservoir_chunk->chunk.GetTypes();
701
707
 
702
708
  for (idx_t i = 0; i < reservoir_chunk->chunk.ColumnCount(); i++) {
@@ -714,6 +720,9 @@ void ReservoirSample::AddToReservoir(DataChunk &chunk) {
714
720
  return;
715
721
  }
716
722
 
723
+ if (!reservoir_chunk && GetReservoirChunkCapacity<idx_t>() == 0) {
724
+ return;
725
+ }
717
726
  idx_t tuples_consumed = FillReservoir(chunk);
718
727
  base_reservoir_sample->num_entries_seen_total += tuples_consumed;
719
728
  D_ASSERT(sample_count == 0 || reservoir_chunk->chunk.size() >= 1);
@@ -752,8 +761,10 @@ void ReservoirSample::AddToReservoir(DataChunk &chunk) {
752
761
  base_reservoir_sample->num_entries_seen_total += chunk.size();
753
762
  return;
754
763
  }
764
+
755
765
  idx_t size = chunk_sel.size;
756
766
  D_ASSERT(size <= chunk.size());
767
+ D_ASSERT(reservoir_chunk->chunk.size() < GetReservoirChunkCapacity<idx_t>());
757
768
 
758
769
  UpdateSampleAppend(reservoir_chunk->chunk, chunk, chunk_sel.sel, size);
759
770
 
@@ -763,11 +774,12 @@ void ReservoirSample::AddToReservoir(DataChunk &chunk) {
763
774
 
764
775
  Verify();
765
776
 
766
- // if we are over the threshold, we ned to swith to slow sampling.
777
+ // if we are over the threshold, we ned to switch to slow sampling.
767
778
  if (GetSamplingState() == SamplingState::RANDOM && GetTuplesSeen() >= FIXED_SAMPLE_SIZE * FAST_TO_SLOW_THRESHOLD) {
768
779
  ConvertToReservoirSample();
769
780
  }
770
- if (reservoir_chunk->chunk.size() >= (GetReservoirChunkCapacity() - (static_cast<idx_t>(FIXED_SAMPLE_SIZE) * 3))) {
781
+ if (static_cast<int64_t>(reservoir_chunk->chunk.size()) >=
782
+ GetReservoirChunkCapacity<int64_t>() - (static_cast<int64_t>(FIXED_SAMPLE_SIZE) * 3)) {
771
783
  Vacuum();
772
784
  }
773
785
  }
@@ -24,12 +24,12 @@ struct GetVariableBindData : FunctionData {
24
24
 
25
25
  static unique_ptr<FunctionData> GetVariableBind(ClientContext &context, ScalarFunction &function,
26
26
  vector<unique_ptr<Expression>> &arguments) {
27
+ if (arguments[0]->HasParameter() || arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN) {
28
+ throw ParameterNotResolvedException();
29
+ }
27
30
  if (!arguments[0]->IsFoldable()) {
28
31
  throw NotImplementedException("getvariable requires a constant input");
29
32
  }
30
- if (arguments[0]->HasParameter()) {
31
- throw ParameterNotResolvedException();
32
- }
33
33
  Value value;
34
34
  auto variable_name = ExpressionExecutor::EvaluateScalar(context, *arguments[0]);
35
35
  if (!variable_name.IsNull()) {
@@ -1,5 +1,5 @@
1
1
  #ifndef DUCKDB_PATCH_VERSION
2
- #define DUCKDB_PATCH_VERSION "0"
2
+ #define DUCKDB_PATCH_VERSION "1"
3
3
  #endif
4
4
  #ifndef DUCKDB_MINOR_VERSION
5
5
  #define DUCKDB_MINOR_VERSION 2
@@ -8,10 +8,10 @@
8
8
  #define DUCKDB_MAJOR_VERSION 1
9
9
  #endif
10
10
  #ifndef DUCKDB_VERSION
11
- #define DUCKDB_VERSION "v1.2.0"
11
+ #define DUCKDB_VERSION "v1.2.1"
12
12
  #endif
13
13
  #ifndef DUCKDB_SOURCE_ID
14
- #define DUCKDB_SOURCE_ID "5f5512b827"
14
+ #define DUCKDB_SOURCE_ID "8e52ec4395"
15
15
  #endif
16
16
  #include "duckdb/function/table/system_functions.hpp"
17
17
  #include "duckdb/main/database.hpp"
@@ -7,6 +7,9 @@ WindowAggregateStates::WindowAggregateStates(const AggregateObject &aggr)
7
7
  }
8
8
 
9
9
  void WindowAggregateStates::Initialize(idx_t count) {
10
+ // Don't leak - every Initialize must be matched with a Destroy
11
+ D_ASSERT(states.empty());
12
+
10
13
  states.resize(count * state_size);
11
14
  auto state_ptr = states.data();
12
15