duckdb 1.2.1-dev6.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
  3. package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
  4. package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
  5. package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
  6. package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
  7. package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
  8. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
  9. package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
  10. package/src/duckdb/extension/json/json_extension.cpp +8 -3
  11. package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
  12. package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
  13. package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
  14. package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
  15. package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
  16. package/src/duckdb/src/catalog/catalog.cpp +12 -0
  17. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  18. package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
  19. package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
  20. package/src/duckdb/src/common/bind_helpers.cpp +3 -0
  21. package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
  22. package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
  23. package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
  24. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  25. package/src/duckdb/src/execution/index/art/art.cpp +19 -6
  26. package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
  27. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
  28. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
  29. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
  30. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
  31. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
  32. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
  33. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
  34. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
  35. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
  36. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
  37. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
  38. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
  39. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
  40. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
  41. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
  42. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
  43. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
  44. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
  45. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
  46. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
  47. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
  48. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  49. package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
  50. package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
  51. package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
  52. package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
  53. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
  54. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
  55. package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
  56. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
  57. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
  58. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
  59. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
  60. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
  62. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
  66. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
  67. package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
  68. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
  69. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
  70. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
  71. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
  72. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
  73. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
  74. package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
  75. package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
  76. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
  77. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
  78. package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
  79. package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
  80. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
  81. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
  83. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
  84. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
  85. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
  86. package/src/duckdb/src/include/duckdb.h +495 -480
  87. package/src/duckdb/src/main/attached_database.cpp +1 -1
  88. package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
  89. package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
  90. package/src/duckdb/src/main/config.cpp +7 -1
  91. package/src/duckdb/src/main/database.cpp +8 -8
  92. package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
  93. package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
  94. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  95. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
  96. package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
  97. package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
  98. package/src/duckdb/src/parallel/executor_task.cpp +10 -6
  99. package/src/duckdb/src/parallel/task_executor.cpp +4 -1
  100. package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
  101. package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
  102. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
  103. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
  104. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
  105. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
  106. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
  107. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
  108. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  109. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
  110. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  111. package/src/duckdb/src/storage/storage_manager.cpp +2 -2
  112. package/src/duckdb/src/storage/table/row_group.cpp +5 -6
  113. package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
  114. package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
  115. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
  116. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
  117. package/src/duckdb/ub_src_parallel.cpp +2 -0
@@ -430,14 +430,13 @@ bool RowGroup::CheckZonemap(ScanFilterInfo &filters) {
430
430
  if (prune_result == FilterPropagateResult::FILTER_ALWAYS_FALSE) {
431
431
  return false;
432
432
  }
433
- if (prune_result == FilterPropagateResult::FILTER_ALWAYS_TRUE) {
434
- // filter is always true - no need to check it
435
- // label the filter as always true so we don't need to check it anymore
436
- filters.SetFilterAlwaysTrue(i);
437
- }
438
433
  if (filter.filter_type == TableFilterType::OPTIONAL_FILTER) {
439
434
  // these are only for row group checking, set as always true so we don't check it
440
435
  filters.SetFilterAlwaysTrue(i);
436
+ } else if (prune_result == FilterPropagateResult::FILTER_ALWAYS_TRUE) {
437
+ // filter is always true - no need to check it
438
+ // label the filter as always true so we don't need to check it anymore
439
+ filters.SetFilterAlwaysTrue(i);
441
440
  }
442
441
  }
443
442
  return true;
@@ -619,7 +618,7 @@ void RowGroup::TemplatedScan(TransactionData transaction, CollectionScanState &s
619
618
  if (prune_result == FilterPropagateResult::FILTER_ALWAYS_FALSE) {
620
619
  // We can just break out of the loop here.
621
620
  approved_tuple_count = 0;
622
- break;
621
+ continue;
623
622
  }
624
623
 
625
624
  // Generate row ids
@@ -25,6 +25,9 @@ void TableScanState::Initialize(vector<StorageIndex> column_ids_p, optional_ptr<
25
25
  if (table_sampling) {
26
26
  sampling_info.do_system_sample = table_sampling->method == SampleMethod::SYSTEM_SAMPLE;
27
27
  sampling_info.sample_rate = table_sampling->sample_size.GetValue<double>() / 100.0;
28
+ if (table_sampling->seed.IsValid()) {
29
+ table_state.random.SetSeed(table_sampling->seed.GetIndex());
30
+ }
28
31
  }
29
32
  }
30
33
 
@@ -96,6 +99,9 @@ void ScanFilterInfo::CheckAllFilters() {
96
99
 
97
100
  void ScanFilterInfo::SetFilterAlwaysTrue(idx_t filter_idx) {
98
101
  auto &filter = filter_list[filter_idx];
102
+ if (filter.always_true) {
103
+ return;
104
+ }
99
105
  filter.always_true = true;
100
106
  column_has_filter[filter.scan_column_index] = false;
101
107
  always_true_filters++;
@@ -192,6 +192,7 @@ bool DuckTransaction::ShouldWriteToWAL(AttachedDatabase &db) {
192
192
  }
193
193
 
194
194
  ErrorData DuckTransaction::WriteToWAL(AttachedDatabase &db, unique_ptr<StorageCommitState> &commit_state) noexcept {
195
+ ErrorData error_data;
195
196
  try {
196
197
  D_ASSERT(ShouldWriteToWAL(db));
197
198
  auto &storage_manager = db.GetStorageManager();
@@ -206,13 +207,20 @@ ErrorData DuckTransaction::WriteToWAL(AttachedDatabase &db, unique_ptr<StorageCo
206
207
  storage_manager.GetBlockManager().FileSync();
207
208
  }
208
209
  } catch (std::exception &ex) {
209
- if (commit_state) {
210
+ // Call RevertCommit() outside this try-catch as it itself may throw
211
+ error_data = ErrorData(ex);
212
+ }
213
+
214
+ if (commit_state && error_data.HasError()) {
215
+ try {
210
216
  commit_state->RevertCommit();
211
217
  commit_state.reset();
218
+ } catch (std::exception &) {
219
+ // Ignore this error. If we fail to RevertCommit(), just return the original exception
212
220
  }
213
- return ErrorData(ex);
214
221
  }
215
- return ErrorData();
222
+
223
+ return error_data;
216
224
  }
217
225
 
218
226
  ErrorData DuckTransaction::Commit(AttachedDatabase &db, transaction_t new_commit_id,
@@ -194,7 +194,7 @@ void DuckTransactionManager::Checkpoint(ClientContext &context, bool force) {
194
194
  // we cannot do a full checkpoint if any transaction needs to read old data
195
195
  options.type = CheckpointType::CONCURRENT_CHECKPOINT;
196
196
  }
197
- storage_manager.CreateCheckpoint(options);
197
+ storage_manager.CreateCheckpoint(context, options);
198
198
  }
199
199
 
200
200
  unique_ptr<StorageLockKey> DuckTransactionManager::SharedCheckpointLock() {
@@ -295,7 +295,7 @@ ErrorData DuckTransactionManager::CommitTransaction(ClientContext &context, Tran
295
295
  options.action = CheckpointAction::ALWAYS_CHECKPOINT;
296
296
  options.type = checkpoint_decision.type;
297
297
  auto &storage_manager = db.GetStorageManager();
298
- storage_manager.CreateCheckpoint(options);
298
+ storage_manager.CreateCheckpoint(context, options);
299
299
  }
300
300
  return error;
301
301
  }
@@ -1254,6 +1254,23 @@ public:
1254
1254
  return size;
1255
1255
  }
1256
1256
 
1257
+
1258
+ // Returns the number of producers currently associated with the queue.
1259
+ size_t size_producers_approx() const
1260
+ {
1261
+ size_t size = 0;
1262
+ for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
1263
+ size += 1;
1264
+ }
1265
+ return size;
1266
+ }
1267
+
1268
+ // Returns the number of elements currently in the queue for a specific producer.
1269
+ size_t size_producer_approx(producer_token_t const& producer) const
1270
+ {
1271
+ return static_cast<ExplicitProducer*>(producer.producer)->size_approx();
1272
+ }
1273
+
1257
1274
 
1258
1275
  // Returns true if the underlying atomic variables used by
1259
1276
  // the queue are lock-free (they should be on most platforms).
@@ -26,6 +26,8 @@
26
26
 
27
27
  #include "src/parallel/task_executor.cpp"
28
28
 
29
+ #include "src/parallel/task_notifier.cpp"
30
+
29
31
  #include "src/parallel/task_scheduler.cpp"
30
32
 
31
33
  #include "src/parallel/thread_context.cpp"