duckdb 1.1.0 → 1.1.1-dev3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/binding.gyp +2 -1
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  4. package/src/duckdb/extension/json/include/json_common.hpp +14 -4
  5. package/src/duckdb/extension/json/include/json_executors.hpp +11 -3
  6. package/src/duckdb/extension/json/json_extension.cpp +1 -1
  7. package/src/duckdb/extension/json/json_functions/json_extract.cpp +11 -3
  8. package/src/duckdb/extension/json/json_functions/json_value.cpp +4 -3
  9. package/src/duckdb/extension/json/json_functions.cpp +16 -7
  10. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  11. package/src/duckdb/extension/parquet/column_writer.cpp +54 -43
  12. package/src/duckdb/extension/parquet/geo_parquet.cpp +19 -0
  13. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +10 -6
  14. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +3 -3
  15. package/src/duckdb/extension/parquet/parquet_writer.cpp +2 -1
  16. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  17. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +1 -0
  18. package/src/duckdb/src/common/arrow/arrow_util.cpp +60 -0
  19. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +1 -53
  20. package/src/duckdb/src/common/cgroups.cpp +15 -24
  21. package/src/duckdb/src/common/constants.cpp +8 -0
  22. package/src/duckdb/src/common/enum_util.cpp +331 -326
  23. package/src/duckdb/src/common/http_util.cpp +5 -1
  24. package/src/duckdb/src/common/operator/cast_operators.cpp +6 -60
  25. package/src/duckdb/src/common/types/bit.cpp +1 -1
  26. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -1
  27. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +2 -1
  28. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +5 -0
  29. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +1 -1
  30. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -1
  31. package/src/duckdb/src/execution/index/art/iterator.cpp +17 -15
  32. package/src/duckdb/src/execution/index/art/prefix.cpp +9 -34
  33. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +4 -3
  34. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +1 -0
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -1
  36. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +2 -2
  37. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +23 -1
  38. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +33 -4
  39. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +23 -13
  40. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +23 -19
  41. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +12 -11
  42. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -14
  43. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +4 -4
  44. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +3 -1
  45. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -1
  46. package/src/duckdb/src/function/cast/decimal_cast.cpp +33 -3
  47. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +9 -0
  48. package/src/duckdb/src/function/table/arrow.cpp +34 -22
  49. package/src/duckdb/src/function/table/sniff_csv.cpp +4 -1
  50. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  51. package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +31 -0
  52. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +2 -16
  53. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +60 -0
  54. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -0
  55. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +0 -1
  56. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +2 -1
  57. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +9 -5
  58. package/src/duckdb/src/include/duckdb/execution/executor.hpp +1 -0
  59. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +5 -2
  60. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +5 -1
  61. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +5 -5
  62. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -0
  63. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -0
  64. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/main/extension.hpp +1 -0
  66. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +14 -5
  67. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/main/settings.hpp +4 -2
  69. package/src/duckdb/src/include/duckdb/parser/keyword_helper.hpp +3 -0
  70. package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +7 -1
  72. package/src/duckdb/src/include/duckdb/planner/binder.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -1
  75. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -1
  76. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +7 -4
  77. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +2 -2
  78. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +2 -1
  79. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +4 -4
  80. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -4
  81. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +4 -2
  83. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +1 -0
  85. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -0
  86. package/src/duckdb/src/include/duckdb/transaction/transaction_manager.hpp +1 -1
  87. package/src/duckdb/src/include/duckdb.h +8 -8
  88. package/src/duckdb/src/main/appender.cpp +1 -1
  89. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +3 -3
  90. package/src/duckdb/src/main/capi/helper-c.cpp +4 -0
  91. package/src/duckdb/src/main/config.cpp +24 -11
  92. package/src/duckdb/src/main/database.cpp +6 -5
  93. package/src/duckdb/src/main/extension/extension_install.cpp +13 -8
  94. package/src/duckdb/src/main/extension/extension_load.cpp +10 -4
  95. package/src/duckdb/src/main/extension.cpp +1 -1
  96. package/src/duckdb/src/optimizer/filter_pushdown.cpp +10 -1
  97. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +9 -5
  98. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +14 -8
  99. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -0
  100. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -0
  101. package/src/duckdb/src/optimizer/optimizer.cpp +4 -1
  102. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -11
  103. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +1 -7
  104. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
  105. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +3 -0
  106. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
  107. package/src/duckdb/src/parser/keyword_helper.cpp +4 -0
  108. package/src/duckdb/src/parser/parser.cpp +20 -18
  109. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +8 -3
  110. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -0
  111. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +7 -1
  112. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +13 -0
  113. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -11
  114. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +27 -10
  115. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +24 -9
  116. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +1 -3
  117. package/src/duckdb/src/planner/binder.cpp +5 -6
  118. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +1 -0
  119. package/src/duckdb/src/planner/expression_binder/select_binder.cpp +9 -0
  120. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +2 -2
  121. package/src/duckdb/src/planner/operator/logical_positional_join.cpp +1 -0
  122. package/src/duckdb/src/storage/buffer/block_handle.cpp +18 -21
  123. package/src/duckdb/src/storage/buffer/block_manager.cpp +12 -4
  124. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
  125. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +12 -2
  126. package/src/duckdb/src/storage/buffer_manager.cpp +3 -2
  127. package/src/duckdb/src/storage/compression/rle.cpp +5 -2
  128. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -1
  129. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +8 -7
  130. package/src/duckdb/src/storage/standard_buffer_manager.cpp +19 -20
  131. package/src/duckdb/src/storage/statistics/column_statistics.cpp +1 -2
  132. package/src/duckdb/src/storage/table/column_data.cpp +5 -2
  133. package/src/duckdb/src/storage/table/column_segment.cpp +2 -2
  134. package/src/duckdb/src/storage/table/row_group_collection.cpp +18 -14
  135. package/src/duckdb/src/storage/table/standard_column_data.cpp +3 -3
  136. package/src/duckdb/src/storage/wal_replay.cpp +2 -3
  137. package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -0
  138. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
  139. package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -2
  140. package/src/duckdb/third_party/libpg_query/include/pg_simplified_token.hpp +6 -4
  141. package/src/duckdb/third_party/libpg_query/include/postgres_parser.hpp +1 -1
  142. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +1 -1
  143. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +801 -799
  144. package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +6 -2
  145. package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +0 -1
  146. package/src/duckdb/ub_src_common_arrow.cpp +2 -0
  147. package/vendor.py +1 -2
@@ -483,6 +483,66 @@ DUCKDB_API bool TryCast::Operation(double input, double &result, bool strict);
483
483
  //===--------------------------------------------------------------------===//
484
484
  // String -> Numeric Casts
485
485
  //===--------------------------------------------------------------------===//
486
+ static inline bool TryCastStringBool(const char *input_data, idx_t input_size, bool &result, bool strict) {
487
+ switch (input_size) {
488
+ case 1: {
489
+ unsigned char c = static_cast<uint8_t>(std::tolower(*input_data));
490
+ if (c == 't' || (!strict && c == 'y') || (!strict && c == '1')) {
491
+ result = true;
492
+ return true;
493
+ } else if (c == 'f' || (!strict && c == 'n') || (!strict && c == '0')) {
494
+ result = false;
495
+ return true;
496
+ }
497
+ return false;
498
+ }
499
+ case 2: {
500
+ unsigned char n = static_cast<uint8_t>(std::tolower(input_data[0]));
501
+ unsigned char o = static_cast<uint8_t>(std::tolower(input_data[1]));
502
+ if (n == 'n' && o == 'o') {
503
+ result = false;
504
+ return true;
505
+ }
506
+ return false;
507
+ }
508
+ case 3: {
509
+ unsigned char y = static_cast<uint8_t>(std::tolower(input_data[0]));
510
+ unsigned char e = static_cast<uint8_t>(std::tolower(input_data[1]));
511
+ unsigned char s = static_cast<uint8_t>(std::tolower(input_data[2]));
512
+ if (y == 'y' && e == 'e' && s == 's') {
513
+ result = true;
514
+ return true;
515
+ }
516
+ return false;
517
+ }
518
+ case 4: {
519
+ unsigned char t = static_cast<uint8_t>(std::tolower(input_data[0]));
520
+ unsigned char r = static_cast<uint8_t>(std::tolower(input_data[1]));
521
+ unsigned char u = static_cast<uint8_t>(std::tolower(input_data[2]));
522
+ unsigned char e = static_cast<uint8_t>(std::tolower(input_data[3]));
523
+ if (t == 't' && r == 'r' && u == 'u' && e == 'e') {
524
+ result = true;
525
+ return true;
526
+ }
527
+ return false;
528
+ }
529
+ case 5: {
530
+ unsigned char f = static_cast<uint8_t>(std::tolower(input_data[0]));
531
+ unsigned char a = static_cast<uint8_t>(std::tolower(input_data[1]));
532
+ unsigned char l = static_cast<uint8_t>(std::tolower(input_data[2]));
533
+ unsigned char s = static_cast<uint8_t>(std::tolower(input_data[3]));
534
+ unsigned char e = static_cast<uint8_t>(std::tolower(input_data[4]));
535
+ if (f == 'f' && a == 'a' && l == 'l' && s == 's' && e == 'e') {
536
+ result = false;
537
+ return true;
538
+ }
539
+ return false;
540
+ }
541
+ default:
542
+ return false;
543
+ }
544
+ }
545
+
486
546
  template <>
487
547
  DUCKDB_API bool TryCast::Operation(string_t input, bool &result, bool strict);
488
548
  template <>
@@ -32,6 +32,7 @@ public:
32
32
  explicit ColumnDataAllocator(BufferManager &buffer_manager);
33
33
  ColumnDataAllocator(ClientContext &context, ColumnDataAllocatorType allocator_type);
34
34
  ColumnDataAllocator(ColumnDataAllocator &allocator);
35
+ ~ColumnDataAllocator();
35
36
 
36
37
  //! Returns an allocator object to allocate with. This returns the allocator in IN_MEMORY_ALLOCATOR, and a buffer
37
38
  //! allocator in case of BUFFER_MANAGER_ALLOCATOR.
@@ -128,7 +128,6 @@ public:
128
128
 
129
129
  static int Sign(hugeint_t n);
130
130
  static hugeint_t Abs(hugeint_t n);
131
-
132
131
  // comparison operators
133
132
  // note that everywhere here we intentionally use bitwise ops
134
133
  // this is because they seem to be consistently much faster (benchmarked on a Macbook Pro)
@@ -20,7 +20,8 @@ public:
20
20
  RowDataBlock(MemoryTag tag, BufferManager &buffer_manager, idx_t capacity, idx_t entry_size)
21
21
  : capacity(capacity), entry_size(entry_size), count(0), byte_offset(0) {
22
22
  auto size = MaxValue<idx_t>(buffer_manager.GetBlockSize(), capacity * entry_size);
23
- buffer_manager.Allocate(tag, size, false, &block);
23
+ auto buffer_handle = buffer_manager.Allocate(tag, size, false);
24
+ block = buffer_handle.GetBlockHandle();
24
25
  D_ASSERT(BufferManager::GetAllocSize(size) == block->GetMemoryUsage());
25
26
  }
26
27
 
@@ -1,7 +1,14 @@
1
1
  #pragma once
2
2
 
3
- #include "duckdb/common/vector.hpp"
3
+ #include "duckdb/common/common.hpp"
4
+ #include "duckdb/storage/arena_allocator.hpp"
4
5
  #include "duckdb/common/algorithm.hpp"
6
+ #include "duckdb/common/pair.hpp"
7
+ #include "duckdb/common/types/string_type.hpp"
8
+ #include "duckdb/common/types/vector.hpp"
9
+ #include "duckdb/common/enums/order_type.hpp"
10
+ #include "duckdb/function/aggregate_function.hpp"
11
+ #include "duckdb/core_functions/create_sort_key.hpp"
5
12
 
6
13
  namespace duckdb {
7
14
 
@@ -294,11 +301,8 @@ struct MinMaxFallbackValue {
294
301
 
295
302
  static void PrepareData(Vector &input, const idx_t count, EXTRA_STATE &extra_state, UnifiedVectorFormat &format) {
296
303
  const OrderModifiers modifiers(OrderType::ASCENDING, OrderByNullType::NULLS_LAST);
297
- CreateSortKeyHelpers::CreateSortKey(input, count, modifiers, extra_state);
304
+ CreateSortKeyHelpers::CreateSortKeyWithValidity(input, extra_state, modifiers, count);
298
305
  input.Flatten(count);
299
- extra_state.Flatten(count);
300
- // Ensure the validity vectors match, because we want to ignore nulls
301
- FlatVector::Validity(extra_state).Initialize(FlatVector::Validity(input));
302
306
  extra_state.ToUnifiedFormat(count, format);
303
307
  }
304
308
  };
@@ -13,6 +13,7 @@
13
13
  #include "duckdb/common/mutex.hpp"
14
14
  #include "duckdb/common/pair.hpp"
15
15
  #include "duckdb/common/reference_map.hpp"
16
+ #include "duckdb/main/query_result.hpp"
16
17
  #include "duckdb/execution/task_error_manager.hpp"
17
18
  #include "duckdb/parallel/pipeline.hpp"
18
19
 
@@ -73,7 +73,7 @@ public:
73
73
  virtual ~BaseScanner() = default;
74
74
 
75
75
  //! Returns true if the scanner is finished
76
- bool FinishedFile();
76
+ bool FinishedFile() const;
77
77
 
78
78
  //! Parses data into a output_chunk
79
79
  virtual ScannerResult &ParseChunk();
@@ -97,7 +97,7 @@ public:
97
97
  return iterator.pos;
98
98
  }
99
99
 
100
- CSVStateMachine &GetStateMachine();
100
+ CSVStateMachine &GetStateMachine() const;
101
101
 
102
102
  shared_ptr<CSVFileScan> csv_file_scan;
103
103
 
@@ -313,6 +313,9 @@ protected:
313
313
  //! Internal function for parse chunk
314
314
  template <class T>
315
315
  void ParseChunkInternal(T &result) {
316
+ if (iterator.done) {
317
+ return;
318
+ }
316
319
  if (!initialized) {
317
320
  Initialize();
318
321
  initialized = true;
@@ -41,7 +41,8 @@ public:
41
41
  bool error = false;
42
42
  idx_t result_position = 0;
43
43
  bool cur_line_starts_as_comment = false;
44
-
44
+ //! How many rows fit a given column count
45
+ map<idx_t, idx_t> rows_per_column_count;
45
46
  //! Adds a Value to the result
46
47
  static inline void AddValue(ColumnCountResult &result, idx_t buffer_pos);
47
48
  //! Adds a Row to the result
@@ -57,6 +58,9 @@ public:
57
58
 
58
59
  static inline void SetComment(ColumnCountResult &result, idx_t buffer_pos);
59
60
 
61
+ //! Returns the column count
62
+ idx_t GetMostFrequentColumnCount() const;
63
+
60
64
  inline void InternalAddRow();
61
65
  };
62
66
 
@@ -56,7 +56,7 @@ struct DialectCandidates {
56
56
  //! Candidates for the comment
57
57
  vector<char> comment_candidates;
58
58
  //! Quote-Rule Candidates
59
- vector<QuoteRule> quoterule_candidates;
59
+ vector<QuoteRule> quote_rule_candidates;
60
60
  //! Candidates for the quote option
61
61
  unordered_map<uint8_t, vector<char>> quote_candidates_map;
62
62
  //! Candidates for the escape option
@@ -181,7 +181,7 @@ private:
181
181
  void RefineCandidates();
182
182
 
183
183
  //! Checks if candidate still produces good values for the next chunk
184
- bool RefineCandidateNextChunk(ColumnCountScanner &candidate);
184
+ bool RefineCandidateNextChunk(ColumnCountScanner &candidate) const;
185
185
 
186
186
  //! ------------------------------------------------------//
187
187
  //! ------------------- Type Detection ------------------ //
@@ -198,7 +198,7 @@ private:
198
198
  void InitializeDateAndTimeStampDetection(CSVStateMachine &candidate, const string &separator,
199
199
  const LogicalType &sql_type);
200
200
  //! Sets user defined date and time formats (if any)
201
- void SetUserDefinedDateTimeFormat(CSVStateMachine &candidate);
201
+ void SetUserDefinedDateTimeFormat(CSVStateMachine &candidate) const;
202
202
  //! Functions that performs detection for date and timestamp formats
203
203
  void DetectDateAndTimeStampFormats(CSVStateMachine &candidate, const LogicalType &sql_type, const string &separator,
204
204
  const string_t &dummy_val);
@@ -235,10 +235,10 @@ private:
235
235
  //! ------------------------------------------------------//
236
236
  void DetectHeader();
237
237
  static bool DetectHeaderWithSetColumn(ClientContext &context, vector<HeaderValue> &best_header_row,
238
- SetColumns &set_columns, CSVReaderOptions &options);
238
+ const SetColumns &set_columns, CSVReaderOptions &options);
239
239
  static vector<string>
240
240
  DetectHeaderInternal(ClientContext &context, vector<HeaderValue> &best_header_row, CSVStateMachine &state_machine,
241
- SetColumns &set_columns,
241
+ const SetColumns &set_columns,
242
242
  unordered_map<idx_t, vector<LogicalType>> &best_sql_types_candidates_per_column_idx,
243
243
  CSVReaderOptions &options, CSVErrorHandler &error_handler);
244
244
  vector<string> names;
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/execution/physical_operator.hpp"
12
+ #include "duckdb/main/query_result.hpp"
12
13
  #include "duckdb/common/enums/statement_type.hpp"
13
14
 
14
15
  namespace duckdb {
@@ -8,6 +8,8 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include <utility>
12
+
11
13
  #include "duckdb/common/types.hpp"
12
14
  #include "duckdb/common/unordered_map.hpp"
13
15
  #include "duckdb/common/vector.hpp"
@@ -22,6 +24,10 @@ public:
22
24
  explicit ArrowType(LogicalType type_p, unique_ptr<ArrowTypeInfo> type_info = nullptr)
23
25
  : type(std::move(type_p)), type_info(std::move(type_info)) {
24
26
  }
27
+ explicit ArrowType(string error_message_p, bool not_implemented_p = false)
28
+ : type(LogicalTypeId::INVALID), type_info(nullptr), error_message(std::move(error_message_p)),
29
+ not_implemented(not_implemented_p) {
30
+ }
25
31
 
26
32
  public:
27
33
  LogicalType GetDuckType(bool use_dictionary = false) const;
@@ -37,6 +43,7 @@ public:
37
43
  const T &GetTypeInfo() const {
38
44
  return type_info->Cast<T>();
39
45
  }
46
+ void ThrowIfInvalid() const;
40
47
 
41
48
  private:
42
49
  LogicalType type;
@@ -45,6 +52,10 @@ private:
45
52
  //! Is run-end-encoded
46
53
  bool run_end_encoded = false;
47
54
  unique_ptr<ArrowTypeInfo> type_info;
55
+ //! Error message in case of an invalid type (i.e., from an unsupported extension)
56
+ string error_message;
57
+ //! In case of an error do we throw not implemented?
58
+ bool not_implemented = false;
48
59
  };
49
60
 
50
61
  using arrow_column_map_t = unordered_map<idx_t, unique_ptr<ArrowType>>;
@@ -245,7 +245,7 @@ struct DBConfigOptions {
245
245
  //! The peak allocation threshold at which to flush the allocator after completing a task (1 << 27, ~128MB)
246
246
  idx_t allocator_flush_threshold = 134217728ULL;
247
247
  //! If bulk deallocation larger than this occurs, flush outstanding allocations (1 << 30, ~1GB)
248
- idx_t allocator_bulk_deallocation_flush_threshold = 1073741824ULL;
248
+ idx_t allocator_bulk_deallocation_flush_threshold = 536870912ULL;
249
249
  //! Whether the allocator background thread is enabled
250
250
  bool allocator_background_threads = false;
251
251
  //! DuckDB API surface
@@ -365,7 +365,7 @@ public:
365
365
  DUCKDB_API IndexTypeSet &GetIndexTypes();
366
366
  static idx_t GetSystemMaxThreads(FileSystem &fs);
367
367
  static idx_t GetSystemAvailableMemory(FileSystem &fs);
368
- static idx_t ParseMemoryLimitSlurm(const string &arg);
368
+ static optional_idx ParseMemoryLimitSlurm(const string &arg);
369
369
  void SetDefaultMaxMemory();
370
370
  void SetDefaultTempDirectory();
371
371
 
@@ -50,6 +50,7 @@ struct ParsedExtensionMetaData {
50
50
  string duckdb_capi_version;
51
51
  string extension_version;
52
52
  string signature;
53
+ string extension_abi_metadata;
53
54
 
54
55
  bool AppearsValid() {
55
56
  return magic_value == EXPECTED_MAGIC_VALUE;
@@ -293,6 +293,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
293
293
  {"st_envelope_agg", "spatial", CatalogType::AGGREGATE_FUNCTION_ENTRY},
294
294
  {"st_equals", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
295
295
  {"st_extent", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
296
+ {"st_extent_agg", "spatial", CatalogType::AGGREGATE_FUNCTION_ENTRY},
296
297
  {"st_extent_approx", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
297
298
  {"st_exteriorring", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
298
299
  {"st_flipcoordinates", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
@@ -331,6 +332,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
331
332
  {"st_makevalid", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
332
333
  {"st_mmax", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
333
334
  {"st_mmin", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
335
+ {"st_multi", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
334
336
  {"st_ngeometries", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
335
337
  {"st_ninteriorrings", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
336
338
  {"st_normalize", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
@@ -511,11 +513,18 @@ static constexpr ExtensionEntry EXTENSION_SECRET_TYPES[] = {
511
513
  // Note: these are currently hardcoded in scripts/generate_extensions_function.py
512
514
  // TODO: automate by passing though to script via duckdb
513
515
  static constexpr ExtensionEntry EXTENSION_SECRET_PROVIDERS[] = {
514
- {"s3/config", "httpfs"}, {"gcs/config", "httpfs"},
515
- {"r2/config", "httpfs"}, {"s3/credential_chain", "aws"},
516
- {"gcs/credential_chain", "aws"}, {"r2/credential_chain", "aws"},
517
- {"azure/config", "azure"}, {"azure/credential_chain", "azure"},
518
- {"huggingface/config", "httfps"}, {"huggingface/credential_chain", "httpfs"},
516
+ {"s3/config", "httpfs"},
517
+ {"gcs/config", "httpfs"},
518
+ {"r2/config", "httpfs"},
519
+ {"s3/credential_chain", "aws"},
520
+ {"gcs/credential_chain", "aws"},
521
+ {"r2/credential_chain", "aws"},
522
+ {"azure/access_token", "azure"},
523
+ {"azure/config", "azure"},
524
+ {"azure/credential_chain", "azure"},
525
+ {"azure/service_principal", "azure"},
526
+ {"huggingface/config", "httfps"},
527
+ {"huggingface/credential_chain", "httpfs"},
519
528
  {"bearer/config", "httpfs"}}; // EXTENSION_SECRET_PROVIDERS
520
529
 
521
530
  static constexpr const char *AUTOLOADABLE_EXTENSIONS[] = {
@@ -124,7 +124,7 @@ public:
124
124
 
125
125
  static bool CheckExtensionSignature(FileHandle &handle, ParsedExtensionMetaData &parsed_metadata,
126
126
  const bool allow_community_extensions);
127
- static ParsedExtensionMetaData ParseExtensionMetaData(const char *metadata);
127
+ static ParsedExtensionMetaData ParseExtensionMetaData(const char *metadata) noexcept;
128
128
  static ParsedExtensionMetaData ParseExtensionMetaData(FileHandle &handle);
129
129
 
130
130
  //! Get the extension url template, containing placeholders for version, platform and extension name
@@ -506,7 +506,7 @@ struct HTTPProxyUsername {
506
506
  };
507
507
 
508
508
  struct HTTPProxyPassword {
509
- static constexpr const char *Name = "http_proxy";
509
+ static constexpr const char *Name = "http_proxy_password";
510
510
  static constexpr const char *Description = "Password for HTTP proxy";
511
511
  static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
512
512
  static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
@@ -596,7 +596,9 @@ struct StreamingBufferSize {
596
596
  struct MaximumTempDirectorySize {
597
597
  static constexpr const char *Name = "max_temp_directory_size";
598
598
  static constexpr const char *Description =
599
- "The maximum amount of data stored inside the 'temp_directory' (when set) (e.g. 1GB)";
599
+ "The maximum amount of data stored inside the 'temp_directory' (when set). If the `temp_directory` is set to "
600
+ "an existing directory, this option defaults to the available disk space on "
601
+ "that drive. Otherwise, it defaults to 0 (implying that the temporary directory is not used).";
600
602
  static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
601
603
  static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
602
604
  static void ResetGlobal(DatabaseInstance *db, DBConfig &config);
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/common.hpp"
12
+ #include "duckdb/parser/simplified_token.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
@@ -17,6 +18,8 @@ public:
17
18
  //! Returns true if the given text matches a keyword of the parser
18
19
  static bool IsKeyword(const string &text);
19
20
 
21
+ static KeywordCategory KeywordCategoryType(const string &text);
22
+
20
23
  static string EscapeQuotes(const string &text, char quote = '"');
21
24
 
22
25
  //! Returns true if the given string needs to be quoted when written as an identifier
@@ -45,7 +45,7 @@ public:
45
45
  static vector<SimplifiedToken> Tokenize(const string &query);
46
46
 
47
47
  //! Returns true if the given text matches a keyword of the parser
48
- static bool IsKeyword(const string &text);
48
+ static KeywordCategory IsKeyword(const string &text);
49
49
  //! Returns a list of all keywords in the parser
50
50
  static vector<ParserKeyword> KeywordList();
51
51
 
@@ -28,7 +28,13 @@ struct SimplifiedToken {
28
28
  idx_t start;
29
29
  };
30
30
 
31
- enum class KeywordCategory : uint8_t { KEYWORD_RESERVED, KEYWORD_UNRESERVED, KEYWORD_TYPE_FUNC, KEYWORD_COL_NAME };
31
+ enum class KeywordCategory : uint8_t {
32
+ KEYWORD_RESERVED,
33
+ KEYWORD_UNRESERVED,
34
+ KEYWORD_TYPE_FUNC,
35
+ KEYWORD_COL_NAME,
36
+ KEYWORD_NONE
37
+ };
32
38
 
33
39
  struct ParserKeyword {
34
40
  string name;
@@ -416,6 +416,8 @@ private:
416
416
  unique_ptr<BoundTableRef> BindShowTable(ShowRef &ref);
417
417
  unique_ptr<BoundTableRef> BindSummarize(ShowRef &ref);
418
418
 
419
+ unique_ptr<LogicalOperator> UnionOperators(vector<unique_ptr<LogicalOperator>> nodes);
420
+
419
421
  private:
420
422
  Binder(ClientContext &context, shared_ptr<Binder> parent, BinderType binder_type);
421
423
  };
@@ -18,10 +18,12 @@ public:
18
18
  SelectBinder(Binder &binder, ClientContext &context, BoundSelectNode &node, BoundGroupInformation &info);
19
19
 
20
20
  protected:
21
+ void ThrowIfUnnestInLambda(const ColumnBinding &column_binding) override;
21
22
  BindResult BindUnnest(FunctionExpression &function, idx_t depth, bool root_expression) override;
22
23
  BindResult BindColumnRef(unique_ptr<ParsedExpression> &expr_ptr, idx_t depth, bool root_expression) override;
23
24
 
24
25
  bool QualifyColumnAlias(const ColumnRefExpression &colref) override;
26
+ unique_ptr<ParsedExpression> GetSQLValueFunction(const string &column_name) override;
25
27
 
26
28
  protected:
27
29
  idx_t unnest_level = 0;
@@ -21,6 +21,7 @@
21
21
  #include "duckdb/catalog/catalog_entry_retriever.hpp"
22
22
  #include "duckdb/planner/expression/bound_lambda_expression.hpp"
23
23
  #include "duckdb/function/scalar_function.hpp"
24
+ #include "duckdb/planner/column_binding.hpp"
24
25
 
25
26
  namespace duckdb {
26
27
 
@@ -186,7 +187,7 @@ protected:
186
187
  const optional_ptr<bind_lambda_function_t> bind_lambda_function,
187
188
  const LogicalType &list_child_type);
188
189
 
189
- static unique_ptr<ParsedExpression> GetSQLValueFunction(const string &column_name);
190
+ virtual unique_ptr<ParsedExpression> GetSQLValueFunction(const string &column_name);
190
191
 
191
192
  LogicalType ResolveOperatorType(OperatorExpression &op, vector<unique_ptr<Expression>> &children);
192
193
  LogicalType ResolveCoalesceType(OperatorExpression &op, vector<unique_ptr<Expression>> &children);
@@ -219,6 +220,7 @@ protected:
219
220
  //! Returns true if the function name is an alias for the UNNEST function
220
221
  static bool IsUnnestFunction(const string &function_name);
221
222
  BindResult TryBindLambdaOrJson(FunctionExpression &function, idx_t depth, CatalogEntry &func);
223
+ virtual void ThrowIfUnnestInLambda(const ColumnBinding &column_binding);
222
224
  };
223
225
 
224
226
  } // namespace duckdb
@@ -88,7 +88,9 @@ public:
88
88
  //! Convert an existing in-memory buffer into a persistent disk-backed block
89
89
  shared_ptr<BlockHandle> ConvertToPersistent(block_id_t block_id, shared_ptr<BlockHandle> old_block);
90
90
 
91
- void UnregisterBlock(block_id_t block_id);
91
+ void UnregisterBlock(BlockHandle &block);
92
+ //! UnregisterBlock, only accepts non-temporary block ids
93
+ void UnregisterBlock(block_id_t id);
92
94
 
93
95
  //! Returns a reference to the metadata manager of this block manager.
94
96
  MetadataManager &GetMetadataManager();
@@ -54,7 +54,7 @@ struct TempBufferPoolReservation : BufferPoolReservation {
54
54
  }
55
55
  };
56
56
 
57
- class BlockHandle {
57
+ class BlockHandle : public enable_shared_from_this<BlockHandle> {
58
58
  friend class BlockManager;
59
59
  friend struct BufferEvictionNode;
60
60
  friend class BufferHandle;
@@ -96,6 +96,10 @@ public:
96
96
  unswizzled = unswizzler;
97
97
  }
98
98
 
99
+ MemoryTag GetMemoryTag() const {
100
+ return tag;
101
+ }
102
+
99
103
  inline void SetDestroyBufferUpon(DestroyBufferUpon destroy_buffer_upon_p) {
100
104
  lock_guard<mutex> guard(lock);
101
105
  destroy_buffer_upon = destroy_buffer_upon_p;
@@ -117,9 +121,8 @@ public:
117
121
  }
118
122
 
119
123
  private:
120
- static BufferHandle Load(shared_ptr<BlockHandle> &handle, unique_ptr<FileBuffer> buffer = nullptr);
121
- static BufferHandle LoadFromBuffer(shared_ptr<BlockHandle> &handle, data_ptr_t data,
122
- unique_ptr<FileBuffer> reusable_buffer);
124
+ BufferHandle Load(unique_ptr<FileBuffer> buffer = nullptr);
125
+ BufferHandle LoadFromBuffer(data_ptr_t data, unique_ptr<FileBuffer> reusable_buffer);
123
126
  unique_ptr<FileBuffer> UnloadAndTakeBlock();
124
127
  void Unload();
125
128
  bool CanUnload();
@@ -18,7 +18,7 @@ class FileBuffer;
18
18
  class BufferHandle {
19
19
  public:
20
20
  DUCKDB_API BufferHandle();
21
- DUCKDB_API BufferHandle(shared_ptr<BlockHandle> handle, FileBuffer *node);
21
+ DUCKDB_API explicit BufferHandle(shared_ptr<BlockHandle> handle);
22
22
  DUCKDB_API ~BufferHandle();
23
23
  // disable copy constructors
24
24
  BufferHandle(const BufferHandle &other) = delete;
@@ -53,7 +53,7 @@ private:
53
53
  //! The block handle
54
54
  shared_ptr<BlockHandle> handle;
55
55
  //! The managed buffer node
56
- FileBuffer *node;
56
+ optional_ptr<FileBuffer> node;
57
57
  };
58
58
 
59
59
  } // namespace duckdb
@@ -41,7 +41,7 @@ class BufferPool {
41
41
  friend class StandardBufferManager;
42
42
 
43
43
  public:
44
- explicit BufferPool(idx_t maximum_memory, bool track_eviction_timestamps);
44
+ BufferPool(idx_t maximum_memory, bool track_eviction_timestamps, idx_t allocator_bulk_deallocation_flush_threshold);
45
45
  virtual ~BufferPool();
46
46
 
47
47
  //! Set a new memory limit to the buffer pool, throws an exception if the new limit is too low and not enough
@@ -50,6 +50,7 @@ public:
50
50
 
51
51
  //! If bulk deallocation larger than this occurs, flush outstanding allocations
52
52
  void SetAllocatorBulkDeallocationFlushThreshold(idx_t threshold);
53
+ idx_t GetAllocatorBulkDeallocationFlushThreshold();
53
54
 
54
55
  void UpdateUsedMemory(MemoryTag tag, int64_t size);
55
56
 
@@ -34,8 +34,7 @@ public:
34
34
  }
35
35
 
36
36
  public:
37
- virtual BufferHandle Allocate(MemoryTag tag, idx_t block_size, bool can_destroy = true,
38
- shared_ptr<BlockHandle> *block = nullptr) = 0;
37
+ virtual BufferHandle Allocate(MemoryTag tag, idx_t block_size, bool can_destroy = true) = 0;
39
38
  //! Reallocate an in-memory buffer that is pinned.
40
39
  virtual void ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size) = 0;
41
40
  virtual BufferHandle Pin(shared_ptr<BlockHandle> &handle) = 0;
@@ -102,8 +101,9 @@ protected:
102
101
  virtual void PurgeQueue(FileBufferType type) = 0;
103
102
  virtual void AddToEvictionQueue(shared_ptr<BlockHandle> &handle);
104
103
  virtual void WriteTemporaryBuffer(MemoryTag tag, block_id_t block_id, FileBuffer &buffer);
105
- virtual unique_ptr<FileBuffer> ReadTemporaryBuffer(MemoryTag tag, block_id_t id, unique_ptr<FileBuffer> buffer);
106
- virtual void DeleteTemporaryFile(block_id_t id);
104
+ virtual unique_ptr<FileBuffer> ReadTemporaryBuffer(MemoryTag tag, BlockHandle &block,
105
+ unique_ptr<FileBuffer> buffer);
106
+ virtual void DeleteTemporaryFile(BlockHandle &block);
107
107
  };
108
108
 
109
109
  } // namespace duckdb
@@ -62,8 +62,7 @@ public:
62
62
 
63
63
  //! Allocate an in-memory buffer with a single pin.
64
64
  //! The allocated memory is released when the buffer handle is destroyed.
65
- DUCKDB_API BufferHandle Allocate(MemoryTag tag, idx_t block_size, bool can_destroy = true,
66
- shared_ptr<BlockHandle> *block = nullptr) final;
65
+ DUCKDB_API BufferHandle Allocate(MemoryTag tag, idx_t block_size, bool can_destroy = true) final;
67
66
 
68
67
  //! Reallocate an in-memory buffer that is pinned.
69
68
  void ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size) final;
@@ -129,12 +128,12 @@ protected:
129
128
  //! Write a temporary buffer to disk
130
129
  void WriteTemporaryBuffer(MemoryTag tag, block_id_t block_id, FileBuffer &buffer) final;
131
130
  //! Read a temporary buffer from disk
132
- unique_ptr<FileBuffer> ReadTemporaryBuffer(MemoryTag tag, block_id_t id,
131
+ unique_ptr<FileBuffer> ReadTemporaryBuffer(MemoryTag tag, BlockHandle &block,
133
132
  unique_ptr<FileBuffer> buffer = nullptr) final;
134
133
  //! Get the path of the temporary buffer
135
134
  string GetTemporaryPath(block_id_t id);
136
135
 
137
- void DeleteTemporaryFile(block_id_t id) final;
136
+ void DeleteTemporaryFile(BlockHandle &block) final;
138
137
 
139
138
  void RequireTemporaryDirectory();
140
139
 
@@ -88,7 +88,7 @@ public:
88
88
  //! Whether or not the column has any updates
89
89
  bool HasUpdates() const;
90
90
  //! Whether or not we can scan an entire vector
91
- virtual ScanVectorType GetVectorScanType(ColumnScanState &state, idx_t scan_count);
91
+ virtual ScanVectorType GetVectorScanType(ColumnScanState &state, idx_t scan_count, Vector &result);
92
92
 
93
93
  //! Initialize prefetch state with required I/O data for the next N rows
94
94
  virtual void InitializePrefetch(PrefetchState &prefetch_state, ColumnScanState &scan_state, idx_t rows);
@@ -33,6 +33,7 @@ class MetadataManager;
33
33
  struct VacuumState;
34
34
  struct CollectionCheckpointState;
35
35
  struct PersistentCollectionData;
36
+ class CheckpointTask;
36
37
 
37
38
  class RowGroupCollection {
38
39
  public:
@@ -98,8 +99,9 @@ public:
98
99
 
99
100
  void InitializeVacuumState(CollectionCheckpointState &checkpoint_state, VacuumState &state,
100
101
  vector<SegmentNode<RowGroup>> &segments);
101
- bool ScheduleVacuumTasks(CollectionCheckpointState &checkpoint_state, VacuumState &state, idx_t segment_idx);
102
- void ScheduleCheckpointTask(CollectionCheckpointState &checkpoint_state, idx_t segment_idx);
102
+ bool ScheduleVacuumTasks(CollectionCheckpointState &checkpoint_state, VacuumState &state, idx_t segment_idx,
103
+ bool schedule_vacuum);
104
+ unique_ptr<CheckpointTask> GetCheckpointTask(CollectionCheckpointState &checkpoint_state, idx_t segment_idx);
103
105
 
104
106
  void CommitDropColumn(idx_t index);
105
107
  void CommitDropTable();
@@ -25,7 +25,7 @@ public:
25
25
  public:
26
26
  void SetStart(idx_t new_start) override;
27
27
 
28
- ScanVectorType GetVectorScanType(ColumnScanState &state, idx_t scan_count) override;
28
+ ScanVectorType GetVectorScanType(ColumnScanState &state, idx_t scan_count, Vector &result) override;
29
29
  void InitializePrefetch(PrefetchState &prefetch_state, ColumnScanState &scan_state, idx_t rows) override;
30
30
  void InitializeScan(ColumnScanState &state) override;
31
31
  void InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) override;
@@ -10,6 +10,7 @@
10
10
 
11
11
  #include "duckdb/transaction/transaction.hpp"
12
12
  #include "duckdb/common/reference_map.hpp"
13
+ #include "duckdb/common/error_data.hpp"
13
14
 
14
15
  namespace duckdb {
15
16
  class CheckpointLock;