duckdb 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/binding.gyp +2 -1
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  4. package/src/duckdb/extension/json/include/json_common.hpp +14 -4
  5. package/src/duckdb/extension/json/include/json_executors.hpp +11 -3
  6. package/src/duckdb/extension/json/json_extension.cpp +1 -1
  7. package/src/duckdb/extension/json/json_functions/json_extract.cpp +11 -3
  8. package/src/duckdb/extension/json/json_functions/json_value.cpp +4 -3
  9. package/src/duckdb/extension/json/json_functions.cpp +16 -7
  10. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  11. package/src/duckdb/extension/parquet/column_writer.cpp +54 -43
  12. package/src/duckdb/extension/parquet/geo_parquet.cpp +19 -0
  13. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +10 -6
  14. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +3 -3
  15. package/src/duckdb/extension/parquet/parquet_writer.cpp +2 -1
  16. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  17. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +1 -0
  18. package/src/duckdb/src/common/arrow/arrow_util.cpp +60 -0
  19. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +1 -53
  20. package/src/duckdb/src/common/cgroups.cpp +15 -24
  21. package/src/duckdb/src/common/constants.cpp +8 -0
  22. package/src/duckdb/src/common/enum_util.cpp +331 -326
  23. package/src/duckdb/src/common/http_util.cpp +5 -1
  24. package/src/duckdb/src/common/operator/cast_operators.cpp +6 -60
  25. package/src/duckdb/src/common/types/bit.cpp +1 -1
  26. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -1
  27. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +2 -1
  28. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +5 -0
  29. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +1 -1
  30. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -1
  31. package/src/duckdb/src/execution/index/art/iterator.cpp +17 -15
  32. package/src/duckdb/src/execution/index/art/prefix.cpp +9 -34
  33. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +4 -3
  34. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +1 -0
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -1
  36. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +2 -2
  37. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +23 -1
  38. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +33 -4
  39. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +23 -13
  40. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +23 -19
  41. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +12 -11
  42. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -14
  43. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +4 -4
  44. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +3 -1
  45. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -1
  46. package/src/duckdb/src/function/cast/decimal_cast.cpp +33 -3
  47. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +9 -0
  48. package/src/duckdb/src/function/table/arrow.cpp +34 -22
  49. package/src/duckdb/src/function/table/sniff_csv.cpp +4 -1
  50. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  51. package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +31 -0
  52. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +2 -16
  53. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +60 -0
  54. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -0
  55. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +0 -1
  56. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +2 -1
  57. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +9 -5
  58. package/src/duckdb/src/include/duckdb/execution/executor.hpp +1 -0
  59. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +5 -2
  60. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +5 -1
  61. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +5 -5
  62. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -0
  63. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -0
  64. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/main/extension.hpp +1 -0
  66. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +14 -5
  67. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/main/settings.hpp +4 -2
  69. package/src/duckdb/src/include/duckdb/parser/keyword_helper.hpp +3 -0
  70. package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +7 -1
  72. package/src/duckdb/src/include/duckdb/planner/binder.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -1
  75. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -1
  76. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +7 -4
  77. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +2 -2
  78. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +2 -1
  79. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +4 -4
  80. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -4
  81. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +4 -2
  83. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +1 -0
  85. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -0
  86. package/src/duckdb/src/include/duckdb/transaction/transaction_manager.hpp +1 -1
  87. package/src/duckdb/src/include/duckdb.h +8 -8
  88. package/src/duckdb/src/main/appender.cpp +1 -1
  89. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +3 -3
  90. package/src/duckdb/src/main/capi/helper-c.cpp +4 -0
  91. package/src/duckdb/src/main/config.cpp +24 -11
  92. package/src/duckdb/src/main/database.cpp +6 -5
  93. package/src/duckdb/src/main/extension/extension_install.cpp +13 -8
  94. package/src/duckdb/src/main/extension/extension_load.cpp +10 -4
  95. package/src/duckdb/src/main/extension.cpp +1 -1
  96. package/src/duckdb/src/optimizer/filter_pushdown.cpp +10 -1
  97. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +9 -5
  98. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +14 -8
  99. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -0
  100. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -0
  101. package/src/duckdb/src/optimizer/optimizer.cpp +4 -1
  102. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -11
  103. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +1 -7
  104. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
  105. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +3 -0
  106. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
  107. package/src/duckdb/src/parser/keyword_helper.cpp +4 -0
  108. package/src/duckdb/src/parser/parser.cpp +20 -18
  109. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +8 -3
  110. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -0
  111. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +7 -1
  112. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +13 -0
  113. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -11
  114. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +27 -10
  115. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +24 -9
  116. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +1 -3
  117. package/src/duckdb/src/planner/binder.cpp +5 -6
  118. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +1 -0
  119. package/src/duckdb/src/planner/expression_binder/select_binder.cpp +9 -0
  120. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +2 -2
  121. package/src/duckdb/src/planner/operator/logical_positional_join.cpp +1 -0
  122. package/src/duckdb/src/storage/buffer/block_handle.cpp +18 -21
  123. package/src/duckdb/src/storage/buffer/block_manager.cpp +12 -4
  124. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
  125. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +12 -2
  126. package/src/duckdb/src/storage/buffer_manager.cpp +3 -2
  127. package/src/duckdb/src/storage/compression/rle.cpp +5 -2
  128. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -1
  129. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +8 -7
  130. package/src/duckdb/src/storage/standard_buffer_manager.cpp +19 -20
  131. package/src/duckdb/src/storage/statistics/column_statistics.cpp +1 -2
  132. package/src/duckdb/src/storage/table/column_data.cpp +5 -2
  133. package/src/duckdb/src/storage/table/column_segment.cpp +2 -2
  134. package/src/duckdb/src/storage/table/row_group_collection.cpp +18 -14
  135. package/src/duckdb/src/storage/table/standard_column_data.cpp +3 -3
  136. package/src/duckdb/src/storage/wal_replay.cpp +2 -3
  137. package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -0
  138. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
  139. package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -2
  140. package/src/duckdb/third_party/libpg_query/include/pg_simplified_token.hpp +6 -4
  141. package/src/duckdb/third_party/libpg_query/include/postgres_parser.hpp +1 -1
  142. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +1 -1
  143. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +801 -799
  144. package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +6 -2
  145. package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +0 -1
  146. package/src/duckdb/ub_src_common_arrow.cpp +2 -0
  147. package/vendor.py +1 -2
@@ -339,8 +339,25 @@ vector<SimplifiedToken> Parser::Tokenize(const string &query) {
339
339
  return result;
340
340
  }
341
341
 
342
- bool Parser::IsKeyword(const string &text) {
343
- return PostgresParser::IsKeyword(text);
342
+ KeywordCategory ToKeywordCategory(duckdb_libpgquery::PGKeywordCategory type) {
343
+ switch (type) {
344
+ case duckdb_libpgquery::PGKeywordCategory::PG_KEYWORD_RESERVED:
345
+ return KeywordCategory::KEYWORD_RESERVED;
346
+ case duckdb_libpgquery::PGKeywordCategory::PG_KEYWORD_UNRESERVED:
347
+ return KeywordCategory::KEYWORD_UNRESERVED;
348
+ case duckdb_libpgquery::PGKeywordCategory::PG_KEYWORD_TYPE_FUNC:
349
+ return KeywordCategory::KEYWORD_TYPE_FUNC;
350
+ case duckdb_libpgquery::PGKeywordCategory::PG_KEYWORD_COL_NAME:
351
+ return KeywordCategory::KEYWORD_COL_NAME;
352
+ case duckdb_libpgquery::PGKeywordCategory::PG_KEYWORD_NONE:
353
+ return KeywordCategory::KEYWORD_NONE;
354
+ default:
355
+ throw InternalException("Unrecognized keyword category");
356
+ }
357
+ }
358
+
359
+ KeywordCategory Parser::IsKeyword(const string &text) {
360
+ return ToKeywordCategory(PostgresParser::IsKeyword(text));
344
361
  }
345
362
 
346
363
  vector<ParserKeyword> Parser::KeywordList() {
@@ -349,22 +366,7 @@ vector<ParserKeyword> Parser::KeywordList() {
349
366
  for (auto &kw : keywords) {
350
367
  ParserKeyword res;
351
368
  res.name = kw.text;
352
- switch (kw.category) {
353
- case duckdb_libpgquery::PGKeywordCategory::PG_KEYWORD_RESERVED:
354
- res.category = KeywordCategory::KEYWORD_RESERVED;
355
- break;
356
- case duckdb_libpgquery::PGKeywordCategory::PG_KEYWORD_UNRESERVED:
357
- res.category = KeywordCategory::KEYWORD_UNRESERVED;
358
- break;
359
- case duckdb_libpgquery::PGKeywordCategory::PG_KEYWORD_TYPE_FUNC:
360
- res.category = KeywordCategory::KEYWORD_TYPE_FUNC;
361
- break;
362
- case duckdb_libpgquery::PGKeywordCategory::PG_KEYWORD_COL_NAME:
363
- res.category = KeywordCategory::KEYWORD_COL_NAME;
364
- break;
365
- default:
366
- throw InternalException("Unrecognized keyword category");
367
- }
369
+ res.category = ToKeywordCategory(kw.category);
368
370
  result.push_back(res);
369
371
  }
370
372
  return result;
@@ -91,9 +91,14 @@ unique_ptr<QueryNode> Transformer::TransformSelectInternal(duckdb_libpgquery::PG
91
91
  if (!stmt.targetList) {
92
92
  throw ParserException("SELECT clause without selection list");
93
93
  }
94
- // select list
95
- TransformExpressionList(*stmt.targetList, result.select_list);
96
- result.from_table = TransformFrom(stmt.fromClause);
94
+ // transform in the specified order to ensure positional parameters are correctly set
95
+ if (stmt.from_first) {
96
+ result.from_table = TransformFrom(stmt.fromClause);
97
+ TransformExpressionList(*stmt.targetList, result.select_list);
98
+ } else {
99
+ TransformExpressionList(*stmt.targetList, result.select_list);
100
+ result.from_table = TransformFrom(stmt.fromClause);
101
+ }
97
102
  }
98
103
 
99
104
  // where
@@ -270,6 +270,9 @@ BindResult ExpressionBinder::BindUnnest(FunctionExpression &expr, idx_t depth, b
270
270
  return BindUnsupportedExpression(expr, depth, UnsupportedUnnestMessage());
271
271
  }
272
272
 
273
+ void ExpressionBinder::ThrowIfUnnestInLambda(const ColumnBinding &column_binding) {
274
+ }
275
+
273
276
  string ExpressionBinder::UnsupportedAggregateMessage() {
274
277
  return "Aggregate functions are not supported here";
275
278
  }
@@ -178,7 +178,7 @@ void ExpressionBinder::CaptureLambdaColumns(BoundLambdaExpression &bound_lambda_
178
178
  const LogicalType &list_child_type) {
179
179
 
180
180
  if (expr->expression_class == ExpressionClass::BOUND_SUBQUERY) {
181
- throw InvalidInputException("Subqueries are not supported in lambda expressions!");
181
+ throw BinderException("subqueries in lambda expressions are not supported");
182
182
  }
183
183
 
184
184
  // these are bound depth-first
@@ -195,6 +195,12 @@ void ExpressionBinder::CaptureLambdaColumns(BoundLambdaExpression &bound_lambda_
195
195
  expr->expression_class == ExpressionClass::BOUND_PARAMETER ||
196
196
  expr->expression_class == ExpressionClass::BOUND_LAMBDA_REF) {
197
197
 
198
+ if (expr->expression_class == ExpressionClass::BOUND_COLUMN_REF) {
199
+ // Search for UNNEST.
200
+ auto &column_binding = expr->Cast<BoundColumnRefExpression>().binding;
201
+ ThrowIfUnnestInLambda(column_binding);
202
+ }
203
+
198
204
  // move the expr because we are going to replace it
199
205
  auto original = std::move(expr);
200
206
  unique_ptr<Expression> replacement;
@@ -43,6 +43,19 @@ unique_ptr<Expression> CreateBoundStructExtractIndex(ClientContext &context, uni
43
43
  return std::move(result);
44
44
  }
45
45
 
46
+ void SelectBinder::ThrowIfUnnestInLambda(const ColumnBinding &column_binding) {
47
+ // Extract the unnests and check if any match the column index.
48
+ for (auto &node_pair : node.unnests) {
49
+ auto &unnest_node = node_pair.second;
50
+
51
+ if (unnest_node.index == column_binding.table_index) {
52
+ if (column_binding.column_index < unnest_node.expressions.size()) {
53
+ throw BinderException("UNNEST in lambda expressions is not supported");
54
+ }
55
+ }
56
+ }
57
+ }
58
+
46
59
  BindResult SelectBinder::BindUnnest(FunctionExpression &function, idx_t depth, bool root_expression) {
47
60
  // bind the children of the function expression
48
61
  if (depth > 0) {
@@ -54,7 +54,7 @@ unique_ptr<LogicalOperator> Binder::BindCopyDatabaseData(Catalog &source_catalog
54
54
  ExportEntries entries;
55
55
  PhysicalExport::ExtractEntries(context, source_schemas, entries);
56
56
 
57
- unique_ptr<LogicalOperator> result;
57
+ vector<unique_ptr<LogicalOperator>> insert_nodes;
58
58
  for (auto &table_ref : entries.tables) {
59
59
  auto &table = table_ref.get().Cast<TableCatalogEntry>();
60
60
  // generate the insert statement
@@ -82,17 +82,10 @@ unique_ptr<LogicalOperator> Binder::BindCopyDatabaseData(Catalog &source_catalog
82
82
  insert_stmt.select_statement = std::move(select_stmt);
83
83
  auto bound_insert = Bind(insert_stmt);
84
84
  auto insert_plan = std::move(bound_insert.plan);
85
- if (result) {
86
- // use UNION ALL to combine the individual copy statements into a single node
87
- auto copy_union =
88
- make_uniq<LogicalSetOperation>(GenerateTableIndex(), 1U, std::move(insert_plan), std::move(result),
89
- LogicalOperatorType::LOGICAL_UNION, true, false);
90
- result = std::move(copy_union);
91
- } else {
92
- result = std::move(insert_plan);
93
- }
85
+ insert_nodes.push_back(std::move(insert_plan));
94
86
  }
95
- if (!result) {
87
+ unique_ptr<LogicalOperator> result;
88
+ if (insert_nodes.empty()) {
96
89
  vector<LogicalType> result_types;
97
90
  result_types.push_back(LogicalType::BIGINT);
98
91
  vector<unique_ptr<Expression>> expression_list;
@@ -101,6 +94,9 @@ unique_ptr<LogicalOperator> Binder::BindCopyDatabaseData(Catalog &source_catalog
101
94
  expressions.push_back(std::move(expression_list));
102
95
  result = make_uniq<LogicalExpressionGet>(GenerateTableIndex(), std::move(result_types), std::move(expressions));
103
96
  result->children.push_back(make_uniq<LogicalDummyScan>(GenerateTableIndex()));
97
+ } else {
98
+ // use UNION ALL to combine the individual copy statements into a single node
99
+ result = UnionOperators(std::move(insert_nodes));
104
100
  }
105
101
  return result;
106
102
  }
@@ -249,6 +249,13 @@ SchemaCatalogEntry &Binder::BindCreateFunctionInfo(CreateInfo &info) {
249
249
  return BindCreateSchema(info);
250
250
  }
251
251
 
252
+ static bool IsValidUserType(optional_ptr<CatalogEntry> entry) {
253
+ if (!entry) {
254
+ return false;
255
+ }
256
+ return entry->Cast<TypeCatalogEntry>().user_type.id() != LogicalTypeId::INVALID;
257
+ }
258
+
252
259
  void Binder::BindLogicalType(LogicalType &type, optional_ptr<Catalog> catalog, const string &schema) {
253
260
  if (type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::MAP) {
254
261
  auto child_type = ListType::GetChildType(type);
@@ -297,24 +304,34 @@ void Binder::BindLogicalType(LogicalType &type, optional_ptr<Catalog> catalog, c
297
304
  type.SetModifiers(modifiers);
298
305
  } else if (type.id() == LogicalTypeId::USER) {
299
306
  auto user_type_name = UserType::GetTypeName(type);
307
+ auto user_type_schema = UserType::GetSchema(type);
300
308
  auto user_type_mods = UserType::GetTypeModifiers(type);
301
309
 
302
310
  bind_type_modifiers_function_t user_bind_modifiers_func = nullptr;
303
311
 
304
312
  if (catalog) {
305
313
  // The search order is:
306
- // 1) In the same schema as the table
307
- // 2) In the same catalog
308
- // 3) System catalog
309
- auto entry = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, *catalog, schema, user_type_name,
310
- OnEntryNotFound::RETURN_NULL);
311
- if (!entry || entry->Cast<TypeCatalogEntry>().user_type.id() == LogicalTypeId::INVALID) {
314
+ // 1) In the explicitly set schema (my_schema.my_type)
315
+ // 2) In the same schema as the table
316
+ // 3) In the same catalog
317
+ // 4) System catalog
318
+
319
+ optional_ptr<CatalogEntry> entry = nullptr;
320
+ if (!user_type_schema.empty()) {
321
+ entry = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, *catalog, user_type_schema, user_type_name,
322
+ OnEntryNotFound::RETURN_NULL);
323
+ }
324
+ if (!IsValidUserType(entry)) {
325
+ entry = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, *catalog, schema, user_type_name,
326
+ OnEntryNotFound::RETURN_NULL);
327
+ }
328
+ if (!IsValidUserType(entry)) {
312
329
  entry = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, *catalog, INVALID_SCHEMA, user_type_name,
313
330
  OnEntryNotFound::RETURN_NULL);
314
- if (!entry || entry->Cast<TypeCatalogEntry>().user_type.id() == LogicalTypeId::INVALID) {
315
- entry = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, INVALID_CATALOG, INVALID_SCHEMA,
316
- user_type_name, OnEntryNotFound::THROW_EXCEPTION);
317
- }
331
+ }
332
+ if (!IsValidUserType(entry)) {
333
+ entry = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, INVALID_CATALOG, INVALID_SCHEMA,
334
+ user_type_name, OnEntryNotFound::THROW_EXCEPTION);
318
335
  }
319
336
  auto &type_entry = entry->Cast<TypeCatalogEntry>();
320
337
  type = type_entry.user_type;
@@ -136,6 +136,27 @@ static unique_ptr<QueryNode> CreateSelectStatement(CopyStatement &stmt, child_li
136
136
  return std::move(statement);
137
137
  }
138
138
 
139
+ unique_ptr<LogicalOperator> Binder::UnionOperators(vector<unique_ptr<LogicalOperator>> nodes) {
140
+ if (nodes.empty()) {
141
+ return nullptr;
142
+ }
143
+ while (nodes.size() > 1) {
144
+ vector<unique_ptr<LogicalOperator>> new_nodes;
145
+ for (idx_t i = 0; i < nodes.size(); i += 2) {
146
+ if (i + 1 == nodes.size()) {
147
+ new_nodes.push_back(std::move(nodes[i]));
148
+ } else {
149
+ auto copy_union = make_uniq<LogicalSetOperation>(GenerateTableIndex(), 1U, std::move(nodes[i]),
150
+ std::move(nodes[i + 1]),
151
+ LogicalOperatorType::LOGICAL_UNION, true, false);
152
+ new_nodes.push_back(std::move(copy_union));
153
+ }
154
+ }
155
+ nodes = std::move(new_nodes);
156
+ }
157
+ return std::move(nodes[0]);
158
+ }
159
+
139
160
  BoundStatement Binder::Bind(ExportStatement &stmt) {
140
161
  // COPY TO a file
141
162
  auto &config = DBConfig::GetConfig(context);
@@ -170,11 +191,11 @@ BoundStatement Binder::Bind(ExportStatement &stmt) {
170
191
 
171
192
  // now generate the COPY statements for each of the tables
172
193
  auto &fs = FileSystem::GetFileSystem(context);
173
- unique_ptr<LogicalOperator> child_operator;
174
194
 
175
195
  BoundExportData exported_tables;
176
196
 
177
197
  unordered_set<string> table_name_index;
198
+ vector<unique_ptr<LogicalOperator>> export_nodes;
178
199
  for (auto &t : tables) {
179
200
  auto &table = t.get().Cast<TableCatalogEntry>();
180
201
  auto info = make_uniq<CopyInfo>();
@@ -237,15 +258,9 @@ BoundStatement Binder::Bind(ExportStatement &stmt) {
237
258
 
238
259
  auto plan = std::move(bound_statement.plan);
239
260
 
240
- if (child_operator) {
241
- // use UNION ALL to combine the individual copy statements into a single node
242
- auto copy_union = make_uniq<LogicalSetOperation>(GenerateTableIndex(), 1U, std::move(child_operator),
243
- std::move(plan), LogicalOperatorType::LOGICAL_UNION, true);
244
- child_operator = std::move(copy_union);
245
- } else {
246
- child_operator = std::move(plan);
247
- }
261
+ export_nodes.push_back(std::move(plan));
248
262
  }
263
+ auto child_operator = UnionOperators(std::move(export_nodes));
249
264
 
250
265
  // try to create the directory, if it doesn't exist yet
251
266
  // a bit hacky to do it here, but we need to create the directory BEFORE the copy statements run
@@ -76,9 +76,7 @@ void LogicalComparisonJoin::ExtractJoinConditions(
76
76
  auto total_side = JoinSide::GetJoinSide(*expr, left_bindings, right_bindings);
77
77
  if (total_side != JoinSide::BOTH) {
78
78
  // join condition does not reference both sides, add it as filter under the join
79
- // BUT don't push right side filters into AsOf because it is really a table lookup
80
- // and we shouldn't remove anything from the table.
81
- if (type == JoinType::LEFT && total_side == JoinSide::RIGHT && ref_type != JoinRefType::ASOF) {
79
+ if ((type == JoinType::LEFT || ref_type == JoinRefType::ASOF) && total_side == JoinSide::RIGHT) {
82
80
  // filter is on RHS and the join is a LEFT OUTER join, we can push it in the right child
83
81
  if (right_child->type != LogicalOperatorType::LOGICAL_FILTER) {
84
82
  // not a filter yet, push a new empty filter
@@ -6,6 +6,7 @@
6
6
  #include "duckdb/common/enum_util.hpp"
7
7
  #include "duckdb/common/helper.hpp"
8
8
  #include "duckdb/main/config.hpp"
9
+ #include "duckdb/main/database.hpp"
9
10
  #include "duckdb/parser/expression/function_expression.hpp"
10
11
  #include "duckdb/parser/expression/subquery_expression.hpp"
11
12
  #include "duckdb/parser/parsed_expression_iterator.hpp"
@@ -21,7 +22,6 @@
21
22
  #include "duckdb/planner/operator/logical_sample.hpp"
22
23
  #include "duckdb/planner/query_node/list.hpp"
23
24
  #include "duckdb/planner/tableref/list.hpp"
24
- #include "duckdb/main/database.hpp"
25
25
 
26
26
  #include <algorithm>
27
27
 
@@ -134,10 +134,8 @@ BoundStatement Binder::BindWithCTE(T &statement) {
134
134
  }
135
135
  MoveCorrelatedExpressions(*tail.child_binder);
136
136
 
137
- // extract operator below root operation
138
- auto plan = std::move(bound_statement.plan->children[0]);
139
- bound_statement.plan->children.clear();
140
- bound_statement.plan->children.push_back(CreatePlan(*bound_cte, std::move(plan)));
137
+ auto plan = std::move(bound_statement.plan);
138
+ bound_statement.plan = CreatePlan(*bound_cte, std::move(plan));
141
139
  } else {
142
140
  bound_statement = Bind(statement.template Cast<T>());
143
141
  }
@@ -344,7 +342,8 @@ unique_ptr<BoundQueryNode> Binder::BindNode(QueryNode &node) {
344
342
 
345
343
  BoundStatement Binder::Bind(QueryNode &node) {
346
344
  BoundStatement result;
347
- if (context.db->config.options.disabled_optimizers.find(OptimizerType::MATERIALIZED_CTE) ==
345
+ if (node.type != QueryNodeType::CTE_NODE && // Issue #13850 - Don't auto-materialize if users materialize (for now)
346
+ context.db->config.options.disabled_optimizers.find(OptimizerType::MATERIALIZED_CTE) ==
348
347
  context.db->config.options.disabled_optimizers.end() &&
349
348
  context.config.enable_optimizer && OptimizeCTEs(node)) {
350
349
  switch (node.type) {
@@ -170,6 +170,7 @@ bool BoundCastExpression::CastIsInvertible(const LogicalType &source_type, const
170
170
  break;
171
171
  case LogicalTypeId::VARCHAR:
172
172
  case LogicalTypeId::BIT:
173
+ case LogicalTypeId::TIME_TZ:
173
174
  return false;
174
175
  default:
175
176
  break;
@@ -8,6 +8,15 @@ SelectBinder::SelectBinder(Binder &binder, ClientContext &context, BoundSelectNo
8
8
  : BaseSelectBinder(binder, context, node, info) {
9
9
  }
10
10
 
11
+ unique_ptr<ParsedExpression> SelectBinder::GetSQLValueFunction(const string &column_name) {
12
+ auto alias_entry = node.bind_state.alias_map.find(column_name);
13
+ if (alias_entry != node.bind_state.alias_map.end()) {
14
+ // don't replace SQL value functions if they are in the alias map
15
+ return nullptr;
16
+ }
17
+ return ExpressionBinder::GetSQLValueFunction(column_name);
18
+ }
19
+
11
20
  BindResult SelectBinder::BindColumnRef(unique_ptr<ParsedExpression> &expr_ptr, idx_t depth, bool root_expression) {
12
21
  // first try to bind the column reference regularly
13
22
  auto result = BaseSelectBinder::BindColumnRef(expr_ptr, depth, root_expression);
@@ -65,7 +65,7 @@ void LogicalCopyToFile::Serialize(Serializer &serializer) const {
65
65
  serializer.WriteProperty(213, "file_extension", file_extension);
66
66
  serializer.WriteProperty(214, "rotate", rotate);
67
67
  serializer.WriteProperty(215, "return_type", return_type);
68
- serializer.WriteProperty(216, "write_partition_columns", write_partition_columns);
68
+ serializer.WritePropertyWithDefault(216, "write_partition_columns", write_partition_columns, true);
69
69
  }
70
70
 
71
71
  unique_ptr<LogicalOperator> LogicalCopyToFile::Deserialize(Deserializer &deserializer) {
@@ -109,7 +109,7 @@ unique_ptr<LogicalOperator> LogicalCopyToFile::Deserialize(Deserializer &deseria
109
109
  auto rotate = deserializer.ReadPropertyWithExplicitDefault(214, "rotate", false);
110
110
  auto return_type =
111
111
  deserializer.ReadPropertyWithExplicitDefault(215, "return_type", CopyFunctionReturnType::CHANGED_ROWS);
112
- auto write_partition_columns = deserializer.ReadProperty<bool>(216, "write_partition_columns");
112
+ auto write_partition_columns = deserializer.ReadPropertyWithExplicitDefault(216, "write_partition_columns", true);
113
113
 
114
114
  if (!has_serialize) {
115
115
  // If not serialized, re-bind with the copy info
@@ -4,6 +4,7 @@ namespace duckdb {
4
4
 
5
5
  LogicalPositionalJoin::LogicalPositionalJoin(unique_ptr<LogicalOperator> left, unique_ptr<LogicalOperator> right)
6
6
  : LogicalUnconditionalJoin(LogicalOperatorType::LOGICAL_POSITIONAL_JOIN, std::move(left), std::move(right)) {
7
+ SetEstimatedCardinality(MaxValue(children[0]->estimated_cardinality, children[1]->estimated_cardinality));
7
8
  }
8
9
 
9
10
  unique_ptr<LogicalOperator> LogicalPositionalJoin::Create(unique_ptr<LogicalOperator> left,
@@ -49,7 +49,7 @@ BlockHandle::~BlockHandle() { // NOLINT: allow internal exceptions
49
49
  D_ASSERT(memory_charge.size == 0);
50
50
  }
51
51
 
52
- block_manager.UnregisterBlock(block_id);
52
+ block_manager.UnregisterBlock(*this);
53
53
  }
54
54
 
55
55
  unique_ptr<Block> AllocateBlock(BlockManager &block_manager, unique_ptr<FileBuffer> reusable_buffer,
@@ -71,39 +71,36 @@ unique_ptr<Block> AllocateBlock(BlockManager &block_manager, unique_ptr<FileBuff
71
71
  }
72
72
  }
73
73
 
74
- BufferHandle BlockHandle::LoadFromBuffer(shared_ptr<BlockHandle> &handle, data_ptr_t data,
75
- unique_ptr<FileBuffer> reusable_buffer) {
76
- D_ASSERT(handle->state != BlockState::BLOCK_LOADED);
74
+ BufferHandle BlockHandle::LoadFromBuffer(data_ptr_t data, unique_ptr<FileBuffer> reusable_buffer) {
75
+ D_ASSERT(state != BlockState::BLOCK_LOADED);
77
76
  // copy over the data into the block from the file buffer
78
- auto block = AllocateBlock(handle->block_manager, std::move(reusable_buffer), handle->block_id);
77
+ auto block = AllocateBlock(block_manager, std::move(reusable_buffer), block_id);
79
78
  memcpy(block->InternalBuffer(), data, block->AllocSize());
80
- handle->buffer = std::move(block);
81
- handle->state = BlockState::BLOCK_LOADED;
82
- return BufferHandle(handle, handle->buffer.get());
79
+ buffer = std::move(block);
80
+ state = BlockState::BLOCK_LOADED;
81
+ return BufferHandle(shared_from_this());
83
82
  }
84
83
 
85
- BufferHandle BlockHandle::Load(shared_ptr<BlockHandle> &handle, unique_ptr<FileBuffer> reusable_buffer) {
86
- if (handle->state == BlockState::BLOCK_LOADED) {
84
+ BufferHandle BlockHandle::Load(unique_ptr<FileBuffer> reusable_buffer) {
85
+ if (state == BlockState::BLOCK_LOADED) {
87
86
  // already loaded
88
- D_ASSERT(handle->buffer);
89
- return BufferHandle(handle, handle->buffer.get());
87
+ D_ASSERT(buffer);
88
+ return BufferHandle(shared_from_this());
90
89
  }
91
90
 
92
- auto &block_manager = handle->block_manager;
93
- if (handle->block_id < MAXIMUM_BLOCK) {
94
- auto block = AllocateBlock(block_manager, std::move(reusable_buffer), handle->block_id);
91
+ if (block_id < MAXIMUM_BLOCK) {
92
+ auto block = AllocateBlock(block_manager, std::move(reusable_buffer), block_id);
95
93
  block_manager.Read(*block);
96
- handle->buffer = std::move(block);
94
+ buffer = std::move(block);
97
95
  } else {
98
- if (handle->MustWriteToTemporaryFile()) {
99
- handle->buffer = block_manager.buffer_manager.ReadTemporaryBuffer(handle->tag, handle->block_id,
100
- std::move(reusable_buffer));
96
+ if (MustWriteToTemporaryFile()) {
97
+ buffer = block_manager.buffer_manager.ReadTemporaryBuffer(tag, *this, std::move(reusable_buffer));
101
98
  } else {
102
99
  return BufferHandle(); // Destroyed upon unpin/evict, so there is no temp buffer to read
103
100
  }
104
101
  }
105
- handle->state = BlockState::BLOCK_LOADED;
106
- return BufferHandle(handle, handle->buffer.get());
102
+ state = BlockState::BLOCK_LOADED;
103
+ return BufferHandle(shared_from_this());
107
104
  }
108
105
 
109
106
  unique_ptr<FileBuffer> BlockHandle::UnloadAndTakeBlock() {
@@ -70,14 +70,22 @@ shared_ptr<BlockHandle> BlockManager::ConvertToPersistent(block_id_t block_id, s
70
70
  return new_block;
71
71
  }
72
72
 
73
- void BlockManager::UnregisterBlock(block_id_t block_id) {
74
- if (block_id >= MAXIMUM_BLOCK) {
73
+ void BlockManager::UnregisterBlock(block_id_t id) {
74
+ D_ASSERT(id < MAXIMUM_BLOCK);
75
+ lock_guard<mutex> lock(blocks_lock);
76
+ // on-disk block: erase from list of blocks in manager
77
+ blocks.erase(id);
78
+ }
79
+
80
+ void BlockManager::UnregisterBlock(BlockHandle &block) {
81
+ auto id = block.BlockId();
82
+ if (id >= MAXIMUM_BLOCK) {
75
83
  // in-memory buffer: buffer could have been offloaded to disk: remove the file
76
- buffer_manager.DeleteTemporaryFile(block_id);
84
+ buffer_manager.DeleteTemporaryFile(block);
77
85
  } else {
78
86
  lock_guard<mutex> lock(blocks_lock);
79
87
  // on-disk block: erase from list of blocks in manager
80
- blocks.erase(block_id);
88
+ blocks.erase(id);
81
89
  }
82
90
  }
83
91
 
@@ -7,8 +7,8 @@ namespace duckdb {
7
7
  BufferHandle::BufferHandle() : handle(nullptr), node(nullptr) {
8
8
  }
9
9
 
10
- BufferHandle::BufferHandle(shared_ptr<BlockHandle> handle_p, FileBuffer *node_p)
11
- : handle(std::move(handle_p)), node(node_p) {
10
+ BufferHandle::BufferHandle(shared_ptr<BlockHandle> handle_p)
11
+ : handle(std::move(handle_p)), node(handle ? handle->buffer.get() : nullptr) {
12
12
  }
13
13
 
14
14
  BufferHandle::BufferHandle(BufferHandle &&other) noexcept : node(nullptr) {
@@ -194,8 +194,11 @@ void EvictionQueue::PurgeIteration(const idx_t purge_size) {
194
194
  total_dead_nodes -= actually_dequeued - alive_nodes;
195
195
  }
196
196
 
197
- BufferPool::BufferPool(idx_t maximum_memory, bool track_eviction_timestamps)
198
- : maximum_memory(maximum_memory), track_eviction_timestamps(track_eviction_timestamps),
197
+ BufferPool::BufferPool(idx_t maximum_memory, bool track_eviction_timestamps,
198
+ idx_t allocator_bulk_deallocation_flush_threshold)
199
+ : maximum_memory(maximum_memory),
200
+ allocator_bulk_deallocation_flush_threshold(allocator_bulk_deallocation_flush_threshold),
201
+ track_eviction_timestamps(track_eviction_timestamps),
199
202
  temporary_memory_manager(make_uniq<TemporaryMemoryManager>()) {
200
203
  queues.reserve(FILE_BUFFER_TYPE_COUNT);
201
204
  for (idx_t i = 0; i < FILE_BUFFER_TYPE_COUNT; i++) {
@@ -283,6 +286,9 @@ BufferPool::EvictionResult BufferPool::EvictBlocksInternal(EvictionQueue &queue,
283
286
  bool found = false;
284
287
 
285
288
  if (memory_usage.GetUsedMemory(MemoryUsageCaches::NO_FLUSH) <= memory_limit) {
289
+ if (Allocator::SupportsFlush() && extra_memory > allocator_bulk_deallocation_flush_threshold) {
290
+ Allocator::FlushAll();
291
+ }
286
292
  return {true, std::move(r)};
287
293
  }
288
294
 
@@ -407,6 +413,10 @@ void BufferPool::SetAllocatorBulkDeallocationFlushThreshold(idx_t threshold) {
407
413
  allocator_bulk_deallocation_flush_threshold = threshold;
408
414
  }
409
415
 
416
+ idx_t BufferPool::GetAllocatorBulkDeallocationFlushThreshold() {
417
+ return allocator_bulk_deallocation_flush_threshold;
418
+ }
419
+
410
420
  BufferPool::MemoryUsage::MemoryUsage() {
411
421
  for (auto &v : memory_usage) {
412
422
  v = 0;
@@ -79,11 +79,12 @@ void BufferManager::WriteTemporaryBuffer(MemoryTag tag, block_id_t block_id, Fil
79
79
  throw NotImplementedException("This type of BufferManager does not support 'WriteTemporaryBuffer");
80
80
  }
81
81
 
82
- unique_ptr<FileBuffer> BufferManager::ReadTemporaryBuffer(MemoryTag tag, block_id_t id, unique_ptr<FileBuffer> buffer) {
82
+ unique_ptr<FileBuffer> BufferManager::ReadTemporaryBuffer(MemoryTag tag, BlockHandle &block,
83
+ unique_ptr<FileBuffer> buffer) {
83
84
  throw NotImplementedException("This type of BufferManager does not support 'ReadTemporaryBuffer");
84
85
  }
85
86
 
86
- void BufferManager::DeleteTemporaryFile(block_id_t id) {
87
+ void BufferManager::DeleteTemporaryFile(BlockHandle &block) {
87
88
  throw NotImplementedException("This type of BufferManager does not support 'DeleteTemporaryFile");
88
89
  }
89
90
 
@@ -57,11 +57,14 @@ public:
57
57
  } else {
58
58
  // the values are different
59
59
  // issue the callback on the last value
60
- Flush<OP>();
60
+ // edge case: if a value has exactly 2^16 repeated values, we can end up here with last_seen_count = 0
61
+ if (last_seen_count > 0) {
62
+ Flush<OP>();
63
+ seen_count++;
64
+ }
61
65
 
62
66
  // increment the seen_count and put the new value into the RLE slot
63
67
  last_value = data[idx];
64
- seen_count++;
65
68
  last_seen_count = 1;
66
69
  }
67
70
  } else {
@@ -315,7 +315,8 @@ void UncompressedStringStorage::WriteStringMemory(ColumnSegment &segment, string
315
315
  new_block->offset = 0;
316
316
  new_block->size = alloc_size;
317
317
  // allocate an in-memory buffer for it
318
- handle = buffer_manager.Allocate(MemoryTag::OVERFLOW_STRINGS, alloc_size, false, &block);
318
+ handle = buffer_manager.Allocate(MemoryTag::OVERFLOW_STRINGS, alloc_size, false);
319
+ block = handle.GetBlockHandle();
319
320
  state.overflow_blocks.insert(make_pair(block->BlockId(), reference<StringBlock>(*new_block)));
320
321
  new_block->block = std::move(block);
321
322
  new_block->next = std::move(state.head);
@@ -61,27 +61,28 @@ MetadataHandle MetadataManager::Pin(MetadataPointer pointer) {
61
61
  return handle;
62
62
  }
63
63
 
64
- void MetadataManager::ConvertToTransient(MetadataBlock &block) {
64
+ void MetadataManager::ConvertToTransient(MetadataBlock &metadata_block) {
65
65
  // pin the old block
66
- auto old_buffer = buffer_manager.Pin(block.block);
66
+ auto old_buffer = buffer_manager.Pin(metadata_block.block);
67
67
 
68
68
  // allocate a new transient block to replace it
69
- shared_ptr<BlockHandle> new_block;
70
- auto new_buffer = buffer_manager.Allocate(MemoryTag::METADATA, block_manager.GetBlockSize(), false, &new_block);
69
+ auto new_buffer = buffer_manager.Allocate(MemoryTag::METADATA, block_manager.GetBlockSize(), false);
70
+ auto new_block = new_buffer.GetBlockHandle();
71
71
 
72
72
  // copy the data to the transient block
73
73
  memcpy(new_buffer.Ptr(), old_buffer.Ptr(), block_manager.GetBlockSize());
74
- block.block = std::move(new_block);
74
+ metadata_block.block = std::move(new_block);
75
75
 
76
76
  // unregister the old block
77
- block_manager.UnregisterBlock(block.block_id);
77
+ block_manager.UnregisterBlock(metadata_block.block_id);
78
78
  }
79
79
 
80
80
  block_id_t MetadataManager::AllocateNewBlock() {
81
81
  auto new_block_id = GetNextBlockId();
82
82
 
83
83
  MetadataBlock new_block;
84
- auto handle = buffer_manager.Allocate(MemoryTag::METADATA, block_manager.GetBlockSize(), false, &new_block.block);
84
+ auto handle = buffer_manager.Allocate(MemoryTag::METADATA, block_manager.GetBlockSize(), false);
85
+ new_block.block = handle.GetBlockHandle();
85
86
  new_block.block_id = new_block_id;
86
87
  for (idx_t i = 0; i < METADATA_BLOCK_COUNT; i++) {
87
88
  new_block.free_blocks.push_back(NumericCast<uint8_t>(METADATA_BLOCK_COUNT - i - 1));