duckdb 1.4.1 → 1.4.3-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/.github/workflows/NodeJS.yml +9 -6
  2. package/package.json +2 -2
  3. package/scripts/node_version.sh +1 -1
  4. package/src/duckdb/extension/icu/icu_extension.cpp +67 -6
  5. package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +9 -3
  6. package/src/duckdb/extension/json/include/json_serializer.hpp +12 -0
  7. package/src/duckdb/extension/json/json_functions/json_create.cpp +10 -10
  8. package/src/duckdb/extension/parquet/decoder/delta_length_byte_array_decoder.cpp +19 -5
  9. package/src/duckdb/extension/parquet/include/decoder/delta_length_byte_array_decoder.hpp +1 -1
  10. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +11 -2
  11. package/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp +2 -1
  12. package/src/duckdb/extension/parquet/parquet_reader.cpp +3 -1
  13. package/src/duckdb/extension/parquet/parquet_writer.cpp +16 -1
  14. package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
  15. package/src/duckdb/extension/parquet/writer/primitive_column_writer.cpp +1 -1
  16. package/src/duckdb/src/catalog/default/default_table_functions.cpp +1 -1
  17. package/src/duckdb/src/common/adbc/adbc.cpp +8 -6
  18. package/src/duckdb/src/common/csv_writer.cpp +1 -13
  19. package/src/duckdb/src/common/encryption_key_manager.cpp +10 -9
  20. package/src/duckdb/src/common/enum_util.cpp +19 -0
  21. package/src/duckdb/src/common/enums/compression_type.cpp +51 -16
  22. package/src/duckdb/src/common/exception/binder_exception.cpp +7 -2
  23. package/src/duckdb/src/common/progress_bar/unscented_kalman_filter.cpp +2 -2
  24. package/src/duckdb/src/common/random_engine.cpp +10 -0
  25. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +13 -2
  26. package/src/duckdb/src/execution/index/art/art.cpp +6 -3
  27. package/src/duckdb/src/execution/index/bound_index.cpp +32 -21
  28. package/src/duckdb/src/execution/index/unbound_index.cpp +20 -9
  29. package/src/duckdb/src/execution/join_hashtable.cpp +9 -3
  30. package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +1 -1
  31. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
  32. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +5 -0
  33. package/src/duckdb/src/function/cast/cast_function_set.cpp +3 -1
  34. package/src/duckdb/src/function/macro_function.cpp +1 -1
  35. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -1
  36. package/src/duckdb/src/function/scalar/create_sort_key.cpp +5 -3
  37. package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +1 -1
  38. package/src/duckdb/src/function/scalar/system/parse_log_message.cpp +4 -2
  39. package/src/duckdb/src/function/table/copy_csv.cpp +28 -4
  40. package/src/duckdb/src/function/table/direct_file_reader.cpp +10 -0
  41. package/src/duckdb/src/function/table/read_file.cpp +65 -1
  42. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  43. package/src/duckdb/src/include/duckdb/common/csv_writer.hpp +0 -3
  44. package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +2 -0
  45. package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  47. package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +42 -2
  48. package/src/duckdb/src/include/duckdb/common/http_util.hpp +7 -0
  49. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +0 -11
  51. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +2 -0
  52. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +1 -0
  53. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +6 -6
  54. package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +115 -97
  55. package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +54 -0
  56. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +21 -2
  57. package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +26 -8
  58. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +2 -0
  59. package/src/duckdb/src/include/duckdb/function/table/read_file.hpp +0 -49
  60. package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/logging/log_type.hpp +14 -0
  62. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +2 -1
  63. package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +1 -1
  65. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +1 -1
  66. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  67. package/src/duckdb/src/include/duckdb/main/database.hpp +2 -2
  68. package/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +10 -6
  69. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +4 -0
  70. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +1 -0
  71. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +1 -0
  72. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -0
  73. package/src/duckdb/src/include/duckdb/main/relation/insert_relation.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +2 -0
  75. package/src/duckdb/src/include/duckdb/main/relation.hpp +10 -2
  76. package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
  77. package/src/duckdb/src/include/duckdb/optimizer/filter_pullup.hpp +10 -14
  78. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +5 -1
  79. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +3 -0
  80. package/src/duckdb/src/include/duckdb/planner/bound_statement.hpp +1 -0
  81. package/src/duckdb/src/include/duckdb/storage/block.hpp +9 -0
  82. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +9 -2
  83. package/src/duckdb/src/include/duckdb/storage/index.hpp +8 -2
  84. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  85. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +1 -1
  86. package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +0 -7
  87. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -2
  88. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +6 -0
  89. package/src/duckdb/src/logging/log_manager.cpp +2 -1
  90. package/src/duckdb/src/logging/log_types.cpp +30 -1
  91. package/src/duckdb/src/main/attached_database.cpp +4 -7
  92. package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +2 -3
  93. package/src/duckdb/src/main/buffered_data/buffered_data.cpp +2 -3
  94. package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +1 -2
  95. package/src/duckdb/src/main/capi/prepared-c.cpp +9 -2
  96. package/src/duckdb/src/main/config.cpp +6 -5
  97. package/src/duckdb/src/main/database.cpp +9 -3
  98. package/src/duckdb/src/main/database_file_path_manager.cpp +43 -14
  99. package/src/duckdb/src/main/database_manager.cpp +1 -1
  100. package/src/duckdb/src/main/http/http_util.cpp +19 -1
  101. package/src/duckdb/src/main/profiling_info.cpp +11 -0
  102. package/src/duckdb/src/main/query_profiler.cpp +16 -0
  103. package/src/duckdb/src/main/relation/create_table_relation.cpp +9 -0
  104. package/src/duckdb/src/main/relation/insert_relation.cpp +7 -0
  105. package/src/duckdb/src/main/relation/table_relation.cpp +14 -0
  106. package/src/duckdb/src/main/relation.cpp +28 -12
  107. package/src/duckdb/src/main/settings/custom_settings.cpp +9 -3
  108. package/src/duckdb/src/optimizer/filter_pullup.cpp +14 -0
  109. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +29 -10
  110. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +7 -0
  111. package/src/duckdb/src/parallel/task_executor.cpp +4 -2
  112. package/src/duckdb/src/parser/query_node/cte_node.cpp +79 -0
  113. package/src/duckdb/src/parser/transform/expression/transform_cast.cpp +3 -1
  114. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +1 -0
  115. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +12 -4
  116. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +16 -12
  117. package/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp +42 -5
  118. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +0 -24
  119. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +1 -1
  120. package/src/duckdb/src/planner/binder.cpp +0 -1
  121. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +1 -2
  122. package/src/duckdb/src/storage/buffer/block_manager.cpp +20 -6
  123. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +8 -6
  124. package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -22
  125. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +7 -0
  126. package/src/duckdb/src/storage/compression/zstd.cpp +34 -12
  127. package/src/duckdb/src/storage/data_table.cpp +1 -1
  128. package/src/duckdb/src/storage/local_storage.cpp +15 -2
  129. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -6
  130. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +11 -15
  131. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +1 -1
  132. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +1 -19
  133. package/src/duckdb/src/storage/single_file_block_manager.cpp +33 -3
  134. package/src/duckdb/src/storage/standard_buffer_manager.cpp +3 -1
  135. package/src/duckdb/src/storage/storage_info.cpp +4 -0
  136. package/src/duckdb/src/storage/storage_manager.cpp +8 -0
  137. package/src/duckdb/src/storage/table/array_column_data.cpp +1 -1
  138. package/src/duckdb/src/storage/table/column_data.cpp +3 -2
  139. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -2
  140. package/src/duckdb/src/storage/table/row_group.cpp +41 -24
  141. package/src/duckdb/src/storage/table/row_group_collection.cpp +114 -11
  142. package/src/duckdb/src/storage/table_index_list.cpp +18 -5
  143. package/src/duckdb/src/transaction/cleanup_state.cpp +7 -2
  144. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +5 -0
  145. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +8 -21
  146. package/src/duckdb/third_party/parquet/parquet_types.cpp +57 -35
  147. package/src/duckdb/third_party/parquet/parquet_types.h +9 -2
  148. package/src/duckdb/ub_src_common_types_row.cpp +0 -2
@@ -54,6 +54,13 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr<LogicalOpera
54
54
  auto relation_id = relations.size();
55
55
 
56
56
  auto table_indexes = op.GetTableIndex();
57
+ bool is_unnest_or_get_with_unnest = op.type == LogicalOperatorType::LOGICAL_UNNEST;
58
+ if (op.type == LogicalOperatorType::LOGICAL_GET) {
59
+ auto &get = op.Cast<LogicalGet>();
60
+ if (get.function.name == "unnest") {
61
+ is_unnest_or_get_with_unnest = true;
62
+ }
63
+ }
57
64
  if (table_indexes.empty()) {
58
65
  // relation represents a non-reorderable relation, most likely a join relation
59
66
  // Get the tables referenced in the non-reorderable relation and add them to the relation mapping
@@ -65,7 +72,7 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr<LogicalOpera
65
72
  D_ASSERT(relation_mapping.find(reference) == relation_mapping.end());
66
73
  relation_mapping[reference] = relation_id;
67
74
  }
68
- } else if (op.type == LogicalOperatorType::LOGICAL_UNNEST) {
75
+ } else if (is_unnest_or_get_with_unnest) {
69
76
  // logical unnest has a logical_unnest index, but other bindings can refer to
70
77
  // columns that are not unnested.
71
78
  auto bindings = op.GetColumnBindings();
@@ -182,6 +189,21 @@ static void ModifyStatsIfLimit(optional_ptr<LogicalOperator> limit_op, RelationS
182
189
  }
183
190
  }
184
191
 
192
+ void RelationManager::AddUnnestRelation(JoinOrderOptimizer &optimizer, LogicalOperator &op, LogicalOperator &input_op,
193
+ optional_ptr<LogicalOperator> parent, RelationStats &child_stats,
194
+ optional_ptr<LogicalOperator> limit_op,
195
+ vector<reference<LogicalOperator>> &datasource_filters) {
196
+ D_ASSERT(!op.children.empty());
197
+ auto child_optimizer = optimizer.CreateChildOptimizer();
198
+ op.children[0] = child_optimizer.Optimize(std::move(op.children[0]), &child_stats);
199
+ if (!datasource_filters.empty()) {
200
+ child_stats.cardinality = LossyNumericCast<idx_t>(static_cast<double>(child_stats.cardinality) *
201
+ RelationStatisticsHelper::DEFAULT_SELECTIVITY);
202
+ }
203
+ ModifyStatsIfLimit(limit_op.get(), child_stats);
204
+ AddRelation(input_op, parent, child_stats);
205
+ }
206
+
185
207
  bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, LogicalOperator &input_op,
186
208
  vector<reference<LogicalOperator>> &filter_operators,
187
209
  optional_ptr<LogicalOperator> parent) {
@@ -279,15 +301,7 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica
279
301
  case LogicalOperatorType::LOGICAL_UNNEST: {
280
302
  // optimize children of unnest
281
303
  RelationStats child_stats;
282
- auto child_optimizer = optimizer.CreateChildOptimizer();
283
- op->children[0] = child_optimizer.Optimize(std::move(op->children[0]), &child_stats);
284
- // the extracted cardinality should be set for window
285
- if (!datasource_filters.empty()) {
286
- child_stats.cardinality = LossyNumericCast<idx_t>(static_cast<double>(child_stats.cardinality) *
287
- RelationStatisticsHelper::DEFAULT_SELECTIVITY);
288
- }
289
- ModifyStatsIfLimit(limit_op.get(), child_stats);
290
- AddRelation(input_op, parent, child_stats);
304
+ AddUnnestRelation(optimizer, *op, input_op, parent, child_stats, limit_op, datasource_filters);
291
305
  return true;
292
306
  }
293
307
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
@@ -345,6 +359,11 @@ bool RelationManager::ExtractJoinRelations(JoinOrderOptimizer &optimizer, Logica
345
359
  case LogicalOperatorType::LOGICAL_GET: {
346
360
  // TODO: Get stats from a logical GET
347
361
  auto &get = op->Cast<LogicalGet>();
362
+ if (get.function.name == "unnest" && !op->children.empty()) {
363
+ RelationStats child_stats;
364
+ AddUnnestRelation(optimizer, *op, input_op, parent, child_stats, limit_op, datasource_filters);
365
+ return true;
366
+ }
348
367
  auto stats = RelationStatisticsHelper::ExtractGetStats(get, context);
349
368
  // if there is another logical filter that could not be pushed down into the
350
369
  // table scan, apply another selectivity.
@@ -184,6 +184,13 @@ unique_ptr<Expression> RegexOptimizationRule::Apply(LogicalOperator &op, vector<
184
184
  if (!escaped_like_string.exists) {
185
185
  return nullptr;
186
186
  }
187
+
188
+ // if regexp had options, remove them so the new Contains Expression can be matched for other optimizers.
189
+ if (root.children.size() == 3) {
190
+ root.children.pop_back();
191
+ D_ASSERT(root.children.size() == 2);
192
+ }
193
+
187
194
  auto parameter = make_uniq<BoundConstantExpression>(Value(std::move(escaped_like_string.like_string)));
188
195
  auto contains = make_uniq<BoundFunctionExpression>(root.return_type, GetStringContains(),
189
196
  std::move(root.children), nullptr);
@@ -69,8 +69,10 @@ TaskExecutionResult BaseExecutorTask::Execute(TaskExecutionMode mode) {
69
69
  return TaskExecutionResult::TASK_FINISHED;
70
70
  }
71
71
  try {
72
- TaskNotifier task_notifier {executor.context};
73
- ExecuteTask();
72
+ {
73
+ TaskNotifier task_notifier {executor.context};
74
+ ExecuteTask();
75
+ }
74
76
  executor.FinishTask();
75
77
  return TaskExecutionResult::TASK_FINISHED;
76
78
  } catch (std::exception &ex) {
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/parser/query_node/cte_node.hpp"
2
2
  #include "duckdb/common/serializer/serializer.hpp"
3
3
  #include "duckdb/common/serializer/deserializer.hpp"
4
+ #include "duckdb/parser/statement/select_statement.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
@@ -39,4 +40,82 @@ unique_ptr<QueryNode> CTENode::Copy() const {
39
40
  return std::move(result);
40
41
  }
41
42
 
43
+ // TEMPORARY BUGFIX WARNING - none of this code should make it into main - this is a temporary work-around for v1.4
44
+ // TEMPORARY BUGFIX START
45
+ // the below code fixes backwards and forwards compatibility of CTEs with the somewhat broken version of CTEs in v1.4
46
+ // all of this code has been made obsolete with the CTE binding rework
47
+ void QueryNode::ExtractCTENodes(unique_ptr<QueryNode> &query_node) {
48
+ if (query_node->cte_map.map.empty()) {
49
+ return;
50
+ }
51
+ vector<unique_ptr<CTENode>> materialized_ctes;
52
+ for (auto &cte : query_node->cte_map.map) {
53
+ auto &cte_entry = cte.second;
54
+ auto mat_cte = make_uniq<CTENode>();
55
+ mat_cte->ctename = cte.first;
56
+ mat_cte->query = cte_entry->query->node->Copy();
57
+ mat_cte->aliases = cte_entry->aliases;
58
+ mat_cte->materialized = cte_entry->materialized;
59
+ materialized_ctes.push_back(std::move(mat_cte));
60
+ }
61
+
62
+ auto root = std::move(query_node);
63
+ while (!materialized_ctes.empty()) {
64
+ unique_ptr<CTENode> node_result;
65
+ node_result = std::move(materialized_ctes.back());
66
+ node_result->cte_map = root->cte_map.Copy();
67
+ node_result->child = std::move(root);
68
+ root = std::move(node_result);
69
+ materialized_ctes.pop_back();
70
+ }
71
+ query_node = std::move(root);
72
+ }
73
+
74
+ void EraseDuplicateCTE(unique_ptr<QueryNode> &node, const string &ctename) {
75
+ if (node->type != QueryNodeType::CTE_NODE) {
76
+ // not a CTE
77
+ return;
78
+ }
79
+ auto &cte_node = node->Cast<CTENode>();
80
+ if (cte_node.ctename == ctename) {
81
+ // duplicate CTE - erase this CTE node
82
+ node = std::move(cte_node.child);
83
+ EraseDuplicateCTE(node, ctename);
84
+ } else {
85
+ // not a duplicate - recurse into child
86
+ EraseDuplicateCTE(cte_node.child, ctename);
87
+ }
88
+ }
89
+
90
+ void CTENode::Serialize(Serializer &serializer) const {
91
+ if (materialized != CTEMaterialize::CTE_MATERIALIZE_ALWAYS) {
92
+ // for non-materialized CTEs - don't serialize CTENode
93
+ // older DuckDB versions only expect a CTENode to be there for materialized CTEs
94
+ child->Serialize(serializer);
95
+ return;
96
+ }
97
+ auto child_copy = child->Copy();
98
+ EraseDuplicateCTE(child_copy, ctename);
99
+
100
+ QueryNode::Serialize(serializer);
101
+ serializer.WritePropertyWithDefault<string>(200, "cte_name", ctename);
102
+ serializer.WritePropertyWithDefault<unique_ptr<QueryNode>>(201, "query", query);
103
+ serializer.WritePropertyWithDefault<unique_ptr<QueryNode>>(202, "child", child_copy);
104
+ serializer.WritePropertyWithDefault<vector<string>>(203, "aliases", aliases);
105
+ }
106
+
107
+ unique_ptr<QueryNode> CTENode::Deserialize(Deserializer &deserializer) {
108
+ auto result = duckdb::unique_ptr<CTENode>(new CTENode());
109
+ deserializer.ReadPropertyWithDefault<string>(200, "cte_name", result->ctename);
110
+ deserializer.ReadPropertyWithDefault<unique_ptr<QueryNode>>(201, "query", result->query);
111
+ deserializer.ReadPropertyWithDefault<unique_ptr<QueryNode>>(202, "child", result->child);
112
+ deserializer.ReadPropertyWithDefault<vector<string>>(203, "aliases", result->aliases);
113
+ // v1.4.0 and v1.4.1 wrote this property - deserialize it for BC with these versions
114
+ deserializer.ReadPropertyWithExplicitDefault<CTEMaterialize>(204, "materialized", result->materialized,
115
+ CTEMaterialize::CTE_MATERIALIZE_DEFAULT);
116
+ return std::move(result->child);
117
+ }
118
+ // TEMPORARY BUGFIX WARNING - none of this code should make it into main - this is a temporary work-around for v1.4
119
+ // TEMPORARY BUGFIX END
120
+
42
121
  } // namespace duckdb
@@ -21,7 +21,9 @@ unique_ptr<ParsedExpression> Transformer::TransformTypeCast(duckdb_libpgquery::P
21
21
  parameters.query_location = NumericCast<idx_t>(root.location);
22
22
  }
23
23
  auto blob_data = Blob::ToBlob(string(c->val.val.str), parameters);
24
- return make_uniq<ConstantExpression>(Value::BLOB_RAW(blob_data));
24
+ auto result = make_uniq<ConstantExpression>(Value::BLOB_RAW(blob_data));
25
+ SetQueryLocation(*result, root.location);
26
+ return std::move(result);
25
27
  }
26
28
  }
27
29
  // transform the expression node
@@ -98,6 +98,7 @@ void ExpressionBinder::UnfoldMacroExpression(FunctionExpression &function, Scala
98
98
  // validate the arguments and separate positional and default arguments
99
99
  vector<unique_ptr<ParsedExpression>> positional_arguments;
100
100
  InsertionOrderPreservingMap<unique_ptr<ParsedExpression>> named_arguments;
101
+ binder.lambda_bindings = lambda_bindings;
101
102
  auto bind_result = MacroFunction::BindMacroFunction(binder, macro_func.macros, macro_func.name, function,
102
103
  positional_arguments, named_arguments, depth);
103
104
  if (!bind_result.error.empty()) {
@@ -40,10 +40,18 @@ static void VerifyCompressionType(ClientContext &context, optional_ptr<StorageMa
40
40
  auto &base = info.base->Cast<CreateTableInfo>();
41
41
  for (auto &col : base.columns.Logical()) {
42
42
  auto compression_type = col.CompressionType();
43
- if (CompressionTypeIsDeprecated(compression_type, storage_manager)) {
44
- throw BinderException("Can't compress using user-provided compression type '%s', that type is deprecated "
45
- "and only has decompress support",
46
- CompressionTypeToString(compression_type));
43
+ auto compression_availability_result = CompressionTypeIsAvailable(compression_type, storage_manager);
44
+ if (!compression_availability_result.IsAvailable()) {
45
+ if (compression_availability_result.IsDeprecated()) {
46
+ throw BinderException(
47
+ "Can't compress using user-provided compression type '%s', that type is deprecated "
48
+ "and only has decompress support",
49
+ CompressionTypeToString(compression_type));
50
+ } else {
51
+ throw BinderException(
52
+ "Can't compress using user-provided compression type '%s', that type is not available yet",
53
+ CompressionTypeToString(compression_type));
54
+ }
47
55
  }
48
56
  auto logical_type = col.GetType();
49
57
  if (logical_type.id() == LogicalTypeId::USER && logical_type.HasAlias()) {
@@ -277,7 +277,7 @@ unique_ptr<MergeIntoStatement> Binder::GenerateMergeInto(InsertStatement &stmt,
277
277
  auto storage_info = table.GetStorageInfo(context);
278
278
  auto &columns = table.GetColumns();
279
279
  // set up the columns on which to join
280
- vector<string> distinct_on_columns;
280
+ vector<vector<string>> all_distinct_on_columns;
281
281
  if (on_conflict_info.indexed_columns.empty()) {
282
282
  // When omitting the conflict target, we derive the join columns from the primary key/unique constraints
283
283
  // traverse the primary key/unique constraints
@@ -292,6 +292,7 @@ unique_ptr<MergeIntoStatement> Binder::GenerateMergeInto(InsertStatement &stmt,
292
292
 
293
293
  vector<unique_ptr<ParsedExpression>> and_children;
294
294
  auto &indexed_columns = index.column_set;
295
+ vector<string> distinct_on_columns;
295
296
  for (auto &column : columns.Physical()) {
296
297
  if (!indexed_columns.count(column.Physical().index)) {
297
298
  continue;
@@ -303,6 +304,7 @@ unique_ptr<MergeIntoStatement> Binder::GenerateMergeInto(InsertStatement &stmt,
303
304
  and_children.push_back(std::move(new_condition));
304
305
  distinct_on_columns.push_back(column.Name());
305
306
  }
307
+ all_distinct_on_columns.push_back(std::move(distinct_on_columns));
306
308
  if (and_children.empty()) {
307
309
  continue;
308
310
  }
@@ -377,7 +379,7 @@ unique_ptr<MergeIntoStatement> Binder::GenerateMergeInto(InsertStatement &stmt,
377
379
  throw BinderException("The specified columns as conflict target are not referenced by a UNIQUE/PRIMARY KEY "
378
380
  "CONSTRAINT or INDEX");
379
381
  }
380
- distinct_on_columns = on_conflict_info.indexed_columns;
382
+ all_distinct_on_columns.push_back(on_conflict_info.indexed_columns);
381
383
  merge_into->using_columns = std::move(on_conflict_info.indexed_columns);
382
384
  }
383
385
 
@@ -445,17 +447,19 @@ unique_ptr<MergeIntoStatement> Binder::GenerateMergeInto(InsertStatement &stmt,
445
447
  }
446
448
  }
447
449
  // push DISTINCT ON(unique_columns)
448
- auto distinct_stmt = make_uniq<SelectStatement>();
449
- auto select_node = make_uniq<SelectNode>();
450
- auto distinct = make_uniq<DistinctModifier>();
451
- for (auto &col : distinct_on_columns) {
452
- distinct->distinct_on_targets.push_back(make_uniq<ColumnRefExpression>(col));
450
+ for (auto &distinct_on_columns : all_distinct_on_columns) {
451
+ auto distinct_stmt = make_uniq<SelectStatement>();
452
+ auto select_node = make_uniq<SelectNode>();
453
+ auto distinct = make_uniq<DistinctModifier>();
454
+ for (auto &col : distinct_on_columns) {
455
+ distinct->distinct_on_targets.push_back(make_uniq<ColumnRefExpression>(col));
456
+ }
457
+ select_node->modifiers.push_back(std::move(distinct));
458
+ select_node->select_list.push_back(make_uniq<StarExpression>());
459
+ select_node->from_table = std::move(source);
460
+ distinct_stmt->node = std::move(select_node);
461
+ source = make_uniq<SubqueryRef>(std::move(distinct_stmt), "excluded");
453
462
  }
454
- select_node->modifiers.push_back(std::move(distinct));
455
- select_node->select_list.push_back(make_uniq<StarExpression>());
456
- select_node->from_table = std::move(source);
457
- distinct_stmt->node = std::move(select_node);
458
- source = make_uniq<SubqueryRef>(std::move(distinct_stmt), "excluded");
459
463
 
460
464
  merge_into->source = std::move(source);
461
465
 
@@ -41,10 +41,20 @@ unique_ptr<BoundMergeIntoAction> Binder::BindMergeAction(LogicalMergeInto &merge
41
41
  auto result = make_uniq<BoundMergeIntoAction>();
42
42
  result->action_type = action.action_type;
43
43
  if (action.condition) {
44
- ProjectionBinder proj_binder(*this, context, proj_index, expressions, "WHERE clause");
45
- proj_binder.target_type = LogicalType::BOOLEAN;
46
- auto cond = proj_binder.Bind(action.condition);
47
- result->condition = std::move(cond);
44
+ if (action.condition->HasSubquery()) {
45
+ // if we have a subquery we need to execute the condition outside of the MERGE INTO statement
46
+ WhereBinder where_binder(*this, context);
47
+ auto cond = where_binder.Bind(action.condition);
48
+ PlanSubqueries(cond, root);
49
+ result->condition =
50
+ make_uniq<BoundColumnRefExpression>(cond->return_type, ColumnBinding(proj_index, expressions.size()));
51
+ expressions.push_back(std::move(cond));
52
+ } else {
53
+ ProjectionBinder proj_binder(*this, context, proj_index, expressions, "WHERE clause");
54
+ proj_binder.target_type = LogicalType::BOOLEAN;
55
+ auto cond = proj_binder.Bind(action.condition);
56
+ result->condition = std::move(cond);
57
+ }
48
58
  }
49
59
  switch (action.action_type) {
50
60
  case MergeActionType::MERGE_UPDATE: {
@@ -173,6 +183,32 @@ void RewriteMergeBindings(LogicalOperator &op, const vector<ColumnBinding> &sour
173
183
  op, [&](unique_ptr<Expression> *child) { RewriteMergeBindings(*child, source_bindings, new_table_index); });
174
184
  }
175
185
 
186
+ LogicalGet &ExtractLogicalGet(LogicalOperator &op) {
187
+ reference<LogicalOperator> current_op(op);
188
+ while (current_op.get().type == LogicalOperatorType::LOGICAL_FILTER) {
189
+ current_op = *current_op.get().children[0];
190
+ }
191
+ if (current_op.get().type != LogicalOperatorType::LOGICAL_GET) {
192
+ throw InvalidInputException("BindMerge - expected to find an operator of type LOGICAL_GET but got %s",
193
+ op.ToString());
194
+ }
195
+ return current_op.get().Cast<LogicalGet>();
196
+ }
197
+
198
+ void CheckMergeAction(MergeActionCondition condition, MergeActionType action_type) {
199
+ if (condition == MergeActionCondition::WHEN_NOT_MATCHED_BY_TARGET) {
200
+ switch (action_type) {
201
+ case MergeActionType::MERGE_UPDATE:
202
+ case MergeActionType::MERGE_DELETE:
203
+ throw ParserException("WHEN NOT MATCHED (BY TARGET) cannot be combined with UPDATE or DELETE actions - as "
204
+ "there is no corresponding row in the target to update or delete.\nDid you mean to "
205
+ "use WHEN MATCHED or WHEN NOT MATCHED BY SOURCE?");
206
+ default:
207
+ break;
208
+ }
209
+ }
210
+ }
211
+
176
212
  BoundStatement Binder::Bind(MergeIntoStatement &stmt) {
177
213
  // bind the target table
178
214
  auto target_binder = Binder::CreateBinder(context, this);
@@ -243,7 +279,7 @@ BoundStatement Binder::Bind(MergeIntoStatement &stmt) {
243
279
  // kind of hacky, CreatePlan turns a RIGHT join into a LEFT join so the children get reversed from what we need
244
280
  bool inverted = join.type == JoinType::RIGHT;
245
281
  auto &source = join_ref.get().children[inverted ? 1 : 0];
246
- auto &get = join_ref.get().children[inverted ? 0 : 1]->Cast<LogicalGet>();
282
+ auto &get = ExtractLogicalGet(*join_ref.get().children[inverted ? 0 : 1]);
247
283
 
248
284
  auto merge_into = make_uniq<LogicalMergeInto>(table);
249
285
  merge_into->table_index = GenerateTableIndex();
@@ -265,6 +301,7 @@ BoundStatement Binder::Bind(MergeIntoStatement &stmt) {
265
301
  for (auto &entry : stmt.actions) {
266
302
  vector<unique_ptr<BoundMergeIntoAction>> bound_actions;
267
303
  for (auto &action : entry.second) {
304
+ CheckMergeAction(entry.first, action->action_type);
268
305
  bound_actions.push_back(BindMergeAction(*merge_into, table, get, proj_index, projection_expressions, root,
269
306
  *action, source_aliases, source_names));
270
307
  }
@@ -326,30 +326,6 @@ unique_ptr<BoundTableRef> Binder::Bind(BaseTableRef &ref) {
326
326
 
327
327
  // The view may contain CTEs, but maybe only in the cte_map, so we need create CTE nodes for them
328
328
  auto query = view_catalog_entry.GetQuery().Copy();
329
- auto &select_stmt = query->Cast<SelectStatement>();
330
-
331
- vector<unique_ptr<CTENode>> materialized_ctes;
332
- for (auto &cte : select_stmt.node->cte_map.map) {
333
- auto &cte_entry = cte.second;
334
- auto mat_cte = make_uniq<CTENode>();
335
- mat_cte->ctename = cte.first;
336
- mat_cte->query = cte_entry->query->node->Copy();
337
- mat_cte->aliases = cte_entry->aliases;
338
- mat_cte->materialized = cte_entry->materialized;
339
- materialized_ctes.push_back(std::move(mat_cte));
340
- }
341
-
342
- auto root = std::move(select_stmt.node);
343
- while (!materialized_ctes.empty()) {
344
- unique_ptr<CTENode> node_result;
345
- node_result = std::move(materialized_ctes.back());
346
- node_result->cte_map = root->cte_map.Copy();
347
- node_result->child = std::move(root);
348
- root = std::move(node_result);
349
- materialized_ctes.pop_back();
350
- }
351
- select_stmt.node = std::move(root);
352
-
353
329
  SubqueryRef subquery(unique_ptr_cast<SQLStatement, SelectStatement>(std::move(query)));
354
330
 
355
331
  subquery.alias = ref.alias;
@@ -147,7 +147,7 @@ bool Binder::BindTableFunctionParameters(TableFunctionCatalogEntry &table_functi
147
147
  MoveCorrelatedExpressions(*subquery->binder);
148
148
  seen_subquery = true;
149
149
  arguments.emplace_back(LogicalTypeId::TABLE);
150
- parameters.emplace_back(Value());
150
+ parameters.emplace_back();
151
151
  continue;
152
152
  }
153
153
 
@@ -469,7 +469,6 @@ optional_ptr<Binding> Binder::GetMatchingBinding(const string &catalog_name, con
469
469
  const string &table_name, const string &column_name,
470
470
  ErrorData &error) {
471
471
  optional_ptr<Binding> binding;
472
- D_ASSERT(!lambda_bindings);
473
472
  if (macro_binding && table_name == macro_binding->GetAlias()) {
474
473
  binding = optional_ptr<Binding>(macro_binding.get());
475
474
  } else {
@@ -3,7 +3,6 @@
3
3
  #include "duckdb/parser/expression/columnref_expression.hpp"
4
4
  #include "duckdb/parser/expression/window_expression.hpp"
5
5
  #include "duckdb/planner/binder.hpp"
6
- #include "duckdb/planner/expression_binder/aggregate_binder.hpp"
7
6
  #include "duckdb/common/string_util.hpp"
8
7
  #include "duckdb/planner/query_node/bound_select_node.hpp"
9
8
 
@@ -91,7 +90,7 @@ BindResult HavingBinder::BindColumnRef(unique_ptr<ParsedExpression> &expr_ptr, i
91
90
  }
92
91
 
93
92
  BindResult HavingBinder::BindWindow(WindowExpression &expr, idx_t depth) {
94
- return BindResult(BinderException::Unsupported(expr, "HAVING clause cannot contain window functions!"));
93
+ throw BinderException::Unsupported(expr, "HAVING clause cannot contain window functions!");
95
94
  }
96
95
 
97
96
  } // namespace duckdb
@@ -34,19 +34,32 @@ shared_ptr<BlockHandle> BlockManager::RegisterBlock(block_id_t block_id) {
34
34
  }
35
35
 
36
36
  shared_ptr<BlockHandle> BlockManager::ConvertToPersistent(QueryContext context, block_id_t block_id,
37
- shared_ptr<BlockHandle> old_block, BufferHandle old_handle) {
37
+ shared_ptr<BlockHandle> old_block, BufferHandle old_handle,
38
+ ConvertToPersistentMode mode) {
38
39
  // register a block with the new block id
39
40
  auto new_block = RegisterBlock(block_id);
40
41
  D_ASSERT(new_block->GetState() == BlockState::BLOCK_UNLOADED);
41
42
  D_ASSERT(new_block->Readers() == 0);
42
43
 
44
+ if (mode == ConvertToPersistentMode::THREAD_SAFE) {
45
+ // safe mode - create a copy of the old block and operate on that
46
+ // this ensures we don't modify the old block - which allows other concurrent operations on the old block to
47
+ // continue
48
+ auto old_block_copy = buffer_manager.AllocateMemory(old_block->GetMemoryTag(), this, false);
49
+ auto copy_pin = buffer_manager.Pin(old_block_copy);
50
+ memcpy(copy_pin.Ptr(), old_handle.Ptr(), GetBlockSize());
51
+ old_block = std::move(old_block_copy);
52
+ old_handle = std::move(copy_pin);
53
+ }
54
+
43
55
  auto lock = old_block->GetLock();
44
56
  D_ASSERT(old_block->GetState() == BlockState::BLOCK_LOADED);
45
57
  D_ASSERT(old_block->GetBuffer(lock));
46
58
  if (old_block->Readers() > 1) {
47
- throw InternalException("BlockManager::ConvertToPersistent - cannot be called for block %d as old_block has "
48
- "multiple readers active",
49
- block_id);
59
+ throw InternalException(
60
+ "BlockManager::ConvertToPersistent in destructive mode - cannot be called for block %d as old_block has "
61
+ "multiple readers active",
62
+ block_id);
50
63
  }
51
64
 
52
65
  // Temp buffers can be larger than the storage block size.
@@ -76,10 +89,11 @@ shared_ptr<BlockHandle> BlockManager::ConvertToPersistent(QueryContext context,
76
89
  }
77
90
 
78
91
  shared_ptr<BlockHandle> BlockManager::ConvertToPersistent(QueryContext context, block_id_t block_id,
79
- shared_ptr<BlockHandle> old_block) {
92
+ shared_ptr<BlockHandle> old_block,
93
+ ConvertToPersistentMode mode) {
80
94
  // pin the old block to ensure we have it loaded in memory
81
95
  auto handle = buffer_manager.Pin(old_block);
82
- return ConvertToPersistent(context, block_id, std::move(old_block), std::move(handle));
96
+ return ConvertToPersistent(context, block_id, std::move(old_block), std::move(handle), mode);
83
97
  }
84
98
 
85
99
  void BlockManager::UnregisterBlock(block_id_t id) {
@@ -4,6 +4,7 @@
4
4
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
5
5
  #include "duckdb/common/serializer/binary_serializer.hpp"
6
6
  #include "duckdb/main/database.hpp"
7
+ #include "duckdb/main/settings.hpp"
7
8
  #include "duckdb/parallel/task_scheduler.hpp"
8
9
  #include "duckdb/storage/table/column_checkpoint_state.hpp"
9
10
  #include "duckdb/storage/table/table_statistics.hpp"
@@ -119,15 +120,16 @@ void SingleFileTableDataWriter::FinalizeTable(const TableStatistics &global_stat
119
120
  }
120
121
  auto index_storage_infos = info.GetIndexes().SerializeToDisk(context, options);
121
122
 
122
- #ifdef DUCKDB_BLOCK_VERIFICATION
123
- for (auto &entry : index_storage_infos) {
124
- for (auto &allocator : entry.allocator_infos) {
125
- for (auto &block : allocator.block_pointers) {
126
- checkpoint_manager.verify_block_usage_count[block.block_id]++;
123
+ auto debug_verify_blocks = DBConfig::GetSetting<DebugVerifyBlocksSetting>(GetDatabase());
124
+ if (debug_verify_blocks) {
125
+ for (auto &entry : index_storage_infos) {
126
+ for (auto &allocator : entry.allocator_infos) {
127
+ for (auto &block : allocator.block_pointers) {
128
+ checkpoint_manager.verify_block_usage_count[block.block_id]++;
129
+ }
127
130
  }
128
131
  }
129
132
  }
130
- #endif
131
133
 
132
134
  // write empty block pointers for forwards compatibility
133
135
  vector<BlockPointer> compat_block_pointers;
@@ -214,33 +214,35 @@ void SingleFileCheckpointWriter::CreateCheckpoint() {
214
214
  header.vector_size = STANDARD_VECTOR_SIZE;
215
215
  block_manager.WriteHeader(context, header);
216
216
 
217
- #ifdef DUCKDB_BLOCK_VERIFICATION
218
- // extend verify_block_usage_count
219
- auto metadata_info = storage_manager.GetMetadataInfo();
220
- for (auto &info : metadata_info) {
221
- verify_block_usage_count[info.block_id]++;
222
- }
223
- for (auto &entry_ref : catalog_entries) {
224
- auto &entry = entry_ref.get();
225
- if (entry.type == CatalogType::TABLE_ENTRY) {
226
- auto &table = entry.Cast<DuckTableEntry>();
227
- auto &storage = table.GetStorage();
228
- auto segment_info = storage.GetColumnSegmentInfo();
229
- for (auto &segment : segment_info) {
230
- verify_block_usage_count[segment.block_id]++;
231
- if (StringUtil::Contains(segment.segment_info, "Overflow String Block Ids: ")) {
232
- auto overflow_blocks = StringUtil::Replace(segment.segment_info, "Overflow String Block Ids: ", "");
233
- auto splits = StringUtil::Split(overflow_blocks, ", ");
234
- for (auto &split : splits) {
235
- auto overflow_block_id = std::stoll(split);
236
- verify_block_usage_count[overflow_block_id]++;
217
+ auto debug_verify_blocks = DBConfig::GetSetting<DebugVerifyBlocksSetting>(db.GetDatabase());
218
+ if (debug_verify_blocks) {
219
+ // extend verify_block_usage_count
220
+ auto metadata_info = storage_manager.GetMetadataInfo();
221
+ for (auto &info : metadata_info) {
222
+ verify_block_usage_count[info.block_id]++;
223
+ }
224
+ for (auto &entry_ref : catalog_entries) {
225
+ auto &entry = entry_ref.get();
226
+ if (entry.type == CatalogType::TABLE_ENTRY) {
227
+ auto &table = entry.Cast<DuckTableEntry>();
228
+ auto &storage = table.GetStorage();
229
+ auto segment_info = storage.GetColumnSegmentInfo();
230
+ for (auto &segment : segment_info) {
231
+ verify_block_usage_count[segment.block_id]++;
232
+ if (StringUtil::Contains(segment.segment_info, "Overflow String Block Ids: ")) {
233
+ auto overflow_blocks =
234
+ StringUtil::Replace(segment.segment_info, "Overflow String Block Ids: ", "");
235
+ auto splits = StringUtil::Split(overflow_blocks, ", ");
236
+ for (auto &split : splits) {
237
+ auto overflow_block_id = std::stoll(split);
238
+ verify_block_usage_count[overflow_block_id]++;
239
+ }
237
240
  }
238
241
  }
239
242
  }
240
243
  }
244
+ block_manager.VerifyBlocks(verify_block_usage_count);
241
245
  }
242
- block_manager.VerifyBlocks(verify_block_usage_count);
243
- #endif
244
246
 
245
247
  if (debug_checkpoint_abort == CheckpointAbort::DEBUG_ABORT_BEFORE_TRUNCATE) {
246
248
  throw FatalException("Checkpoint aborted before truncate because of PRAGMA checkpoint_abort flag");
@@ -287,6 +287,13 @@ void ValidityUncompressed::UnalignedScan(data_ptr_t input, idx_t input_size, idx
287
287
  // otherwise the subsequent bitwise & will modify values outside of the range of values we want to alter
288
288
  input_mask |= ValidityUncompressed::UPPER_MASKS[shift_amount];
289
289
 
290
+ if (pos == 0) {
291
+ // We also need to set the lower bits, which are to the left of the relevant bits (x), to 1
292
+ // These are the bits that are "behind" this scan window, and should not affect this scan
293
+ auto non_relevant_mask = ValidityUncompressed::LOWER_MASKS[result_idx];
294
+ input_mask |= non_relevant_mask;
295
+ }
296
+
290
297
  // after this, we move to the next input_entry
291
298
  offset = ValidityMask::BITS_PER_VALUE - input_idx;
292
299
  input_entry++;