duckdb 0.7.2-dev1803.0 → 0.7.2-dev1898.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/catalog/catalog.cpp +27 -27
  3. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +6 -6
  4. package/src/duckdb/src/catalog/catalog_set.cpp +27 -25
  5. package/src/duckdb/src/catalog/default/default_functions.cpp +6 -6
  6. package/src/duckdb/src/catalog/default/default_types.cpp +4 -4
  7. package/src/duckdb/src/catalog/default/default_views.cpp +4 -4
  8. package/src/duckdb/src/catalog/dependency_list.cpp +7 -6
  9. package/src/duckdb/src/catalog/dependency_manager.cpp +44 -38
  10. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +11 -6
  11. package/src/duckdb/src/common/sort/sorted_block.cpp +9 -4
  12. package/src/duckdb/src/common/types/batched_data_collection.cpp +2 -1
  13. package/src/duckdb/src/common/types/column_data_allocator.cpp +1 -0
  14. package/src/duckdb/src/common/types/vector.cpp +2 -2
  15. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +14 -11
  16. package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
  17. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +51 -50
  18. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +4 -0
  19. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +14 -13
  20. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +20 -20
  21. package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +2 -2
  22. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +1 -1
  23. package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +3 -3
  24. package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
  25. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
  26. package/src/duckdb/src/execution/physical_plan/plan_update.cpp +1 -1
  27. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +172 -63
  28. package/src/duckdb/src/function/cast/cast_function_set.cpp +2 -1
  29. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +15 -9
  30. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +29 -29
  31. package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +106 -0
  32. package/src/duckdb/src/function/scalar/string/regexp.cpp +145 -28
  33. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  34. package/src/duckdb/src/function/table/checkpoint.cpp +4 -4
  35. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +24 -24
  36. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +7 -6
  37. package/src/duckdb/src/function/table/system/duckdb_databases.cpp +1 -1
  38. package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +11 -11
  39. package/src/duckdb/src/function/table/system/pragma_database_size.cpp +1 -1
  40. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +17 -18
  41. package/src/duckdb/src/function/table/table_scan.cpp +8 -11
  42. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  43. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +9 -9
  44. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_map.hpp +38 -0
  45. package/src/duckdb/src/include/duckdb/catalog/catalog_transaction.hpp +4 -3
  46. package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +2 -2
  47. package/src/duckdb/src/include/duckdb/catalog/default/default_types.hpp +2 -2
  48. package/src/duckdb/src/include/duckdb/catalog/default/default_views.hpp +2 -2
  49. package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +4 -5
  50. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +4 -5
  51. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +10 -9
  52. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -1
  53. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +1 -1
  54. package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -0
  55. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +29 -6
  56. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +6 -5
  57. package/src/duckdb/src/include/duckdb/common/serializer.hpp +1 -1
  58. package/src/duckdb/src/include/duckdb/common/types/row_data_collection.hpp +1 -0
  59. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +5 -5
  61. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +2 -2
  62. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +3 -2
  63. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  64. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  65. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -3
  66. package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -2
  67. package/src/duckdb/src/include/duckdb/main/settings.hpp +19 -0
  68. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +7 -7
  69. package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +11 -11
  70. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +8 -8
  71. package/src/duckdb/src/include/duckdb/optimizer/rule/arithmetic_simplification.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/optimizer/rule/case_simplification.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/optimizer/rule/comparison_simplification.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/optimizer/rule/conjunction_simplification.hpp +2 -2
  75. package/src/duckdb/src/include/duckdb/optimizer/rule/constant_folding.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/optimizer/rule/date_part_simplification.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/optimizer/rule/distributivity.hpp +1 -1
  78. package/src/duckdb/src/include/duckdb/optimizer/rule/empty_needle_removal.hpp +1 -1
  79. package/src/duckdb/src/include/duckdb/optimizer/rule/enum_comparison.hpp +1 -1
  80. package/src/duckdb/src/include/duckdb/optimizer/rule/equal_or_null_simplification.hpp +1 -1
  81. package/src/duckdb/src/include/duckdb/optimizer/rule/in_clause_simplification.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/optimizer/rule/like_optimizations.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/optimizer/rule/move_constants.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +1 -1
  85. package/src/duckdb/src/include/duckdb/optimizer/rule/regex_optimizations.hpp +1 -1
  86. package/src/duckdb/src/include/duckdb/optimizer/rule.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +1 -1
  88. package/src/duckdb/src/include/duckdb/parser/expression_map.hpp +19 -6
  89. package/src/duckdb/src/include/duckdb/parser/expression_util.hpp +1 -1
  90. package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -7
  91. package/src/duckdb/src/include/duckdb/parser/parser_options.hpp +23 -0
  92. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -3
  93. package/src/duckdb/src/include/duckdb/planner/expression.hpp +5 -2
  94. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -1
  95. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +3 -3
  96. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +10 -2
  97. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +1 -0
  98. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +49 -126
  99. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +5 -5
  100. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +159 -0
  101. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -0
  102. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +6 -5
  103. package/src/duckdb/src/main/client_context.cpp +5 -3
  104. package/src/duckdb/src/main/config.cpp +2 -0
  105. package/src/duckdb/src/main/database.cpp +2 -1
  106. package/src/duckdb/src/main/database_manager.cpp +4 -4
  107. package/src/duckdb/src/main/settings/settings.cpp +36 -0
  108. package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +2 -2
  109. package/src/duckdb/src/optimizer/cse_optimizer.cpp +4 -4
  110. package/src/duckdb/src/optimizer/deliminator.cpp +13 -11
  111. package/src/duckdb/src/optimizer/expression_rewriter.cpp +2 -2
  112. package/src/duckdb/src/optimizer/filter_combiner.cpp +67 -65
  113. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -0
  114. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +26 -25
  115. package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +23 -21
  116. package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +7 -6
  117. package/src/duckdb/src/optimizer/rule/case_simplification.cpp +2 -2
  118. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +6 -7
  119. package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +9 -8
  120. package/src/duckdb/src/optimizer/rule/constant_folding.cpp +7 -7
  121. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +3 -3
  122. package/src/duckdb/src/optimizer/rule/distributivity.cpp +5 -5
  123. package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +6 -6
  124. package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +4 -4
  125. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +23 -26
  126. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +2 -3
  127. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -3
  128. package/src/duckdb/src/optimizer/rule/move_constants.cpp +6 -6
  129. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +2 -2
  130. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +3 -3
  131. package/src/duckdb/src/parser/expression_util.cpp +6 -6
  132. package/src/duckdb/src/parser/parser.cpp +1 -1
  133. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +7 -3
  134. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +3 -3
  135. package/src/duckdb/src/parser/transformer.cpp +6 -5
  136. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +2 -2
  137. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +3 -3
  138. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +5 -5
  139. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +2 -2
  140. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
  141. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +3 -3
  142. package/src/duckdb/src/storage/buffer/block_handle.cpp +3 -2
  143. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -1
  144. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -0
  145. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +3 -0
  146. package/src/duckdb/src/storage/buffer_manager.cpp +35 -726
  147. package/src/duckdb/src/storage/checkpoint_manager.cpp +2 -2
  148. package/src/duckdb/src/storage/meta_block_reader.cpp +6 -5
  149. package/src/duckdb/src/storage/standard_buffer_manager.cpp +801 -0
  150. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  151. package/src/duckdb/src/transaction/meta_transaction.cpp +13 -13
  152. package/src/duckdb/src/transaction/transaction.cpp +1 -1
  153. package/src/duckdb/src/transaction/transaction_context.cpp +1 -1
  154. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +949 -947
  155. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +16431 -16385
  156. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +503 -493
  157. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  158. package/src/duckdb/ub_src_storage.cpp +2 -0
@@ -0,0 +1,106 @@
1
+ #include "duckdb/function/scalar/string_functions.hpp"
2
+ #include "duckdb/common/map.hpp"
3
+ #include "duckdb/common/vector.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ // Using Lowrance-Wagner (LW) algorithm: https://doi.org/10.1145%2F321879.321880
8
+ // Can't calculate as trivial modification to levenshtein algorithm
9
+ // as we need to potentially know about earlier in the string
10
+ static idx_t DamerauLevenshteinDistance(const string_t &source, const string_t &target) {
11
+ // costs associated with each type of edit, to aid readability
12
+ constexpr uint8_t COST_SUBSTITUTION = 1;
13
+ constexpr uint8_t COST_INSERTION = 1;
14
+ constexpr uint8_t COST_DELETION = 1;
15
+ constexpr uint8_t COST_TRANSPOSITION = 1;
16
+ const auto source_len = source.GetSize();
17
+ const auto target_len = target.GetSize();
18
+
19
+ // If one string is empty, the distance equals the length of the other string
20
+ // either through target_len insertions
21
+ // or source_len deletions
22
+ if (source_len == 0) {
23
+ return target_len * COST_INSERTION;
24
+ } else if (target_len == 0) {
25
+ return source_len * COST_DELETION;
26
+ }
27
+
28
+ const auto source_str = source.GetDataUnsafe();
29
+ const auto target_str = target.GetDataUnsafe();
30
+
31
+ // larger than the largest possible value:
32
+ const auto inf = source_len * COST_DELETION + target_len * COST_INSERTION + 1;
33
+ // minimum edit distance from prefix of source string to prefix of target string
34
+ // same object as H in LW paper (with indices offset by 1)
35
+ vector<vector<idx_t>> distance(source_len + 2, vector<idx_t>(target_len + 2, inf));
36
+ // keeps track of the largest string indices of source string matching each character
37
+ // same as DA in LW paper
38
+ map<char, idx_t> largest_source_chr_matching;
39
+
40
+ // initialise row/column corresponding to zero-length strings
41
+ // partial string -> empty requires a deletion for each character
42
+ for (idx_t source_idx = 0; source_idx <= source_len; source_idx++) {
43
+ distance[source_idx + 1][1] = source_idx * COST_DELETION;
44
+ }
45
+ // and empty -> partial string means simply inserting characters
46
+ for (idx_t target_idx = 1; target_idx <= target_len; target_idx++) {
47
+ distance[1][target_idx + 1] = target_idx * COST_INSERTION;
48
+ }
49
+ // loop through string indices - these are offset by 2 from distance indices
50
+ for (idx_t source_idx = 0; source_idx < source_len; source_idx++) {
51
+ // keeps track of the largest string indices of target string matching current source character
52
+ // same as DB in LW paper
53
+ idx_t largest_target_chr_matching;
54
+ largest_target_chr_matching = 0;
55
+ for (idx_t target_idx = 0; target_idx < target_len; target_idx++) {
56
+ // correspond to i1 and j1 in LW paper respectively
57
+ idx_t largest_source_chr_matching_target;
58
+ idx_t largest_target_chr_matching_source;
59
+ // cost associated to diagnanl shift in distance matrix
60
+ // corresponds to d in LW paper
61
+ uint8_t cost_diagonal_shift;
62
+ largest_source_chr_matching_target = largest_source_chr_matching[target_str[target_idx]];
63
+ largest_target_chr_matching_source = largest_target_chr_matching;
64
+ // if characters match, diagonal move costs nothing and we update our largest target index
65
+ // otherwise move is substitution and costs as such
66
+ if (source_str[source_idx] == target_str[target_idx]) {
67
+ cost_diagonal_shift = 0;
68
+ largest_target_chr_matching = target_idx + 1;
69
+ } else {
70
+ cost_diagonal_shift = COST_SUBSTITUTION;
71
+ }
72
+ distance[source_idx + 2][target_idx + 2] = MinValue(
73
+ distance[source_idx + 1][target_idx + 1] + cost_diagonal_shift,
74
+ MinValue(distance[source_idx + 2][target_idx + 1] + COST_INSERTION,
75
+ MinValue(distance[source_idx + 1][target_idx + 2] + COST_DELETION,
76
+ distance[largest_source_chr_matching_target][largest_target_chr_matching_source] +
77
+ (source_idx - largest_source_chr_matching_target) * COST_DELETION +
78
+ COST_TRANSPOSITION +
79
+ (target_idx - largest_target_chr_matching_source) * COST_INSERTION)));
80
+ }
81
+ largest_source_chr_matching[source_str[source_idx]] = source_idx + 1;
82
+ }
83
+ return distance[source_len + 1][target_len + 1];
84
+ }
85
+
86
+ static int64_t DamerauLevenshteinScalarFunction(Vector &result, const string_t source, const string_t target) {
87
+ return (int64_t)DamerauLevenshteinDistance(source, target);
88
+ }
89
+
90
+ static void DamerauLevenshteinFunction(DataChunk &args, ExpressionState &state, Vector &result) {
91
+ auto &source_vec = args.data[0];
92
+ auto &target_vec = args.data[1];
93
+
94
+ BinaryExecutor::Execute<string_t, string_t, int64_t>(
95
+ source_vec, target_vec, result, args.size(),
96
+ [&](string_t source, string_t target) { return DamerauLevenshteinScalarFunction(result, source, target); });
97
+ }
98
+
99
+ void DamerauLevenshteinFun::RegisterFunction(BuiltinFunctions &set) {
100
+ ScalarFunctionSet damerau_levenshtein("damerau_levenshtein");
101
+ damerau_levenshtein.AddFunction(ScalarFunction("damerau_levenshtein", {LogicalType::VARCHAR, LogicalType::VARCHAR},
102
+ LogicalType::BIGINT, DamerauLevenshteinFunction));
103
+ set.AddFunction(damerau_levenshtein);
104
+ }
105
+
106
+ } // namespace duckdb
@@ -218,6 +218,103 @@ bool RegexpExtractBindData::Equals(const FunctionData &other_p) const {
218
218
  return RegexpBaseBindData::Equals(other) && group_string == other.group_string;
219
219
  }
220
220
 
221
+ static void RegexExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) {
222
+ auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
223
+ const auto &info = func_expr.bind_info->Cast<RegexpExtractBindData>();
224
+
225
+ auto &strings = args.data[0];
226
+ auto &patterns = args.data[1];
227
+ if (info.constant_pattern) {
228
+ auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<RegexLocalState>();
229
+ UnaryExecutor::Execute<string_t, string_t>(strings, result, args.size(), [&](string_t input) {
230
+ return Extract(input, result, lstate.constant_pattern, info.rewrite);
231
+ });
232
+ } else {
233
+ BinaryExecutor::Execute<string_t, string_t, string_t>(strings, patterns, result, args.size(),
234
+ [&](string_t input, string_t pattern) {
235
+ RE2 re(CreateStringPiece(pattern), info.options);
236
+ return Extract(input, result, re, info.rewrite);
237
+ });
238
+ }
239
+ }
240
+
241
+ //===--------------------------------------------------------------------===//
242
+ // Regexp Extract Struct
243
+ //===--------------------------------------------------------------------===//
244
+ static void RegexExtractStructFunction(DataChunk &args, ExpressionState &state, Vector &result) {
245
+ auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<RegexLocalState>();
246
+
247
+ const auto count = args.size();
248
+ auto &input = args.data[0];
249
+
250
+ auto &child_entries = StructVector::GetEntries(result);
251
+ const auto groupSize = child_entries.size();
252
+ // Reference the 'input' StringBuffer, because we won't need to allocate new data
253
+ // for the result, all returned strings are substrings of the originals
254
+ for (auto &child_entry : child_entries) {
255
+ child_entry->SetAuxiliary(input.GetAuxiliary());
256
+ }
257
+
258
+ vector<RE2::Arg> argv(groupSize);
259
+ vector<RE2::Arg *> groups(groupSize);
260
+ vector<duckdb_re2::StringPiece> ws(groupSize);
261
+ for (size_t i = 0; i < groupSize; ++i) {
262
+ groups[i] = &argv[i];
263
+ argv[i] = &ws[i];
264
+ }
265
+
266
+ if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) {
267
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
268
+
269
+ if (ConstantVector::IsNull(input)) {
270
+ ConstantVector::SetNull(result, true);
271
+ } else {
272
+ ConstantVector::SetNull(result, false);
273
+ auto idata = ConstantVector::GetData<string_t>(input);
274
+ auto str = CreateStringPiece(idata[0]);
275
+ auto match = duckdb_re2::RE2::PartialMatchN(str, lstate.constant_pattern, groups.data(), groups.size());
276
+ for (size_t col = 0; col < child_entries.size(); ++col) {
277
+ auto &child_entry = child_entries[col];
278
+ ConstantVector::SetNull(*child_entry, false);
279
+ auto &extracted = ws[col];
280
+ auto cdata = ConstantVector::GetData<string_t>(*child_entry);
281
+ cdata[0] = string_t(extracted.data(), match ? extracted.size() : 0);
282
+ }
283
+ }
284
+ } else {
285
+ UnifiedVectorFormat iunified;
286
+ input.ToUnifiedFormat(count, iunified);
287
+
288
+ const auto &ivalidity = iunified.validity;
289
+ auto idata = (const string_t *)iunified.data;
290
+
291
+ // Start with a valid flat vector
292
+ result.SetVectorType(VectorType::FLAT_VECTOR);
293
+
294
+ // Start with valid children
295
+ for (size_t col = 0; col < child_entries.size(); ++col) {
296
+ auto &child_entry = child_entries[col];
297
+ child_entry->SetVectorType(VectorType::FLAT_VECTOR);
298
+ }
299
+
300
+ for (idx_t i = 0; i < count; ++i) {
301
+ const auto idx = iunified.sel->get_index(i);
302
+ if (ivalidity.RowIsValid(idx)) {
303
+ auto str = CreateStringPiece(idata[idx]);
304
+ auto match = duckdb_re2::RE2::PartialMatchN(str, lstate.constant_pattern, groups.data(), groups.size());
305
+ for (size_t col = 0; col < child_entries.size(); ++col) {
306
+ auto &child_entry = child_entries[col];
307
+ auto cdata = FlatVector::GetData<string_t>(*child_entry);
308
+ auto &extracted = ws[col];
309
+ cdata[i] = string_t(extracted.data(), match ? extracted.size() : 0);
310
+ }
311
+ } else {
312
+ FlatVector::SetNull(result, i, true);
313
+ }
314
+ }
315
+ }
316
+ }
317
+
221
318
  static unique_ptr<FunctionData> RegexExtractBind(ClientContext &context, ScalarFunction &bound_function,
222
319
  vector<unique_ptr<Expression>> &arguments) {
223
320
  D_ASSERT(arguments.size() >= 2);
@@ -227,52 +324,62 @@ static unique_ptr<FunctionData> RegexExtractBind(ClientContext &context, ScalarF
227
324
  string constant_string;
228
325
  bool constant_pattern = TryParseConstantPattern(context, *arguments[1], constant_string);
229
326
 
230
- string group_string = "";
327
+ if (arguments.size() >= 4) {
328
+ ParseRegexOptions(context, *arguments[3], options);
329
+ }
330
+
331
+ string group_string = "\\0";
231
332
  if (arguments.size() >= 3) {
232
333
  if (arguments[2]->HasParameter()) {
233
334
  throw ParameterNotResolvedException();
234
335
  }
235
336
  if (!arguments[2]->IsFoldable()) {
236
- throw InvalidInputException("Group index field field must be a constant!");
337
+ throw InvalidInputException("Group specification field must be a constant!");
237
338
  }
238
339
  Value group = ExpressionExecutor::EvaluateScalar(context, *arguments[2]);
239
- if (!group.IsNull()) {
340
+ if (group.IsNull()) {
341
+ group_string = "";
342
+ } else if (group.type().id() == LogicalTypeId::LIST) {
343
+ if (!constant_pattern) {
344
+ throw BinderException("%s with LIST requires a constant pattern", bound_function.name);
345
+ }
346
+ auto &list_children = ListValue::GetChildren(group);
347
+ if (list_children.empty()) {
348
+ throw BinderException("%s requires non-empty lists of capture names", bound_function.name);
349
+ }
350
+ case_insensitive_set_t name_collision_set;
351
+ child_list_t<LogicalType> struct_children;
352
+ for (const auto &child : list_children) {
353
+ if (child.IsNull()) {
354
+ throw BinderException("NULL group name in %s", bound_function.name);
355
+ }
356
+ const auto group_name = child.ToString();
357
+ if (name_collision_set.find(group_name) != name_collision_set.end()) {
358
+ throw BinderException("Duplicate group name \"%s\" in %s", group_name, bound_function.name);
359
+ }
360
+ name_collision_set.insert(group_name);
361
+ struct_children.emplace_back(make_pair(group_name, LogicalType::VARCHAR));
362
+ }
363
+ bound_function.return_type = LogicalType::STRUCT(struct_children);
364
+
365
+ duckdb_re2::StringPiece constant_piece(constant_string.c_str(), constant_string.size());
366
+ RE2 constant_pattern(constant_piece, options);
367
+ if (size_t(constant_pattern.NumberOfCapturingGroups()) < list_children.size()) {
368
+ throw BinderException("Not enough group names in %s", bound_function.name);
369
+ }
370
+ } else {
240
371
  auto group_idx = group.GetValue<int32_t>();
241
372
  if (group_idx < 0 || group_idx > 9) {
242
373
  throw InvalidInputException("Group index must be between 0 and 9!");
243
374
  }
244
375
  group_string = "\\" + to_string(group_idx);
245
376
  }
246
- } else {
247
- group_string = "\\0";
248
- }
249
- if (arguments.size() >= 4) {
250
- ParseRegexOptions(context, *arguments[3], options);
251
377
  }
378
+
252
379
  return make_uniq<RegexpExtractBindData>(options, std::move(constant_string), constant_pattern,
253
380
  std::move(group_string));
254
381
  }
255
382
 
256
- static void RegexExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) {
257
- auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
258
- const auto &info = func_expr.bind_info->Cast<RegexpExtractBindData>();
259
-
260
- auto &strings = args.data[0];
261
- auto &patterns = args.data[1];
262
- if (info.constant_pattern) {
263
- auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<RegexLocalState>();
264
- UnaryExecutor::Execute<string_t, string_t>(strings, result, args.size(), [&](string_t input) {
265
- return Extract(input, result, lstate.constant_pattern, info.rewrite);
266
- });
267
- } else {
268
- BinaryExecutor::Execute<string_t, string_t, string_t>(strings, patterns, result, args.size(),
269
- [&](string_t input, string_t pattern) {
270
- RE2 re(CreateStringPiece(pattern), info.options);
271
- return Extract(input, result, re, info.rewrite);
272
- });
273
- }
274
- }
275
-
276
383
  void RegexpFun::RegisterFunction(BuiltinFunctions &set) {
277
384
  ScalarFunctionSet regexp_full_match("regexp_full_match");
278
385
  regexp_full_match.AddFunction(ScalarFunction(
@@ -315,6 +422,16 @@ void RegexpFun::RegisterFunction(BuiltinFunctions &set) {
315
422
  {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER, LogicalType::VARCHAR}, LogicalType::VARCHAR,
316
423
  RegexExtractFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
317
424
  FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
425
+ // REGEXP_EXTRACT(<string>, <pattern>, [<group 1 name>[, <group n name>]...])
426
+ regexp_extract.AddFunction(ScalarFunction(
427
+ {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::LIST(LogicalType::VARCHAR)}, LogicalType::VARCHAR,
428
+ RegexExtractStructFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
429
+ FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
430
+ // REGEXP_EXTRACT(<string>, <pattern>, [<group 1 name>[, <group n name>]...], <options>)
431
+ regexp_extract.AddFunction(ScalarFunction(
432
+ {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::LIST(LogicalType::VARCHAR), LogicalType::VARCHAR},
433
+ LogicalType::VARCHAR, RegexExtractStructFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState,
434
+ LogicalType::INVALID, FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
318
435
 
319
436
  ScalarFunctionSet regexp_extract_all("regexp_extract_all");
320
437
  regexp_extract_all.AddFunction(ScalarFunction(
@@ -39,6 +39,7 @@ void BuiltinFunctions::RegisterStringFunctions() {
39
39
  Register<CHR>();
40
40
  Register<MismatchesFun>();
41
41
  Register<LevenshteinFun>();
42
+ Register<DamerauLevenshteinFun>();
42
43
  Register<JaccardFun>();
43
44
  Register<JaroWinklerFun>();
44
45
 
@@ -8,10 +8,10 @@
8
8
  namespace duckdb {
9
9
 
10
10
  struct CheckpointBindData : public FunctionData {
11
- explicit CheckpointBindData(AttachedDatabase *db) : db(db) {
11
+ explicit CheckpointBindData(optional_ptr<AttachedDatabase> db) : db(db) {
12
12
  }
13
13
 
14
- AttachedDatabase *db;
14
+ optional_ptr<AttachedDatabase> db;
15
15
 
16
16
  public:
17
17
  unique_ptr<FunctionData> Copy() const override {
@@ -29,7 +29,7 @@ static unique_ptr<FunctionData> CheckpointBind(ClientContext &context, TableFunc
29
29
  return_types.emplace_back(LogicalType::BOOLEAN);
30
30
  names.emplace_back("Success");
31
31
 
32
- AttachedDatabase *db;
32
+ optional_ptr<AttachedDatabase> db;
33
33
  auto &db_manager = DatabaseManager::Get(context);
34
34
  if (!input.inputs.empty()) {
35
35
  auto &db_name = StringValue::Get(input.inputs[0]);
@@ -46,7 +46,7 @@ static unique_ptr<FunctionData> CheckpointBind(ClientContext &context, TableFunc
46
46
  template <bool FORCE>
47
47
  static void TemplatedCheckpointFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
48
48
  auto &bind_data = data_p.bind_data->Cast<CheckpointBindData>();
49
- auto &transaction_manager = TransactionManager::Get(*bind_data.db);
49
+ auto &transaction_manager = TransactionManager::Get(*bind_data.db.get_mutable());
50
50
  transaction_manager.Checkpoint(context, FORCE);
51
51
  }
52
52
 
@@ -16,7 +16,7 @@ struct DuckDBColumnsData : public GlobalTableFunctionState {
16
16
  DuckDBColumnsData() : offset(0), column_offset(0) {
17
17
  }
18
18
 
19
- vector<CatalogEntry *> entries;
19
+ vector<optional_ptr<CatalogEntry>> entries;
20
20
  idx_t offset;
21
21
  idx_t column_offset;
22
22
  };
@@ -90,12 +90,12 @@ unique_ptr<GlobalTableFunctionState> DuckDBColumnsInit(ClientContext &context, T
90
90
 
91
91
  class ColumnHelper {
92
92
  public:
93
- static unique_ptr<ColumnHelper> Create(CatalogEntry *entry);
93
+ static unique_ptr<ColumnHelper> Create(CatalogEntry &entry);
94
94
 
95
95
  virtual ~ColumnHelper() {
96
96
  }
97
97
 
98
- virtual StandardEntry *Entry() = 0;
98
+ virtual StandardEntry &Entry() = 0;
99
99
  virtual idx_t NumColumns() = 0;
100
100
  virtual const string &ColumnName(idx_t col) = 0;
101
101
  virtual const LogicalType &ColumnType(idx_t col) = 0;
@@ -107,8 +107,8 @@ public:
107
107
 
108
108
  class TableColumnHelper : public ColumnHelper {
109
109
  public:
110
- explicit TableColumnHelper(TableCatalogEntry *entry) : entry(entry) {
111
- for (auto &constraint : entry->GetConstraints()) {
110
+ explicit TableColumnHelper(TableCatalogEntry &entry) : entry(entry) {
111
+ for (auto &constraint : entry.GetConstraints()) {
112
112
  if (constraint->type == ConstraintType::NOT_NULL) {
113
113
  auto &not_null = *reinterpret_cast<NotNullConstraint *>(constraint.get());
114
114
  not_null_cols.insert(not_null.index.index);
@@ -116,20 +116,20 @@ public:
116
116
  }
117
117
  }
118
118
 
119
- StandardEntry *Entry() override {
119
+ StandardEntry &Entry() override {
120
120
  return entry;
121
121
  }
122
122
  idx_t NumColumns() override {
123
- return entry->GetColumns().LogicalColumnCount();
123
+ return entry.GetColumns().LogicalColumnCount();
124
124
  }
125
125
  const string &ColumnName(idx_t col) override {
126
- return entry->GetColumn(LogicalIndex(col)).Name();
126
+ return entry.GetColumn(LogicalIndex(col)).Name();
127
127
  }
128
128
  const LogicalType &ColumnType(idx_t col) override {
129
- return entry->GetColumn(LogicalIndex(col)).Type();
129
+ return entry.GetColumn(LogicalIndex(col)).Type();
130
130
  }
131
131
  const Value ColumnDefault(idx_t col) override {
132
- auto &column = entry->GetColumn(LogicalIndex(col));
132
+ auto &column = entry.GetColumn(LogicalIndex(col));
133
133
  if (column.DefaultValue()) {
134
134
  return Value(column.DefaultValue()->ToString());
135
135
  }
@@ -140,26 +140,26 @@ public:
140
140
  }
141
141
 
142
142
  private:
143
- TableCatalogEntry *entry;
143
+ TableCatalogEntry &entry;
144
144
  std::set<idx_t> not_null_cols;
145
145
  };
146
146
 
147
147
  class ViewColumnHelper : public ColumnHelper {
148
148
  public:
149
- explicit ViewColumnHelper(ViewCatalogEntry *entry) : entry(entry) {
149
+ explicit ViewColumnHelper(ViewCatalogEntry &entry) : entry(entry) {
150
150
  }
151
151
 
152
- StandardEntry *Entry() override {
152
+ StandardEntry &Entry() override {
153
153
  return entry;
154
154
  }
155
155
  idx_t NumColumns() override {
156
- return entry->types.size();
156
+ return entry.types.size();
157
157
  }
158
158
  const string &ColumnName(idx_t col) override {
159
- return entry->aliases[col];
159
+ return entry.aliases[col];
160
160
  }
161
161
  const LogicalType &ColumnType(idx_t col) override {
162
- return entry->types[col];
162
+ return entry.types[col];
163
163
  }
164
164
  const Value ColumnDefault(idx_t col) override {
165
165
  return Value();
@@ -169,15 +169,15 @@ public:
169
169
  }
170
170
 
171
171
  private:
172
- ViewCatalogEntry *entry;
172
+ ViewCatalogEntry &entry;
173
173
  };
174
174
 
175
- unique_ptr<ColumnHelper> ColumnHelper::Create(CatalogEntry *entry) {
176
- switch (entry->type) {
175
+ unique_ptr<ColumnHelper> ColumnHelper::Create(CatalogEntry &entry) {
176
+ switch (entry.type) {
177
177
  case CatalogType::TABLE_ENTRY:
178
- return make_uniq<TableColumnHelper>((TableCatalogEntry *)entry);
178
+ return make_uniq<TableColumnHelper>(entry.Cast<TableCatalogEntry>());
179
179
  case CatalogType::VIEW_ENTRY:
180
- return make_uniq<ViewColumnHelper>((ViewCatalogEntry *)entry);
180
+ return make_uniq<ViewColumnHelper>(entry.Cast<ViewCatalogEntry>());
181
181
  default:
182
182
  throw NotImplementedException("Unsupported catalog type for duckdb_columns");
183
183
  }
@@ -186,7 +186,7 @@ unique_ptr<ColumnHelper> ColumnHelper::Create(CatalogEntry *entry) {
186
186
  void ColumnHelper::WriteColumns(idx_t start_index, idx_t start_col, idx_t end_col, DataChunk &output) {
187
187
  for (idx_t i = start_col; i < end_col; i++) {
188
188
  auto index = start_index + (i - start_col);
189
- auto &entry = *Entry();
189
+ auto &entry = Entry();
190
190
 
191
191
  idx_t col = 0;
192
192
  // database_name, VARCHAR
@@ -284,7 +284,7 @@ void ColumnHelper::WriteColumns(idx_t start_index, idx_t start_col, idx_t end_co
284
284
  }
285
285
 
286
286
  void DuckDBColumnsFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
287
- auto &data = (DuckDBColumnsData &)*data_p.global_state;
287
+ auto &data = data_p.global_state->Cast<DuckDBColumnsData>();
288
288
  if (data.offset >= data.entries.size()) {
289
289
  // finished returning values
290
290
  return;
@@ -298,7 +298,7 @@ void DuckDBColumnsFunction(ClientContext &context, TableFunctionInput &data_p, D
298
298
  idx_t column_offset = data.column_offset;
299
299
  idx_t index = 0;
300
300
  while (next < data.entries.size() && index < STANDARD_VECTOR_SIZE) {
301
- auto column_helper = ColumnHelper::Create(data.entries[next]);
301
+ auto column_helper = ColumnHelper::Create(*data.entries[next]);
302
302
  idx_t columns = column_helper->NumColumns();
303
303
 
304
304
  // Check to see if we are going to exceed the maximum index for a DataChunk
@@ -19,7 +19,8 @@
19
19
  namespace duckdb {
20
20
 
21
21
  struct UniqueKeyInfo {
22
- string schema, table;
22
+ string schema;
23
+ string table;
23
24
  vector<LogicalIndex> columns;
24
25
 
25
26
  bool operator==(const UniqueKeyInfo &other) const {
@@ -52,7 +53,7 @@ struct DuckDBConstraintsData : public GlobalTableFunctionState {
52
53
  DuckDBConstraintsData() : offset(0), constraint_offset(0), unique_constraint_offset(0) {
53
54
  }
54
55
 
55
- vector<CatalogEntry *> entries;
56
+ vector<optional_ptr<CatalogEntry>> entries;
56
57
  idx_t offset;
57
58
  idx_t constraint_offset;
58
59
  idx_t unique_constraint_offset;
@@ -125,7 +126,7 @@ unique_ptr<GlobalTableFunctionState> DuckDBConstraintsInit(ClientContext &contex
125
126
  }
126
127
 
127
128
  void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
128
- auto &data = (DuckDBConstraintsData &)*data_p.global_state;
129
+ auto &data = data_p.global_state->Cast<DuckDBConstraintsData>();
129
130
  if (data.offset >= data.entries.size()) {
130
131
  // finished returning values
131
132
  return;
@@ -134,10 +135,10 @@ void DuckDBConstraintsFunction(ClientContext &context, TableFunctionInput &data_
134
135
  // either fill up the chunk or return all the remaining columns
135
136
  idx_t count = 0;
136
137
  while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) {
137
- auto &entry = data.entries[data.offset];
138
- D_ASSERT(entry->type == CatalogType::TABLE_ENTRY);
138
+ auto &entry = *data.entries[data.offset];
139
+ D_ASSERT(entry.type == CatalogType::TABLE_ENTRY);
139
140
 
140
- auto &table = entry->Cast<TableCatalogEntry>();
141
+ auto &table = entry.Cast<TableCatalogEntry>();
141
142
  auto &constraints = table.GetConstraints();
142
143
  bool is_duck_table = table.IsDuckTable();
143
144
  for (; data.constraint_offset < constraints.size() && count < STANDARD_VECTOR_SIZE; data.constraint_offset++) {
@@ -8,7 +8,7 @@ struct DuckDBDatabasesData : public GlobalTableFunctionState {
8
8
  DuckDBDatabasesData() : offset(0) {
9
9
  }
10
10
 
11
- vector<AttachedDatabase *> entries;
11
+ vector<optional_ptr<AttachedDatabase>> entries;
12
12
  idx_t offset;
13
13
  };
14
14
 
@@ -9,8 +9,12 @@
9
9
  namespace duckdb {
10
10
 
11
11
  struct DependencyInformation {
12
- CatalogEntry *object;
13
- CatalogEntry *dependent;
12
+ DependencyInformation(CatalogEntry &object, CatalogEntry &dependent, DependencyType type)
13
+ : object(object), dependent(dependent), type(type) {
14
+ }
15
+
16
+ CatalogEntry &object;
17
+ CatalogEntry &dependent;
14
18
  DependencyType type;
15
19
  };
16
20
 
@@ -54,14 +58,10 @@ unique_ptr<GlobalTableFunctionState> DuckDBDependenciesInit(ClientContext &conte
54
58
  // scan all the schemas and collect them
55
59
  auto &catalog = Catalog::GetCatalog(context, INVALID_CATALOG);
56
60
  if (catalog.IsDuckCatalog()) {
57
- auto &duck_catalog = (DuckCatalog &)catalog;
61
+ auto &duck_catalog = catalog.Cast<DuckCatalog>();
58
62
  auto &dependency_manager = duck_catalog.GetDependencyManager();
59
- dependency_manager.Scan([&](CatalogEntry *obj, CatalogEntry *dependent, DependencyType type) {
60
- DependencyInformation info;
61
- info.object = obj;
62
- info.dependent = dependent;
63
- info.type = type;
64
- result->entries.push_back(info);
63
+ dependency_manager.Scan([&](CatalogEntry &obj, CatalogEntry &dependent, DependencyType type) {
64
+ result->entries.emplace_back(obj, dependent, type);
65
65
  });
66
66
  }
67
67
 
@@ -84,13 +84,13 @@ void DuckDBDependenciesFunction(ClientContext &context, TableFunctionInput &data
84
84
  // classid, LogicalType::BIGINT
85
85
  output.SetValue(0, count, Value::BIGINT(0));
86
86
  // objid, LogicalType::BIGINT
87
- output.SetValue(1, count, Value::BIGINT(entry.object->oid));
87
+ output.SetValue(1, count, Value::BIGINT(entry.object.oid));
88
88
  // objsubid, LogicalType::INTEGER
89
89
  output.SetValue(2, count, Value::INTEGER(0));
90
90
  // refclassid, LogicalType::BIGINT
91
91
  output.SetValue(3, count, Value::BIGINT(0));
92
92
  // refobjid, LogicalType::BIGINT
93
- output.SetValue(4, count, Value::BIGINT(entry.dependent->oid));
93
+ output.SetValue(4, count, Value::BIGINT(entry.dependent.oid));
94
94
  // refobjsubid, LogicalType::INTEGER
95
95
  output.SetValue(5, count, Value::INTEGER(0));
96
96
  // deptype, LogicalType::VARCHAR
@@ -15,7 +15,7 @@ struct PragmaDatabaseSizeData : public GlobalTableFunctionState {
15
15
  }
16
16
 
17
17
  idx_t index;
18
- vector<AttachedDatabase *> databases;
18
+ vector<optional_ptr<AttachedDatabase>> databases;
19
19
  Value memory_usage;
20
20
  Value memory_limit;
21
21
  };