duckdb 0.7.2-dev654.0 → 0.7.2-dev832.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/binding.gyp +2 -0
  2. package/lib/duckdb.d.ts +12 -1
  3. package/lib/duckdb.js +19 -0
  4. package/package.json +1 -1
  5. package/src/duckdb/extension/json/include/json_common.hpp +1 -0
  6. package/src/duckdb/extension/json/include/json_functions.hpp +2 -0
  7. package/src/duckdb/extension/json/include/json_serializer.hpp +77 -0
  8. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +147 -0
  9. package/src/duckdb/extension/json/json_functions.cpp +12 -4
  10. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  11. package/src/duckdb/extension/json/json_serializer.cpp +217 -0
  12. package/src/duckdb/src/common/enums/expression_type.cpp +8 -222
  13. package/src/duckdb/src/common/enums/join_type.cpp +3 -22
  14. package/src/duckdb/src/common/exception.cpp +2 -2
  15. package/src/duckdb/src/common/serializer/enum_serializer.cpp +1172 -0
  16. package/src/duckdb/src/common/types/column_data_collection_segment.cpp +11 -6
  17. package/src/duckdb/src/common/types/value.cpp +117 -0
  18. package/src/duckdb/src/common/types/vector.cpp +140 -1
  19. package/src/duckdb/src/common/types.cpp +166 -89
  20. package/src/duckdb/src/common/vector_operations/vector_cast.cpp +2 -1
  21. package/src/duckdb/src/execution/aggregate_hashtable.cpp +10 -5
  22. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
  23. package/src/duckdb/src/execution/index/art/art.cpp +5 -5
  24. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
  25. package/src/duckdb/src/execution/partitionable_hashtable.cpp +14 -2
  26. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -1
  27. package/src/duckdb/src/function/cast/enum_casts.cpp +25 -3
  28. package/src/duckdb/src/function/cast/list_casts.cpp +17 -4
  29. package/src/duckdb/src/function/cast/map_cast.cpp +5 -2
  30. package/src/duckdb/src/function/cast/string_cast.cpp +36 -10
  31. package/src/duckdb/src/function/cast/struct_cast.cpp +23 -3
  32. package/src/duckdb/src/function/cast/union_casts.cpp +33 -7
  33. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +243 -0
  34. package/src/duckdb/src/function/scalar/string/regexp/regexp_util.cpp +79 -0
  35. package/src/duckdb/src/function/scalar/string/regexp.cpp +21 -80
  36. package/src/duckdb/src/function/table/arrow_conversion.cpp +7 -1
  37. package/src/duckdb/src/function/table/table_scan.cpp +1 -1
  38. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  39. package/src/duckdb/src/include/duckdb/common/enums/aggregate_handling.hpp +2 -0
  40. package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +2 -3
  41. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -0
  42. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +2 -0
  43. package/src/duckdb/src/include/duckdb/common/enums/set_operation_type.hpp +2 -1
  44. package/src/duckdb/src/include/duckdb/common/exception.hpp +40 -9
  45. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +45 -0
  46. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +3 -0
  47. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +113 -0
  48. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +336 -0
  49. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +268 -0
  50. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +126 -0
  51. package/src/duckdb/src/include/duckdb/common/string_util.hpp +12 -0
  52. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -0
  53. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -0
  54. package/src/duckdb/src/include/duckdb/common/types.hpp +8 -2
  55. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -0
  56. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +2 -2
  57. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -0
  58. package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +84 -0
  59. package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +28 -64
  61. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +81 -1
  62. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  63. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  64. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  65. package/src/duckdb/src/include/duckdb/parser/expression/bound_expression.hpp +2 -0
  66. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +5 -0
  67. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +2 -0
  68. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +2 -0
  69. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +2 -0
  70. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -0
  71. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +2 -0
  72. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  73. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  74. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -0
  75. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +2 -0
  76. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -0
  77. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +2 -0
  78. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +2 -0
  79. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +2 -0
  80. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +2 -0
  81. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +5 -0
  82. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +2 -0
  83. package/src/duckdb/src/include/duckdb/parser/parsed_expression.hpp +5 -0
  84. package/src/duckdb/src/include/duckdb/parser/query_node/recursive_cte_node.hpp +3 -0
  85. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +5 -0
  86. package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +3 -0
  87. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +11 -1
  88. package/src/duckdb/src/include/duckdb/parser/result_modifier.hpp +24 -1
  89. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -1
  90. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +6 -1
  91. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +4 -0
  92. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +2 -0
  93. package/src/duckdb/src/include/duckdb/parser/tableref/expressionlistref.hpp +3 -0
  94. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +3 -0
  95. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +9 -0
  96. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  97. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +3 -0
  98. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  99. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +2 -2
  100. package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -3
  101. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -2
  102. package/src/duckdb/src/main/extension/extension_install.cpp +7 -2
  103. package/src/duckdb/src/optimizer/deliminator.cpp +1 -1
  104. package/src/duckdb/src/optimizer/filter_combiner.cpp +1 -1
  105. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +3 -3
  106. package/src/duckdb/src/optimizer/rule/move_constants.cpp +2 -2
  107. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +1 -1
  108. package/src/duckdb/src/parser/common_table_expression_info.cpp +19 -0
  109. package/src/duckdb/src/parser/expression/between_expression.cpp +17 -0
  110. package/src/duckdb/src/parser/expression/case_expression.cpp +28 -0
  111. package/src/duckdb/src/parser/expression/cast_expression.cpp +17 -0
  112. package/src/duckdb/src/parser/expression/collate_expression.cpp +16 -0
  113. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -0
  114. package/src/duckdb/src/parser/expression/comparison_expression.cpp +16 -0
  115. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +15 -0
  116. package/src/duckdb/src/parser/expression/constant_expression.cpp +14 -0
  117. package/src/duckdb/src/parser/expression/default_expression.cpp +7 -0
  118. package/src/duckdb/src/parser/expression/function_expression.cpp +35 -0
  119. package/src/duckdb/src/parser/expression/lambda_expression.cpp +16 -0
  120. package/src/duckdb/src/parser/expression/operator_expression.cpp +15 -0
  121. package/src/duckdb/src/parser/expression/parameter_expression.cpp +15 -0
  122. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +14 -0
  123. package/src/duckdb/src/parser/expression/star_expression.cpp +20 -0
  124. package/src/duckdb/src/parser/expression/subquery_expression.cpp +20 -0
  125. package/src/duckdb/src/parser/expression/window_expression.cpp +43 -0
  126. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +22 -10
  127. package/src/duckdb/src/parser/parsed_expression.cpp +72 -0
  128. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +21 -0
  129. package/src/duckdb/src/parser/query_node/select_node.cpp +31 -0
  130. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +17 -0
  131. package/src/duckdb/src/parser/query_node.cpp +50 -0
  132. package/src/duckdb/src/parser/result_modifier.cpp +78 -0
  133. package/src/duckdb/src/parser/statement/select_statement.cpp +12 -0
  134. package/src/duckdb/src/parser/tableref/basetableref.cpp +21 -0
  135. package/src/duckdb/src/parser/tableref/emptytableref.cpp +4 -0
  136. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +17 -0
  137. package/src/duckdb/src/parser/tableref/joinref.cpp +25 -0
  138. package/src/duckdb/src/parser/tableref/pivotref.cpp +53 -0
  139. package/src/duckdb/src/parser/tableref/subqueryref.cpp +15 -0
  140. package/src/duckdb/src/parser/tableref/table_function.cpp +17 -0
  141. package/src/duckdb/src/parser/tableref.cpp +46 -0
  142. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +1 -1
  143. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +1 -1
  144. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  145. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  146. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +4 -0
  147. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +3 -1
  148. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +1 -1
  149. package/src/duckdb/src/planner/expression/bound_expression.cpp +4 -0
  150. package/src/duckdb/src/storage/data_table.cpp +15 -13
  151. package/src/duckdb/src/storage/index.cpp +12 -1
  152. package/src/duckdb/src/storage/local_storage.cpp +20 -23
  153. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +0 -1
  154. package/src/duckdb/third_party/re2/re2/re2.cc +9 -0
  155. package/src/duckdb/third_party/re2/re2/re2.h +2 -0
  156. package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
  157. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  158. package/src/duckdb/ub_src_function_scalar_string_regexp.cpp +4 -0
  159. package/src/duckdb/ub_src_parser.cpp +2 -0
  160. package/src/utils.cpp +12 -0
  161. package/test/extension.test.ts +44 -26
@@ -0,0 +1,126 @@
1
+ #pragma once
2
+ #include <type_traits>
3
+ #include "duckdb/common/vector.hpp"
4
+ #include "duckdb/common/unordered_map.hpp"
5
+ #include "duckdb/common/unordered_set.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ class FormatSerializer; // Forward declare
10
+ class FormatDeserializer; // Forward declare
11
+
12
+ // Backport to c++11
13
+ template <class...>
14
+ using void_t = void;
15
+
16
+ // Check for anything implementing a `void FormatSerialize(FormatSerializer &FormatSerializer)` method
17
+ template <typename T, typename = void_t<>>
18
+ struct has_serialize : std::false_type {};
19
+
20
+ template <typename T>
21
+ struct has_serialize<T, void_t<decltype(std::declval<T>().FormatSerialize(std::declval<FormatSerializer &>()))>>
22
+ : std::true_type {};
23
+
24
+ template <typename T>
25
+ constexpr bool has_serialize_v() {
26
+ return has_serialize<T>::value;
27
+ }
28
+
29
+ // Check for anything implementing a static `T FormatDeserialize(FormatDeserializer&)` method
30
+ /*
31
+ template <typename T, typename = void_t<>>
32
+ struct has_deserialize : std::false_type {};
33
+
34
+ template <typename T>
35
+ struct has_deserialize<T, void_t<decltype(T::FormatDeserialize)>> : std::true_type {};
36
+
37
+ template <typename T>
38
+ constexpr bool has_deserialize_v() {
39
+ return has_deserialize<T>::value;
40
+ }*/
41
+
42
+ template <typename T, typename = T>
43
+ struct has_deserialize : std::false_type {};
44
+
45
+ // Accept `static unique_ptr<T> FormatDeserialize(FormatDeserializer& deserializer)`
46
+ template <typename T>
47
+ struct has_deserialize<
48
+ T, typename std::enable_if<std::is_same<decltype(T::FormatDeserialize), unique_ptr<T>(FormatDeserializer &)>::value,
49
+ T>::type> : std::true_type {};
50
+
51
+ // Accept `static shared_ptr<T> FormatDeserialize(FormatDeserializer& deserializer)`
52
+ template <typename T>
53
+ struct has_deserialize<
54
+ T, typename std::enable_if<std::is_same<decltype(T::FormatDeserialize), shared_ptr<T>(FormatDeserializer &)>::value,
55
+ T>::type> : std::true_type {};
56
+
57
+ // Accept `static T FormatDeserialize(FormatDeserializer& deserializer)`
58
+ template <typename T>
59
+ struct has_deserialize<
60
+ T, typename std::enable_if<std::is_same<decltype(T::FormatDeserialize), T(FormatDeserializer &)>::value, T>::type>
61
+ : std::true_type {};
62
+
63
+ // Check if T is a vector, and provide access to the inner type
64
+ template <typename T>
65
+ struct is_vector : std::false_type {};
66
+ template <typename T>
67
+ struct is_vector<typename std::vector<T>> : std::true_type {
68
+ typedef T ELEMENT_TYPE;
69
+ };
70
+
71
+ // Check if T is a unordered map, and provide access to the inner type
72
+ template <typename T>
73
+ struct is_unordered_map : std::false_type {};
74
+ template <typename... Args>
75
+ struct is_unordered_map<typename std::unordered_map<Args...>> : std::true_type {
76
+ typedef typename std::tuple_element<0, std::tuple<Args...>>::type KEY_TYPE;
77
+ typedef typename std::tuple_element<1, std::tuple<Args...>>::type VALUE_TYPE;
78
+ typedef typename std::tuple_element<2, std::tuple<Args...>>::type HASH_TYPE;
79
+ typedef typename std::tuple_element<3, std::tuple<Args...>>::type EQUAL_TYPE;
80
+ };
81
+
82
+ template <typename T>
83
+ struct is_unique_ptr : std::false_type {};
84
+
85
+ template <typename T, typename D>
86
+ struct is_unique_ptr<unique_ptr<T, D>> : std::true_type {
87
+ typedef T ELEMENT_TYPE;
88
+ typedef D DELETER_TYPE;
89
+ };
90
+
91
+ template <typename T>
92
+ struct is_shared_ptr : std::false_type {};
93
+
94
+ template <typename T>
95
+ struct is_shared_ptr<shared_ptr<T>> : std::true_type {
96
+ typedef T ELEMENT_TYPE;
97
+ };
98
+
99
+ template <typename T>
100
+ struct is_pair : std::false_type {};
101
+
102
+ template <typename T, typename U>
103
+ struct is_pair<std::pair<T, U>> : std::true_type {
104
+ typedef T FIRST_TYPE;
105
+ typedef U SECOND_TYPE;
106
+ };
107
+
108
+ template <typename T>
109
+ struct is_unordered_set : std::false_type {};
110
+ template <typename... Args>
111
+ struct is_unordered_set<std::unordered_set<Args...>> : std::true_type {
112
+ typedef typename std::tuple_element<0, std::tuple<Args...>>::type ELEMENT_TYPE;
113
+ typedef typename std::tuple_element<1, std::tuple<Args...>>::type HASH_TYPE;
114
+ typedef typename std::tuple_element<2, std::tuple<Args...>>::type EQUAL_TYPE;
115
+ };
116
+
117
+ template <typename T>
118
+ struct is_set : std::false_type {};
119
+ template <typename... Args>
120
+ struct is_set<std::set<Args...>> : std::true_type {
121
+ typedef typename std::tuple_element<0, std::tuple<Args...>>::type ELEMENT_TYPE;
122
+ typedef typename std::tuple_element<1, std::tuple<Args...>>::type HASH_TYPE;
123
+ typedef typename std::tuple_element<2, std::tuple<Args...>>::type EQUAL_TYPE;
124
+ };
125
+
126
+ } // namespace duckdb
@@ -170,6 +170,18 @@ public:
170
170
  //! Equivalent to calling TopNLevenshtein followed by CandidatesMessage
171
171
  DUCKDB_API static string CandidatesErrorMessage(const vector<string> &strings, const string &target,
172
172
  const string &message_prefix, idx_t n = 5);
173
+
174
+ //! Returns true if two null-terminated strings are equal or point to the same address.
175
+ //! Returns false if only one of the strings is nullptr
176
+ DUCKDB_API static bool Equals(const char *s1, const char *s2) {
177
+ if (s1 == s2) {
178
+ return true;
179
+ }
180
+ if (s1 == nullptr || s2 == nullptr) {
181
+ return false;
182
+ }
183
+ return strcmp(s1, s2) == 0;
184
+ }
173
185
  };
174
186
 
175
187
  } // namespace duckdb
@@ -226,6 +226,8 @@ public:
226
226
 
227
227
  //! Serializes a Value to a stand-alone binary blob
228
228
  DUCKDB_API void Serialize(Serializer &serializer) const;
229
+ DUCKDB_API void FormatSerialize(FormatSerializer &serializer) const;
230
+ DUCKDB_API static Value FormatDeserialize(FormatDeserializer &deserializer);
229
231
  //! Deserializes a Value from a blob
230
232
  DUCKDB_API static Value Deserialize(Deserializer &source);
231
233
 
@@ -162,6 +162,9 @@ public:
162
162
  //! Deserializes a blob back into a Vector
163
163
  DUCKDB_API void Deserialize(idx_t count, Deserializer &source);
164
164
 
165
+ DUCKDB_API void FormatSerialize(FormatSerializer &serializer, idx_t count);
166
+ DUCKDB_API void FormatDeserialize(FormatDeserializer &deserializer, idx_t count);
167
+
165
168
  // Getters
166
169
  inline VectorType GetVectorType() const {
167
170
  return vector_type;
@@ -17,6 +17,8 @@
17
17
 
18
18
  namespace duckdb {
19
19
 
20
+ class FormatSerializer;
21
+ class FormatDeserializer;
20
22
  class Serializer;
21
23
  class Deserializer;
22
24
  class Value;
@@ -46,7 +48,7 @@ public:
46
48
  public:
47
49
  DUCKDB_API hugeint_t() = default;
48
50
  DUCKDB_API hugeint_t(int64_t value); // NOLINT: Allow implicit conversion from `int64_t`
49
- DUCKDB_API constexpr hugeint_t(int64_t upper, uint64_t lower): lower(lower), upper(upper) {};
51
+ DUCKDB_API constexpr hugeint_t(int64_t upper, uint64_t lower): lower(lower), upper(upper) {}
50
52
  DUCKDB_API constexpr hugeint_t(const hugeint_t &rhs) = default;
51
53
  DUCKDB_API constexpr hugeint_t(hugeint_t &&rhs) = default;
52
54
  DUCKDB_API hugeint_t &operator=(const hugeint_t &rhs) = default;
@@ -288,8 +290,8 @@ enum class LogicalTypeId : uint8_t {
288
290
  UNION = 107
289
291
  };
290
292
 
291
- struct ExtraTypeInfo;
292
293
 
294
+ struct ExtraTypeInfo;
293
295
 
294
296
  struct aggregate_state_t;
295
297
 
@@ -349,6 +351,10 @@ struct LogicalType {
349
351
  //! Deserializes a blob back into an LogicalType
350
352
  DUCKDB_API static LogicalType Deserialize(Deserializer &source);
351
353
 
354
+ DUCKDB_API void FormatSerialize(FormatSerializer &serializer) const;
355
+ DUCKDB_API static LogicalType FormatDeserialize(FormatDeserializer &deserializer);
356
+
357
+
352
358
  DUCKDB_API static bool TypeIsTimestamp(LogicalTypeId id) {
353
359
  return (id == LogicalTypeId::TIMESTAMP ||
354
360
  id == LogicalTypeId::TIMESTAMP_MS ||
@@ -113,6 +113,7 @@ public:
113
113
  }
114
114
 
115
115
  idx_t MaxCapacity();
116
+ static idx_t GetMaxCapacity(HtEntryType entry_type, idx_t tuple_size);
116
117
 
117
118
  void Partition(vector<GroupedAggregateHashTable *> &partition_hts, hash_t mask, idx_t shift);
118
119
 
@@ -72,7 +72,7 @@ public:
72
72
  vector<row_t> &result_ids) override;
73
73
 
74
74
  //! Called when data is appended to the index. The lock obtained from InitializeLock must be held
75
- bool Append(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
75
+ PreservedError Append(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
76
76
  //! Verify that data can be appended to the index without a constraint violation
77
77
  void VerifyAppend(DataChunk &chunk) override;
78
78
  //! Verify that data can be appended to the index without a constraint violation using the conflict manager
@@ -80,7 +80,7 @@ public:
80
80
  //! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held
81
81
  void Delete(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
82
82
  //! Insert a chunk of entries into the index
83
- bool Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override;
83
+ PreservedError Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override;
84
84
 
85
85
  //! Construct an ART from a vector of sorted keys
86
86
  bool ConstructFromSorted(idx_t count, vector<Key> &keys, Vector &row_identifiers);
@@ -57,9 +57,12 @@ private:
57
57
 
58
58
  HashTableList unpartitioned_hts;
59
59
  unordered_map<hash_t, HashTableList> radix_partitioned_hts;
60
+ idx_t tuple_size;
60
61
 
61
62
  private:
62
63
  idx_t ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes, DataChunk &payload,
63
64
  const vector<idx_t> &filter);
65
+ //! Returns the HT entry size used for intermediate hash tables
66
+ HtEntryType GetHTEntrySize();
64
67
  };
65
68
  } // namespace duckdb
@@ -0,0 +1,84 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/function/cast/bound_cast_data.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/function/cast/default_casts.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ struct ListBoundCastData : public BoundCastData {
16
+ explicit ListBoundCastData(BoundCastInfo child_cast) : child_cast_info(std::move(child_cast)) {
17
+ }
18
+
19
+ BoundCastInfo child_cast_info;
20
+ static unique_ptr<BoundCastData> BindListToListCast(BindCastInput &input, const LogicalType &source,
21
+ const LogicalType &target);
22
+ static unique_ptr<FunctionLocalState> InitListLocalState(CastLocalStateParameters &parameters);
23
+
24
+ public:
25
+ unique_ptr<BoundCastData> Copy() const override {
26
+ return make_unique<ListBoundCastData>(child_cast_info.Copy());
27
+ }
28
+ };
29
+
30
+ struct ListCast {
31
+ static bool ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
32
+ };
33
+
34
+ struct StructBoundCastData : public BoundCastData {
35
+ StructBoundCastData(vector<BoundCastInfo> child_casts, LogicalType target_p)
36
+ : child_cast_info(std::move(child_casts)), target(std::move(target_p)) {
37
+ }
38
+
39
+ vector<BoundCastInfo> child_cast_info;
40
+ LogicalType target;
41
+
42
+ static unique_ptr<BoundCastData> BindStructToStructCast(BindCastInput &input, const LogicalType &source,
43
+ const LogicalType &target);
44
+ static unique_ptr<FunctionLocalState> InitStructCastLocalState(CastLocalStateParameters &parameters);
45
+
46
+ public:
47
+ unique_ptr<BoundCastData> Copy() const override {
48
+ vector<BoundCastInfo> copy_info;
49
+ for (auto &info : child_cast_info) {
50
+ copy_info.push_back(info.Copy());
51
+ }
52
+ return make_unique<StructBoundCastData>(std::move(copy_info), target);
53
+ }
54
+ };
55
+
56
+ struct StructCastLocalState : public FunctionLocalState {
57
+ public:
58
+ vector<unique_ptr<FunctionLocalState>> local_states;
59
+ };
60
+
61
+ struct MapBoundCastData : public BoundCastData {
62
+ MapBoundCastData(BoundCastInfo key_cast, BoundCastInfo value_cast)
63
+ : key_cast(std::move(key_cast)), value_cast(std::move(value_cast)) {
64
+ }
65
+
66
+ BoundCastInfo key_cast;
67
+ BoundCastInfo value_cast;
68
+
69
+ static unique_ptr<BoundCastData> BindMapToMapCast(BindCastInput &input, const LogicalType &source,
70
+ const LogicalType &target);
71
+
72
+ public:
73
+ unique_ptr<BoundCastData> Copy() const override {
74
+ return make_unique<MapBoundCastData>(key_cast.Copy(), value_cast.Copy());
75
+ }
76
+ };
77
+
78
+ struct MapCastLocalState : public FunctionLocalState {
79
+ public:
80
+ unique_ptr<FunctionLocalState> key_state;
81
+ unique_ptr<FunctionLocalState> value_state;
82
+ };
83
+
84
+ } // namespace duckdb
@@ -19,12 +19,12 @@ typedef BoundCastInfo (*bind_cast_function_t)(BindCastInput &input, const Logica
19
19
  typedef int64_t (*implicit_cast_cost_t)(const LogicalType &from, const LogicalType &to);
20
20
 
21
21
  struct GetCastFunctionInput {
22
- GetCastFunctionInput(ClientContext *context = nullptr) : context(context) {
22
+ GetCastFunctionInput(optional_ptr<ClientContext> context = nullptr) : context(context) {
23
23
  }
24
24
  GetCastFunctionInput(ClientContext &context) : context(&context) {
25
25
  }
26
26
 
27
- ClientContext *context;
27
+ optional_ptr<ClientContext> context;
28
28
  };
29
29
 
30
30
  struct BindCastFunction {
@@ -10,6 +10,8 @@
10
10
 
11
11
  #include "duckdb/common/types.hpp"
12
12
  #include "duckdb/common/types/vector.hpp"
13
+ #include "duckdb/common/optional_ptr.hpp"
14
+ #include "duckdb/function/scalar_function.hpp"
13
15
 
14
16
  namespace duckdb {
15
17
 
@@ -31,25 +33,43 @@ struct BoundCastData {
31
33
  struct CastParameters {
32
34
  CastParameters() {
33
35
  }
34
- CastParameters(BoundCastData *cast_data, bool strict, string *error_message, FunctionLocalState *local_state)
36
+ CastParameters(BoundCastData *cast_data, bool strict, string *error_message,
37
+ optional_ptr<FunctionLocalState> local_state)
35
38
  : cast_data(cast_data), strict(strict), error_message(error_message), local_state(local_state) {
36
39
  }
37
- CastParameters(CastParameters &parent, BoundCastData *cast_data = nullptr)
38
- : cast_data(cast_data), strict(parent.strict), error_message(parent.error_message) {
40
+ CastParameters(CastParameters &parent, optional_ptr<BoundCastData> cast_data,
41
+ optional_ptr<FunctionLocalState> local_state)
42
+ : cast_data(cast_data), strict(parent.strict), error_message(parent.error_message), local_state(local_state) {
39
43
  }
40
44
 
41
45
  //! The bound cast data (if any)
42
- BoundCastData *cast_data = nullptr;
46
+ optional_ptr<BoundCastData> cast_data;
43
47
  //! whether or not to enable strict casting
44
48
  bool strict = false;
45
49
  // out: error message in case cast has failed
46
50
  string *error_message = nullptr;
47
51
  //! Local state
48
- FunctionLocalState *local_state = nullptr;
52
+ optional_ptr<FunctionLocalState> local_state;
53
+ };
54
+
55
+ struct CastLocalStateParameters {
56
+ CastLocalStateParameters(optional_ptr<ClientContext> context_p, optional_ptr<BoundCastData> cast_data_p)
57
+ : context(context_p), cast_data(cast_data_p) {
58
+ }
59
+ CastLocalStateParameters(ClientContext &context_p, optional_ptr<BoundCastData> cast_data_p)
60
+ : context(&context_p), cast_data(cast_data_p) {
61
+ }
62
+ CastLocalStateParameters(CastLocalStateParameters &parent, optional_ptr<BoundCastData> cast_data_p)
63
+ : context(parent.context), cast_data(cast_data_p) {
64
+ }
65
+
66
+ optional_ptr<ClientContext> context;
67
+ //! The bound cast data (if any)
68
+ optional_ptr<BoundCastData> cast_data;
49
69
  };
50
70
 
51
71
  typedef bool (*cast_function_t)(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
52
- typedef unique_ptr<FunctionLocalState> (*init_cast_local_state_t)(ClientContext &context);
72
+ typedef unique_ptr<FunctionLocalState> (*init_cast_local_state_t)(CastLocalStateParameters &parameters);
53
73
 
54
74
  struct BoundCastInfo {
55
75
  DUCKDB_API
@@ -65,72 +85,16 @@ public:
65
85
  };
66
86
 
67
87
  struct BindCastInput {
68
- DUCKDB_API BindCastInput(CastFunctionSet &function_set, BindCastInfo *info, ClientContext *context);
88
+ DUCKDB_API BindCastInput(CastFunctionSet &function_set, BindCastInfo *info, optional_ptr<ClientContext> context);
69
89
 
70
90
  CastFunctionSet &function_set;
71
91
  BindCastInfo *info;
72
- ClientContext *context;
92
+ optional_ptr<ClientContext> context;
73
93
 
74
94
  public:
75
95
  DUCKDB_API BoundCastInfo GetCastFunction(const LogicalType &source, const LogicalType &target);
76
96
  };
77
97
 
78
- struct ListBoundCastData : public BoundCastData {
79
- explicit ListBoundCastData(BoundCastInfo child_cast) : child_cast_info(std::move(child_cast)) {
80
- }
81
-
82
- BoundCastInfo child_cast_info;
83
- static unique_ptr<BoundCastData> BindListToListCast(BindCastInput &input, const LogicalType &source,
84
- const LogicalType &target);
85
-
86
- public:
87
- unique_ptr<BoundCastData> Copy() const override {
88
- return make_unique<ListBoundCastData>(child_cast_info.Copy());
89
- }
90
- };
91
-
92
- struct ListCast {
93
- static bool ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
94
- };
95
-
96
- struct StructBoundCastData : public BoundCastData {
97
- StructBoundCastData(vector<BoundCastInfo> child_casts, LogicalType target_p)
98
- : child_cast_info(std::move(child_casts)), target(std::move(target_p)) {
99
- }
100
-
101
- vector<BoundCastInfo> child_cast_info;
102
- LogicalType target;
103
-
104
- static unique_ptr<BoundCastData> BindStructToStructCast(BindCastInput &input, const LogicalType &source,
105
- const LogicalType &target);
106
-
107
- public:
108
- unique_ptr<BoundCastData> Copy() const override {
109
- vector<BoundCastInfo> copy_info;
110
- for (auto &info : child_cast_info) {
111
- copy_info.push_back(info.Copy());
112
- }
113
- return make_unique<StructBoundCastData>(std::move(copy_info), target);
114
- }
115
- };
116
-
117
- struct MapBoundCastData : public BoundCastData {
118
- MapBoundCastData(BoundCastInfo key_cast, BoundCastInfo value_cast)
119
- : key_cast(std::move(key_cast)), value_cast(std::move(value_cast)) {
120
- }
121
-
122
- BoundCastInfo key_cast;
123
- BoundCastInfo value_cast;
124
-
125
- static unique_ptr<BoundCastData> BindMapToMapCast(BindCastInput &input, const LogicalType &source,
126
- const LogicalType &target);
127
-
128
- public:
129
- unique_ptr<BoundCastData> Copy() const override {
130
- return make_unique<MapBoundCastData>(key_cast.Copy(), value_cast.Copy());
131
- }
132
- };
133
-
134
98
  struct DefaultCasts {
135
99
  DUCKDB_API static BoundCastInfo GetDefaultCastFunction(BindCastInput &input, const LogicalType &source,
136
100
  const LogicalType &target);
@@ -11,9 +11,36 @@
11
11
  #include "duckdb/function/function_set.hpp"
12
12
  #include "re2/re2.h"
13
13
  #include "duckdb/function/built_in_functions.hpp"
14
+ #include "re2/stringpiece.h"
14
15
 
15
16
  namespace duckdb {
16
17
 
18
+ namespace regexp_util {
19
+
20
+ bool TryParseConstantPattern(ClientContext &context, Expression &expr, string &constant_string);
21
+ void ParseRegexOptions(const string &options, duckdb_re2::RE2::Options &result, bool *global_replace = nullptr);
22
+ void ParseRegexOptions(ClientContext &context, Expression &expr, RE2::Options &target, bool *global_replace = nullptr);
23
+
24
+ inline duckdb_re2::StringPiece CreateStringPiece(const string_t &input) {
25
+ return duckdb_re2::StringPiece(input.GetDataUnsafe(), input.GetSize());
26
+ }
27
+
28
+ inline string_t Extract(const string_t &input, Vector &result, const RE2 &re, const duckdb_re2::StringPiece &rewrite) {
29
+ string extracted;
30
+ RE2::Extract(input.GetString(), re, rewrite, &extracted);
31
+ return StringVector::AddString(result, extracted.c_str(), extracted.size());
32
+ }
33
+
34
+ } // namespace regexp_util
35
+
36
+ struct RegexpExtractAll {
37
+ static void Execute(DataChunk &args, ExpressionState &state, Vector &result);
38
+ static unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
39
+ vector<unique_ptr<Expression>> &arguments);
40
+ static unique_ptr<FunctionLocalState> InitLocalState(ExpressionState &state, const BoundFunctionExpression &expr,
41
+ FunctionData *bind_data);
42
+ };
43
+
17
44
  struct RegexpBaseBindData : public FunctionData {
18
45
  RegexpBaseBindData();
19
46
  RegexpBaseBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern = true);
@@ -61,14 +88,67 @@ struct RegexpExtractBindData : public RegexpBaseBindData {
61
88
  bool Equals(const FunctionData &other_p) const override;
62
89
  };
63
90
 
91
+ struct RegexStringPieceArgs {
92
+ RegexStringPieceArgs() : size(0), capacity(0), group_buffer(nullptr) {
93
+ }
94
+ void Init(idx_t size) {
95
+ this->size = size;
96
+ // Allocate for one extra, for the all-encompassing match group
97
+ this->capacity = size + 1;
98
+ group_buffer = AllocateArray<duckdb_re2::StringPiece>(capacity);
99
+ }
100
+ void SetSize(idx_t size) {
101
+ this->size = size;
102
+ if (size + 1 > capacity) {
103
+ Clear();
104
+ Init(size);
105
+ }
106
+ }
107
+
108
+ RegexStringPieceArgs &operator=(RegexStringPieceArgs &&other) {
109
+ std::swap(this->size, other.size);
110
+ std::swap(this->capacity, other.capacity);
111
+ std::swap(this->group_buffer, other.group_buffer);
112
+ return *this;
113
+ }
114
+
115
+ ~RegexStringPieceArgs() {
116
+ Clear();
117
+ }
118
+
119
+ private:
120
+ void Clear() {
121
+ DeleteArray<duckdb_re2::StringPiece>(group_buffer, capacity);
122
+ group_buffer = nullptr;
123
+
124
+ size = 0;
125
+ capacity = 0;
126
+ }
127
+
128
+ public:
129
+ idx_t size;
130
+ //! The currently allocated capacity for the groups
131
+ idx_t capacity;
132
+ //! Used by ExtractAll to pre-allocate the storage for the groups
133
+ duckdb_re2::StringPiece *group_buffer;
134
+ };
135
+
64
136
  struct RegexLocalState : public FunctionLocalState {
65
- explicit RegexLocalState(RegexpBaseBindData &info)
137
+ explicit RegexLocalState(RegexpBaseBindData &info, bool extract_all = false)
66
138
  : constant_pattern(duckdb_re2::StringPiece(info.constant_string.c_str(), info.constant_string.size()),
67
139
  info.options) {
140
+ if (extract_all) {
141
+ auto group_count_p = constant_pattern.NumberOfCapturingGroups();
142
+ if (group_count_p != -1) {
143
+ group_buffer.Init(group_count_p);
144
+ }
145
+ }
68
146
  D_ASSERT(info.constant_pattern);
69
147
  }
70
148
 
71
149
  RE2 constant_pattern;
150
+ //! Used by regexp_extract_all to pre-allocate the args
151
+ RegexStringPieceArgs group_buffer;
72
152
  };
73
153
 
74
154
  unique_ptr<FunctionLocalState> RegexInitLocalState(ExpressionState &state, const BoundFunctionExpression &expr,
@@ -55,6 +55,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
55
55
  {"json_transform_strict", "json"},
56
56
  {"json_type", "json"},
57
57
  {"json_valid", "json"},
58
+ {"json_serialize_sql", "json"},
58
59
  {"make_timestamptz", "icu"},
59
60
  {"parquet_metadata", "parquet"},
60
61
  {"parquet_scan", "parquet"},
@@ -18,6 +18,8 @@ struct CommonTableExpressionInfo {
18
18
  vector<string> aliases;
19
19
  unique_ptr<SelectStatement> query;
20
20
 
21
+ void FormatSerialize(FormatSerializer &serializer) const;
22
+ static unique_ptr<CommonTableExpressionInfo> FormatDeserialize(FormatDeserializer &deserializer);
21
23
  unique_ptr<CommonTableExpressionInfo> Copy();
22
24
  };
23
25
 
@@ -31,6 +31,9 @@ public:
31
31
  void Serialize(FieldWriter &writer) const override;
32
32
  static unique_ptr<ParsedExpression> Deserialize(ExpressionType type, FieldReader &source);
33
33
 
34
+ void FormatSerialize(FormatSerializer &serializer) const override;
35
+ static unique_ptr<ParsedExpression> FormatDeserialize(ExpressionType type, FormatDeserializer &deserializer);
36
+
34
37
  public:
35
38
  template <class T, class BASE>
36
39
  static string ToString(const T &entry) {
@@ -33,6 +33,8 @@ public:
33
33
  unique_ptr<ParsedExpression> Copy() const override;
34
34
 
35
35
  void Serialize(FieldWriter &writer) const override;
36
+
37
+ void FormatSerialize(FormatSerializer &serializer) const override;
36
38
  };
37
39
 
38
40
  } // namespace duckdb
@@ -16,6 +16,9 @@ namespace duckdb {
16
16
  struct CaseCheck {
17
17
  unique_ptr<ParsedExpression> when_expr;
18
18
  unique_ptr<ParsedExpression> then_expr;
19
+
20
+ void FormatSerialize(FormatSerializer &serializer) const;
21
+ static CaseCheck FormatDeserialize(FormatDeserializer &deserializer);
19
22
  };
20
23
 
21
24
  //! The CaseExpression represents a CASE expression in the query
@@ -35,6 +38,8 @@ public:
35
38
 
36
39
  void Serialize(FieldWriter &writer) const override;
37
40
  static unique_ptr<ParsedExpression> Deserialize(ExpressionType type, FieldReader &source);
41
+ void FormatSerialize(FormatSerializer &serializer) const override;
42
+ static unique_ptr<ParsedExpression> FormatDeserialize(ExpressionType type, FormatDeserializer &deserializer);
38
43
 
39
44
  public:
40
45
  template <class T, class BASE>
@@ -34,6 +34,8 @@ public:
34
34
 
35
35
  void Serialize(FieldWriter &writer) const override;
36
36
  static unique_ptr<ParsedExpression> Deserialize(ExpressionType type, FieldReader &source);
37
+ void FormatSerialize(FormatSerializer &serializer) const override;
38
+ static unique_ptr<ParsedExpression> FormatDeserialize(ExpressionType type, FormatDeserializer &deserializer);
37
39
 
38
40
  public:
39
41
  template <class T, class BASE>
@@ -31,5 +31,7 @@ public:
31
31
 
32
32
  void Serialize(FieldWriter &writer) const override;
33
33
  static unique_ptr<ParsedExpression> Deserialize(ExpressionType type, FieldReader &source);
34
+ void FormatSerialize(FormatSerializer &serializer) const override;
35
+ static unique_ptr<ParsedExpression> FormatDeserialize(ExpressionType type, FormatDeserializer &deserializer);
34
36
  };
35
37
  } // namespace duckdb
@@ -45,5 +45,7 @@ public:
45
45
 
46
46
  void Serialize(FieldWriter &writer) const override;
47
47
  static unique_ptr<ParsedExpression> Deserialize(ExpressionType type, FieldReader &source);
48
+ void FormatSerialize(FormatSerializer &serializer) const override;
49
+ static unique_ptr<ParsedExpression> FormatDeserialize(ExpressionType type, FormatDeserializer &deserializer);
48
50
  };
49
51
  } // namespace duckdb