duckdb 0.7.2-dev1734.0 → 0.7.2-dev1867.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/catalog/catalog.cpp +27 -27
  3. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +6 -6
  4. package/src/duckdb/src/catalog/catalog_set.cpp +27 -25
  5. package/src/duckdb/src/catalog/default/default_functions.cpp +4 -4
  6. package/src/duckdb/src/catalog/default/default_types.cpp +4 -4
  7. package/src/duckdb/src/catalog/default/default_views.cpp +4 -4
  8. package/src/duckdb/src/catalog/dependency_list.cpp +7 -6
  9. package/src/duckdb/src/catalog/dependency_manager.cpp +44 -38
  10. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +11 -6
  11. package/src/duckdb/src/common/types/batched_data_collection.cpp +2 -1
  12. package/src/duckdb/src/common/types/column_data_allocator.cpp +1 -0
  13. package/src/duckdb/src/common/types/vector.cpp +2 -2
  14. package/src/duckdb/src/common/types.cpp +2 -2
  15. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +14 -11
  16. package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
  17. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +51 -50
  18. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +14 -13
  19. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +20 -20
  20. package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +2 -2
  21. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +1 -1
  22. package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +3 -3
  23. package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
  24. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
  25. package/src/duckdb/src/execution/physical_plan/plan_update.cpp +1 -1
  26. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +3 -3
  27. package/src/duckdb/src/function/cast/cast_function_set.cpp +2 -1
  28. package/src/duckdb/src/function/scalar/math/numeric.cpp +57 -0
  29. package/src/duckdb/src/function/scalar/math_functions.cpp +1 -0
  30. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +29 -29
  31. package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +106 -0
  32. package/src/duckdb/src/function/scalar/string/hex.cpp +261 -78
  33. package/src/duckdb/src/function/scalar/string/regexp.cpp +145 -28
  34. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  35. package/src/duckdb/src/function/table/checkpoint.cpp +4 -4
  36. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +24 -24
  37. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +7 -6
  38. package/src/duckdb/src/function/table/system/duckdb_databases.cpp +1 -1
  39. package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +11 -11
  40. package/src/duckdb/src/function/table/system/pragma_database_size.cpp +1 -1
  41. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +17 -18
  42. package/src/duckdb/src/function/table/table_scan.cpp +8 -11
  43. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  44. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +9 -9
  45. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_map.hpp +38 -0
  46. package/src/duckdb/src/include/duckdb/catalog/catalog_transaction.hpp +4 -3
  47. package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +2 -2
  48. package/src/duckdb/src/include/duckdb/catalog/default/default_types.hpp +2 -2
  49. package/src/duckdb/src/include/duckdb/catalog/default/default_views.hpp +2 -2
  50. package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +4 -5
  51. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +4 -5
  52. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +10 -9
  53. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -1
  54. package/src/duckdb/src/include/duckdb/common/bit_utils.hpp +147 -0
  55. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +1 -1
  56. package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -0
  57. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +1 -0
  58. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +29 -6
  59. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +6 -5
  60. package/src/duckdb/src/include/duckdb/common/serializer.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/common/string_util.hpp +7 -0
  62. package/src/duckdb/src/include/duckdb/common/types/row_data_collection.hpp +1 -0
  63. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +5 -5
  66. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +2 -2
  67. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +3 -2
  68. package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +4 -0
  69. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  70. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -3
  71. package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -2
  72. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +7 -7
  73. package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +11 -11
  74. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +8 -8
  75. package/src/duckdb/src/include/duckdb/optimizer/rule/arithmetic_simplification.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/optimizer/rule/case_simplification.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/optimizer/rule/comparison_simplification.hpp +1 -1
  78. package/src/duckdb/src/include/duckdb/optimizer/rule/conjunction_simplification.hpp +2 -2
  79. package/src/duckdb/src/include/duckdb/optimizer/rule/constant_folding.hpp +1 -1
  80. package/src/duckdb/src/include/duckdb/optimizer/rule/date_part_simplification.hpp +1 -1
  81. package/src/duckdb/src/include/duckdb/optimizer/rule/distributivity.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/optimizer/rule/empty_needle_removal.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/optimizer/rule/enum_comparison.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/optimizer/rule/equal_or_null_simplification.hpp +1 -1
  85. package/src/duckdb/src/include/duckdb/optimizer/rule/in_clause_simplification.hpp +1 -1
  86. package/src/duckdb/src/include/duckdb/optimizer/rule/like_optimizations.hpp +1 -1
  87. package/src/duckdb/src/include/duckdb/optimizer/rule/move_constants.hpp +1 -1
  88. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +1 -1
  89. package/src/duckdb/src/include/duckdb/optimizer/rule/regex_optimizations.hpp +1 -1
  90. package/src/duckdb/src/include/duckdb/optimizer/rule.hpp +2 -2
  91. package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/parser/expression_map.hpp +19 -6
  93. package/src/duckdb/src/include/duckdb/parser/expression_util.hpp +1 -1
  94. package/src/duckdb/src/include/duckdb/planner/expression.hpp +5 -2
  95. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +3 -3
  97. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +15 -7
  98. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +3 -0
  99. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +49 -126
  100. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/chimp128.hpp +1 -0
  101. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/chimp_utils.hpp +0 -97
  102. package/src/duckdb/src/include/duckdb/storage/compression/patas/algorithm/patas.hpp +1 -0
  103. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +5 -5
  104. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +159 -0
  105. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -0
  106. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +6 -5
  107. package/src/duckdb/src/main/client_context.cpp +1 -1
  108. package/src/duckdb/src/main/database.cpp +2 -1
  109. package/src/duckdb/src/main/database_manager.cpp +4 -4
  110. package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +2 -2
  111. package/src/duckdb/src/optimizer/cse_optimizer.cpp +4 -4
  112. package/src/duckdb/src/optimizer/deliminator.cpp +13 -11
  113. package/src/duckdb/src/optimizer/expression_rewriter.cpp +2 -2
  114. package/src/duckdb/src/optimizer/filter_combiner.cpp +67 -65
  115. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -0
  116. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +26 -25
  117. package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +23 -21
  118. package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +3 -3
  119. package/src/duckdb/src/optimizer/rule/case_simplification.cpp +2 -2
  120. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +6 -7
  121. package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +9 -8
  122. package/src/duckdb/src/optimizer/rule/constant_folding.cpp +7 -7
  123. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +3 -3
  124. package/src/duckdb/src/optimizer/rule/distributivity.cpp +5 -5
  125. package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +6 -6
  126. package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +4 -4
  127. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +23 -26
  128. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +2 -3
  129. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -3
  130. package/src/duckdb/src/optimizer/rule/move_constants.cpp +6 -6
  131. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +2 -2
  132. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +11 -10
  133. package/src/duckdb/src/parser/expression_util.cpp +6 -6
  134. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +3 -3
  135. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +2 -2
  136. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +3 -3
  137. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +5 -5
  138. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +2 -2
  139. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
  140. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +3 -3
  141. package/src/duckdb/src/storage/buffer/block_handle.cpp +7 -6
  142. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -1
  143. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -0
  144. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +6 -2
  145. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +7 -4
  146. package/src/duckdb/src/storage/buffer_manager.cpp +35 -726
  147. package/src/duckdb/src/storage/checkpoint_manager.cpp +2 -2
  148. package/src/duckdb/src/storage/meta_block_reader.cpp +6 -5
  149. package/src/duckdb/src/storage/standard_buffer_manager.cpp +801 -0
  150. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  151. package/src/duckdb/src/transaction/meta_transaction.cpp +13 -13
  152. package/src/duckdb/src/transaction/transaction.cpp +1 -1
  153. package/src/duckdb/src/transaction/transaction_context.cpp +1 -1
  154. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  155. package/src/duckdb/ub_src_storage.cpp +2 -0
@@ -1,3 +1,4 @@
1
+ #include "duckdb/common/bit_utils.hpp"
1
2
  #include "duckdb/common/exception.hpp"
2
3
  #include "duckdb/common/string_util.hpp"
3
4
  #include "duckdb/common/types/blob.hpp"
@@ -7,6 +8,58 @@
7
8
 
8
9
  namespace duckdb {
9
10
 
11
+ static void WriteHexBytes(uint64_t x, char *&output, idx_t buffer_size) {
12
+ idx_t offset = buffer_size * 4;
13
+
14
+ for (; offset >= 4; offset -= 4) {
15
+ uint8_t byte = (x >> (offset - 4)) & 0x0F;
16
+ *output = Blob::HEX_TABLE[byte];
17
+ output++;
18
+ }
19
+ }
20
+
21
+ static void WriteHugeIntHexBytes(hugeint_t x, char *&output, idx_t buffer_size) {
22
+ idx_t offset = buffer_size * 4;
23
+ auto upper = x.upper;
24
+ auto lower = x.lower;
25
+
26
+ for (; offset >= 68; offset -= 4) {
27
+ uint8_t byte = (upper >> (offset - 68)) & 0x0F;
28
+ *output = Blob::HEX_TABLE[byte];
29
+ output++;
30
+ }
31
+
32
+ for (; offset >= 4; offset -= 4) {
33
+ uint8_t byte = (lower >> (offset - 4)) & 0x0F;
34
+ *output = Blob::HEX_TABLE[byte];
35
+ output++;
36
+ }
37
+ }
38
+
39
+ static void WriteBinBytes(uint64_t x, char *&output, idx_t buffer_size) {
40
+ idx_t offset = buffer_size;
41
+ for (; offset >= 1; offset -= 1) {
42
+ *output = ((x >> (offset - 1)) & 0x01) + '0';
43
+ output++;
44
+ }
45
+ }
46
+
47
+ static void WriteHugeIntBinBytes(hugeint_t x, char *&output, idx_t buffer_size) {
48
+ auto upper = x.upper;
49
+ auto lower = x.lower;
50
+ idx_t offset = buffer_size;
51
+
52
+ for (; offset >= 65; offset -= 1) {
53
+ *output = ((upper >> (offset - 65)) & 0x01) + '0';
54
+ output++;
55
+ }
56
+
57
+ for (; offset >= 1; offset -= 1) {
58
+ *output = ((lower >> (offset - 1)) & 0x01) + '0';
59
+ output++;
60
+ }
61
+ }
62
+
10
63
  struct HexStrOperator {
11
64
  template <class INPUT_TYPE, class RESULT_TYPE>
12
65
  static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
@@ -29,6 +82,149 @@ struct HexStrOperator {
29
82
  }
30
83
  };
31
84
 
85
+ struct HexIntegralOperator {
86
+ template <class INPUT_TYPE, class RESULT_TYPE>
87
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
88
+
89
+ idx_t num_leading_zero = CountZeros<uint64_t>::Leading(input);
90
+ idx_t num_bits_to_check = 64 - num_leading_zero;
91
+ D_ASSERT(num_bits_to_check <= sizeof(INPUT_TYPE) * 8);
92
+
93
+ idx_t buffer_size = (num_bits_to_check + 3) / 4;
94
+
95
+ // Special case: All bits are zero
96
+ if (buffer_size == 0) {
97
+ auto target = StringVector::EmptyString(result, 1);
98
+ auto output = target.GetDataWriteable();
99
+ *output = '0';
100
+ target.Finalize();
101
+ return target;
102
+ }
103
+
104
+ D_ASSERT(buffer_size > 0);
105
+ auto target = StringVector::EmptyString(result, buffer_size);
106
+ auto output = target.GetDataWriteable();
107
+
108
+ WriteHexBytes(input, output, buffer_size);
109
+
110
+ target.Finalize();
111
+ return target;
112
+ }
113
+ };
114
+
115
+ struct HexHugeIntOperator {
116
+ template <class INPUT_TYPE, class RESULT_TYPE>
117
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
118
+
119
+ idx_t num_leading_zero = CountZeros<hugeint_t>::Leading(input);
120
+ idx_t buffer_size = sizeof(INPUT_TYPE) * 2 - (num_leading_zero / 4);
121
+
122
+ // Special case: All bits are zero
123
+ if (buffer_size == 0) {
124
+ auto target = StringVector::EmptyString(result, 1);
125
+ auto output = target.GetDataWriteable();
126
+ *output = '0';
127
+ target.Finalize();
128
+ return target;
129
+ }
130
+
131
+ D_ASSERT(buffer_size > 0);
132
+ auto target = StringVector::EmptyString(result, buffer_size);
133
+ auto output = target.GetDataWriteable();
134
+
135
+ WriteHugeIntHexBytes(input, output, buffer_size);
136
+
137
+ target.Finalize();
138
+ return target;
139
+ }
140
+ };
141
+
142
+ template <class INPUT, class OP>
143
+ static void ToHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
144
+ D_ASSERT(args.ColumnCount() == 1);
145
+ auto &input = args.data[0];
146
+ idx_t count = args.size();
147
+ UnaryExecutor::ExecuteString<INPUT, string_t, OP>(input, result, count);
148
+ }
149
+
150
+ struct BinaryStrOperator {
151
+ template <class INPUT_TYPE, class RESULT_TYPE>
152
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
153
+ auto data = input.GetDataUnsafe();
154
+ auto size = input.GetSize();
155
+
156
+ // Allocate empty space
157
+ auto target = StringVector::EmptyString(result, size * 8);
158
+ auto output = target.GetDataWriteable();
159
+
160
+ for (idx_t i = 0; i < size; ++i) {
161
+ uint8_t byte = data[i];
162
+ for (idx_t i = 8; i >= 1; --i) {
163
+ *output = ((byte >> (i - 1)) & 0x01) + '0';
164
+ output++;
165
+ }
166
+ }
167
+
168
+ target.Finalize();
169
+ return target;
170
+ }
171
+ };
172
+
173
+ struct BinaryIntegralOperator {
174
+ template <class INPUT_TYPE, class RESULT_TYPE>
175
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
176
+
177
+ idx_t num_leading_zero = CountZeros<uint64_t>::Leading(input);
178
+ idx_t num_bits_to_check = 64 - num_leading_zero;
179
+ D_ASSERT(num_bits_to_check <= sizeof(INPUT_TYPE) * 8);
180
+
181
+ idx_t buffer_size = num_bits_to_check;
182
+
183
+ // Special case: All bits are zero
184
+ if (buffer_size == 0) {
185
+ auto target = StringVector::EmptyString(result, 1);
186
+ auto output = target.GetDataWriteable();
187
+ *output = '0';
188
+ target.Finalize();
189
+ return target;
190
+ }
191
+
192
+ D_ASSERT(buffer_size > 0);
193
+ auto target = StringVector::EmptyString(result, buffer_size);
194
+ auto output = target.GetDataWriteable();
195
+
196
+ WriteBinBytes(input, output, buffer_size);
197
+
198
+ target.Finalize();
199
+ return target;
200
+ }
201
+ };
202
+
203
+ struct BinaryHugeIntOperator {
204
+ template <class INPUT_TYPE, class RESULT_TYPE>
205
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
206
+ idx_t num_leading_zero = CountZeros<hugeint_t>::Leading(input);
207
+ idx_t buffer_size = sizeof(INPUT_TYPE) * 8 - num_leading_zero;
208
+
209
+ // Special case: All bits are zero
210
+ if (buffer_size == 0) {
211
+ auto target = StringVector::EmptyString(result, 1);
212
+ auto output = target.GetDataWriteable();
213
+ *output = '0';
214
+ target.Finalize();
215
+ return target;
216
+ }
217
+
218
+ auto target = StringVector::EmptyString(result, buffer_size);
219
+ auto output = target.GetDataWriteable();
220
+
221
+ WriteHugeIntBinBytes(input, output, buffer_size);
222
+
223
+ target.Finalize();
224
+ return target;
225
+ }
226
+ };
227
+
32
228
  struct FromHexOperator {
33
229
  template <class INPUT_TYPE, class RESULT_TYPE>
34
230
  static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
@@ -66,103 +262,65 @@ struct FromHexOperator {
66
262
  }
67
263
  };
68
264
 
69
- struct HexIntegralOperator {
265
+ struct FromBinaryOperator {
70
266
  template <class INPUT_TYPE, class RESULT_TYPE>
71
267
  static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
72
- // Sufficient space for maximum length
73
- char buffer[sizeof(INPUT_TYPE) * 2];
74
- char *ptr = buffer;
75
- idx_t buffer_size = 0;
76
-
77
- bool seen_non_zero = false;
78
- for (idx_t offset = sizeof(INPUT_TYPE) * 8; offset >= 4; offset -= 4) {
79
- uint8_t byte = (input >> (offset - 4)) & 0x0F;
80
- if (byte == 0 && !seen_non_zero && offset > 4) {
81
- continue;
82
- }
83
- seen_non_zero = true;
84
- *ptr = Blob::HEX_TABLE[byte];
85
- ptr++;
86
- buffer_size++;
268
+ auto data = input.GetDataUnsafe();
269
+ auto size = input.GetSize();
270
+
271
+ if (size > NumericLimits<uint32_t>::Maximum()) {
272
+ throw InvalidInputException("Binary input length larger than 2^32 are not supported");
87
273
  }
88
274
 
275
+ D_ASSERT(size <= NumericLimits<uint32_t>::Maximum());
276
+ auto buffer_size = (size + 7) / 8;
277
+
89
278
  // Allocate empty space
90
279
  auto target = StringVector::EmptyString(result, buffer_size);
91
280
  auto output = target.GetDataWriteable();
92
- memcpy(output, buffer, buffer_size);
93
-
94
- target.Finalize();
95
- return target;
96
- }
97
- };
98
281
 
99
- struct HexHugeIntOperator {
100
- template <class INPUT_TYPE, class RESULT_TYPE>
101
- static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
102
- char buffer[sizeof(INPUT_TYPE) * 2];
103
- char *ptr = buffer;
104
- idx_t buffer_size = 0;
105
-
106
- uint64_t lower = input.lower;
107
- int64_t upper = input.upper;
108
-
109
- bool seen_non_zero = false;
110
- for (idx_t offset = 64; offset >= 4; offset -= 4) {
111
- uint8_t byte = (upper >> (offset - 4)) & 0x0F;
112
-
113
- if (byte == 0 && !seen_non_zero) {
114
- continue;
282
+ // Treated as a single byte
283
+ idx_t i = 0;
284
+ if (size % 8 != 0) {
285
+ uint8_t byte = 0;
286
+ for (idx_t j = size % 8; j > 0; --j) {
287
+ byte |= StringUtil::GetBinaryValue(data[i]) << (j - 1);
288
+ i++;
115
289
  }
116
- seen_non_zero = true;
117
- *ptr = Blob::HEX_TABLE[byte];
118
- ptr++;
119
- buffer_size++;
290
+ *output = byte;
291
+ output++;
120
292
  }
121
293
 
122
- for (idx_t offset = 64; offset >= 4; offset -= 4) {
123
- uint8_t byte = (lower >> (offset - 4)) & 0x0F;
124
-
125
- // at least one byte space
126
- if (byte == 0 && !seen_non_zero && offset > 4) {
127
- continue;
294
+ while (i < size) {
295
+ uint8_t byte = 0;
296
+ for (idx_t j = 8; j > 0; --j) {
297
+ byte |= StringUtil::GetBinaryValue(data[i]) << (j - 1);
298
+ i++;
128
299
  }
129
- seen_non_zero = true;
130
- *ptr = Blob::HEX_TABLE[byte];
131
- ptr++;
132
- buffer_size++;
300
+ *output = byte;
301
+ output++;
133
302
  }
134
303
 
135
- // Allocate empty space
136
- auto target = StringVector::EmptyString(result, buffer_size);
137
- auto output = target.GetDataWriteable();
138
- memcpy(output, buffer, buffer_size);
139
-
140
304
  target.Finalize();
141
305
  return target;
142
306
  }
143
307
  };
144
308
 
145
- static void ToHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
309
+ template <class INPUT, class OP>
310
+ static void ToBinaryFunction(DataChunk &args, ExpressionState &state, Vector &result) {
146
311
  D_ASSERT(args.ColumnCount() == 1);
147
312
  auto &input = args.data[0];
148
313
  idx_t count = args.size();
314
+ UnaryExecutor::ExecuteString<INPUT, string_t, OP>(input, result, count);
315
+ }
149
316
 
150
- switch (input.GetType().InternalType()) {
151
- case PhysicalType::VARCHAR:
152
- UnaryExecutor::ExecuteString<string_t, string_t, HexStrOperator>(input, result, count);
153
- break;
154
- case PhysicalType::INT64:
155
- UnaryExecutor::ExecuteString<int64_t, string_t, HexIntegralOperator>(input, result, count);
156
- break;
157
- case PhysicalType::INT128:
158
- UnaryExecutor::ExecuteString<hugeint_t, string_t, HexHugeIntOperator>(input, result, count);
159
- break;
160
- case PhysicalType::UINT64:
161
- UnaryExecutor::ExecuteString<uint64_t, string_t, HexIntegralOperator>(input, result, count);
162
- break;
163
- default:
164
- throw NotImplementedException("Specifier type not implemented");
165
- }
317
+ static void FromBinaryFunction(DataChunk &args, ExpressionState &state, Vector &result) {
318
+ D_ASSERT(args.ColumnCount() == 1);
319
+ D_ASSERT(args.data[0].GetType().InternalType() == PhysicalType::VARCHAR);
320
+ auto &input = args.data[0];
321
+ idx_t count = args.size();
322
+
323
+ UnaryExecutor::ExecuteString<string_t, string_t, FromBinaryOperator>(input, result, count);
166
324
  }
167
325
 
168
326
  static void FromHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
@@ -178,13 +336,17 @@ void HexFun::RegisterFunction(BuiltinFunctions &set) {
178
336
  ScalarFunctionSet to_hex("to_hex");
179
337
  ScalarFunctionSet from_hex("from_hex");
180
338
 
181
- to_hex.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToHexFunction));
339
+ to_hex.AddFunction(
340
+ ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToHexFunction<string_t, HexStrOperator>));
182
341
 
183
- to_hex.AddFunction(ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, ToHexFunction));
342
+ to_hex.AddFunction(
343
+ ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, ToHexFunction<int64_t, HexIntegralOperator>));
184
344
 
185
- to_hex.AddFunction(ScalarFunction({LogicalType::UBIGINT}, LogicalType::VARCHAR, ToHexFunction));
345
+ to_hex.AddFunction(
346
+ ScalarFunction({LogicalType::UBIGINT}, LogicalType::VARCHAR, ToHexFunction<uint64_t, HexIntegralOperator>));
186
347
 
187
- to_hex.AddFunction(ScalarFunction({LogicalType::HUGEINT}, LogicalType::VARCHAR, ToHexFunction));
348
+ to_hex.AddFunction(
349
+ ScalarFunction({LogicalType::HUGEINT}, LogicalType::VARCHAR, ToHexFunction<hugeint_t, HexHugeIntOperator>));
188
350
 
189
351
  from_hex.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::BLOB, FromHexFunction));
190
352
 
@@ -196,6 +358,27 @@ void HexFun::RegisterFunction(BuiltinFunctions &set) {
196
358
  from_hex.name = "unhex";
197
359
  set.AddFunction(to_hex);
198
360
  set.AddFunction(from_hex);
361
+
362
+ ScalarFunctionSet to_binary("to_binary");
363
+ ScalarFunctionSet from_binary("from_binary");
364
+ to_binary.AddFunction(
365
+ ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToBinaryFunction<string_t, BinaryStrOperator>));
366
+ to_binary.AddFunction(ScalarFunction({LogicalType::UBIGINT}, LogicalType::VARCHAR,
367
+ ToBinaryFunction<uint64_t, BinaryIntegralOperator>));
368
+ to_binary.AddFunction(
369
+ ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, ToBinaryFunction<int64_t, BinaryIntegralOperator>));
370
+ to_binary.AddFunction(ScalarFunction({LogicalType::HUGEINT}, LogicalType::VARCHAR,
371
+ ToBinaryFunction<hugeint_t, BinaryHugeIntOperator>));
372
+
373
+ from_binary.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::BLOB, FromBinaryFunction));
374
+
375
+ set.AddFunction(to_binary);
376
+ set.AddFunction(from_binary);
377
+
378
+ to_binary.name = "bin";
379
+ from_binary.name = "unbin";
380
+ set.AddFunction(to_binary);
381
+ set.AddFunction(from_binary);
199
382
  }
200
383
 
201
384
  } // namespace duckdb
@@ -218,6 +218,103 @@ bool RegexpExtractBindData::Equals(const FunctionData &other_p) const {
218
218
  return RegexpBaseBindData::Equals(other) && group_string == other.group_string;
219
219
  }
220
220
 
221
+ static void RegexExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) {
222
+ auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
223
+ const auto &info = func_expr.bind_info->Cast<RegexpExtractBindData>();
224
+
225
+ auto &strings = args.data[0];
226
+ auto &patterns = args.data[1];
227
+ if (info.constant_pattern) {
228
+ auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<RegexLocalState>();
229
+ UnaryExecutor::Execute<string_t, string_t>(strings, result, args.size(), [&](string_t input) {
230
+ return Extract(input, result, lstate.constant_pattern, info.rewrite);
231
+ });
232
+ } else {
233
+ BinaryExecutor::Execute<string_t, string_t, string_t>(strings, patterns, result, args.size(),
234
+ [&](string_t input, string_t pattern) {
235
+ RE2 re(CreateStringPiece(pattern), info.options);
236
+ return Extract(input, result, re, info.rewrite);
237
+ });
238
+ }
239
+ }
240
+
241
+ //===--------------------------------------------------------------------===//
242
+ // Regexp Extract Struct
243
+ //===--------------------------------------------------------------------===//
244
+ static void RegexExtractStructFunction(DataChunk &args, ExpressionState &state, Vector &result) {
245
+ auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<RegexLocalState>();
246
+
247
+ const auto count = args.size();
248
+ auto &input = args.data[0];
249
+
250
+ auto &child_entries = StructVector::GetEntries(result);
251
+ const auto groupSize = child_entries.size();
252
+ // Reference the 'input' StringBuffer, because we won't need to allocate new data
253
+ // for the result, all returned strings are substrings of the originals
254
+ for (auto &child_entry : child_entries) {
255
+ child_entry->SetAuxiliary(input.GetAuxiliary());
256
+ }
257
+
258
+ vector<RE2::Arg> argv(groupSize);
259
+ vector<RE2::Arg *> groups(groupSize);
260
+ vector<duckdb_re2::StringPiece> ws(groupSize);
261
+ for (size_t i = 0; i < groupSize; ++i) {
262
+ groups[i] = &argv[i];
263
+ argv[i] = &ws[i];
264
+ }
265
+
266
+ if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) {
267
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
268
+
269
+ if (ConstantVector::IsNull(input)) {
270
+ ConstantVector::SetNull(result, true);
271
+ } else {
272
+ ConstantVector::SetNull(result, false);
273
+ auto idata = ConstantVector::GetData<string_t>(input);
274
+ auto str = CreateStringPiece(idata[0]);
275
+ auto match = duckdb_re2::RE2::PartialMatchN(str, lstate.constant_pattern, groups.data(), groups.size());
276
+ for (size_t col = 0; col < child_entries.size(); ++col) {
277
+ auto &child_entry = child_entries[col];
278
+ ConstantVector::SetNull(*child_entry, false);
279
+ auto &extracted = ws[col];
280
+ auto cdata = ConstantVector::GetData<string_t>(*child_entry);
281
+ cdata[0] = string_t(extracted.data(), match ? extracted.size() : 0);
282
+ }
283
+ }
284
+ } else {
285
+ UnifiedVectorFormat iunified;
286
+ input.ToUnifiedFormat(count, iunified);
287
+
288
+ const auto &ivalidity = iunified.validity;
289
+ auto idata = (const string_t *)iunified.data;
290
+
291
+ // Start with a valid flat vector
292
+ result.SetVectorType(VectorType::FLAT_VECTOR);
293
+
294
+ // Start with valid children
295
+ for (size_t col = 0; col < child_entries.size(); ++col) {
296
+ auto &child_entry = child_entries[col];
297
+ child_entry->SetVectorType(VectorType::FLAT_VECTOR);
298
+ }
299
+
300
+ for (idx_t i = 0; i < count; ++i) {
301
+ const auto idx = iunified.sel->get_index(i);
302
+ if (ivalidity.RowIsValid(idx)) {
303
+ auto str = CreateStringPiece(idata[idx]);
304
+ auto match = duckdb_re2::RE2::PartialMatchN(str, lstate.constant_pattern, groups.data(), groups.size());
305
+ for (size_t col = 0; col < child_entries.size(); ++col) {
306
+ auto &child_entry = child_entries[col];
307
+ auto cdata = FlatVector::GetData<string_t>(*child_entry);
308
+ auto &extracted = ws[col];
309
+ cdata[i] = string_t(extracted.data(), match ? extracted.size() : 0);
310
+ }
311
+ } else {
312
+ FlatVector::SetNull(result, i, true);
313
+ }
314
+ }
315
+ }
316
+ }
317
+
221
318
  static unique_ptr<FunctionData> RegexExtractBind(ClientContext &context, ScalarFunction &bound_function,
222
319
  vector<unique_ptr<Expression>> &arguments) {
223
320
  D_ASSERT(arguments.size() >= 2);
@@ -227,52 +324,62 @@ static unique_ptr<FunctionData> RegexExtractBind(ClientContext &context, ScalarF
227
324
  string constant_string;
228
325
  bool constant_pattern = TryParseConstantPattern(context, *arguments[1], constant_string);
229
326
 
230
- string group_string = "";
327
+ if (arguments.size() >= 4) {
328
+ ParseRegexOptions(context, *arguments[3], options);
329
+ }
330
+
331
+ string group_string = "\\0";
231
332
  if (arguments.size() >= 3) {
232
333
  if (arguments[2]->HasParameter()) {
233
334
  throw ParameterNotResolvedException();
234
335
  }
235
336
  if (!arguments[2]->IsFoldable()) {
236
- throw InvalidInputException("Group index field field must be a constant!");
337
+ throw InvalidInputException("Group specification field must be a constant!");
237
338
  }
238
339
  Value group = ExpressionExecutor::EvaluateScalar(context, *arguments[2]);
239
- if (!group.IsNull()) {
340
+ if (group.IsNull()) {
341
+ group_string = "";
342
+ } else if (group.type().id() == LogicalTypeId::LIST) {
343
+ if (!constant_pattern) {
344
+ throw BinderException("%s with LIST requires a constant pattern", bound_function.name);
345
+ }
346
+ auto &list_children = ListValue::GetChildren(group);
347
+ if (list_children.empty()) {
348
+ throw BinderException("%s requires non-empty lists of capture names", bound_function.name);
349
+ }
350
+ case_insensitive_set_t name_collision_set;
351
+ child_list_t<LogicalType> struct_children;
352
+ for (const auto &child : list_children) {
353
+ if (child.IsNull()) {
354
+ throw BinderException("NULL group name in %s", bound_function.name);
355
+ }
356
+ const auto group_name = child.ToString();
357
+ if (name_collision_set.find(group_name) != name_collision_set.end()) {
358
+ throw BinderException("Duplicate group name \"%s\" in %s", group_name, bound_function.name);
359
+ }
360
+ name_collision_set.insert(group_name);
361
+ struct_children.emplace_back(make_pair(group_name, LogicalType::VARCHAR));
362
+ }
363
+ bound_function.return_type = LogicalType::STRUCT(struct_children);
364
+
365
+ duckdb_re2::StringPiece constant_piece(constant_string.c_str(), constant_string.size());
366
+ RE2 constant_pattern(constant_piece, options);
367
+ if (size_t(constant_pattern.NumberOfCapturingGroups()) < list_children.size()) {
368
+ throw BinderException("Not enough group names in %s", bound_function.name);
369
+ }
370
+ } else {
240
371
  auto group_idx = group.GetValue<int32_t>();
241
372
  if (group_idx < 0 || group_idx > 9) {
242
373
  throw InvalidInputException("Group index must be between 0 and 9!");
243
374
  }
244
375
  group_string = "\\" + to_string(group_idx);
245
376
  }
246
- } else {
247
- group_string = "\\0";
248
- }
249
- if (arguments.size() >= 4) {
250
- ParseRegexOptions(context, *arguments[3], options);
251
377
  }
378
+
252
379
  return make_uniq<RegexpExtractBindData>(options, std::move(constant_string), constant_pattern,
253
380
  std::move(group_string));
254
381
  }
255
382
 
256
- static void RegexExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) {
257
- auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
258
- const auto &info = func_expr.bind_info->Cast<RegexpExtractBindData>();
259
-
260
- auto &strings = args.data[0];
261
- auto &patterns = args.data[1];
262
- if (info.constant_pattern) {
263
- auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<RegexLocalState>();
264
- UnaryExecutor::Execute<string_t, string_t>(strings, result, args.size(), [&](string_t input) {
265
- return Extract(input, result, lstate.constant_pattern, info.rewrite);
266
- });
267
- } else {
268
- BinaryExecutor::Execute<string_t, string_t, string_t>(strings, patterns, result, args.size(),
269
- [&](string_t input, string_t pattern) {
270
- RE2 re(CreateStringPiece(pattern), info.options);
271
- return Extract(input, result, re, info.rewrite);
272
- });
273
- }
274
- }
275
-
276
383
  void RegexpFun::RegisterFunction(BuiltinFunctions &set) {
277
384
  ScalarFunctionSet regexp_full_match("regexp_full_match");
278
385
  regexp_full_match.AddFunction(ScalarFunction(
@@ -315,6 +422,16 @@ void RegexpFun::RegisterFunction(BuiltinFunctions &set) {
315
422
  {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER, LogicalType::VARCHAR}, LogicalType::VARCHAR,
316
423
  RegexExtractFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
317
424
  FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
425
+ // REGEXP_EXTRACT(<string>, <pattern>, [<group 1 name>[, <group n name>]...])
426
+ regexp_extract.AddFunction(ScalarFunction(
427
+ {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::LIST(LogicalType::VARCHAR)}, LogicalType::VARCHAR,
428
+ RegexExtractStructFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
429
+ FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
430
+ // REGEXP_EXTRACT(<string>, <pattern>, [<group 1 name>[, <group n name>]...], <options>)
431
+ regexp_extract.AddFunction(ScalarFunction(
432
+ {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::LIST(LogicalType::VARCHAR), LogicalType::VARCHAR},
433
+ LogicalType::VARCHAR, RegexExtractStructFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState,
434
+ LogicalType::INVALID, FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
318
435
 
319
436
  ScalarFunctionSet regexp_extract_all("regexp_extract_all");
320
437
  regexp_extract_all.AddFunction(ScalarFunction(
@@ -39,6 +39,7 @@ void BuiltinFunctions::RegisterStringFunctions() {
39
39
  Register<CHR>();
40
40
  Register<MismatchesFun>();
41
41
  Register<LevenshteinFun>();
42
+ Register<DamerauLevenshteinFun>();
42
43
  Register<JaccardFun>();
43
44
  Register<JaroWinklerFun>();
44
45
 
@@ -8,10 +8,10 @@
8
8
  namespace duckdb {
9
9
 
10
10
  struct CheckpointBindData : public FunctionData {
11
- explicit CheckpointBindData(AttachedDatabase *db) : db(db) {
11
+ explicit CheckpointBindData(optional_ptr<AttachedDatabase> db) : db(db) {
12
12
  }
13
13
 
14
- AttachedDatabase *db;
14
+ optional_ptr<AttachedDatabase> db;
15
15
 
16
16
  public:
17
17
  unique_ptr<FunctionData> Copy() const override {
@@ -29,7 +29,7 @@ static unique_ptr<FunctionData> CheckpointBind(ClientContext &context, TableFunc
29
29
  return_types.emplace_back(LogicalType::BOOLEAN);
30
30
  names.emplace_back("Success");
31
31
 
32
- AttachedDatabase *db;
32
+ optional_ptr<AttachedDatabase> db;
33
33
  auto &db_manager = DatabaseManager::Get(context);
34
34
  if (!input.inputs.empty()) {
35
35
  auto &db_name = StringValue::Get(input.inputs[0]);
@@ -46,7 +46,7 @@ static unique_ptr<FunctionData> CheckpointBind(ClientContext &context, TableFunc
46
46
  template <bool FORCE>
47
47
  static void TemplatedCheckpointFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
48
48
  auto &bind_data = data_p.bind_data->Cast<CheckpointBindData>();
49
- auto &transaction_manager = TransactionManager::Get(*bind_data.db);
49
+ auto &transaction_manager = TransactionManager::Get(*bind_data.db.get_mutable());
50
50
  transaction_manager.Checkpoint(context, FORCE);
51
51
  }
52
52