duckdb 0.7.2-dev717.0 → 0.7.2-dev865.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/binding.gyp +2 -0
  2. package/lib/duckdb.d.ts +12 -1
  3. package/lib/duckdb.js +19 -0
  4. package/package.json +1 -1
  5. package/src/duckdb/extension/json/include/json_common.hpp +1 -0
  6. package/src/duckdb/extension/json/include/json_functions.hpp +1 -0
  7. package/src/duckdb/extension/json/include/json_serializer.hpp +77 -0
  8. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +147 -0
  9. package/src/duckdb/extension/json/json_functions.cpp +1 -0
  10. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  11. package/src/duckdb/extension/json/json_serializer.cpp +217 -0
  12. package/src/duckdb/src/catalog/catalog.cpp +21 -5
  13. package/src/duckdb/src/common/enums/expression_type.cpp +8 -222
  14. package/src/duckdb/src/common/enums/join_type.cpp +3 -22
  15. package/src/duckdb/src/common/exception.cpp +2 -2
  16. package/src/duckdb/src/common/serializer/enum_serializer.cpp +1172 -0
  17. package/src/duckdb/src/common/types/value.cpp +117 -93
  18. package/src/duckdb/src/common/types/vector.cpp +140 -1
  19. package/src/duckdb/src/common/types.cpp +166 -89
  20. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +3 -0
  21. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +5 -8
  22. package/src/duckdb/src/function/scalar/date/date_part.cpp +2 -2
  23. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +2 -2
  24. package/src/duckdb/src/function/scalar/list/list_aggregates.cpp +1 -1
  25. package/src/duckdb/src/function/scalar/list/list_lambdas.cpp +4 -0
  26. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +8 -8
  27. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +243 -0
  28. package/src/duckdb/src/function/scalar/string/regexp/regexp_util.cpp +79 -0
  29. package/src/duckdb/src/function/scalar/string/regexp.cpp +21 -80
  30. package/src/duckdb/src/function/table/arrow_conversion.cpp +7 -1
  31. package/src/duckdb/src/function/table/table_scan.cpp +1 -1
  32. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  33. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +3 -0
  34. package/src/duckdb/src/include/duckdb/common/enums/aggregate_handling.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +2 -3
  36. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -0
  37. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +2 -0
  38. package/src/duckdb/src/include/duckdb/common/enums/set_operation_type.hpp +2 -1
  39. package/src/duckdb/src/include/duckdb/common/exception.hpp +40 -9
  40. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +3 -0
  41. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +113 -0
  42. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +336 -0
  43. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +268 -0
  44. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +126 -0
  45. package/src/duckdb/src/include/duckdb/common/string_util.hpp +12 -0
  46. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -31
  47. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -0
  48. package/src/duckdb/src/include/duckdb/common/types.hpp +8 -2
  49. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +81 -1
  50. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  51. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  52. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  53. package/src/duckdb/src/include/duckdb/parser/expression/bound_expression.hpp +2 -0
  54. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +5 -0
  55. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +2 -0
  56. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +2 -0
  57. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +2 -0
  58. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -0
  59. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +2 -0
  60. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  61. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  62. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -0
  63. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +2 -0
  64. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -0
  65. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +2 -0
  66. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +2 -0
  67. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +2 -0
  68. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +2 -0
  69. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +5 -0
  70. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +2 -0
  71. package/src/duckdb/src/include/duckdb/parser/parsed_expression.hpp +5 -0
  72. package/src/duckdb/src/include/duckdb/parser/query_node/recursive_cte_node.hpp +3 -0
  73. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +5 -0
  74. package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +3 -0
  75. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +11 -1
  76. package/src/duckdb/src/include/duckdb/parser/result_modifier.hpp +24 -1
  77. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -1
  78. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +6 -1
  79. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +4 -0
  80. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +2 -0
  81. package/src/duckdb/src/include/duckdb/parser/tableref/expressionlistref.hpp +3 -0
  82. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +3 -0
  83. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +9 -0
  84. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  85. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +3 -0
  86. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  87. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +9 -52
  88. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +62 -0
  89. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  90. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +6 -3
  91. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +3 -2
  92. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +5 -3
  93. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +4 -1
  94. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -3
  95. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +5 -3
  96. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +37 -0
  97. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +8 -1
  98. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +4 -3
  99. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +271 -26
  100. package/src/duckdb/src/main/extension/extension_install.cpp +7 -2
  101. package/src/duckdb/src/optimizer/deliminator.cpp +1 -1
  102. package/src/duckdb/src/optimizer/filter_combiner.cpp +1 -1
  103. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +3 -3
  104. package/src/duckdb/src/optimizer/rule/move_constants.cpp +2 -2
  105. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +1 -1
  106. package/src/duckdb/src/parser/common_table_expression_info.cpp +19 -0
  107. package/src/duckdb/src/parser/expression/between_expression.cpp +17 -0
  108. package/src/duckdb/src/parser/expression/case_expression.cpp +28 -0
  109. package/src/duckdb/src/parser/expression/cast_expression.cpp +17 -0
  110. package/src/duckdb/src/parser/expression/collate_expression.cpp +16 -0
  111. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -0
  112. package/src/duckdb/src/parser/expression/comparison_expression.cpp +16 -0
  113. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +15 -0
  114. package/src/duckdb/src/parser/expression/constant_expression.cpp +14 -0
  115. package/src/duckdb/src/parser/expression/default_expression.cpp +7 -0
  116. package/src/duckdb/src/parser/expression/function_expression.cpp +35 -0
  117. package/src/duckdb/src/parser/expression/lambda_expression.cpp +16 -0
  118. package/src/duckdb/src/parser/expression/operator_expression.cpp +15 -0
  119. package/src/duckdb/src/parser/expression/parameter_expression.cpp +15 -0
  120. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +14 -0
  121. package/src/duckdb/src/parser/expression/star_expression.cpp +20 -0
  122. package/src/duckdb/src/parser/expression/subquery_expression.cpp +20 -0
  123. package/src/duckdb/src/parser/expression/window_expression.cpp +43 -0
  124. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +22 -10
  125. package/src/duckdb/src/parser/parsed_expression.cpp +72 -0
  126. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +21 -0
  127. package/src/duckdb/src/parser/query_node/select_node.cpp +31 -0
  128. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +17 -0
  129. package/src/duckdb/src/parser/query_node.cpp +50 -0
  130. package/src/duckdb/src/parser/result_modifier.cpp +78 -0
  131. package/src/duckdb/src/parser/statement/select_statement.cpp +12 -0
  132. package/src/duckdb/src/parser/tableref/basetableref.cpp +21 -0
  133. package/src/duckdb/src/parser/tableref/emptytableref.cpp +4 -0
  134. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +17 -0
  135. package/src/duckdb/src/parser/tableref/joinref.cpp +25 -0
  136. package/src/duckdb/src/parser/tableref/pivotref.cpp +53 -0
  137. package/src/duckdb/src/parser/tableref/subqueryref.cpp +15 -0
  138. package/src/duckdb/src/parser/tableref/table_function.cpp +17 -0
  139. package/src/duckdb/src/parser/tableref.cpp +46 -0
  140. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +11 -0
  141. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +1 -1
  142. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  143. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  144. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +22 -4
  145. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +4 -0
  146. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +1 -1
  147. package/src/duckdb/src/planner/expression/bound_expression.cpp +4 -0
  148. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -11
  149. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +6 -0
  150. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -0
  151. package/src/duckdb/src/storage/compression/numeric_constant.cpp +2 -2
  152. package/src/duckdb/src/storage/data_table.cpp +1 -1
  153. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +145 -83
  154. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +65 -0
  155. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  156. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +1 -6
  157. package/src/duckdb/src/storage/table/column_data.cpp +29 -35
  158. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +5 -5
  159. package/src/duckdb/src/storage/table/column_segment.cpp +8 -7
  160. package/src/duckdb/src/storage/table/list_column_data.cpp +2 -1
  161. package/src/duckdb/src/storage/table/persistent_table_data.cpp +2 -1
  162. package/src/duckdb/src/storage/table/row_group.cpp +9 -9
  163. package/src/duckdb/src/storage/table/row_group_collection.cpp +82 -66
  164. package/src/duckdb/src/storage/table/scan_state.cpp +22 -3
  165. package/src/duckdb/src/storage/table/standard_column_data.cpp +1 -0
  166. package/src/duckdb/src/storage/table/struct_column_data.cpp +1 -0
  167. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +0 -1
  168. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11780 -11512
  169. package/src/duckdb/third_party/re2/re2/re2.cc +9 -0
  170. package/src/duckdb/third_party/re2/re2/re2.h +2 -0
  171. package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
  172. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  173. package/src/duckdb/ub_src_function_scalar_string_regexp.cpp +4 -0
  174. package/src/duckdb/ub_src_parser.cpp +2 -0
  175. package/src/duckdb/ub_src_storage_statistics.cpp +2 -0
  176. package/src/duckdb/ub_src_storage_table.cpp +0 -2
  177. package/src/utils.cpp +12 -0
  178. package/test/extension.test.ts +44 -26
  179. package/src/duckdb/src/storage/table/segment_tree.cpp +0 -179
@@ -0,0 +1,243 @@
1
+ #include "duckdb/function/scalar/regexp.hpp"
2
+ #include "duckdb/execution/expression_executor.hpp"
3
+ #include "duckdb/planner/expression/bound_function_expression.hpp"
4
+ #include "duckdb/function/scalar/string_functions.hpp"
5
+ #include "re2/re2.h"
6
+
7
+ namespace duckdb {
8
+
9
+ using regexp_util::CreateStringPiece;
10
+ using regexp_util::Extract;
11
+ using regexp_util::ParseRegexOptions;
12
+ using regexp_util::TryParseConstantPattern;
13
+
14
+ unique_ptr<FunctionLocalState>
15
+ RegexpExtractAll::InitLocalState(ExpressionState &state, const BoundFunctionExpression &expr, FunctionData *bind_data) {
16
+ auto &info = (RegexpBaseBindData &)*bind_data;
17
+ if (info.constant_pattern) {
18
+ return make_unique<RegexLocalState>(info, true);
19
+ }
20
+ return nullptr;
21
+ }
22
+
23
+ // Forwards startpos automatically
24
+ bool ExtractAll(duckdb_re2::StringPiece &input, duckdb_re2::RE2 &pattern, idx_t *startpos,
25
+ duckdb_re2::StringPiece *groups, int ngroups) {
26
+
27
+ D_ASSERT(pattern.ok());
28
+ D_ASSERT(pattern.NumberOfCapturingGroups() == ngroups);
29
+
30
+ if (!pattern.Match(input, *startpos, input.size(), pattern.Anchored(), groups, ngroups + 1)) {
31
+ return false;
32
+ }
33
+ idx_t consumed = static_cast<size_t>(groups[0].end() - (input.begin() + *startpos));
34
+ if (!consumed) {
35
+ // Empty match found, have to manually forward the input
36
+ // to avoid an infinite loop
37
+ // FIXME: support unicode characters
38
+ consumed++;
39
+ while (*startpos + consumed < input.length() && !LengthFun::IsCharacter(input[*startpos + consumed])) {
40
+ consumed++;
41
+ }
42
+ }
43
+ *startpos += consumed;
44
+ return true;
45
+ }
46
+
47
+ void ExtractSingleTuple(const string_t &string, duckdb_re2::RE2 &pattern, int32_t group, RegexStringPieceArgs &args,
48
+ Vector &result, idx_t row) {
49
+ auto input = CreateStringPiece(string);
50
+
51
+ auto &child_vector = ListVector::GetEntry(result);
52
+ auto list_content = FlatVector::GetData<string_t>(child_vector);
53
+ auto &child_validity = FlatVector::Validity(child_vector);
54
+
55
+ auto current_list_size = ListVector::GetListSize(result);
56
+ auto current_list_capacity = ListVector::GetListCapacity(result);
57
+
58
+ auto result_data = FlatVector::GetData<list_entry_t>(result);
59
+ auto &list_entry = result_data[row];
60
+ list_entry.offset = current_list_size;
61
+
62
+ if (group < 0) {
63
+ list_entry.length = 0;
64
+ return;
65
+ }
66
+ // If the requested group index is out of bounds
67
+ // we want to throw only if there is a match
68
+ bool throw_on_group_found = (idx_t)group > args.size;
69
+
70
+ idx_t startpos = 0;
71
+ for (idx_t iteration = 0; ExtractAll(input, pattern, &startpos, args.group_buffer, args.size); iteration++) {
72
+ if (!iteration && throw_on_group_found) {
73
+ throw InvalidInputException("Pattern has %d groups. Cannot access group %d", args.size, group);
74
+ }
75
+
76
+ // Make sure we have enough room for the new entries
77
+ if (current_list_size + 1 >= current_list_capacity) {
78
+ ListVector::Reserve(result, current_list_capacity * 2);
79
+ current_list_capacity = ListVector::GetListCapacity(result);
80
+ list_content = FlatVector::GetData<string_t>(child_vector);
81
+ }
82
+
83
+ // Write the captured groups into the list-child vector
84
+ auto &match_group = args.group_buffer[group];
85
+
86
+ idx_t child_idx = current_list_size;
87
+ if (match_group.empty()) {
88
+ // This group was not matched
89
+ list_content[child_idx] = string_t(string.GetDataUnsafe(), 0);
90
+ if (match_group.begin() == nullptr) {
91
+ // This group is optional
92
+ child_validity.SetInvalid(child_idx);
93
+ }
94
+ } else {
95
+ // Every group is a substring of the original, we can find out the offset using the pointer
96
+ // the 'match_group' address is guaranteed to be bigger than that of the source
97
+ D_ASSERT((const char *)match_group.begin() >= string.GetDataUnsafe());
98
+ idx_t offset = match_group.begin() - string.GetDataUnsafe();
99
+ list_content[child_idx] = string_t(string.GetDataUnsafe() + offset, match_group.size());
100
+ }
101
+ current_list_size++;
102
+ if (startpos > input.size()) {
103
+ // Empty match found at the end of the string
104
+ break;
105
+ }
106
+ }
107
+ list_entry.length = current_list_size - list_entry.offset;
108
+ ListVector::SetListSize(result, current_list_size);
109
+ }
110
+
111
+ int32_t GetGroupIndex(DataChunk &args, idx_t row, int32_t &result) {
112
+ if (args.ColumnCount() < 3) {
113
+ result = 0;
114
+ return true;
115
+ }
116
+ UnifiedVectorFormat format;
117
+ args.data[2].ToUnifiedFormat(args.size(), format);
118
+ idx_t index = format.sel->get_index(row);
119
+ if (!format.validity.RowIsValid(index)) {
120
+ return false;
121
+ }
122
+ result = ((int32_t *)format.data)[index];
123
+ return true;
124
+ }
125
+
126
+ duckdb_re2::RE2 &GetPattern(const RegexpBaseBindData &info, ExpressionState &state,
127
+ unique_ptr<duckdb_re2::RE2> &pattern_p) {
128
+ if (info.constant_pattern) {
129
+ auto &lstate = (RegexLocalState &)*ExecuteFunctionState::GetFunctionState(state);
130
+ return lstate.constant_pattern;
131
+ }
132
+ D_ASSERT(pattern_p);
133
+ return *pattern_p;
134
+ }
135
+
136
+ RegexStringPieceArgs &GetGroupsBuffer(const RegexpBaseBindData &info, ExpressionState &state,
137
+ unique_ptr<RegexStringPieceArgs> &groups_p) {
138
+ if (info.constant_pattern) {
139
+ auto &lstate = (RegexLocalState &)*ExecuteFunctionState::GetFunctionState(state);
140
+ return lstate.group_buffer;
141
+ }
142
+ D_ASSERT(groups_p);
143
+ return *groups_p;
144
+ }
145
+
146
+ void RegexpExtractAll::Execute(DataChunk &args, ExpressionState &state, Vector &result) {
147
+ auto &func_expr = (BoundFunctionExpression &)state.expr;
148
+ const auto &info = (RegexpBaseBindData &)*func_expr.bind_info;
149
+
150
+ auto &strings = args.data[0];
151
+ auto &patterns = args.data[1];
152
+ D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
153
+ auto &output_child = ListVector::GetEntry(result);
154
+
155
+ UnifiedVectorFormat strings_data;
156
+ strings.ToUnifiedFormat(args.size(), strings_data);
157
+
158
+ UnifiedVectorFormat pattern_data;
159
+ patterns.ToUnifiedFormat(args.size(), pattern_data);
160
+
161
+ ListVector::Reserve(result, STANDARD_VECTOR_SIZE);
162
+ // Reference the 'strings' StringBuffer, because we won't need to allocate new data
163
+ // for the result, all returned strings are substrings of the originals
164
+ output_child.SetAuxiliary(strings.GetAuxiliary());
165
+
166
+ // Avoid doing extra work if all the inputs are constant
167
+ idx_t tuple_count = args.AllConstant() ? 1 : args.size();
168
+
169
+ unique_ptr<RegexStringPieceArgs> non_const_args;
170
+ unique_ptr<duckdb_re2::RE2> stored_re;
171
+ if (!info.constant_pattern) {
172
+ non_const_args = make_unique<RegexStringPieceArgs>();
173
+ } else {
174
+ // Verify that the constant pattern is valid
175
+ auto &re = GetPattern(info, state, stored_re);
176
+ auto group_count_p = re.NumberOfCapturingGroups();
177
+ if (group_count_p == -1) {
178
+ throw InvalidInputException("Pattern failed to parse, error: '%s'", re.error());
179
+ }
180
+ }
181
+
182
+ for (idx_t row = 0; row < tuple_count; row++) {
183
+ bool pattern_valid = true;
184
+ if (!info.constant_pattern) {
185
+ // Check if the pattern is NULL or not,
186
+ // and compile the pattern if it's not constant
187
+ auto pattern_idx = pattern_data.sel->get_index(row);
188
+ if (!pattern_data.validity.RowIsValid(pattern_idx)) {
189
+ pattern_valid = false;
190
+ } else {
191
+ auto &pattern_p = ((string_t *)pattern_data.data)[pattern_idx];
192
+ auto pattern_strpiece = CreateStringPiece(pattern_p);
193
+ stored_re = make_unique<duckdb_re2::RE2>(pattern_strpiece, info.options);
194
+
195
+ // Increase the size of the args buffer if needed
196
+ auto group_count_p = stored_re->NumberOfCapturingGroups();
197
+ if (group_count_p == -1) {
198
+ throw InvalidInputException("Pattern failed to parse, error: '%s'", stored_re->error());
199
+ }
200
+ non_const_args->SetSize(group_count_p);
201
+ }
202
+ }
203
+
204
+ auto string_idx = strings_data.sel->get_index(row);
205
+ int32_t group_index;
206
+ if (!pattern_valid || !strings_data.validity.RowIsValid(string_idx) || !GetGroupIndex(args, row, group_index)) {
207
+ // If something is NULL, the result is NULL
208
+ // FIXME: do we even need 'SPECIAL_HANDLING'?
209
+ auto result_data = FlatVector::GetData<list_entry_t>(result);
210
+ auto &result_validity = FlatVector::Validity(result);
211
+ result_data[row].length = 0;
212
+ result_data[row].offset = ListVector::GetListSize(result);
213
+ result_validity.SetInvalid(row);
214
+ continue;
215
+ }
216
+
217
+ auto &re = GetPattern(info, state, stored_re);
218
+ auto &groups = GetGroupsBuffer(info, state, non_const_args);
219
+ auto &string = ((string_t *)strings_data.data)[string_idx];
220
+ ExtractSingleTuple(string, re, group_index, groups, result, row);
221
+ }
222
+
223
+ if (args.AllConstant()) {
224
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
225
+ }
226
+ }
227
+
228
+ unique_ptr<FunctionData> RegexpExtractAll::Bind(ClientContext &context, ScalarFunction &bound_function,
229
+ vector<unique_ptr<Expression>> &arguments) {
230
+ D_ASSERT(arguments.size() >= 2);
231
+
232
+ duckdb_re2::RE2::Options options;
233
+
234
+ string constant_string;
235
+ bool constant_pattern = TryParseConstantPattern(context, *arguments[1], constant_string);
236
+
237
+ if (arguments.size() >= 4) {
238
+ ParseRegexOptions(context, *arguments[3], options);
239
+ }
240
+ return make_unique<RegexpExtractBindData>(options, std::move(constant_string), constant_pattern, "");
241
+ }
242
+
243
+ } // namespace duckdb
@@ -0,0 +1,79 @@
1
+ #include "duckdb/function/scalar/regexp.hpp"
2
+ #include "duckdb/execution/expression_executor.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ namespace regexp_util {
7
+
8
+ bool TryParseConstantPattern(ClientContext &context, Expression &expr, string &constant_string) {
9
+ if (!expr.IsFoldable()) {
10
+ return false;
11
+ }
12
+ Value pattern_str = ExpressionExecutor::EvaluateScalar(context, expr);
13
+ if (!pattern_str.IsNull() && pattern_str.type().id() == LogicalTypeId::VARCHAR) {
14
+ constant_string = StringValue::Get(pattern_str);
15
+ return true;
16
+ }
17
+ return false;
18
+ }
19
+
20
+ void ParseRegexOptions(const string &options, duckdb_re2::RE2::Options &result, bool *global_replace) {
21
+ for (idx_t i = 0; i < options.size(); i++) {
22
+ switch (options[i]) {
23
+ case 'c':
24
+ // case-sensitive matching
25
+ result.set_case_sensitive(true);
26
+ break;
27
+ case 'i':
28
+ // case-insensitive matching
29
+ result.set_case_sensitive(false);
30
+ break;
31
+ case 'l':
32
+ // literal matching
33
+ result.set_literal(true);
34
+ break;
35
+ case 'm':
36
+ case 'n':
37
+ case 'p':
38
+ // newline-sensitive matching
39
+ result.set_dot_nl(false);
40
+ break;
41
+ case 's':
42
+ // non-newline-sensitive matching
43
+ result.set_dot_nl(true);
44
+ break;
45
+ case 'g':
46
+ // global replace, only available for regexp_replace
47
+ if (global_replace) {
48
+ *global_replace = true;
49
+ } else {
50
+ throw InvalidInputException("Option 'g' (global replace) is only valid for regexp_replace");
51
+ }
52
+ break;
53
+ case ' ':
54
+ case '\t':
55
+ case '\n':
56
+ // ignore whitespace
57
+ break;
58
+ default:
59
+ throw InvalidInputException("Unrecognized Regex option %c", options[i]);
60
+ }
61
+ }
62
+ }
63
+
64
+ void ParseRegexOptions(ClientContext &context, Expression &expr, RE2::Options &target, bool *global_replace) {
65
+ if (expr.HasParameter()) {
66
+ throw ParameterNotResolvedException();
67
+ }
68
+ if (!expr.IsFoldable()) {
69
+ throw InvalidInputException("Regex options field must be a constant");
70
+ }
71
+ Value options_str = ExpressionExecutor::EvaluateScalar(context, expr);
72
+ if (!options_str.IsNull() && options_str.type().id() == LogicalTypeId::VARCHAR) {
73
+ ParseRegexOptions(StringValue::Get(options_str), target, global_replace);
74
+ }
75
+ }
76
+
77
+ } // namespace regexp_util
78
+
79
+ } // namespace duckdb
@@ -12,6 +12,11 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
+ using regexp_util::CreateStringPiece;
16
+ using regexp_util::Extract;
17
+ using regexp_util::ParseRegexOptions;
18
+ using regexp_util::TryParseConstantPattern;
19
+
15
20
  static bool RegexOptionsEquals(const duckdb_re2::RE2::Options &opt_a, const duckdb_re2::RE2::Options &opt_b) {
16
21
  return opt_a.case_sensitive() == opt_b.case_sensitive();
17
22
  }
@@ -32,10 +37,6 @@ bool RegexpBaseBindData::Equals(const FunctionData &other_p) const {
32
37
  RegexOptionsEquals(options, other.options);
33
38
  }
34
39
 
35
- static inline duckdb_re2::StringPiece CreateStringPiece(string_t &input) {
36
- return duckdb_re2::StringPiece(input.GetDataUnsafe(), input.GetSize());
37
- }
38
-
39
40
  unique_ptr<FunctionLocalState> RegexInitLocalState(ExpressionState &state, const BoundFunctionExpression &expr,
40
41
  FunctionData *bind_data) {
41
42
  auto &info = (RegexpBaseBindData &)*bind_data;
@@ -45,75 +46,6 @@ unique_ptr<FunctionLocalState> RegexInitLocalState(ExpressionState &state, const
45
46
  return nullptr;
46
47
  }
47
48
 
48
- static void ParseRegexOptions(const string &options, duckdb_re2::RE2::Options &result, bool *global_replace = nullptr) {
49
- for (idx_t i = 0; i < options.size(); i++) {
50
- switch (options[i]) {
51
- case 'c':
52
- // case-sensitive matching
53
- result.set_case_sensitive(true);
54
- break;
55
- case 'i':
56
- // case-insensitive matching
57
- result.set_case_sensitive(false);
58
- break;
59
- case 'l':
60
- // literal matching
61
- result.set_literal(true);
62
- break;
63
- case 'm':
64
- case 'n':
65
- case 'p':
66
- // newline-sensitive matching
67
- result.set_dot_nl(false);
68
- break;
69
- case 's':
70
- // non-newline-sensitive matching
71
- result.set_dot_nl(true);
72
- break;
73
- case 'g':
74
- // global replace, only available for regexp_replace
75
- if (global_replace) {
76
- *global_replace = true;
77
- } else {
78
- throw InvalidInputException("Option 'g' (global replace) is only valid for regexp_replace");
79
- }
80
- break;
81
- case ' ':
82
- case '\t':
83
- case '\n':
84
- // ignore whitespace
85
- break;
86
- default:
87
- throw InvalidInputException("Unrecognized Regex option %c", options[i]);
88
- }
89
- }
90
- }
91
-
92
- void ParseRegexOptions(ClientContext &context, Expression &expr, RE2::Options &target, bool *global_replace = nullptr) {
93
- if (expr.HasParameter()) {
94
- throw ParameterNotResolvedException();
95
- }
96
- if (!expr.IsFoldable()) {
97
- throw InvalidInputException("Regex options field must be a constant");
98
- }
99
- Value options_str = ExpressionExecutor::EvaluateScalar(context, expr);
100
- if (!options_str.IsNull() && options_str.type().id() == LogicalTypeId::VARCHAR) {
101
- ParseRegexOptions(StringValue::Get(options_str), target, global_replace);
102
- }
103
- }
104
-
105
- static bool TryParseConstantPattern(ClientContext &context, Expression &expr, string &constant_string) {
106
- if (!expr.IsFoldable()) {
107
- return false;
108
- }
109
- Value pattern_str = ExpressionExecutor::EvaluateScalar(context, expr);
110
- if (!pattern_str.IsNull() && pattern_str.type().id() == LogicalTypeId::VARCHAR) {
111
- constant_string = StringValue::Get(pattern_str);
112
- return true;
113
- }
114
- return false;
115
- }
116
-
117
49
  //===--------------------------------------------------------------------===//
118
50
  // Regexp Matches
119
51
  //===--------------------------------------------------------------------===//
@@ -321,13 +253,6 @@ static unique_ptr<FunctionData> RegexExtractBind(ClientContext &context, ScalarF
321
253
  std::move(group_string));
322
254
  }
323
255
 
324
- inline static string_t Extract(const string_t &input, Vector &result, const RE2 &re,
325
- const duckdb_re2::StringPiece &rewrite) {
326
- std::string extracted;
327
- RE2::Extract(input.GetString(), re, rewrite, &extracted);
328
- return StringVector::AddString(result, extracted.c_str(), extracted.size());
329
- }
330
-
331
256
  static void RegexExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) {
332
257
  auto &func_expr = (BoundFunctionExpression &)state.expr;
333
258
  const auto &info = (RegexpExtractBindData &)*func_expr.bind_info;
@@ -391,10 +316,26 @@ void RegexpFun::RegisterFunction(BuiltinFunctions &set) {
391
316
  RegexExtractFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
392
317
  FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
393
318
 
319
+ ScalarFunctionSet regexp_extract_all("regexp_extract_all");
320
+ regexp_extract_all.AddFunction(ScalarFunction(
321
+ {LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::LIST(LogicalType::VARCHAR),
322
+ RegexpExtractAll::Execute, RegexpExtractAll::Bind, nullptr, nullptr, RegexpExtractAll::InitLocalState,
323
+ LogicalType::INVALID, FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
324
+ regexp_extract_all.AddFunction(ScalarFunction(
325
+ {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER}, LogicalType::LIST(LogicalType::VARCHAR),
326
+ RegexpExtractAll::Execute, RegexpExtractAll::Bind, nullptr, nullptr, RegexpExtractAll::InitLocalState,
327
+ LogicalType::INVALID, FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
328
+ regexp_extract_all.AddFunction(
329
+ ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER, LogicalType::VARCHAR},
330
+ LogicalType::LIST(LogicalType::VARCHAR), RegexpExtractAll::Execute, RegexpExtractAll::Bind,
331
+ nullptr, nullptr, RegexpExtractAll::InitLocalState, LogicalType::INVALID,
332
+ FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
333
+
394
334
  set.AddFunction(regexp_full_match);
395
335
  set.AddFunction(regexp_partial_match);
396
336
  set.AddFunction(regexp_replace);
397
337
  set.AddFunction(regexp_extract);
338
+ set.AddFunction(regexp_extract_all);
398
339
  }
399
340
 
400
341
  } // namespace duckdb
@@ -20,7 +20,13 @@ void ShiftRight(unsigned char *ar, int size, int shift) {
20
20
 
21
21
  void GetValidityMask(ValidityMask &mask, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
22
22
  int64_t nested_offset = -1, bool add_null = false) {
23
- if (array.null_count != 0 && array.buffers[0]) {
23
+ // In certains we don't need to or cannot copy arrow's validity mask to duckdb.
24
+ //
25
+ // The conditions where we do want to copy arrow's mask to duckdb are:
26
+ // 1. nulls exist
27
+ // 2. n_buffers > 0, meaning the array's arrow type is not `null`
28
+ // 3. the validity buffer (the first buffer) is not a nullptr
29
+ if (array.null_count != 0 && array.n_buffers > 0 && array.buffers[0]) {
24
30
  auto bit_offset = scan_state.chunk_offset + array.offset;
25
31
  if (nested_offset != -1) {
26
32
  bit_offset = nested_offset;
@@ -332,7 +332,7 @@ void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, Fun
332
332
  auto comparison_type = comparison->type;
333
333
  if (comparison->left->type == ExpressionType::VALUE_CONSTANT) {
334
334
  // the expression is on the right side, we flip them around
335
- comparison_type = FlipComparisionExpression(comparison_type);
335
+ comparison_type = FlipComparisonExpression(comparison_type);
336
336
  }
337
337
  if (comparison_type == ExpressionType::COMPARE_EQUAL) {
338
338
  // equality value
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev717"
2
+ #define DUCKDB_VERSION "0.7.2-dev865"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "cd47ad8e2d"
5
+ #define DUCKDB_SOURCE_ID "61325fdd83"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -14,6 +14,7 @@
14
14
  #include "duckdb/catalog/catalog_transaction.hpp"
15
15
  #include "duckdb/common/unordered_set.hpp"
16
16
  #include "duckdb/common/atomic.hpp"
17
+ #include "duckdb/common/optional_ptr.hpp"
17
18
 
18
19
  #include <functional>
19
20
 
@@ -80,6 +81,8 @@ public:
80
81
  DUCKDB_API static Catalog &GetCatalog(ClientContext &context, const string &catalog_name);
81
82
  //! Get the specified Catalog from the DatabaseInstance
82
83
  DUCKDB_API static Catalog &GetCatalog(DatabaseInstance &db, const string &catalog_name);
84
+ //! Gets the specified Catalog from the database if it exists
85
+ DUCKDB_API static optional_ptr<Catalog> GetCatalogEntry(ClientContext &context, const string &catalog_name);
83
86
  //! Get the specific Catalog from the AttachedDatabase
84
87
  DUCKDB_API static Catalog &GetCatalog(AttachedDatabase &db);
85
88
 
@@ -19,4 +19,6 @@ enum class AggregateHandling : uint8_t {
19
19
  FORCE_AGGREGATES // force aggregates: any non-aggregate select list entry will become a GROUP
20
20
  };
21
21
 
22
+ const char *ToString(AggregateHandling value);
23
+
22
24
  } // namespace duckdb
@@ -205,11 +205,10 @@ string ExpressionTypeToOperator(ExpressionType type);
205
205
 
206
206
  // Operator String to ExpressionType (e.g. + => OPERATOR_ADD)
207
207
  ExpressionType OperatorToExpressionType(const string &op);
208
-
209
208
  //! Negate a comparison expression, turning e.g. = into !=, or < into >=
210
- ExpressionType NegateComparisionExpression(ExpressionType type);
209
+ ExpressionType NegateComparisonExpression(ExpressionType type);
211
210
  //! Flip a comparison expression, turning e.g. < into >, or = into =
212
- ExpressionType FlipComparisionExpression(ExpressionType type);
211
+ ExpressionType FlipComparisonExpression(ExpressionType type);
213
212
 
214
213
  DUCKDB_API string ExpressionClassToString(ExpressionClass type);
215
214
 
@@ -22,4 +22,6 @@ enum class JoinRefType : uint8_t {
22
22
  POSITIONAL // Positional condition
23
23
  };
24
24
 
25
+ const char *ToString(JoinRefType value);
26
+
25
27
  } // namespace duckdb
@@ -9,10 +9,12 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/constants.hpp"
12
+ #include "duckdb/common/exception.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
15
16
  enum class OrderType : uint8_t { INVALID = 0, ORDER_DEFAULT = 1, ASCENDING = 2, DESCENDING = 3 };
17
+
16
18
  enum class OrderByNullType : uint8_t { INVALID = 0, ORDER_DEFAULT = 1, NULLS_FIRST = 2, NULLS_LAST = 3 };
17
19
 
18
20
  } // namespace duckdb
@@ -13,4 +13,5 @@
13
13
  namespace duckdb {
14
14
 
15
15
  enum class SetOperationType : uint8_t { NONE = 0, UNION = 1, EXCEPT = 2, INTERSECT = 3, UNION_BY_NAME = 4 };
16
- }
16
+
17
+ } // namespace duckdb
@@ -13,6 +13,7 @@
13
13
  #include "duckdb/common/exception_format_value.hpp"
14
14
  #include "duckdb/common/vector.hpp"
15
15
 
16
+ #include <map>
16
17
  #include <stdexcept>
17
18
 
18
19
  namespace duckdb {
@@ -283,29 +284,59 @@ public:
283
284
 
284
285
  class HTTPException : public IOException {
285
286
  public:
286
- DUCKDB_API explicit HTTPException(int status_code, string response, const string &msg)
287
- : IOException(ExceptionType::HTTP, msg), status_code(status_code), response(std::move(response)) {
287
+ template <typename>
288
+ struct ResponseShape {
289
+ typedef int status;
290
+ };
291
+
292
+ template <class RESPONSE, typename ResponseShape<decltype(RESPONSE::status)>::status = 0, typename... ARGS>
293
+ explicit HTTPException(RESPONSE &response, const string &msg, ARGS... params)
294
+ : HTTPException(response.status, response.body, response.headers, response.reason, msg, params...) {
295
+ }
296
+
297
+ template <typename>
298
+ struct ResponseWrapperShape {
299
+ typedef int code;
300
+ };
301
+ template <class RESPONSE, typename ResponseWrapperShape<decltype(RESPONSE::code)>::code = 0, typename... ARGS>
302
+ explicit HTTPException(RESPONSE &response, const string &msg, ARGS... params)
303
+ : HTTPException(response.code, response.body, response.headers, response.error, msg, params...) {
288
304
  }
289
305
 
290
- template <typename... ARGS>
291
- explicit HTTPException(int status_code, string response, const string &msg, ARGS... params)
292
- : HTTPException(status_code, std::move(response), ConstructMessage(msg, params...)) {
306
+ template <typename HEADERS, typename... ARGS>
307
+ explicit HTTPException(int status_code, string response_body, HEADERS headers, const string &reason,
308
+ const string &msg, ARGS... params)
309
+ : IOException(ExceptionType::HTTP, ConstructMessage(msg, params...)), status_code(status_code), reason(reason),
310
+ response_body(std::move(response_body)) {
311
+ this->headers.insert(headers.begin(), headers.end());
312
+ D_ASSERT(this->headers.size() > 0);
293
313
  }
294
314
 
295
315
  std::shared_ptr<Exception> Copy() const {
296
- return make_shared<HTTPException>(status_code, response, RawMessage());
316
+ return make_shared<HTTPException>(status_code, response_body, headers, reason, RawMessage());
297
317
  }
298
318
 
319
+ const std::multimap<std::string, std::string> GetHeaders() const {
320
+ return headers;
321
+ }
299
322
  int GetStatusCode() const {
300
323
  return status_code;
301
324
  }
302
- const string &GetResponse() const {
303
- return response;
325
+ const string &GetResponseBody() const {
326
+ return response_body;
327
+ }
328
+ const string &GetReason() const {
329
+ return reason;
330
+ }
331
+ [[noreturn]] void Throw() const {
332
+ throw HTTPException(status_code, response_body, headers, reason, RawMessage());
304
333
  }
305
334
 
306
335
  private:
307
336
  int status_code;
308
- string response; // we can keep a copy for the user
337
+ string reason;
338
+ string response_body;
339
+ std::multimap<string, string> headers;
309
340
  };
310
341
 
311
342
  class SerializationException : public Exception {
@@ -39,6 +39,9 @@ public:
39
39
  //! Let's us do things like 'if (error)'
40
40
  DUCKDB_API operator bool() const;
41
41
  DUCKDB_API bool operator==(const PreservedError &other) const;
42
+ DUCKDB_API const shared_ptr<Exception> &GetError() {
43
+ return exception_instance;
44
+ }
42
45
 
43
46
  private:
44
47
  //! Whether this PreservedError contains an exception or not