duckdb 0.8.2-dev2356.0 → 0.8.2-dev2509.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +7 -7
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -0
- package/src/duckdb/extension/icu/icu-datepart.cpp +7 -5
- package/src/duckdb/extension/icu/icu-strptime.cpp +1 -20
- package/src/duckdb/src/common/http_state.cpp +78 -0
- package/src/duckdb/src/common/types/list_segment.cpp +42 -134
- package/src/duckdb/src/common/types/vector.cpp +21 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +5 -7
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +17 -19
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +80 -61
- package/src/duckdb/src/core_functions/function_list.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +308 -82
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +6 -0
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +11 -5
- package/src/duckdb/src/execution/window_executor.cpp +18 -20
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +9 -11
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +7 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -2
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +0 -1
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +20 -3
- package/src/duckdb/src/main/extension/extension_helper.cpp +2 -1
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -0
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +2 -4
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11077 -10674
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
@@ -1,14 +1,62 @@
|
|
1
1
|
#include "duckdb/core_functions/scalar/list_functions.hpp"
|
2
|
-
#include "duckdb/common/pair.hpp"
|
3
2
|
#include "duckdb/common/string_util.hpp"
|
3
|
+
#include "duckdb/common/swap.hpp"
|
4
4
|
#include "duckdb/common/types/data_chunk.hpp"
|
5
5
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
6
6
|
#include "duckdb/function/scalar/string_functions.hpp"
|
7
|
-
#include "duckdb/parser/expression/bound_expression.hpp"
|
8
7
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
8
|
+
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
9
9
|
|
10
10
|
namespace duckdb {
|
11
11
|
|
12
|
+
struct ListSliceBindData : public FunctionData {
|
13
|
+
ListSliceBindData(const LogicalType &return_type_p, bool begin_is_empty_p, bool end_is_empty_p)
|
14
|
+
: return_type(return_type_p), begin_is_empty(begin_is_empty_p), end_is_empty(end_is_empty_p) {
|
15
|
+
}
|
16
|
+
~ListSliceBindData() override;
|
17
|
+
|
18
|
+
LogicalType return_type;
|
19
|
+
|
20
|
+
bool begin_is_empty;
|
21
|
+
bool end_is_empty;
|
22
|
+
|
23
|
+
public:
|
24
|
+
bool Equals(const FunctionData &other_p) const override;
|
25
|
+
unique_ptr<FunctionData> Copy() const override;
|
26
|
+
};
|
27
|
+
|
28
|
+
ListSliceBindData::~ListSliceBindData() {
|
29
|
+
}
|
30
|
+
|
31
|
+
bool ListSliceBindData::Equals(const FunctionData &other_p) const {
|
32
|
+
auto &other = other_p.Cast<ListSliceBindData>();
|
33
|
+
return return_type == other.return_type && begin_is_empty == other.begin_is_empty &&
|
34
|
+
end_is_empty == other.end_is_empty;
|
35
|
+
}
|
36
|
+
|
37
|
+
unique_ptr<FunctionData> ListSliceBindData::Copy() const {
|
38
|
+
return make_uniq<ListSliceBindData>(return_type, begin_is_empty, end_is_empty);
|
39
|
+
}
|
40
|
+
|
41
|
+
template <typename INDEX_TYPE>
|
42
|
+
static int CalculateSliceLength(idx_t begin, idx_t end, INDEX_TYPE step, bool svalid) {
|
43
|
+
if (step < 0) {
|
44
|
+
step = abs(step);
|
45
|
+
}
|
46
|
+
if (step == 0 && svalid) {
|
47
|
+
throw InvalidInputException("Slice step cannot be zero");
|
48
|
+
}
|
49
|
+
if (step == 1) {
|
50
|
+
return end - begin;
|
51
|
+
} else if (static_cast<idx_t>(step) >= (end - begin)) {
|
52
|
+
return 1;
|
53
|
+
}
|
54
|
+
if ((end - begin) % step != 0) {
|
55
|
+
return (end - begin) / step + 1;
|
56
|
+
}
|
57
|
+
return (end - begin) / step;
|
58
|
+
}
|
59
|
+
|
12
60
|
template <typename INPUT_TYPE, typename INDEX_TYPE>
|
13
61
|
INDEX_TYPE ValueLength(const INPUT_TYPE &value) {
|
14
62
|
return 0;
|
@@ -20,33 +68,44 @@ int64_t ValueLength(const list_entry_t &value) {
|
|
20
68
|
}
|
21
69
|
|
22
70
|
template <>
|
23
|
-
|
24
|
-
return LengthFun::Length<string_t,
|
71
|
+
int64_t ValueLength(const string_t &value) {
|
72
|
+
return LengthFun::Length<string_t, int64_t>(value);
|
25
73
|
}
|
26
74
|
|
27
75
|
template <typename INPUT_TYPE, typename INDEX_TYPE>
|
28
|
-
|
29
|
-
const auto length = ValueLength<INPUT_TYPE, INDEX_TYPE>(value);
|
76
|
+
static void ClampIndex(INDEX_TYPE &index, const INPUT_TYPE &value, const INDEX_TYPE length, bool is_min) {
|
30
77
|
if (index < 0) {
|
31
|
-
|
32
|
-
return false;
|
33
|
-
}
|
78
|
+
index = (!is_min) ? index + 1 : index;
|
34
79
|
index = length + index;
|
80
|
+
return;
|
35
81
|
} else if (index > length) {
|
36
82
|
index = length;
|
37
83
|
}
|
38
|
-
return
|
84
|
+
return;
|
39
85
|
}
|
40
86
|
|
41
87
|
template <typename INPUT_TYPE, typename INDEX_TYPE>
|
42
|
-
static bool ClampSlice(const INPUT_TYPE &value, INDEX_TYPE &begin, INDEX_TYPE &end
|
88
|
+
static bool ClampSlice(const INPUT_TYPE &value, INDEX_TYPE &begin, INDEX_TYPE &end) {
|
43
89
|
// Clamp offsets
|
44
|
-
begin =
|
45
|
-
|
46
|
-
|
47
|
-
if (
|
48
|
-
|
90
|
+
begin = (begin != 0 && begin != (INDEX_TYPE)NumericLimits<int64_t>::Minimum()) ? begin - 1 : begin;
|
91
|
+
|
92
|
+
bool is_min = false;
|
93
|
+
if (begin == (INDEX_TYPE)NumericLimits<int64_t>::Minimum()) {
|
94
|
+
begin++;
|
95
|
+
is_min = true;
|
96
|
+
}
|
97
|
+
|
98
|
+
const auto length = ValueLength<INPUT_TYPE, INDEX_TYPE>(value);
|
99
|
+
if (begin < 0 && -begin > length && end < 0 && -end > length) {
|
100
|
+
begin = 0;
|
101
|
+
end = 0;
|
102
|
+
return true;
|
103
|
+
}
|
104
|
+
if (begin < 0 && -begin > length) {
|
105
|
+
begin = 0;
|
49
106
|
}
|
107
|
+
ClampIndex(begin, value, length, is_min);
|
108
|
+
ClampIndex(end, value, length, false);
|
50
109
|
end = MaxValue<INDEX_TYPE>(begin, end);
|
51
110
|
|
52
111
|
return true;
|
@@ -65,108 +124,262 @@ list_entry_t SliceValue(Vector &result, list_entry_t input, int64_t begin, int64
|
|
65
124
|
}
|
66
125
|
|
67
126
|
template <>
|
68
|
-
string_t SliceValue(Vector &result, string_t input,
|
127
|
+
string_t SliceValue(Vector &result, string_t input, int64_t begin, int64_t end) {
|
69
128
|
// one-based - zero has strange semantics
|
70
129
|
return SubstringFun::SubstringUnicode(result, input, begin + 1, end - begin);
|
71
130
|
}
|
72
131
|
|
73
132
|
template <typename INPUT_TYPE, typename INDEX_TYPE>
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
133
|
+
INPUT_TYPE SliceValueWithSteps(Vector &result, SelectionVector &sel, INPUT_TYPE input, INDEX_TYPE begin, INDEX_TYPE end,
|
134
|
+
INDEX_TYPE step, idx_t &sel_idx) {
|
135
|
+
return input;
|
136
|
+
}
|
137
|
+
|
138
|
+
template <>
|
139
|
+
list_entry_t SliceValueWithSteps(Vector &result, SelectionVector &sel, list_entry_t input, int64_t begin, int64_t end,
|
140
|
+
int64_t step, idx_t &sel_idx) {
|
141
|
+
if (end - begin == 0) {
|
142
|
+
input.length = 0;
|
143
|
+
input.offset = sel_idx;
|
144
|
+
return input;
|
145
|
+
}
|
146
|
+
input.length = CalculateSliceLength(begin, end, step, true);
|
147
|
+
idx_t child_idx = input.offset + begin;
|
148
|
+
if (step < 0) {
|
149
|
+
child_idx = input.offset + end - 1;
|
150
|
+
}
|
151
|
+
input.offset = sel_idx;
|
152
|
+
for (idx_t i = 0; i < input.length; i++) {
|
153
|
+
sel.set_index(sel_idx, child_idx);
|
154
|
+
child_idx += step;
|
155
|
+
sel_idx++;
|
156
|
+
}
|
157
|
+
return input;
|
158
|
+
}
|
159
|
+
|
160
|
+
template <typename INPUT_TYPE, typename INDEX_TYPE>
|
161
|
+
static void ExecuteConstantSlice(Vector &result, Vector &str_vector, Vector &begin_vector, Vector &end_vector,
|
162
|
+
optional_ptr<Vector> step_vector, const idx_t count, SelectionVector &sel,
|
163
|
+
idx_t &sel_idx, optional_ptr<Vector> result_child_vector, bool begin_is_empty,
|
164
|
+
bool end_is_empty) {
|
165
|
+
auto result_data = ConstantVector::GetData<INPUT_TYPE>(result);
|
166
|
+
auto str_data = ConstantVector::GetData<INPUT_TYPE>(str_vector);
|
167
|
+
auto begin_data = ConstantVector::GetData<INDEX_TYPE>(begin_vector);
|
168
|
+
auto end_data = ConstantVector::GetData<INDEX_TYPE>(end_vector);
|
169
|
+
auto step_data = step_vector ? ConstantVector::GetData<INDEX_TYPE>(*step_vector) : nullptr;
|
170
|
+
|
171
|
+
auto str = str_data[0];
|
172
|
+
auto begin = begin_is_empty ? 0 : begin_data[0];
|
173
|
+
auto end = end_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(str) : end_data[0];
|
174
|
+
auto step = step_data ? step_data[0] : 1;
|
175
|
+
|
176
|
+
if (step < 0) {
|
177
|
+
swap(begin, end);
|
178
|
+
begin = end_is_empty ? 0 : begin;
|
179
|
+
end = begin_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(str) : end;
|
180
|
+
}
|
181
|
+
|
182
|
+
auto str_valid = !ConstantVector::IsNull(str_vector);
|
183
|
+
auto begin_valid = !ConstantVector::IsNull(begin_vector);
|
184
|
+
auto end_valid = !ConstantVector::IsNull(end_vector);
|
185
|
+
auto step_valid = step_vector && !ConstantVector::IsNull(*step_vector);
|
186
|
+
|
187
|
+
// Clamp offsets
|
188
|
+
bool clamp_result = false;
|
189
|
+
if (str_valid && begin_valid && end_valid && (step_valid || step == 1)) {
|
190
|
+
clamp_result = ClampSlice(str, begin, end);
|
191
|
+
}
|
192
|
+
|
193
|
+
auto sel_length = 0;
|
194
|
+
if (step_vector && step_valid && str_valid && begin_valid && end_valid && step != 1 && end - begin > 0) {
|
195
|
+
sel_length = CalculateSliceLength(begin, end, step, step_valid);
|
196
|
+
sel.Initialize(sel_length);
|
197
|
+
}
|
198
|
+
|
199
|
+
// Try to slice
|
200
|
+
if (!str_valid || !begin_valid || !end_valid || (step_vector && !step_valid) || !clamp_result) {
|
201
|
+
ConstantVector::SetNull(result, true);
|
202
|
+
} else if (step == 1) {
|
203
|
+
result_data[0] = SliceValue<INPUT_TYPE, INDEX_TYPE>(result, str, begin, end);
|
95
204
|
} else {
|
96
|
-
|
205
|
+
result_data[0] = SliceValueWithSteps<INPUT_TYPE, INDEX_TYPE>(result, sel, str, begin, end, step, sel_idx);
|
206
|
+
}
|
207
|
+
|
208
|
+
if (step_vector && step != 0 && end - begin > 0) {
|
209
|
+
result_child_vector->Slice(sel, sel_length);
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
213
|
+
template <typename INPUT_TYPE, typename INDEX_TYPE>
|
214
|
+
static void ExecuteFlatSlice(Vector &result, Vector &list_vector, Vector &begin_vector, Vector &end_vector,
|
215
|
+
optional_ptr<Vector> step_vector, const idx_t count, SelectionVector &sel, idx_t &sel_idx,
|
216
|
+
optional_ptr<Vector> result_child_vector, bool begin_is_empty, bool end_is_empty) {
|
217
|
+
UnifiedVectorFormat list_data, begin_data, end_data, step_data;
|
218
|
+
idx_t sel_length = 0;
|
97
219
|
|
98
|
-
|
99
|
-
|
100
|
-
|
220
|
+
list_vector.ToUnifiedFormat(count, list_data);
|
221
|
+
begin_vector.ToUnifiedFormat(count, begin_data);
|
222
|
+
end_vector.ToUnifiedFormat(count, end_data);
|
223
|
+
if (step_vector) {
|
224
|
+
step_vector->ToUnifiedFormat(count, step_data);
|
225
|
+
sel.Initialize(ListVector::GetListSize(list_vector));
|
226
|
+
}
|
101
227
|
|
102
|
-
|
103
|
-
|
228
|
+
auto result_data = FlatVector::GetData<INPUT_TYPE>(result);
|
229
|
+
auto &result_mask = FlatVector::Validity(result);
|
104
230
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
231
|
+
for (idx_t i = 0; i < count; ++i) {
|
232
|
+
auto list_idx = list_data.sel->get_index(i);
|
233
|
+
auto begin_idx = begin_data.sel->get_index(i);
|
234
|
+
auto end_idx = end_data.sel->get_index(i);
|
235
|
+
auto step_idx = step_vector ? step_data.sel->get_index(i) : 0;
|
109
236
|
|
110
|
-
|
111
|
-
|
112
|
-
|
237
|
+
auto sliced = reinterpret_cast<INPUT_TYPE *>(list_data.data)[list_idx];
|
238
|
+
auto begin = begin_is_empty ? 0 : reinterpret_cast<INDEX_TYPE *>(begin_data.data)[begin_idx];
|
239
|
+
auto end = end_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(sliced)
|
240
|
+
: reinterpret_cast<INDEX_TYPE *>(end_data.data)[end_idx];
|
241
|
+
auto step = step_vector ? reinterpret_cast<INDEX_TYPE *>(step_data.data)[step_idx] : 1;
|
113
242
|
|
114
|
-
|
115
|
-
|
116
|
-
|
243
|
+
if (step < 0) {
|
244
|
+
swap(begin, end);
|
245
|
+
begin = end_is_empty ? 0 : begin;
|
246
|
+
end = begin_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(sliced) : end;
|
247
|
+
}
|
248
|
+
auto list_valid = list_data.validity.RowIsValid(list_idx);
|
249
|
+
auto begin_valid = begin_data.validity.RowIsValid(begin_idx);
|
250
|
+
auto end_valid = end_data.validity.RowIsValid(end_idx);
|
251
|
+
auto step_valid = step_vector && step_data.validity.RowIsValid(step_idx);
|
117
252
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
} else {
|
122
|
-
rdata[i] = SliceValue<INPUT_TYPE, INDEX_TYPE>(result, sliced, begin, end);
|
123
|
-
}
|
253
|
+
bool clamp_result = false;
|
254
|
+
if (list_valid && begin_valid && end_valid && (step_valid || step == 1)) {
|
255
|
+
clamp_result = ClampSlice(sliced, begin, end);
|
124
256
|
}
|
257
|
+
|
258
|
+
auto length = 0;
|
259
|
+
if (step_vector && step_valid && list_valid && begin_valid && end_valid && end - begin > 0) {
|
260
|
+
length = CalculateSliceLength(begin, end, step, step_valid);
|
261
|
+
}
|
262
|
+
sel_length += length;
|
263
|
+
|
264
|
+
if (!list_valid || !begin_valid || !end_valid || (step_vector && !step_valid) || !clamp_result) {
|
265
|
+
result_mask.SetInvalid(i);
|
266
|
+
} else if (!step_vector) {
|
267
|
+
result_data[i] = SliceValue<INPUT_TYPE, INDEX_TYPE>(result, sliced, begin, end);
|
268
|
+
} else {
|
269
|
+
result_data[i] =
|
270
|
+
SliceValueWithSteps<INPUT_TYPE, INDEX_TYPE>(result, sel, sliced, begin, end, step, sel_idx);
|
271
|
+
}
|
272
|
+
}
|
273
|
+
if (step_vector) {
|
274
|
+
SelectionVector new_sel(sel_length);
|
275
|
+
for (idx_t i = 0; i < sel_length; ++i) {
|
276
|
+
new_sel.set_index(i, sel.get_index(i));
|
277
|
+
}
|
278
|
+
result_child_vector->Slice(new_sel, sel_length);
|
279
|
+
}
|
280
|
+
}
|
281
|
+
|
282
|
+
template <typename INPUT_TYPE, typename INDEX_TYPE>
|
283
|
+
static void ExecuteSlice(Vector &result, Vector &list_or_str_vector, Vector &begin_vector, Vector &end_vector,
|
284
|
+
optional_ptr<Vector> step_vector, const idx_t count, bool begin_is_empty, bool end_is_empty) {
|
285
|
+
optional_ptr<Vector> result_child_vector;
|
286
|
+
if (step_vector) {
|
287
|
+
result_child_vector = &ListVector::GetEntry(result);
|
125
288
|
}
|
126
289
|
|
290
|
+
SelectionVector sel;
|
291
|
+
idx_t sel_idx = 0;
|
292
|
+
|
293
|
+
if (result.GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
294
|
+
ExecuteConstantSlice<INPUT_TYPE, INDEX_TYPE>(result, list_or_str_vector, begin_vector, end_vector, step_vector,
|
295
|
+
count, sel, sel_idx, result_child_vector, begin_is_empty,
|
296
|
+
end_is_empty);
|
297
|
+
} else {
|
298
|
+
ExecuteFlatSlice<INPUT_TYPE, INDEX_TYPE>(result, list_or_str_vector, begin_vector, end_vector, step_vector,
|
299
|
+
count, sel, sel_idx, result_child_vector, begin_is_empty,
|
300
|
+
end_is_empty);
|
301
|
+
}
|
127
302
|
result.Verify(count);
|
128
303
|
}
|
129
304
|
|
130
305
|
static void ArraySliceFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
131
|
-
D_ASSERT(args.ColumnCount() == 3);
|
132
|
-
D_ASSERT(args.data.size() == 3);
|
306
|
+
D_ASSERT(args.ColumnCount() == 3 || args.ColumnCount() == 4);
|
307
|
+
D_ASSERT(args.data.size() == 3 || args.data.size() == 4);
|
133
308
|
auto count = args.size();
|
134
309
|
|
135
|
-
Vector &
|
136
|
-
Vector &
|
137
|
-
Vector &
|
310
|
+
Vector &list_or_str_vector = args.data[0];
|
311
|
+
Vector &begin_vector = args.data[1];
|
312
|
+
Vector &end_vector = args.data[2];
|
313
|
+
|
314
|
+
optional_ptr<Vector> step_vector;
|
315
|
+
if (args.ColumnCount() == 4) {
|
316
|
+
step_vector = &args.data[3];
|
317
|
+
}
|
318
|
+
|
319
|
+
auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
|
320
|
+
auto &info = func_expr.bind_info->Cast<ListSliceBindData>();
|
321
|
+
auto begin_is_empty = info.begin_is_empty;
|
322
|
+
auto end_is_empty = info.end_is_empty;
|
138
323
|
|
139
324
|
result.SetVectorType(args.AllConstant() ? VectorType::CONSTANT_VECTOR : VectorType::FLAT_VECTOR);
|
140
325
|
switch (result.GetType().id()) {
|
141
|
-
case LogicalTypeId::LIST:
|
326
|
+
case LogicalTypeId::LIST: {
|
142
327
|
// Share the value dictionary as we are just going to slice it
|
143
|
-
if (
|
144
|
-
|
328
|
+
if (list_or_str_vector.GetVectorType() != VectorType::FLAT_VECTOR &&
|
329
|
+
list_or_str_vector.GetVectorType() != VectorType::CONSTANT_VECTOR) {
|
330
|
+
list_or_str_vector.Flatten(count);
|
145
331
|
}
|
146
|
-
ListVector::ReferenceEntry(result,
|
147
|
-
ExecuteSlice<list_entry_t, int64_t>(result,
|
332
|
+
ListVector::ReferenceEntry(result, list_or_str_vector);
|
333
|
+
ExecuteSlice<list_entry_t, int64_t>(result, list_or_str_vector, begin_vector, end_vector, step_vector, count,
|
334
|
+
begin_is_empty, end_is_empty);
|
148
335
|
break;
|
149
|
-
|
150
|
-
|
336
|
+
}
|
337
|
+
case LogicalTypeId::VARCHAR: {
|
338
|
+
ExecuteSlice<string_t, int64_t>(result, list_or_str_vector, begin_vector, end_vector, step_vector, count,
|
339
|
+
begin_is_empty, end_is_empty);
|
151
340
|
break;
|
341
|
+
}
|
152
342
|
default:
|
153
343
|
throw NotImplementedException("Specifier type not implemented");
|
154
344
|
}
|
155
345
|
}
|
156
346
|
|
347
|
+
static bool CheckIfParamIsEmpty(duckdb::unique_ptr<duckdb::Expression> ¶m) {
|
348
|
+
bool is_empty = false;
|
349
|
+
if (param->return_type.id() == LogicalTypeId::LIST) {
|
350
|
+
auto empty_list = make_uniq<BoundConstantExpression>(Value::LIST(LogicalType::INTEGER, vector<Value>()));
|
351
|
+
is_empty = param->Equals(*empty_list);
|
352
|
+
if (!is_empty) {
|
353
|
+
// if the param is not empty, the user has entered a list instead of a BIGINT
|
354
|
+
throw BinderException("The upper and lower bounds of the slice must be a BIGINT");
|
355
|
+
}
|
356
|
+
}
|
357
|
+
return is_empty;
|
358
|
+
}
|
359
|
+
|
157
360
|
static unique_ptr<FunctionData> ArraySliceBind(ClientContext &context, ScalarFunction &bound_function,
|
158
361
|
vector<unique_ptr<Expression>> &arguments) {
|
159
|
-
D_ASSERT(
|
362
|
+
D_ASSERT(arguments.size() == 3 || arguments.size() == 4);
|
363
|
+
D_ASSERT(bound_function.arguments.size() == 3 || bound_function.arguments.size() == 4);
|
364
|
+
|
160
365
|
switch (arguments[0]->return_type.id()) {
|
161
366
|
case LogicalTypeId::LIST:
|
162
367
|
// The result is the same type
|
163
368
|
bound_function.return_type = arguments[0]->return_type;
|
164
369
|
break;
|
165
370
|
case LogicalTypeId::VARCHAR:
|
166
|
-
// string slice returns a string
|
371
|
+
// string slice returns a string
|
372
|
+
if (bound_function.arguments.size() == 4) {
|
373
|
+
throw NotImplementedException(
|
374
|
+
"Slice with steps has not been implemented for string types, you can consider rewriting your query as "
|
375
|
+
"follows:\n SELECT array_to_string((str_split(string, '')[begin:end:step], '');");
|
376
|
+
}
|
167
377
|
bound_function.return_type = arguments[0]->return_type;
|
168
|
-
|
169
|
-
|
378
|
+
for (idx_t i = 1; i < 3; i++) {
|
379
|
+
if (arguments[i]->return_type.id() != LogicalTypeId::LIST) {
|
380
|
+
bound_function.arguments[i] = LogicalType::BIGINT;
|
381
|
+
}
|
382
|
+
}
|
170
383
|
break;
|
171
384
|
case LogicalTypeId::SQLNULL:
|
172
385
|
case LogicalTypeId::UNKNOWN:
|
@@ -177,16 +390,29 @@ static unique_ptr<FunctionData> ArraySliceBind(ClientContext &context, ScalarFun
|
|
177
390
|
throw BinderException("ARRAY_SLICE can only operate on LISTs and VARCHARs");
|
178
391
|
}
|
179
392
|
|
180
|
-
|
393
|
+
bool begin_is_empty = CheckIfParamIsEmpty(arguments[1]);
|
394
|
+
if (!begin_is_empty) {
|
395
|
+
bound_function.arguments[1] = LogicalType::BIGINT;
|
396
|
+
}
|
397
|
+
bool end_is_empty = CheckIfParamIsEmpty(arguments[2]);
|
398
|
+
if (!end_is_empty) {
|
399
|
+
bound_function.arguments[2] = LogicalType::BIGINT;
|
400
|
+
}
|
401
|
+
|
402
|
+
return make_uniq<ListSliceBindData>(bound_function.return_type, begin_is_empty, end_is_empty);
|
181
403
|
}
|
182
404
|
|
183
|
-
|
405
|
+
ScalarFunctionSet ListSliceFun::GetFunctions() {
|
184
406
|
// the arguments and return types are actually set in the binder function
|
185
|
-
ScalarFunction fun({LogicalType::ANY, LogicalType::
|
186
|
-
|
187
|
-
fun.varargs = LogicalType::ANY;
|
407
|
+
ScalarFunction fun({LogicalType::ANY, LogicalType::ANY, LogicalType::ANY}, LogicalType::ANY, ArraySliceFunction,
|
408
|
+
ArraySliceBind);
|
188
409
|
fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
|
189
|
-
|
410
|
+
|
411
|
+
ScalarFunctionSet set;
|
412
|
+
set.AddFunction(fun);
|
413
|
+
fun.arguments.push_back(LogicalType::BIGINT);
|
414
|
+
set.AddFunction(fun);
|
415
|
+
return set;
|
190
416
|
}
|
191
417
|
|
192
418
|
} // namespace duckdb
|
@@ -584,6 +584,12 @@ void GroupedAggregateHashTable::Combine(GroupedAggregateHashTable &other) {
|
|
584
584
|
}
|
585
585
|
|
586
586
|
Verify();
|
587
|
+
|
588
|
+
// if we combine states, then we also need to combine the arena allocators
|
589
|
+
for (auto &stored_allocator : other.stored_allocators) {
|
590
|
+
stored_allocators.push_back(stored_allocator);
|
591
|
+
}
|
592
|
+
stored_allocators.push_back(other.aggregate_allocator);
|
587
593
|
}
|
588
594
|
|
589
595
|
void GroupedAggregateHashTable::Append(GroupedAggregateHashTable &other) {
|
@@ -12,7 +12,8 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(ClientContext &context, All
|
|
12
12
|
vector<Value> group_minima_p, vector<idx_t> required_bits_p)
|
13
13
|
: BaseAggregateHashTable(context, allocator, aggregate_objects_p, std::move(payload_types_p)),
|
14
14
|
addresses(LogicalType::POINTER), required_bits(std::move(required_bits_p)), total_required_bits(0),
|
15
|
-
group_minima(std::move(group_minima_p)), sel(STANDARD_VECTOR_SIZE),
|
15
|
+
group_minima(std::move(group_minima_p)), sel(STANDARD_VECTOR_SIZE),
|
16
|
+
aggregate_allocator(make_uniq<ArenaAllocator>(allocator)) {
|
16
17
|
for (auto &group_bits : required_bits) {
|
17
18
|
total_required_bits += group_bits;
|
18
19
|
}
|
@@ -136,7 +137,7 @@ void PerfectAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload)
|
|
136
137
|
// after finding the group location we update the aggregates
|
137
138
|
idx_t payload_idx = 0;
|
138
139
|
auto &aggregates = layout.GetAggregates();
|
139
|
-
RowOperationsState row_state(aggregate_allocator);
|
140
|
+
RowOperationsState row_state(*aggregate_allocator);
|
140
141
|
for (idx_t aggr_idx = 0; aggr_idx < aggregates.size(); aggr_idx++) {
|
141
142
|
auto &aggregate = aggregates[aggr_idx];
|
142
143
|
auto input_count = (idx_t)aggregate.child_count;
|
@@ -165,7 +166,7 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) {
|
|
165
166
|
data_ptr_t source_ptr = other.data;
|
166
167
|
data_ptr_t target_ptr = data;
|
167
168
|
idx_t combine_count = 0;
|
168
|
-
RowOperationsState row_state(aggregate_allocator);
|
169
|
+
RowOperationsState row_state(*aggregate_allocator);
|
169
170
|
for (idx_t i = 0; i < total_groups; i++) {
|
170
171
|
auto has_entry_source = other.group_is_set[i];
|
171
172
|
// we only have any work to do if the source has an entry for this group
|
@@ -183,6 +184,11 @@ void PerfectAggregateHashTable::Combine(PerfectAggregateHashTable &other) {
|
|
183
184
|
target_ptr += tuple_size;
|
184
185
|
}
|
185
186
|
RowOperations::CombineStates(row_state, layout, source_addresses, target_addresses, combine_count);
|
187
|
+
|
188
|
+
// FIXME: after moving the arena allocator, we currently have to ensure that the pointer is not nullptr, because the
|
189
|
+
// FIXME: Destroy()-function of the hash table expects an allocator in some cases (e.g., for sorted aggregates)
|
190
|
+
stored_allocators.push_back(std::move(other.aggregate_allocator));
|
191
|
+
other.aggregate_allocator = make_uniq<ArenaAllocator>(allocator);
|
186
192
|
}
|
187
193
|
|
188
194
|
template <class T>
|
@@ -268,7 +274,7 @@ void PerfectAggregateHashTable::Scan(idx_t &scan_position, DataChunk &result) {
|
|
268
274
|
}
|
269
275
|
// then construct the payloads
|
270
276
|
result.SetCardinality(entry_count);
|
271
|
-
RowOperationsState row_state(aggregate_allocator);
|
277
|
+
RowOperationsState row_state(*aggregate_allocator);
|
272
278
|
RowOperations::FinalizeStates(row_state, layout, addresses, result, grouping_columns);
|
273
279
|
}
|
274
280
|
|
@@ -289,7 +295,7 @@ void PerfectAggregateHashTable::Destroy() {
|
|
289
295
|
idx_t count = 0;
|
290
296
|
|
291
297
|
// iterate over all initialised slots of the hash table
|
292
|
-
RowOperationsState row_state(aggregate_allocator);
|
298
|
+
RowOperationsState row_state(*aggregate_allocator);
|
293
299
|
data_ptr_t payload_ptr = data;
|
294
300
|
for (idx_t i = 0; i < total_groups; i++) {
|
295
301
|
if (group_is_set[i]) {
|
@@ -204,19 +204,19 @@ static idx_t FindTypedRangeBound(const WindowInputColumn &over, const idx_t orde
|
|
204
204
|
WindowColumnIterator<T> begin(over, order_begin);
|
205
205
|
WindowColumnIterator<T> end(over, order_end);
|
206
206
|
|
207
|
-
if (order_begin < prev.
|
208
|
-
const auto first = over.GetCell<T>(prev.
|
207
|
+
if (order_begin < prev.start && prev.start < order_end) {
|
208
|
+
const auto first = over.GetCell<T>(prev.start);
|
209
209
|
if (!comp(val, first)) {
|
210
210
|
// prev.first <= val, so we can start further forward
|
211
|
-
begin += (prev.
|
211
|
+
begin += (prev.start - order_begin);
|
212
212
|
}
|
213
213
|
}
|
214
|
-
if (order_begin <= prev.
|
215
|
-
const auto second = over.GetCell<T>(prev.
|
214
|
+
if (order_begin <= prev.end && prev.end < order_end) {
|
215
|
+
const auto second = over.GetCell<T>(prev.end);
|
216
216
|
if (!comp(second, val)) {
|
217
217
|
// val <= prev.second, so we can end further back
|
218
218
|
// (prev.second is the largest peer)
|
219
|
-
end -= (order_end - prev.
|
219
|
+
end -= (order_end - prev.end - 1);
|
220
220
|
}
|
221
221
|
}
|
222
222
|
|
@@ -278,8 +278,6 @@ static idx_t FindOrderedRangeBound(const WindowInputColumn &over, const OrderTyp
|
|
278
278
|
}
|
279
279
|
|
280
280
|
struct WindowBoundariesState {
|
281
|
-
using FrameBounds = std::pair<idx_t, idx_t>;
|
282
|
-
|
283
281
|
static inline bool IsScalar(const unique_ptr<Expression> &expr) {
|
284
282
|
return expr ? expr->IsScalar() : true;
|
285
283
|
}
|
@@ -375,8 +373,8 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
|
|
375
373
|
}
|
376
374
|
|
377
375
|
// Reset range hints
|
378
|
-
prev.
|
379
|
-
prev.
|
376
|
+
prev.start = valid_start;
|
377
|
+
prev.end = valid_end;
|
380
378
|
}
|
381
379
|
} else if (!is_peer) {
|
382
380
|
peer_start = row_idx;
|
@@ -427,9 +425,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
|
|
427
425
|
if (boundary_start.CellIsNull(chunk_idx)) {
|
428
426
|
window_start = peer_start;
|
429
427
|
} else {
|
430
|
-
prev.
|
428
|
+
prev.start = FindOrderedRangeBound<true>(range_collection, range_sense, valid_start, row_idx,
|
431
429
|
boundary_start, chunk_idx, prev);
|
432
|
-
window_start = prev.
|
430
|
+
window_start = prev.start;
|
433
431
|
}
|
434
432
|
break;
|
435
433
|
}
|
@@ -437,9 +435,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
|
|
437
435
|
if (boundary_start.CellIsNull(chunk_idx)) {
|
438
436
|
window_start = peer_start;
|
439
437
|
} else {
|
440
|
-
prev.
|
438
|
+
prev.start = FindOrderedRangeBound<true>(range_collection, range_sense, row_idx, valid_end, boundary_start,
|
441
439
|
chunk_idx, prev);
|
442
|
-
window_start = prev.
|
440
|
+
window_start = prev.start;
|
443
441
|
}
|
444
442
|
break;
|
445
443
|
}
|
@@ -472,9 +470,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
|
|
472
470
|
if (boundary_end.CellIsNull(chunk_idx)) {
|
473
471
|
window_end = peer_end;
|
474
472
|
} else {
|
475
|
-
prev.
|
476
|
-
|
477
|
-
window_end = prev.
|
473
|
+
prev.end = FindOrderedRangeBound<false>(range_collection, range_sense, valid_start, row_idx, boundary_end,
|
474
|
+
chunk_idx, prev);
|
475
|
+
window_end = prev.end;
|
478
476
|
}
|
479
477
|
break;
|
480
478
|
}
|
@@ -482,9 +480,9 @@ void WindowBoundariesState::Update(const idx_t row_idx, const WindowInputColumn
|
|
482
480
|
if (boundary_end.CellIsNull(chunk_idx)) {
|
483
481
|
window_end = peer_end;
|
484
482
|
} else {
|
485
|
-
prev.
|
486
|
-
|
487
|
-
window_end = prev.
|
483
|
+
prev.end = FindOrderedRangeBound<false>(range_collection, range_sense, row_idx, valid_end, boundary_end,
|
484
|
+
chunk_idx, prev);
|
485
|
+
window_end = prev.end;
|
488
486
|
}
|
489
487
|
break;
|
490
488
|
}
|
@@ -39,8 +39,8 @@ struct CountStarFunction : public BaseCountFunction {
|
|
39
39
|
Vector &result, idx_t rid, idx_t bias) {
|
40
40
|
D_ASSERT(input_count == 0);
|
41
41
|
auto data = FlatVector::GetData<RESULT_TYPE>(result);
|
42
|
-
const auto begin = frame.
|
43
|
-
const auto end = frame.
|
42
|
+
const auto begin = frame.start;
|
43
|
+
const auto end = frame.end;
|
44
44
|
// Slice to any filtered rows
|
45
45
|
if (!filter_mask.AllValid()) {
|
46
46
|
RESULT_TYPE filtered = 0;
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev2509"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "785b11edd5"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|