duckdb 0.8.2-dev2356.0 → 0.8.2-dev2509.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +7 -7
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -0
- package/src/duckdb/extension/icu/icu-datepart.cpp +7 -5
- package/src/duckdb/extension/icu/icu-strptime.cpp +1 -20
- package/src/duckdb/src/common/http_state.cpp +78 -0
- package/src/duckdb/src/common/types/list_segment.cpp +42 -134
- package/src/duckdb/src/common/types/vector.cpp +21 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +5 -7
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +17 -19
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +80 -61
- package/src/duckdb/src/core_functions/function_list.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +308 -82
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +6 -0
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +11 -5
- package/src/duckdb/src/execution/window_executor.cpp +18 -20
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +9 -11
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +7 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -2
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +0 -1
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +20 -3
- package/src/duckdb/src/main/extension/extension_helper.cpp +2 -1
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -0
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +2 -4
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11077 -10674
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
@@ -37,8 +37,6 @@ inline interval_t operator-(const interval_t &lhs, const interval_t &rhs) {
|
|
37
37
|
return Interval::FromMicro(Interval::GetMicro(lhs) - Interval::GetMicro(rhs));
|
38
38
|
}
|
39
39
|
|
40
|
-
using FrameBounds = std::pair<idx_t, idx_t>;
|
41
|
-
|
42
40
|
template <typename SAVE_TYPE>
|
43
41
|
struct QuantileState {
|
44
42
|
using SaveType = SAVE_TYPE;
|
@@ -89,7 +87,7 @@ void ReuseIndexes(idx_t *index, const FrameBounds &frame, const FrameBounds &pre
|
|
89
87
|
idx_t j = 0;
|
90
88
|
|
91
89
|
// Copy overlapping indices
|
92
|
-
for (idx_t p = 0; p < (prev.
|
90
|
+
for (idx_t p = 0; p < (prev.end - prev.start); ++p) {
|
93
91
|
auto idx = index[p];
|
94
92
|
|
95
93
|
// Shift down into any hole
|
@@ -98,7 +96,7 @@ void ReuseIndexes(idx_t *index, const FrameBounds &frame, const FrameBounds &pre
|
|
98
96
|
}
|
99
97
|
|
100
98
|
// Skip overlapping values
|
101
|
-
if (frame.
|
99
|
+
if (frame.start <= idx && idx < frame.end) {
|
102
100
|
++j;
|
103
101
|
}
|
104
102
|
}
|
@@ -106,15 +104,15 @@ void ReuseIndexes(idx_t *index, const FrameBounds &frame, const FrameBounds &pre
|
|
106
104
|
// Insert new indices
|
107
105
|
if (j > 0) {
|
108
106
|
// Overlap: append the new ends
|
109
|
-
for (auto f = frame.
|
107
|
+
for (auto f = frame.start; f < prev.start; ++f, ++j) {
|
110
108
|
index[j] = f;
|
111
109
|
}
|
112
|
-
for (auto f = prev.
|
110
|
+
for (auto f = prev.end; f < frame.end; ++f, ++j) {
|
113
111
|
index[j] = f;
|
114
112
|
}
|
115
113
|
} else {
|
116
114
|
// No overlap: overwrite with new values
|
117
|
-
for (auto f = frame.
|
115
|
+
for (auto f = frame.start; f < frame.end; ++f, ++j) {
|
118
116
|
index[j] = f;
|
119
117
|
}
|
120
118
|
}
|
@@ -124,17 +122,17 @@ static idx_t ReplaceIndex(idx_t *index, const FrameBounds &frame, const FrameBou
|
|
124
122
|
D_ASSERT(index);
|
125
123
|
|
126
124
|
idx_t j = 0;
|
127
|
-
for (idx_t p = 0; p < (prev.
|
125
|
+
for (idx_t p = 0; p < (prev.end - prev.start); ++p) {
|
128
126
|
auto idx = index[p];
|
129
127
|
if (j != p) {
|
130
128
|
break;
|
131
129
|
}
|
132
130
|
|
133
|
-
if (frame.
|
131
|
+
if (frame.start <= idx && idx < frame.end) {
|
134
132
|
++j;
|
135
133
|
}
|
136
134
|
}
|
137
|
-
index[j] = frame.
|
135
|
+
index[j] = frame.end - 1;
|
138
136
|
|
139
137
|
return j;
|
140
138
|
}
|
@@ -560,7 +558,7 @@ struct QuantileScalarOperation : public QuantileOperation {
|
|
560
558
|
|
561
559
|
// Lazily initialise frame state
|
562
560
|
auto prev_pos = state.pos;
|
563
|
-
state.SetPos(frame.
|
561
|
+
state.SetPos(frame.end - frame.start);
|
564
562
|
|
565
563
|
auto index = state.w.data();
|
566
564
|
D_ASSERT(index);
|
@@ -572,11 +570,11 @@ struct QuantileScalarOperation : public QuantileOperation {
|
|
572
570
|
const auto q = bind_data.quantiles[0];
|
573
571
|
|
574
572
|
bool replace = false;
|
575
|
-
if (frame.
|
573
|
+
if (frame.start == prev.start + 1 && frame.end == prev.end + 1) {
|
576
574
|
// Fixed frame size
|
577
575
|
const auto j = ReplaceIndex(index, frame, prev);
|
578
576
|
// We can only replace if the number of NULLs has not changed
|
579
|
-
if (included.AllValid() || included(prev.
|
577
|
+
if (included.AllValid() || included(prev.start) == included(prev.end)) {
|
580
578
|
Interpolator<DISCRETE> interp(q, prev_pos, false);
|
581
579
|
replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
|
582
580
|
if (replace) {
|
@@ -720,7 +718,7 @@ struct QuantileListOperation : public QuantileOperation {
|
|
720
718
|
|
721
719
|
// Lazily initialise frame state
|
722
720
|
auto prev_pos = state.pos;
|
723
|
-
state.SetPos(frame.
|
721
|
+
state.SetPos(frame.end - frame.start);
|
724
722
|
|
725
723
|
auto index = state.w.data();
|
726
724
|
|
@@ -731,11 +729,11 @@ struct QuantileListOperation : public QuantileOperation {
|
|
731
729
|
// then Q25 must be recomputed, but Q50 and Q75 are unaffected.
|
732
730
|
// For a single element list, this reduces to the scalar case.
|
733
731
|
std::pair<idx_t, idx_t> replaceable {state.pos, 0};
|
734
|
-
if (frame.
|
732
|
+
if (frame.start == prev.start + 1 && frame.end == prev.end + 1) {
|
735
733
|
// Fixed frame size
|
736
734
|
const auto j = ReplaceIndex(index, frame, prev);
|
737
735
|
// We can only replace if the number of NULLs has not changed
|
738
|
-
if (included.AllValid() || included(prev.
|
736
|
+
if (included.AllValid() || included(prev.start) == included(prev.end)) {
|
739
737
|
for (const auto &q : bind_data.order) {
|
740
738
|
const auto &quantile = bind_data.quantiles[q];
|
741
739
|
Interpolator<DISCRETE> interp(quantile, prev_pos, false);
|
@@ -1062,7 +1060,7 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
|
|
1062
1060
|
|
1063
1061
|
// Lazily initialise frame state
|
1064
1062
|
auto prev_pos = state.pos;
|
1065
|
-
state.SetPos(frame.
|
1063
|
+
state.SetPos(frame.end - frame.start);
|
1066
1064
|
|
1067
1065
|
auto index = state.w.data();
|
1068
1066
|
D_ASSERT(index);
|
@@ -1085,11 +1083,11 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
|
|
1085
1083
|
const float q = 0.5;
|
1086
1084
|
|
1087
1085
|
bool replace = false;
|
1088
|
-
if (frame.
|
1086
|
+
if (frame.start == prev.start + 1 && frame.end == prev.end + 1) {
|
1089
1087
|
// Fixed frame size
|
1090
1088
|
const auto j = ReplaceIndex(index, frame, prev);
|
1091
1089
|
// We can only replace if the number of NULLs has not changed
|
1092
|
-
if (included.AllValid() || included(prev.
|
1090
|
+
if (included.AllValid() || included(prev.start) == included(prev.end)) {
|
1093
1091
|
Interpolator<false> interp(q, prev_pos, false);
|
1094
1092
|
replace = CanReplace(index, data, j, interp.FRN, interp.CRN, included);
|
1095
1093
|
if (replace) {
|
@@ -5,24 +5,6 @@
|
|
5
5
|
|
6
6
|
namespace duckdb {
|
7
7
|
|
8
|
-
static void RecursiveFlatten(Vector &vector, idx_t &count) {
|
9
|
-
if (vector.GetVectorType() != VectorType::FLAT_VECTOR) {
|
10
|
-
vector.Flatten(count);
|
11
|
-
}
|
12
|
-
|
13
|
-
auto internal_type = vector.GetType().InternalType();
|
14
|
-
if (internal_type == PhysicalType::LIST) {
|
15
|
-
auto &child_vector = ListVector::GetEntry(vector);
|
16
|
-
auto child_vector_count = ListVector::GetListSize(vector);
|
17
|
-
RecursiveFlatten(child_vector, child_vector_count);
|
18
|
-
} else if (internal_type == PhysicalType::STRUCT) {
|
19
|
-
auto &children = StructVector::GetEntries(vector);
|
20
|
-
for (auto &child : children) {
|
21
|
-
RecursiveFlatten(*child, count);
|
22
|
-
}
|
23
|
-
}
|
24
|
-
}
|
25
|
-
|
26
8
|
struct ListBindData : public FunctionData {
|
27
9
|
explicit ListBindData(const LogicalType &stype_p);
|
28
10
|
~ListBindData() override;
|
@@ -60,12 +42,6 @@ struct ListFunction {
|
|
60
42
|
state.linked_list.first_segment = nullptr;
|
61
43
|
state.linked_list.last_segment = nullptr;
|
62
44
|
}
|
63
|
-
|
64
|
-
template <class STATE>
|
65
|
-
static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
|
66
|
-
// nop
|
67
|
-
}
|
68
|
-
|
69
45
|
static bool IgnoreNull() {
|
70
46
|
return false;
|
71
47
|
}
|
@@ -73,58 +49,54 @@ struct ListFunction {
|
|
73
49
|
|
74
50
|
static void ListUpdateFunction(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count,
|
75
51
|
Vector &state_vector, idx_t count) {
|
76
|
-
D_ASSERT(input_count == 1);
|
77
52
|
|
53
|
+
D_ASSERT(input_count == 1);
|
78
54
|
auto &input = inputs[0];
|
79
|
-
|
80
|
-
|
55
|
+
RecursiveUnifiedVectorFormat input_data;
|
56
|
+
Vector::RecursiveToUnifiedFormat(input, count, input_data);
|
81
57
|
|
82
|
-
|
83
|
-
|
58
|
+
UnifiedVectorFormat states_data;
|
59
|
+
state_vector.ToUnifiedFormat(count, states_data);
|
60
|
+
auto states = UnifiedVectorFormat::GetData<ListAggState *>(states_data);
|
84
61
|
|
85
62
|
auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
|
86
63
|
|
87
64
|
for (idx_t i = 0; i < count; i++) {
|
88
|
-
auto &state = *states[
|
89
|
-
list_bind_data.functions.AppendRow(aggr_input_data.allocator, state.linked_list,
|
65
|
+
auto &state = *states[states_data.sel->get_index(i)];
|
66
|
+
list_bind_data.functions.AppendRow(aggr_input_data.allocator, state.linked_list, input_data, i);
|
90
67
|
}
|
91
68
|
}
|
92
69
|
|
93
|
-
static void ListCombineFunction(Vector &
|
94
|
-
UnifiedVectorFormat sdata;
|
95
|
-
state.ToUnifiedFormat(count, sdata);
|
96
|
-
auto states_ptr = UnifiedVectorFormat::GetData<ListAggState *>(sdata);
|
70
|
+
static void ListCombineFunction(Vector &states_vector, Vector &combined, AggregateInputData &, idx_t count) {
|
97
71
|
|
98
|
-
|
72
|
+
UnifiedVectorFormat states_data;
|
73
|
+
states_vector.ToUnifiedFormat(count, states_data);
|
74
|
+
auto states_ptr = UnifiedVectorFormat::GetData<ListAggState *>(states_data);
|
99
75
|
|
100
76
|
auto combined_ptr = FlatVector::GetData<ListAggState *>(combined);
|
101
77
|
for (idx_t i = 0; i < count; i++) {
|
102
|
-
auto &state = *states_ptr[sdata.sel->get_index(i)];
|
103
|
-
if (state.linked_list.total_capacity == 0) {
|
104
|
-
// NULL, no need to append.
|
105
|
-
continue;
|
106
|
-
}
|
107
78
|
|
108
|
-
|
109
|
-
|
110
|
-
list_bind_data.functions.CopyLinkedList(state.linked_list, copied_linked_list, aggr_input_data.allocator);
|
79
|
+
auto &state = *states_ptr[states_data.sel->get_index(i)];
|
80
|
+
D_ASSERT(state.linked_list.total_capacity != 0);
|
111
81
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
} else {
|
116
|
-
combined_ptr[i]->linked_list.first_segment = copied_linked_list.first_segment;
|
82
|
+
if (combined_ptr[i]->linked_list.total_capacity == 0) {
|
83
|
+
combined_ptr[i]->linked_list = state.linked_list;
|
84
|
+
continue;
|
117
85
|
}
|
118
|
-
|
119
|
-
|
86
|
+
|
87
|
+
// append the linked list
|
88
|
+
combined_ptr[i]->linked_list.last_segment->next = state.linked_list.first_segment;
|
89
|
+
combined_ptr[i]->linked_list.last_segment = state.linked_list.last_segment;
|
90
|
+
combined_ptr[i]->linked_list.total_capacity += state.linked_list.total_capacity;
|
120
91
|
}
|
121
92
|
}
|
122
93
|
|
123
|
-
static void ListFinalize(Vector &
|
94
|
+
static void ListFinalize(Vector &states_vector, AggregateInputData &aggr_input_data, Vector &result, idx_t count,
|
124
95
|
idx_t offset) {
|
125
|
-
|
126
|
-
|
127
|
-
|
96
|
+
|
97
|
+
UnifiedVectorFormat states_data;
|
98
|
+
states_vector.ToUnifiedFormat(count, states_data);
|
99
|
+
auto states = UnifiedVectorFormat::GetData<ListAggState *>(states_data);
|
128
100
|
|
129
101
|
D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
|
130
102
|
|
@@ -133,9 +105,11 @@ static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_da
|
|
133
105
|
size_t total_len = ListVector::GetListSize(result);
|
134
106
|
|
135
107
|
auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
|
136
|
-
|
108
|
+
|
109
|
+
// first iterate over all entries and set up the list entries, and get the newly required total length
|
137
110
|
for (idx_t i = 0; i < count; i++) {
|
138
|
-
|
111
|
+
|
112
|
+
auto &state = *states[states_data.sel->get_index(i)];
|
139
113
|
const auto rid = i + offset;
|
140
114
|
result_data[rid].offset = total_len;
|
141
115
|
if (state.linked_list.total_capacity == 0) {
|
@@ -143,16 +117,19 @@ static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_da
|
|
143
117
|
result_data[rid].length = 0;
|
144
118
|
continue;
|
145
119
|
}
|
120
|
+
|
146
121
|
// set the length and offset of this list in the result vector
|
147
122
|
auto total_capacity = state.linked_list.total_capacity;
|
148
123
|
result_data[rid].length = total_capacity;
|
149
124
|
total_len += total_capacity;
|
150
125
|
}
|
151
|
-
|
126
|
+
|
127
|
+
// reserve capacity, then iterate over all entries again and copy over the data to the child vector
|
152
128
|
ListVector::Reserve(result, total_len);
|
153
129
|
auto &result_child = ListVector::GetEntry(result);
|
154
130
|
for (idx_t i = 0; i < count; i++) {
|
155
|
-
|
131
|
+
|
132
|
+
auto &state = *states[states_data.sel->get_index(i)];
|
156
133
|
const auto rid = i + offset;
|
157
134
|
if (state.linked_list.total_capacity == 0) {
|
158
135
|
continue;
|
@@ -161,6 +138,48 @@ static void ListFinalize(Vector &state_vector, AggregateInputData &aggr_input_da
|
|
161
138
|
idx_t current_offset = result_data[rid].offset;
|
162
139
|
list_bind_data.functions.BuildListVector(state.linked_list, result_child, current_offset);
|
163
140
|
}
|
141
|
+
|
142
|
+
ListVector::SetListSize(result, total_len);
|
143
|
+
}
|
144
|
+
|
145
|
+
static void ListWindow(Vector inputs[], const ValidityMask &filter_mask, AggregateInputData &aggr_input_data,
|
146
|
+
idx_t input_count, data_ptr_t state, const FrameBounds &frame, const FrameBounds &prev,
|
147
|
+
Vector &result, idx_t rid, idx_t bias) {
|
148
|
+
|
149
|
+
auto &list_bind_data = aggr_input_data.bind_data->Cast<ListBindData>();
|
150
|
+
LinkedList linked_list;
|
151
|
+
|
152
|
+
// UPDATE step
|
153
|
+
|
154
|
+
D_ASSERT(input_count == 1);
|
155
|
+
auto &input = inputs[0];
|
156
|
+
|
157
|
+
// FIXME: we unify more values than necessary (count is frame.end)
|
158
|
+
RecursiveUnifiedVectorFormat input_data;
|
159
|
+
Vector::RecursiveToUnifiedFormat(input, frame.end, input_data);
|
160
|
+
|
161
|
+
for (idx_t i = frame.start; i < frame.end; i++) {
|
162
|
+
list_bind_data.functions.AppendRow(aggr_input_data.allocator, linked_list, input_data, i);
|
163
|
+
}
|
164
|
+
|
165
|
+
// FINALIZE step
|
166
|
+
|
167
|
+
D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
|
168
|
+
auto result_data = FlatVector::GetData<list_entry_t>(result);
|
169
|
+
size_t total_len = ListVector::GetListSize(result);
|
170
|
+
|
171
|
+
// set the length and offset of this list in the result vector
|
172
|
+
result_data[rid].offset = total_len;
|
173
|
+
result_data[rid].length = linked_list.total_capacity;
|
174
|
+
D_ASSERT(linked_list.total_capacity != 0);
|
175
|
+
total_len += linked_list.total_capacity;
|
176
|
+
|
177
|
+
// reserve capacity, then copy over the data to the child vector
|
178
|
+
ListVector::Reserve(result, total_len);
|
179
|
+
auto &result_child = ListVector::GetEntry(result);
|
180
|
+
idx_t offset = result_data[rid].offset;
|
181
|
+
list_bind_data.functions.BuildListVector(linked_list, result_child, offset);
|
182
|
+
|
164
183
|
ListVector::SetListSize(result, total_len);
|
165
184
|
}
|
166
185
|
|
@@ -182,8 +201,8 @@ unique_ptr<FunctionData> ListBindFunction(ClientContext &context, AggregateFunct
|
|
182
201
|
AggregateFunction ListFun::GetFunction() {
|
183
202
|
return AggregateFunction({LogicalType::ANY}, LogicalTypeId::LIST, AggregateFunction::StateSize<ListAggState>,
|
184
203
|
AggregateFunction::StateInitialize<ListAggState, ListFunction>, ListUpdateFunction,
|
185
|
-
ListCombineFunction, ListFinalize, nullptr, ListBindFunction,
|
186
|
-
|
204
|
+
ListCombineFunction, ListFinalize, nullptr, ListBindFunction, nullptr, nullptr,
|
205
|
+
ListWindow);
|
187
206
|
}
|
188
207
|
|
189
208
|
} // namespace duckdb
|
@@ -73,7 +73,7 @@ static StaticFunctionDefinition internal_functions[] = {
|
|
73
73
|
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayDistinctFun),
|
74
74
|
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayFilterFun),
|
75
75
|
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ArrayReverseSortFun),
|
76
|
-
|
76
|
+
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ArraySliceFun),
|
77
77
|
DUCKDB_SCALAR_FUNCTION_SET_ALIAS(ArraySortFun),
|
78
78
|
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayTransformFun),
|
79
79
|
DUCKDB_SCALAR_FUNCTION_ALIAS(ArrayUniqueFun),
|
@@ -200,7 +200,7 @@ static StaticFunctionDefinition internal_functions[] = {
|
|
200
200
|
DUCKDB_SCALAR_FUNCTION(ListFilterFun),
|
201
201
|
DUCKDB_SCALAR_FUNCTION_ALIAS(ListPackFun),
|
202
202
|
DUCKDB_SCALAR_FUNCTION_SET(ListReverseSortFun),
|
203
|
-
|
203
|
+
DUCKDB_SCALAR_FUNCTION_SET(ListSliceFun),
|
204
204
|
DUCKDB_SCALAR_FUNCTION_SET(ListSortFun),
|
205
205
|
DUCKDB_SCALAR_FUNCTION(ListTransformFun),
|
206
206
|
DUCKDB_SCALAR_FUNCTION(ListUniqueFun),
|