duckdb 0.8.2-dev1791.0 → 0.8.2-dev1862.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/src/common/constants.cpp +2 -1
- package/src/duckdb/src/common/enum_util.cpp +5 -5
- package/src/duckdb/src/core_functions/function_list.cpp +2 -0
- package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
- package/src/duckdb/src/execution/index/art/art.cpp +49 -108
- package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +10 -14
- package/src/duckdb/src/execution/index/art/iterator.cpp +13 -19
- package/src/duckdb/src/execution/index/art/leaf.cpp +290 -241
- package/src/duckdb/src/execution/index/art/node.cpp +104 -95
- package/src/duckdb/src/execution/index/art/node16.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node256.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node4.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node48.cpp +6 -6
- package/src/duckdb/src/execution/index/art/prefix.cpp +49 -39
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +34 -1175
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +4 -14
- package/src/duckdb/src/execution/window_executor.cpp +1280 -0
- package/src/duckdb/src/execution/window_segment_tree.cpp +224 -117
- package/src/duckdb/src/function/table/read_csv.cpp +4 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +43 -40
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +119 -40
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +60 -53
- package/src/duckdb/src/storage/compression/rle.cpp +52 -12
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_src_execution.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -4
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
- package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
- package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
@@ -0,0 +1,313 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/window_executor.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/execution/expression_executor.hpp"
|
12
|
+
#include "duckdb/execution/window_segment_tree.hpp"
|
13
|
+
#include "duckdb/planner/expression/bound_window_expression.hpp"
|
14
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
15
|
+
|
16
|
+
namespace duckdb {
|
17
|
+
|
18
|
+
struct WindowInputExpression {
|
19
|
+
static void PrepareInputExpression(Expression &expr, ExpressionExecutor &executor, DataChunk &chunk) {
|
20
|
+
vector<LogicalType> types;
|
21
|
+
types.push_back(expr.return_type);
|
22
|
+
executor.AddExpression(expr);
|
23
|
+
|
24
|
+
auto &allocator = executor.GetAllocator();
|
25
|
+
chunk.Initialize(allocator, types);
|
26
|
+
}
|
27
|
+
|
28
|
+
WindowInputExpression(optional_ptr<Expression> expr_p, ClientContext &context)
|
29
|
+
: expr(expr_p), ptype(PhysicalType::INVALID), scalar(true), executor(context) {
|
30
|
+
if (expr) {
|
31
|
+
PrepareInputExpression(*expr, executor, chunk);
|
32
|
+
ptype = expr->return_type.InternalType();
|
33
|
+
scalar = expr->IsScalar();
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
void Execute(DataChunk &input_chunk) {
|
38
|
+
if (expr) {
|
39
|
+
chunk.Reset();
|
40
|
+
executor.Execute(input_chunk, chunk);
|
41
|
+
chunk.Verify();
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
template <typename T>
|
46
|
+
inline T GetCell(idx_t i) const {
|
47
|
+
D_ASSERT(!chunk.data.empty());
|
48
|
+
const auto data = FlatVector::GetData<T>(chunk.data[0]);
|
49
|
+
return data[scalar ? 0 : i];
|
50
|
+
}
|
51
|
+
|
52
|
+
inline bool CellIsNull(idx_t i) const {
|
53
|
+
D_ASSERT(!chunk.data.empty());
|
54
|
+
if (chunk.data[0].GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
55
|
+
return ConstantVector::IsNull(chunk.data[0]);
|
56
|
+
}
|
57
|
+
return FlatVector::IsNull(chunk.data[0], i);
|
58
|
+
}
|
59
|
+
|
60
|
+
inline void CopyCell(Vector &target, idx_t target_offset) const {
|
61
|
+
D_ASSERT(!chunk.data.empty());
|
62
|
+
auto &source = chunk.data[0];
|
63
|
+
auto source_offset = scalar ? 0 : target_offset;
|
64
|
+
VectorOperations::Copy(source, target, source_offset + 1, source_offset, target_offset);
|
65
|
+
}
|
66
|
+
|
67
|
+
optional_ptr<Expression> expr;
|
68
|
+
PhysicalType ptype;
|
69
|
+
bool scalar;
|
70
|
+
ExpressionExecutor executor;
|
71
|
+
DataChunk chunk;
|
72
|
+
};
|
73
|
+
|
74
|
+
struct WindowInputColumn {
|
75
|
+
WindowInputColumn(Expression *expr_p, ClientContext &context, idx_t capacity_p)
|
76
|
+
: input_expr(expr_p, context), count(0), capacity(capacity_p) {
|
77
|
+
if (input_expr.expr) {
|
78
|
+
target = make_uniq<Vector>(input_expr.chunk.data[0].GetType(), capacity);
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
void Append(DataChunk &input_chunk) {
|
83
|
+
if (input_expr.expr) {
|
84
|
+
const auto source_count = input_chunk.size();
|
85
|
+
D_ASSERT(count + source_count <= capacity);
|
86
|
+
if (!input_expr.scalar || !count) {
|
87
|
+
input_expr.Execute(input_chunk);
|
88
|
+
auto &source = input_expr.chunk.data[0];
|
89
|
+
VectorOperations::Copy(source, *target, source_count, 0, count);
|
90
|
+
}
|
91
|
+
count += source_count;
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
inline bool CellIsNull(idx_t i) const {
|
96
|
+
D_ASSERT(target);
|
97
|
+
D_ASSERT(i < count);
|
98
|
+
return FlatVector::IsNull(*target, input_expr.scalar ? 0 : i);
|
99
|
+
}
|
100
|
+
|
101
|
+
template <typename T>
|
102
|
+
inline T GetCell(idx_t i) const {
|
103
|
+
D_ASSERT(target);
|
104
|
+
D_ASSERT(i < count);
|
105
|
+
const auto data = FlatVector::GetData<T>(*target);
|
106
|
+
return data[input_expr.scalar ? 0 : i];
|
107
|
+
}
|
108
|
+
|
109
|
+
WindowInputExpression input_expr;
|
110
|
+
|
111
|
+
private:
|
112
|
+
unique_ptr<Vector> target;
|
113
|
+
idx_t count;
|
114
|
+
idx_t capacity;
|
115
|
+
};
|
116
|
+
|
117
|
+
// Column indexes of the bounds chunk
|
118
|
+
enum WindowBounds : uint8_t { PARTITION_BEGIN, PARTITION_END, PEER_BEGIN, PEER_END, WINDOW_BEGIN, WINDOW_END };
|
119
|
+
|
120
|
+
class WindowExecutorState {
|
121
|
+
public:
|
122
|
+
WindowExecutorState() {};
|
123
|
+
virtual ~WindowExecutorState() {
|
124
|
+
}
|
125
|
+
|
126
|
+
template <class TARGET>
|
127
|
+
TARGET &Cast() {
|
128
|
+
D_ASSERT(dynamic_cast<TARGET *>(this));
|
129
|
+
return reinterpret_cast<TARGET &>(*this);
|
130
|
+
}
|
131
|
+
template <class TARGET>
|
132
|
+
const TARGET &Cast() const {
|
133
|
+
D_ASSERT(dynamic_cast<const TARGET *>(this));
|
134
|
+
return reinterpret_cast<const TARGET &>(*this);
|
135
|
+
}
|
136
|
+
};
|
137
|
+
|
138
|
+
class WindowExecutor {
|
139
|
+
public:
|
140
|
+
WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
141
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
142
|
+
virtual ~WindowExecutor() {
|
143
|
+
}
|
144
|
+
|
145
|
+
virtual void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
|
146
|
+
range.Append(input_chunk);
|
147
|
+
}
|
148
|
+
|
149
|
+
virtual void Finalize() {
|
150
|
+
}
|
151
|
+
|
152
|
+
virtual unique_ptr<WindowExecutorState> GetExecutorState() const;
|
153
|
+
|
154
|
+
void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, WindowExecutorState &lstate) const;
|
155
|
+
|
156
|
+
protected:
|
157
|
+
// The function
|
158
|
+
BoundWindowExpression &wexpr;
|
159
|
+
ClientContext &context;
|
160
|
+
const idx_t payload_count;
|
161
|
+
const ValidityMask &partition_mask;
|
162
|
+
const ValidityMask &order_mask;
|
163
|
+
|
164
|
+
// Expression collections
|
165
|
+
DataChunk payload_collection;
|
166
|
+
ExpressionExecutor payload_executor;
|
167
|
+
DataChunk payload_chunk;
|
168
|
+
|
169
|
+
// evaluate RANGE expressions, if needed
|
170
|
+
WindowInputColumn range;
|
171
|
+
|
172
|
+
virtual void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const = 0;
|
173
|
+
};
|
174
|
+
|
175
|
+
class WindowAggregateExecutor : public WindowExecutor {
|
176
|
+
public:
|
177
|
+
bool IsConstantAggregate();
|
178
|
+
bool IsCustomAggregate();
|
179
|
+
|
180
|
+
WindowAggregateExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
181
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask,
|
182
|
+
WindowAggregationMode mode);
|
183
|
+
|
184
|
+
void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) override;
|
185
|
+
void Finalize() override;
|
186
|
+
|
187
|
+
unique_ptr<WindowExecutorState> GetExecutorState() const override;
|
188
|
+
|
189
|
+
const WindowAggregationMode mode;
|
190
|
+
|
191
|
+
protected:
|
192
|
+
ExpressionExecutor filter_executor;
|
193
|
+
SelectionVector filter_sel;
|
194
|
+
|
195
|
+
// aggregate computation algorithm
|
196
|
+
unique_ptr<WindowAggregator> aggregator;
|
197
|
+
|
198
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
199
|
+
};
|
200
|
+
|
201
|
+
class WindowRowNumberExecutor : public WindowExecutor {
|
202
|
+
public:
|
203
|
+
WindowRowNumberExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
204
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
205
|
+
|
206
|
+
protected:
|
207
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
208
|
+
};
|
209
|
+
|
210
|
+
// Base class for non-aggregate functions that use peer boundaries
|
211
|
+
class WindowRankExecutor : public WindowExecutor {
|
212
|
+
public:
|
213
|
+
WindowRankExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
214
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
215
|
+
|
216
|
+
unique_ptr<WindowExecutorState> GetExecutorState() const override;
|
217
|
+
|
218
|
+
protected:
|
219
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
220
|
+
};
|
221
|
+
|
222
|
+
class WindowDenseRankExecutor : public WindowExecutor {
|
223
|
+
public:
|
224
|
+
WindowDenseRankExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
225
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
226
|
+
|
227
|
+
unique_ptr<WindowExecutorState> GetExecutorState() const override;
|
228
|
+
|
229
|
+
protected:
|
230
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
231
|
+
};
|
232
|
+
|
233
|
+
class WindowPercentRankExecutor : public WindowExecutor {
|
234
|
+
public:
|
235
|
+
WindowPercentRankExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
236
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
237
|
+
|
238
|
+
unique_ptr<WindowExecutorState> GetExecutorState() const override;
|
239
|
+
|
240
|
+
protected:
|
241
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
242
|
+
};
|
243
|
+
|
244
|
+
class WindowCumeDistExecutor : public WindowExecutor {
|
245
|
+
public:
|
246
|
+
WindowCumeDistExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
247
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
248
|
+
|
249
|
+
protected:
|
250
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
251
|
+
};
|
252
|
+
|
253
|
+
// Base class for non-aggregate functions that have a payload
|
254
|
+
class WindowValueExecutor : public WindowExecutor {
|
255
|
+
public:
|
256
|
+
WindowValueExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
257
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
258
|
+
|
259
|
+
void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) override;
|
260
|
+
|
261
|
+
protected:
|
262
|
+
// IGNORE NULLS
|
263
|
+
ValidityMask ignore_nulls;
|
264
|
+
};
|
265
|
+
|
266
|
+
//
|
267
|
+
class WindowNtileExecutor : public WindowValueExecutor {
|
268
|
+
public:
|
269
|
+
WindowNtileExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
270
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
271
|
+
|
272
|
+
protected:
|
273
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
274
|
+
};
|
275
|
+
class WindowLeadLagExecutor : public WindowValueExecutor {
|
276
|
+
public:
|
277
|
+
WindowLeadLagExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
278
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
279
|
+
|
280
|
+
unique_ptr<WindowExecutorState> GetExecutorState() const override;
|
281
|
+
|
282
|
+
protected:
|
283
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
284
|
+
};
|
285
|
+
|
286
|
+
class WindowFirstValueExecutor : public WindowValueExecutor {
|
287
|
+
public:
|
288
|
+
WindowFirstValueExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
289
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
290
|
+
|
291
|
+
protected:
|
292
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
293
|
+
};
|
294
|
+
|
295
|
+
class WindowLastValueExecutor : public WindowValueExecutor {
|
296
|
+
public:
|
297
|
+
WindowLastValueExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
298
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
299
|
+
|
300
|
+
protected:
|
301
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
302
|
+
};
|
303
|
+
|
304
|
+
class WindowNthValueExecutor : public WindowValueExecutor {
|
305
|
+
public:
|
306
|
+
WindowNthValueExecutor(BoundWindowExpression &wexpr, ClientContext &context, const idx_t payload_count,
|
307
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask);
|
308
|
+
|
309
|
+
protected:
|
310
|
+
void EvaluateInternal(WindowExecutorState &lstate, Vector &result, idx_t count, idx_t row_idx) const override;
|
311
|
+
};
|
312
|
+
|
313
|
+
} // namespace duckdb
|
@@ -16,20 +16,42 @@
|
|
16
16
|
|
17
17
|
namespace duckdb {
|
18
18
|
|
19
|
-
class
|
19
|
+
class WindowAggregatorState {
|
20
20
|
public:
|
21
|
-
|
22
|
-
virtual ~
|
21
|
+
WindowAggregatorState();
|
22
|
+
virtual ~WindowAggregatorState() {
|
23
|
+
}
|
24
|
+
|
25
|
+
template <class TARGET>
|
26
|
+
TARGET &Cast() {
|
27
|
+
D_ASSERT(dynamic_cast<TARGET *>(this));
|
28
|
+
return reinterpret_cast<TARGET &>(*this);
|
29
|
+
}
|
30
|
+
template <class TARGET>
|
31
|
+
const TARGET &Cast() const {
|
32
|
+
D_ASSERT(dynamic_cast<const TARGET *>(this));
|
33
|
+
return reinterpret_cast<const TARGET &>(*this);
|
34
|
+
}
|
23
35
|
|
36
|
+
//! Allocator for aggregates
|
37
|
+
ArenaAllocator allocator;
|
38
|
+
};
|
39
|
+
|
40
|
+
class WindowAggregator {
|
41
|
+
public:
|
42
|
+
WindowAggregator(AggregateObject aggr, const LogicalType &result_type_p, idx_t partition_count);
|
43
|
+
virtual ~WindowAggregator();
|
44
|
+
|
45
|
+
// Build
|
24
46
|
virtual void Sink(DataChunk &payload_chunk, SelectionVector *filter_sel, idx_t filtered);
|
25
47
|
virtual void Finalize();
|
26
|
-
virtual void Compute(Vector &result, idx_t rid, idx_t start, idx_t end);
|
27
|
-
virtual void Evaluate(const idx_t *begins, const idx_t *ends, Vector &result, idx_t count);
|
28
48
|
|
29
|
-
|
30
|
-
|
31
|
-
void
|
49
|
+
// Probe
|
50
|
+
virtual unique_ptr<WindowAggregatorState> GetLocalState() const = 0;
|
51
|
+
virtual void Evaluate(WindowAggregatorState &lstate, const idx_t *begins, const idx_t *ends, Vector &result,
|
52
|
+
idx_t count) const = 0;
|
32
53
|
|
54
|
+
protected:
|
33
55
|
AggregateObject aggr;
|
34
56
|
//! The result type of the window function
|
35
57
|
LogicalType result_type;
|
@@ -38,10 +60,6 @@ protected:
|
|
38
60
|
const idx_t partition_count;
|
39
61
|
//! The size of a single aggregate state
|
40
62
|
const idx_t state_size;
|
41
|
-
//! Data pointer that contains a single state, used for intermediate window segment aggregation
|
42
|
-
vector<data_t> state;
|
43
|
-
//! Reused result state container for the window functions
|
44
|
-
Vector statef;
|
45
63
|
//! Partition data chunk
|
46
64
|
DataChunk inputs;
|
47
65
|
|
@@ -49,23 +67,28 @@ protected:
|
|
49
67
|
vector<validity_t> filter_bits;
|
50
68
|
ValidityMask filter_mask;
|
51
69
|
idx_t filter_pos;
|
52
|
-
|
53
|
-
|
54
|
-
ArenaAllocator allocator;
|
70
|
+
//! The state used by the aggregator to build.
|
71
|
+
unique_ptr<WindowAggregatorState> gstate;
|
55
72
|
};
|
56
73
|
|
57
|
-
class
|
74
|
+
class WindowConstantAggregator : public WindowAggregator {
|
58
75
|
public:
|
59
|
-
|
60
|
-
|
61
|
-
~
|
76
|
+
WindowConstantAggregator(AggregateObject aggr, const LogicalType &result_type_p, const ValidityMask &partition_mask,
|
77
|
+
const idx_t count);
|
78
|
+
~WindowConstantAggregator() override {
|
62
79
|
}
|
63
80
|
|
64
81
|
void Sink(DataChunk &payload_chunk, SelectionVector *filter_sel, idx_t filtered) override;
|
65
82
|
void Finalize() override;
|
66
|
-
|
83
|
+
|
84
|
+
unique_ptr<WindowAggregatorState> GetLocalState() const override;
|
85
|
+
void Evaluate(WindowAggregatorState &lstate, const idx_t *begins, const idx_t *ends, Vector &result,
|
86
|
+
idx_t count) const override;
|
67
87
|
|
68
88
|
private:
|
89
|
+
void AggregateInit();
|
90
|
+
void AggegateFinal(Vector &result, idx_t rid);
|
91
|
+
|
69
92
|
//! Partition starts
|
70
93
|
vector<idx_t> partition_offsets;
|
71
94
|
//! Aggregate results
|
@@ -74,25 +97,25 @@ private:
|
|
74
97
|
idx_t partition;
|
75
98
|
//! The current input row being built/read
|
76
99
|
idx_t row;
|
100
|
+
//! Data pointer that contains a single state, used for intermediate window segment aggregation
|
101
|
+
vector<data_t> state;
|
77
102
|
//! A vector of pointers to "state", used for intermediate window segment aggregation
|
78
103
|
Vector statep;
|
79
|
-
//!
|
80
|
-
|
104
|
+
//! Reused result state container for the window functions
|
105
|
+
Vector statef;
|
81
106
|
};
|
82
107
|
|
83
|
-
class
|
108
|
+
class WindowCustomAggregator : public WindowAggregator {
|
84
109
|
public:
|
85
|
-
|
86
|
-
~
|
110
|
+
WindowCustomAggregator(AggregateObject aggr, const LogicalType &result_type_p, idx_t partition_count);
|
111
|
+
~WindowCustomAggregator() override;
|
87
112
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
//! The frame boundaries, used for the window functions
|
92
|
-
FrameBounds frame;
|
113
|
+
unique_ptr<WindowAggregatorState> GetLocalState() const override;
|
114
|
+
void Evaluate(WindowAggregatorState &lstate, const idx_t *begins, const idx_t *ends, Vector &result,
|
115
|
+
idx_t count) const override;
|
93
116
|
};
|
94
117
|
|
95
|
-
class WindowSegmentTree : public
|
118
|
+
class WindowSegmentTree : public WindowAggregator {
|
96
119
|
public:
|
97
120
|
using FrameBounds = std::pair<idx_t, idx_t>;
|
98
121
|
|
@@ -100,32 +123,19 @@ public:
|
|
100
123
|
~WindowSegmentTree() override;
|
101
124
|
|
102
125
|
void Finalize() override;
|
103
|
-
void Evaluate(const idx_t *begins, const idx_t *ends, Vector &result, idx_t count) override;
|
104
126
|
|
105
|
-
|
127
|
+
unique_ptr<WindowAggregatorState> GetLocalState() const override;
|
128
|
+
void Evaluate(WindowAggregatorState &lstate, const idx_t *begins, const idx_t *ends, Vector &result,
|
129
|
+
idx_t count) const override;
|
130
|
+
|
131
|
+
public:
|
106
132
|
void ConstructTree();
|
107
|
-
void ExtractFrame(idx_t begin, idx_t end, data_ptr_t current_state);
|
108
|
-
void FlushStates(bool combining);
|
109
|
-
void WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end, data_ptr_t current_state);
|
110
133
|
|
111
134
|
//! Use the combine API, if available
|
112
135
|
inline bool UseCombineAPI() const {
|
113
136
|
return mode < WindowAggregationMode::SEPARATE;
|
114
137
|
}
|
115
138
|
|
116
|
-
//! Input data chunk, used for leaf segment aggregation
|
117
|
-
DataChunk leaves;
|
118
|
-
//! The filtered rows in inputs.
|
119
|
-
SelectionVector filter_sel;
|
120
|
-
//! A vector of pointers to "state", used for intermediate window segment aggregation
|
121
|
-
Vector statep;
|
122
|
-
//! The frame boundaries, used for the window functions
|
123
|
-
FrameBounds frame;
|
124
|
-
//! Reused state pointers for combining segment tree levels
|
125
|
-
Vector statel;
|
126
|
-
//! Count of buffered values
|
127
|
-
idx_t flush_count;
|
128
|
-
|
129
139
|
//! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes
|
130
140
|
unsafe_unique_array<data_t> levels_flat_native;
|
131
141
|
//! For each level, the starting location in the levels_flat_native array
|
@@ -134,12 +144,9 @@ private:
|
|
134
144
|
//! The total number of internal nodes of the tree, stored in levels_flat_native
|
135
145
|
idx_t internal_nodes;
|
136
146
|
|
137
|
-
//! Use the
|
147
|
+
//! Use the combine API, if available
|
138
148
|
WindowAggregationMode mode;
|
139
149
|
|
140
|
-
//! Aggregate allocator
|
141
|
-
ArenaAllocator allocator;
|
142
|
-
|
143
150
|
// TREE_FANOUT needs to cleanly divide STANDARD_VECTOR_SIZE
|
144
151
|
static constexpr idx_t TREE_FANOUT = 16;
|
145
152
|
};
|
@@ -166,8 +166,8 @@ struct RLECompressState : public CompressionState {
|
|
166
166
|
void WriteValue(T value, rle_count_t count, bool is_null) {
|
167
167
|
// write the RLE entry
|
168
168
|
auto handle_ptr = handle.Ptr() + RLEConstants::RLE_HEADER_SIZE;
|
169
|
-
auto data_pointer =
|
170
|
-
auto index_pointer =
|
169
|
+
auto data_pointer = reinterpret_cast<T *>(handle_ptr);
|
170
|
+
auto index_pointer = reinterpret_cast<rle_count_t *>(handle_ptr + max_rle_count * sizeof(T));
|
171
171
|
data_pointer[entry_count] = value;
|
172
172
|
index_pointer[entry_count] = count;
|
173
173
|
entry_count++;
|
@@ -257,7 +257,7 @@ struct RLEScanState : public SegmentScanState {
|
|
257
257
|
|
258
258
|
void Skip(ColumnSegment &segment, idx_t skip_count) {
|
259
259
|
auto data = handle.Ptr() + segment.GetBlockOffset();
|
260
|
-
auto index_pointer =
|
260
|
+
auto index_pointer = reinterpret_cast<rle_count_t *>(data + rle_count_offset);
|
261
261
|
|
262
262
|
for (idx_t i = 0; i < skip_count; i++) {
|
263
263
|
// assign the current value
|
@@ -292,14 +292,58 @@ void RLESkip(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count) {
|
|
292
292
|
scan_state.Skip(segment, skip_count);
|
293
293
|
}
|
294
294
|
|
295
|
+
static bool CanEmitConstantVector(idx_t position, idx_t run_length, idx_t scan_count) {
|
296
|
+
if (scan_count != STANDARD_VECTOR_SIZE) {
|
297
|
+
// Only when we can fill an entire Vector can we emit a ConstantVector, because subsequent scans require the
|
298
|
+
// input Vector to be flat
|
299
|
+
return false;
|
300
|
+
}
|
301
|
+
D_ASSERT(position < run_length);
|
302
|
+
auto remaining_in_run = run_length - position;
|
303
|
+
// The amount of values left in this run are equal or greater than the amount of values we need to scan
|
304
|
+
return remaining_in_run >= scan_count;
|
305
|
+
}
|
306
|
+
|
307
|
+
template <class T>
|
308
|
+
inline static void ForwardToNextRun(RLEScanState<T> &scan_state) {
|
309
|
+
// handled all entries in this RLE value
|
310
|
+
// move to the next entry
|
311
|
+
scan_state.entry_pos++;
|
312
|
+
scan_state.position_in_entry = 0;
|
313
|
+
}
|
314
|
+
|
315
|
+
template <class T>
|
316
|
+
inline static bool ExhaustedRun(RLEScanState<T> &scan_state, rle_count_t *index_pointer) {
|
317
|
+
return scan_state.position_in_entry >= index_pointer[scan_state.entry_pos];
|
318
|
+
}
|
319
|
+
|
320
|
+
template <class T>
|
321
|
+
static void RLEScanConstant(RLEScanState<T> &scan_state, rle_count_t *index_pointer, T *data_pointer, idx_t scan_count,
|
322
|
+
Vector &result) {
|
323
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
324
|
+
auto result_data = ConstantVector::GetData<T>(result);
|
325
|
+
result_data[0] = data_pointer[scan_state.entry_pos];
|
326
|
+
scan_state.position_in_entry += scan_count;
|
327
|
+
if (ExhaustedRun(scan_state, index_pointer)) {
|
328
|
+
ForwardToNextRun(scan_state);
|
329
|
+
}
|
330
|
+
return;
|
331
|
+
}
|
332
|
+
|
295
333
|
template <class T>
|
296
334
|
void RLEScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result,
|
297
335
|
idx_t result_offset) {
|
298
336
|
auto &scan_state = state.scan_state->Cast<RLEScanState<T>>();
|
299
337
|
|
300
338
|
auto data = scan_state.handle.Ptr() + segment.GetBlockOffset();
|
301
|
-
auto data_pointer =
|
302
|
-
auto index_pointer =
|
339
|
+
auto data_pointer = reinterpret_cast<T *>(data + RLEConstants::RLE_HEADER_SIZE);
|
340
|
+
auto index_pointer = reinterpret_cast<rle_count_t *>(data + scan_state.rle_count_offset);
|
341
|
+
|
342
|
+
// If we are scanning an entire Vector and it contains only a single run
|
343
|
+
if (CanEmitConstantVector(scan_state.position_in_entry, index_pointer[scan_state.entry_pos], scan_count)) {
|
344
|
+
RLEScanConstant<T>(scan_state, index_pointer, data_pointer, scan_count, result);
|
345
|
+
return;
|
346
|
+
}
|
303
347
|
|
304
348
|
auto result_data = FlatVector::GetData<T>(result);
|
305
349
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
@@ -307,18 +351,14 @@ void RLEScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_c
|
|
307
351
|
// assign the current value
|
308
352
|
result_data[result_offset + i] = data_pointer[scan_state.entry_pos];
|
309
353
|
scan_state.position_in_entry++;
|
310
|
-
if (scan_state
|
311
|
-
|
312
|
-
// move to the next entry
|
313
|
-
scan_state.entry_pos++;
|
314
|
-
scan_state.position_in_entry = 0;
|
354
|
+
if (ExhaustedRun(scan_state, index_pointer)) {
|
355
|
+
ForwardToNextRun(scan_state);
|
315
356
|
}
|
316
357
|
}
|
317
358
|
}
|
318
359
|
|
319
360
|
template <class T>
|
320
361
|
void RLEScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) {
|
321
|
-
// FIXME: emit constant vector if repetition of single value is >= scan_count
|
322
362
|
RLEScanPartial<T>(segment, state, scan_count, result, 0);
|
323
363
|
}
|
324
364
|
|
@@ -331,7 +371,7 @@ void RLEFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id,
|
|
331
371
|
scan_state.Skip(segment, row_id);
|
332
372
|
|
333
373
|
auto data = scan_state.handle.Ptr() + segment.GetBlockOffset();
|
334
|
-
auto data_pointer =
|
374
|
+
auto data_pointer = reinterpret_cast<T *>(data + RLEConstants::RLE_HEADER_SIZE);
|
335
375
|
auto result_data = FlatVector::GetData<T>(result);
|
336
376
|
result_data[result_idx] = data_pointer[scan_state.entry_pos];
|
337
377
|
}
|
@@ -8,8 +8,6 @@
|
|
8
8
|
|
9
9
|
#include "src/execution/index/art/leaf.cpp"
|
10
10
|
|
11
|
-
#include "src/execution/index/art/leaf_segment.cpp"
|
12
|
-
|
13
11
|
#include "src/execution/index/art/node4.cpp"
|
14
12
|
|
15
13
|
#include "src/execution/index/art/node16.cpp"
|
@@ -18,8 +16,6 @@
|
|
18
16
|
|
19
17
|
#include "src/execution/index/art/node256.cpp"
|
20
18
|
|
21
|
-
#include "src/execution/index/art/swizzleable_pointer.cpp"
|
22
|
-
|
23
19
|
#include "src/execution/index/art/prefix.cpp"
|
24
20
|
|
25
21
|
#include "src/execution/index/art/art.cpp"
|
@@ -1,52 +0,0 @@
|
|
1
|
-
#include "duckdb/execution/index/art/leaf_segment.hpp"
|
2
|
-
|
3
|
-
#include "duckdb/execution/index/art/art.hpp"
|
4
|
-
#include "duckdb/execution/index/art/node.hpp"
|
5
|
-
|
6
|
-
namespace duckdb {
|
7
|
-
|
8
|
-
LeafSegment &LeafSegment::New(ART &art, Node &node) {
|
9
|
-
|
10
|
-
node.SetPtr(Node::GetAllocator(art, NType::LEAF_SEGMENT).New());
|
11
|
-
node.type = (uint8_t)NType::LEAF_SEGMENT;
|
12
|
-
|
13
|
-
auto &segment = LeafSegment::Get(art, node);
|
14
|
-
segment.next.Reset();
|
15
|
-
return segment;
|
16
|
-
}
|
17
|
-
|
18
|
-
void LeafSegment::Free(ART &art, Node &node) {
|
19
|
-
|
20
|
-
D_ASSERT(node.IsSet());
|
21
|
-
D_ASSERT(!node.IsSwizzled());
|
22
|
-
|
23
|
-
// free next segment
|
24
|
-
auto next_segment = LeafSegment::Get(art, node).next;
|
25
|
-
Node::Free(art, next_segment);
|
26
|
-
}
|
27
|
-
|
28
|
-
LeafSegment &LeafSegment::Append(ART &art, uint32_t &count, const row_t row_id) {
|
29
|
-
|
30
|
-
reference<LeafSegment> segment(*this);
|
31
|
-
auto position = count % Node::LEAF_SEGMENT_SIZE;
|
32
|
-
|
33
|
-
// we need a new segment
|
34
|
-
if (position == 0 && count != 0) {
|
35
|
-
segment = LeafSegment::New(art, next);
|
36
|
-
}
|
37
|
-
|
38
|
-
segment.get().row_ids[position] = row_id;
|
39
|
-
count++;
|
40
|
-
return segment.get();
|
41
|
-
}
|
42
|
-
|
43
|
-
LeafSegment &LeafSegment::GetTail(const ART &art) {
|
44
|
-
|
45
|
-
reference<LeafSegment> segment(*this);
|
46
|
-
while (segment.get().next.IsSet()) {
|
47
|
-
segment = LeafSegment::Get(art, segment.get().next);
|
48
|
-
}
|
49
|
-
return segment.get();
|
50
|
-
}
|
51
|
-
|
52
|
-
} // namespace duckdb
|