duckdb 0.7.2-dev1867.0 → 0.7.2-dev1901.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/data_chunk.cpp +13 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +2 -2
- package/src/duckdb/src/common/sort/sorted_block.cpp +9 -4
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +4 -0
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +169 -60
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +15 -9
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +19 -0
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -7
- package/src/duckdb/src/include/duckdb/parser/parser_options.hpp +23 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -3
- package/src/duckdb/src/main/client_context.cpp +4 -2
- package/src/duckdb/src/main/config.cpp +2 -0
- package/src/duckdb/src/main/settings/settings.cpp +36 -0
- package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +4 -3
- package/src/duckdb/src/parser/parser.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +7 -3
- package/src/duckdb/src/parser/transformer.cpp +6 -5
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +949 -947
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +16431 -16385
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +503 -493
- package/test/udf.test.ts +9 -0
package/package.json
CHANGED
package/src/data_chunk.cpp
CHANGED
@@ -145,7 +145,19 @@ Napi::Array EncodeDataChunk(Napi::Env env, duckdb::DataChunk &chunk, bool with_t
|
|
145
145
|
}
|
146
146
|
break;
|
147
147
|
}
|
148
|
-
case duckdb::LogicalTypeId::BLOB:
|
148
|
+
case duckdb::LogicalTypeId::BLOB: {
|
149
|
+
if (with_data) {
|
150
|
+
auto array = Napi::Array::New(env, chunk.size());
|
151
|
+
auto data = duckdb::FlatVector::GetData<duckdb::string_t>(*vec);
|
152
|
+
|
153
|
+
for (size_t i = 0; i < chunk.size(); ++i) {
|
154
|
+
auto buf = Napi::Buffer<char>::Copy(env, data[i].GetDataUnsafe(), data[i].GetSize());
|
155
|
+
array.Set(i, buf);
|
156
|
+
}
|
157
|
+
desc.Set("data", array);
|
158
|
+
}
|
159
|
+
break;
|
160
|
+
}
|
149
161
|
case duckdb::LogicalTypeId::VARCHAR: {
|
150
162
|
if (with_data) {
|
151
163
|
auto array = Napi::Array::New(env, chunk.size());
|
@@ -53,10 +53,10 @@ static DefaultMacro internal_macros[] = {
|
|
53
53
|
|
54
54
|
// various postgres system functions
|
55
55
|
{"pg_catalog", "pg_get_viewdef", {"oid", nullptr}, "(select sql from duckdb_views() v where v.view_oid=oid)"},
|
56
|
-
{"pg_catalog", "pg_get_constraintdef", {"constraint_oid", "pretty_bool", nullptr}, "(select constraint_text from duckdb_constraints() d_constraint where d_constraint.table_oid=constraint_oid
|
56
|
+
{"pg_catalog", "pg_get_constraintdef", {"constraint_oid", "pretty_bool", nullptr}, "(select constraint_text from duckdb_constraints() d_constraint where d_constraint.table_oid=constraint_oid//1000000 and d_constraint.constraint_index=constraint_oid%1000000)"},
|
57
57
|
{"pg_catalog", "pg_get_expr", {"pg_node_tree", "relation_oid", nullptr}, "pg_node_tree"},
|
58
58
|
{"pg_catalog", "format_pg_type", {"type_name", nullptr}, "case when logical_type='FLOAT' then 'real' when logical_type='DOUBLE' then 'double precision' when logical_type='DECIMAL' then 'numeric' when logical_type='ENUM' then lower(type_name) when logical_type='VARCHAR' then 'character varying' when logical_type='BLOB' then 'bytea' when logical_type='TIMESTAMP' then 'timestamp without time zone' when logical_type='TIME' then 'time without time zone' else lower(logical_type) end"},
|
59
|
-
{"pg_catalog", "format_type", {"type_oid", "typemod", nullptr}, "(select format_pg_type(type_name) from duckdb_types() t where t.type_oid=type_oid) || case when typemod>0 then concat('(', typemod
|
59
|
+
{"pg_catalog", "format_type", {"type_oid", "typemod", nullptr}, "(select format_pg_type(type_name) from duckdb_types() t where t.type_oid=type_oid) || case when typemod>0 then concat('(', typemod//1000, ',', typemod%1000, ')') else '' end"},
|
60
60
|
|
61
61
|
{"pg_catalog", "pg_has_role", {"user", "role", "privilege", nullptr}, "true"}, //boolean //does user have privilege for role
|
62
62
|
{"pg_catalog", "pg_has_role", {"role", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for role
|
@@ -366,11 +366,16 @@ int SBIterator::ComparisonValue(ExpressionType comparison) {
|
|
366
366
|
}
|
367
367
|
}
|
368
368
|
|
369
|
+
static idx_t GetBlockCountWithEmptyCheck(const GlobalSortState &gss) {
|
370
|
+
D_ASSERT(gss.sorted_blocks.size() > 0);
|
371
|
+
return gss.sorted_blocks[0]->radix_sorting_data.size();
|
372
|
+
}
|
373
|
+
|
369
374
|
SBIterator::SBIterator(GlobalSortState &gss, ExpressionType comparison, idx_t entry_idx_p)
|
370
|
-
: sort_layout(gss.sort_layout), block_count(gss
|
371
|
-
|
372
|
-
|
373
|
-
|
375
|
+
: sort_layout(gss.sort_layout), block_count(GetBlockCountWithEmptyCheck(gss)), block_capacity(gss.block_capacity),
|
376
|
+
cmp_size(sort_layout.comparison_size), entry_size(sort_layout.entry_size), all_constant(sort_layout.all_constant),
|
377
|
+
external(gss.external), cmp(ComparisonValue(comparison)), scan(gss.buffer_manager, gss), block_ptr(nullptr),
|
378
|
+
entry_ptr(nullptr) {
|
374
379
|
|
375
380
|
scan.sb = gss.sorted_blocks[0].get();
|
376
381
|
scan.block_idx = block_count;
|
@@ -402,6 +402,10 @@ IEJoinUnion::IEJoinUnion(ClientContext &context, const PhysicalIEJoin &op, Sorte
|
|
402
402
|
r_executor.AddExpression(*op.rhs_orders[1][0].expression);
|
403
403
|
AppendKey(t2, r_executor, *l1, -1, -1, b2);
|
404
404
|
|
405
|
+
if (l1->global_sort_state.sorted_blocks.empty()) {
|
406
|
+
return;
|
407
|
+
}
|
408
|
+
|
405
409
|
Sort(*l1);
|
406
410
|
|
407
411
|
op1 = make_uniq<SBIterator>(l1->global_sort_state, cmp1);
|
@@ -4,6 +4,7 @@
|
|
4
4
|
#include "duckdb/function/function_binder.hpp"
|
5
5
|
#include "duckdb/storage/buffer_manager.hpp"
|
6
6
|
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
7
|
+
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
7
8
|
#include "duckdb/parser/expression_map.hpp"
|
8
9
|
#include "duckdb/function/aggregate/distributive_functions.hpp"
|
9
10
|
|
@@ -12,7 +13,8 @@ namespace duckdb {
|
|
12
13
|
struct SortedAggregateBindData : public FunctionData {
|
13
14
|
SortedAggregateBindData(ClientContext &context, BoundAggregateExpression &expr)
|
14
15
|
: buffer_manager(BufferManager::GetBufferManager(context)), function(expr.function),
|
15
|
-
bind_info(std::move(expr.bind_info))
|
16
|
+
bind_info(std::move(expr.bind_info)), threshold(ClientConfig::GetConfig(context).ordered_aggregate_threshold),
|
17
|
+
external(ClientConfig::GetConfig(context).force_external) {
|
16
18
|
auto &children = expr.children;
|
17
19
|
arg_types.reserve(children.size());
|
18
20
|
for (const auto &child : children) {
|
@@ -32,7 +34,8 @@ struct SortedAggregateBindData : public FunctionData {
|
|
32
34
|
|
33
35
|
SortedAggregateBindData(const SortedAggregateBindData &other)
|
34
36
|
: buffer_manager(other.buffer_manager), function(other.function), arg_types(other.arg_types),
|
35
|
-
sort_types(other.sort_types), sorted_on_args(other.sorted_on_args)
|
37
|
+
sort_types(other.sort_types), sorted_on_args(other.sorted_on_args), threshold(other.threshold),
|
38
|
+
external(other.external) {
|
36
39
|
if (other.bind_info) {
|
37
40
|
bind_info = other.bind_info->Copy();
|
38
41
|
}
|
@@ -76,13 +79,17 @@ struct SortedAggregateBindData : public FunctionData {
|
|
76
79
|
vector<BoundOrderByNode> orders;
|
77
80
|
vector<LogicalType> sort_types;
|
78
81
|
bool sorted_on_args;
|
82
|
+
|
83
|
+
//! The sort flush threshold
|
84
|
+
const idx_t threshold;
|
85
|
+
const bool external;
|
79
86
|
};
|
80
87
|
|
81
88
|
struct SortedAggregateState {
|
82
89
|
//! Default buffer size, optimised for small group to avoid blowing out memory.
|
83
90
|
static const idx_t BUFFER_CAPACITY = 16;
|
84
91
|
|
85
|
-
SortedAggregateState() : nsel(0), offset(0) {
|
92
|
+
SortedAggregateState() : count(0), nsel(0), offset(0) {
|
86
93
|
}
|
87
94
|
|
88
95
|
static inline void InitializeBuffer(DataChunk &chunk, const vector<LogicalType> &types) {
|
@@ -98,7 +105,7 @@ struct SortedAggregateState {
|
|
98
105
|
chunk.Initialize(Allocator::DefaultAllocator(), types);
|
99
106
|
}
|
100
107
|
|
101
|
-
void Flush(SortedAggregateBindData &order_bind) {
|
108
|
+
void Flush(const SortedAggregateBindData &order_bind) {
|
102
109
|
if (ordering) {
|
103
110
|
return;
|
104
111
|
}
|
@@ -116,7 +123,9 @@ struct SortedAggregateState {
|
|
116
123
|
}
|
117
124
|
}
|
118
125
|
|
119
|
-
void Update(SortedAggregateBindData &order_bind, DataChunk &sort_chunk, DataChunk &arg_chunk) {
|
126
|
+
void Update(const SortedAggregateBindData &order_bind, DataChunk &sort_chunk, DataChunk &arg_chunk) {
|
127
|
+
count += sort_chunk.size();
|
128
|
+
|
120
129
|
// Lazy instantiation of the buffer chunks
|
121
130
|
InitializeBuffer(sort_buffer, order_bind.sort_types);
|
122
131
|
if (!order_bind.sorted_on_args) {
|
@@ -139,7 +148,9 @@ struct SortedAggregateState {
|
|
139
148
|
}
|
140
149
|
}
|
141
150
|
|
142
|
-
void UpdateSlice(SortedAggregateBindData &order_bind, DataChunk &sort_inputs, DataChunk &arg_inputs) {
|
151
|
+
void UpdateSlice(const SortedAggregateBindData &order_bind, DataChunk &sort_inputs, DataChunk &arg_inputs) {
|
152
|
+
count += nsel;
|
153
|
+
|
143
154
|
// Lazy instantiation of the buffer chunks
|
144
155
|
InitializeBuffer(sort_buffer, order_bind.sort_types);
|
145
156
|
if (!order_bind.sorted_on_args) {
|
@@ -178,25 +189,35 @@ struct SortedAggregateState {
|
|
178
189
|
Flush(order_bind);
|
179
190
|
ordering->Combine(*other.ordering);
|
180
191
|
arguments->Combine(*other.arguments);
|
192
|
+
count += other.count;
|
181
193
|
} else if (other.ordering) {
|
182
194
|
// Force CDC if the other has it
|
183
195
|
Flush(order_bind);
|
184
196
|
ordering->Combine(*other.ordering);
|
197
|
+
count += other.count;
|
185
198
|
} else if (other.sort_buffer.size()) {
|
186
199
|
Update(order_bind, other.sort_buffer, other.arg_buffer);
|
187
200
|
}
|
188
201
|
}
|
189
202
|
|
190
|
-
void
|
203
|
+
void PrefixSortBuffer(DataChunk &prefixed) {
|
204
|
+
for (column_t col_idx = 0; col_idx < sort_buffer.ColumnCount(); ++col_idx) {
|
205
|
+
prefixed.data[col_idx + 1].Reference(sort_buffer.data[col_idx]);
|
206
|
+
}
|
207
|
+
prefixed.SetCardinality(sort_buffer);
|
208
|
+
}
|
209
|
+
|
210
|
+
void Finalize(const SortedAggregateBindData &order_bind, DataChunk &prefixed, LocalSortState &local_sort) {
|
191
211
|
if (arguments) {
|
192
212
|
ColumnDataScanState sort_state;
|
193
213
|
ordering->InitializeScan(sort_state);
|
194
214
|
ColumnDataScanState arg_state;
|
195
215
|
arguments->InitializeScan(arg_state);
|
196
216
|
for (sort_buffer.Reset(); ordering->Scan(sort_state, sort_buffer); sort_buffer.Reset()) {
|
217
|
+
PrefixSortBuffer(prefixed);
|
197
218
|
arg_buffer.Reset();
|
198
219
|
arguments->Scan(arg_state, arg_buffer);
|
199
|
-
local_sort.SinkChunk(
|
220
|
+
local_sort.SinkChunk(prefixed, arg_buffer);
|
200
221
|
}
|
201
222
|
ordering->Reset();
|
202
223
|
arguments->Reset();
|
@@ -204,16 +225,20 @@ struct SortedAggregateState {
|
|
204
225
|
ColumnDataScanState sort_state;
|
205
226
|
ordering->InitializeScan(sort_state);
|
206
227
|
for (sort_buffer.Reset(); ordering->Scan(sort_state, sort_buffer); sort_buffer.Reset()) {
|
207
|
-
|
228
|
+
PrefixSortBuffer(prefixed);
|
229
|
+
local_sort.SinkChunk(prefixed, sort_buffer);
|
208
230
|
}
|
209
231
|
ordering->Reset();
|
210
232
|
} else if (order_bind.sorted_on_args) {
|
211
|
-
|
233
|
+
PrefixSortBuffer(prefixed);
|
234
|
+
local_sort.SinkChunk(prefixed, sort_buffer);
|
212
235
|
} else {
|
213
|
-
|
236
|
+
PrefixSortBuffer(prefixed);
|
237
|
+
local_sort.SinkChunk(prefixed, arg_buffer);
|
214
238
|
}
|
215
239
|
}
|
216
240
|
|
241
|
+
idx_t count;
|
217
242
|
unique_ptr<ColumnDataCollection> arguments;
|
218
243
|
unique_ptr<ColumnDataCollection> ordering;
|
219
244
|
|
@@ -237,19 +262,19 @@ struct SortedAggregateFunction {
|
|
237
262
|
state->~STATE();
|
238
263
|
}
|
239
264
|
|
240
|
-
static void ProjectInputs(Vector inputs[], SortedAggregateBindData
|
241
|
-
DataChunk &arg_chunk, DataChunk &sort_chunk) {
|
265
|
+
static void ProjectInputs(Vector inputs[], const SortedAggregateBindData &order_bind, idx_t input_count,
|
266
|
+
idx_t count, DataChunk &arg_chunk, DataChunk &sort_chunk) {
|
242
267
|
idx_t col = 0;
|
243
268
|
|
244
|
-
if (!order_bind
|
245
|
-
arg_chunk.InitializeEmpty(order_bind
|
269
|
+
if (!order_bind.sorted_on_args) {
|
270
|
+
arg_chunk.InitializeEmpty(order_bind.arg_types);
|
246
271
|
for (auto &dst : arg_chunk.data) {
|
247
272
|
dst.Reference(inputs[col++]);
|
248
273
|
}
|
249
274
|
arg_chunk.SetCardinality(count);
|
250
275
|
}
|
251
276
|
|
252
|
-
sort_chunk.InitializeEmpty(order_bind
|
277
|
+
sort_chunk.InitializeEmpty(order_bind.sort_types);
|
253
278
|
for (auto &dst : sort_chunk.data) {
|
254
279
|
dst.Reference(inputs[col++]);
|
255
280
|
}
|
@@ -258,13 +283,13 @@ struct SortedAggregateFunction {
|
|
258
283
|
|
259
284
|
static void SimpleUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, data_ptr_t state,
|
260
285
|
idx_t count) {
|
261
|
-
const auto order_bind =
|
286
|
+
const auto order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
|
262
287
|
DataChunk arg_chunk;
|
263
288
|
DataChunk sort_chunk;
|
264
289
|
ProjectInputs(inputs, order_bind, input_count, count, arg_chunk, sort_chunk);
|
265
290
|
|
266
291
|
const auto order_state = (SortedAggregateState *)state;
|
267
|
-
order_state->Update(
|
292
|
+
order_state->Update(order_bind, sort_chunk, arg_chunk);
|
268
293
|
}
|
269
294
|
|
270
295
|
static void ScatterUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, Vector &states,
|
@@ -274,7 +299,7 @@ struct SortedAggregateFunction {
|
|
274
299
|
}
|
275
300
|
|
276
301
|
// Append the arguments to the two sub-collections
|
277
|
-
const auto order_bind =
|
302
|
+
const auto &order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
|
278
303
|
DataChunk arg_inputs;
|
279
304
|
DataChunk sort_inputs;
|
280
305
|
ProjectInputs(inputs, order_bind, input_count, count, arg_inputs, sort_inputs);
|
@@ -315,7 +340,7 @@ struct SortedAggregateFunction {
|
|
315
340
|
continue;
|
316
341
|
}
|
317
342
|
|
318
|
-
order_state->UpdateSlice(
|
343
|
+
order_state->UpdateSlice(order_bind, sort_inputs, arg_inputs);
|
319
344
|
}
|
320
345
|
}
|
321
346
|
|
@@ -333,78 +358,162 @@ struct SortedAggregateFunction {
|
|
333
358
|
}
|
334
359
|
|
335
360
|
static void Finalize(Vector &states, AggregateInputData &aggr_input_data, Vector &result, idx_t count,
|
336
|
-
idx_t offset) {
|
337
|
-
const auto order_bind =
|
338
|
-
auto &buffer_manager = order_bind
|
339
|
-
auto &orders = order_bind->orders;
|
361
|
+
const idx_t offset) {
|
362
|
+
const auto &order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
|
363
|
+
auto &buffer_manager = order_bind.buffer_manager;
|
340
364
|
RowLayout payload_layout;
|
341
|
-
payload_layout.Initialize(order_bind
|
365
|
+
payload_layout.Initialize(order_bind.arg_types);
|
342
366
|
DataChunk chunk;
|
343
|
-
chunk.Initialize(Allocator::DefaultAllocator(), order_bind
|
367
|
+
chunk.Initialize(Allocator::DefaultAllocator(), order_bind.arg_types);
|
368
|
+
DataChunk sliced;
|
369
|
+
sliced.Initialize(Allocator::DefaultAllocator(), order_bind.arg_types);
|
344
370
|
|
345
371
|
// Reusable inner state
|
346
|
-
vector<data_t> agg_state(order_bind
|
372
|
+
vector<data_t> agg_state(order_bind.function.state_size());
|
347
373
|
Vector agg_state_vec(Value::POINTER((idx_t)agg_state.data()));
|
348
374
|
|
349
375
|
// State variables
|
350
|
-
|
351
|
-
auto bind_info = order_bind->bind_info.get();
|
376
|
+
auto bind_info = order_bind.bind_info.get();
|
352
377
|
AggregateInputData aggr_bind_info(bind_info, Allocator::DefaultAllocator());
|
353
378
|
|
354
379
|
// Inner aggregate APIs
|
355
|
-
auto initialize = order_bind
|
356
|
-
auto destructor = order_bind
|
357
|
-
auto simple_update = order_bind
|
358
|
-
auto update = order_bind
|
359
|
-
auto finalize = order_bind
|
380
|
+
auto initialize = order_bind.function.initialize;
|
381
|
+
auto destructor = order_bind.function.destructor;
|
382
|
+
auto simple_update = order_bind.function.simple_update;
|
383
|
+
auto update = order_bind.function.update;
|
384
|
+
auto finalize = order_bind.function.finalize;
|
360
385
|
|
361
386
|
auto sdata = FlatVector::GetData<SortedAggregateState *>(states);
|
387
|
+
|
388
|
+
vector<idx_t> state_unprocessed(count, 0);
|
362
389
|
for (idx_t i = 0; i < count; ++i) {
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
390
|
+
state_unprocessed[i] = sdata[i]->count;
|
391
|
+
}
|
392
|
+
|
393
|
+
// Sort the input payloads on (state_idx ASC, orders)
|
394
|
+
vector<BoundOrderByNode> orders;
|
395
|
+
orders.emplace_back(BoundOrderByNode(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST,
|
396
|
+
make_uniq<BoundConstantExpression>(Value::USMALLINT(0))));
|
397
|
+
for (const auto &order : order_bind.orders) {
|
398
|
+
orders.emplace_back(order.Copy());
|
399
|
+
}
|
400
|
+
|
401
|
+
auto global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
|
402
|
+
global_sort->external = order_bind.external;
|
403
|
+
auto local_sort = make_uniq<LocalSortState>();
|
404
|
+
local_sort->Initialize(*global_sort, global_sort->buffer_manager);
|
405
|
+
|
406
|
+
DataChunk prefixed;
|
407
|
+
prefixed.Initialize(Allocator::DefaultAllocator(), global_sort->sort_layout.logical_types);
|
408
|
+
|
409
|
+
// Go through the states accumulating values to sort until we hit the sort threshold
|
410
|
+
idx_t unsorted_count = 0;
|
411
|
+
idx_t sorted = 0;
|
412
|
+
for (idx_t finalized = 0; finalized < count;) {
|
413
|
+
if (unsorted_count < order_bind.threshold) {
|
414
|
+
auto state = sdata[finalized];
|
415
|
+
prefixed.Reset();
|
416
|
+
prefixed.data[0].Reference(Value::USMALLINT(finalized));
|
417
|
+
state->Finalize(order_bind, prefixed, *local_sort);
|
418
|
+
unsorted_count += state_unprocessed[finalized];
|
419
|
+
|
420
|
+
// Go to the next aggregate unless this is the last one
|
421
|
+
if (++finalized < count) {
|
422
|
+
continue;
|
380
423
|
}
|
424
|
+
}
|
425
|
+
|
426
|
+
// If they were all empty (filtering) flush them
|
427
|
+
// (This can only happen on the last range)
|
428
|
+
if (!unsorted_count) {
|
429
|
+
break;
|
430
|
+
}
|
431
|
+
|
432
|
+
// Sort all the data
|
433
|
+
global_sort->AddLocalState(*local_sort);
|
434
|
+
global_sort->PrepareMergePhase();
|
435
|
+
while (global_sort->sorted_blocks.size() > 1) {
|
436
|
+
global_sort->InitializeMergeRound();
|
437
|
+
MergeSorter merge_sorter(*global_sort, global_sort->buffer_manager);
|
438
|
+
merge_sorter.PerformInMergeRound();
|
439
|
+
global_sort->CompleteMergeRound(false);
|
440
|
+
}
|
381
441
|
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
442
|
+
auto scanner = make_uniq<PayloadScanner>(*global_sort);
|
443
|
+
initialize(agg_state.data());
|
444
|
+
while (scanner->Remaining()) {
|
445
|
+
chunk.Reset();
|
446
|
+
scanner->Scan(chunk);
|
447
|
+
idx_t consumed = 0;
|
448
|
+
|
449
|
+
// Distribute the scanned chunk to the aggregates
|
450
|
+
while (consumed < chunk.size()) {
|
451
|
+
// Find the next aggregate that needs data
|
452
|
+
for (; !state_unprocessed[sorted]; ++sorted) {
|
453
|
+
// Finalize a single value at the next offset
|
454
|
+
agg_state_vec.SetVectorType(states.GetVectorType());
|
455
|
+
finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
|
456
|
+
if (destructor) {
|
457
|
+
destructor(agg_state_vec, aggr_bind_info, 1);
|
458
|
+
}
|
459
|
+
|
460
|
+
initialize(agg_state.data());
|
461
|
+
}
|
462
|
+
const auto input_count = MinValue(state_unprocessed[sorted], chunk.size() - consumed);
|
463
|
+
for (column_t col_idx = 0; col_idx < chunk.ColumnCount(); ++col_idx) {
|
464
|
+
sliced.data[col_idx].Slice(chunk.data[col_idx], consumed, consumed + input_count);
|
388
465
|
}
|
466
|
+
sliced.SetCardinality(input_count);
|
467
|
+
|
389
468
|
// These are all simple updates, so use it if available
|
390
469
|
if (simple_update) {
|
391
|
-
simple_update(
|
470
|
+
simple_update(sliced.data.data(), aggr_bind_info, 1, agg_state.data(), sliced.size());
|
392
471
|
} else {
|
393
472
|
// We are only updating a constant state
|
394
473
|
agg_state_vec.SetVectorType(VectorType::CONSTANT_VECTOR);
|
395
|
-
update(
|
474
|
+
update(sliced.data.data(), aggr_bind_info, 1, agg_state_vec, sliced.size());
|
396
475
|
}
|
476
|
+
|
477
|
+
consumed += input_count;
|
478
|
+
state_unprocessed[sorted] -= input_count;
|
397
479
|
}
|
398
480
|
}
|
399
481
|
|
482
|
+
// Finalize the last state for this sort
|
483
|
+
agg_state_vec.SetVectorType(states.GetVectorType());
|
484
|
+
finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
|
485
|
+
if (destructor) {
|
486
|
+
destructor(agg_state_vec, aggr_bind_info, 1);
|
487
|
+
}
|
488
|
+
++sorted;
|
489
|
+
|
490
|
+
// Stop if we are done
|
491
|
+
if (finalized >= count) {
|
492
|
+
break;
|
493
|
+
}
|
494
|
+
|
495
|
+
// Create a new sort
|
496
|
+
scanner.reset();
|
497
|
+
global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
|
498
|
+
global_sort->external = order_bind.external;
|
499
|
+
local_sort = make_uniq<LocalSortState>();
|
500
|
+
local_sort->Initialize(*global_sort, global_sort->buffer_manager);
|
501
|
+
unsorted_count = 0;
|
502
|
+
}
|
503
|
+
|
504
|
+
for (; sorted < count; ++sorted) {
|
505
|
+
initialize(agg_state.data());
|
506
|
+
|
400
507
|
// Finalize a single value at the next offset
|
401
508
|
agg_state_vec.SetVectorType(states.GetVectorType());
|
402
|
-
finalize(agg_state_vec, aggr_bind_info, result, 1,
|
509
|
+
finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
|
403
510
|
|
404
511
|
if (destructor) {
|
405
512
|
destructor(agg_state_vec, aggr_bind_info, 1);
|
406
513
|
}
|
407
514
|
}
|
515
|
+
|
516
|
+
result.Verify(count);
|
408
517
|
}
|
409
518
|
|
410
519
|
static void Serialize(FieldWriter &writer, const FunctionData *bind_data, const AggregateFunction &function) {
|
@@ -907,23 +907,29 @@ static scalar_function_t GetBinaryFunctionIgnoreZero(const LogicalType &type) {
|
|
907
907
|
}
|
908
908
|
|
909
909
|
void DivideFun::RegisterFunction(BuiltinFunctions &set) {
|
910
|
-
ScalarFunctionSet
|
910
|
+
ScalarFunctionSet fp_divide("/");
|
911
|
+
fp_divide.AddFunction(ScalarFunction({LogicalType::FLOAT, LogicalType::FLOAT}, LogicalType::FLOAT,
|
912
|
+
GetBinaryFunctionIgnoreZero<DivideOperator>(LogicalType::FLOAT)));
|
913
|
+
fp_divide.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE}, LogicalType::DOUBLE,
|
914
|
+
GetBinaryFunctionIgnoreZero<DivideOperator>(LogicalType::DOUBLE)));
|
915
|
+
fp_divide.AddFunction(
|
916
|
+
ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
|
917
|
+
BinaryScalarFunctionIgnoreZero<interval_t, int64_t, interval_t, DivideOperator>));
|
918
|
+
set.AddFunction(fp_divide);
|
919
|
+
|
920
|
+
ScalarFunctionSet full_divide("//");
|
911
921
|
for (auto &type : LogicalType::Numeric()) {
|
912
922
|
if (type.id() == LogicalTypeId::DECIMAL) {
|
913
923
|
continue;
|
914
924
|
} else {
|
915
|
-
|
925
|
+
full_divide.AddFunction(
|
916
926
|
ScalarFunction({type, type}, type, GetBinaryFunctionIgnoreZero<DivideOperator>(type)));
|
917
927
|
}
|
918
928
|
}
|
919
|
-
|
920
|
-
ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
|
921
|
-
BinaryScalarFunctionIgnoreZero<interval_t, int64_t, interval_t, DivideOperator>));
|
929
|
+
set.AddFunction(full_divide);
|
922
930
|
|
923
|
-
|
924
|
-
|
925
|
-
functions.name = "divide";
|
926
|
-
set.AddFunction(functions);
|
931
|
+
full_divide.name = "divide";
|
932
|
+
set.AddFunction(full_divide);
|
927
933
|
}
|
928
934
|
|
929
935
|
//===--------------------------------------------------------------------===//
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.7.2-
|
2
|
+
#define DUCKDB_VERSION "0.7.2-dev1901"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "5aa369b4b1"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -77,6 +77,8 @@ struct ClientConfig {
|
|
77
77
|
//! Maximum bits allowed for using a perfect hash table (i.e. the perfect HT can hold up to 2^perfect_ht_threshold
|
78
78
|
//! elements)
|
79
79
|
idx_t perfect_ht_threshold = 12;
|
80
|
+
//! The maximum number of rows to accumulate before sorting ordered aggregates.
|
81
|
+
idx_t ordered_aggregate_threshold = (idx_t(1) << 18);
|
80
82
|
|
81
83
|
//! Callback to create a progress bar display
|
82
84
|
progress_bar_display_create_func_t display_create_func = nullptr;
|
@@ -90,6 +92,9 @@ struct ClientConfig {
|
|
90
92
|
//! The maximum amount of pivot columns
|
91
93
|
idx_t pivot_limit = 100000;
|
92
94
|
|
95
|
+
//! Whether or not the "/" division operator defaults to integer division or floating point division
|
96
|
+
bool integer_division = false;
|
97
|
+
|
93
98
|
//! Generic options
|
94
99
|
case_insensitive_map_t<Value> set_variables;
|
95
100
|
|
@@ -65,6 +65,15 @@ struct DebugForceNoCrossProduct {
|
|
65
65
|
static Value GetSetting(ClientContext &context);
|
66
66
|
};
|
67
67
|
|
68
|
+
struct OrderedAggregateThreshold {
|
69
|
+
static constexpr const char *Name = "ordered_aggregate_threshold";
|
70
|
+
static constexpr const char *Description = "the number of rows to accumulate before sorting, used for tuning";
|
71
|
+
static constexpr const LogicalTypeId InputType = LogicalTypeId::UBIGINT;
|
72
|
+
static void SetLocal(ClientContext &context, const Value ¶meter);
|
73
|
+
static void ResetLocal(ClientContext &context);
|
74
|
+
static Value GetSetting(ClientContext &context);
|
75
|
+
};
|
76
|
+
|
68
77
|
struct DebugWindowMode {
|
69
78
|
static constexpr const char *Name = "debug_window_mode";
|
70
79
|
static constexpr const char *Description = "DEBUG SETTING: switch window mode to use";
|
@@ -270,6 +279,16 @@ struct HomeDirectorySetting {
|
|
270
279
|
static Value GetSetting(ClientContext &context);
|
271
280
|
};
|
272
281
|
|
282
|
+
struct IntegerDivisionSetting {
|
283
|
+
static constexpr const char *Name = "integer_division";
|
284
|
+
static constexpr const char *Description =
|
285
|
+
"Whether or not the / operator defaults to integer division, or to floating point division";
|
286
|
+
static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
|
287
|
+
static void SetLocal(ClientContext &context, const Value ¶meter);
|
288
|
+
static void ResetLocal(ClientContext &context);
|
289
|
+
static Value GetSetting(ClientContext &context);
|
290
|
+
};
|
291
|
+
|
273
292
|
struct LogQueryPathSetting {
|
274
293
|
static constexpr const char *Name = "log_query_path";
|
275
294
|
static constexpr const char *Description =
|
@@ -13,6 +13,7 @@
|
|
13
13
|
#include "duckdb/parser/query_node.hpp"
|
14
14
|
#include "duckdb/parser/column_list.hpp"
|
15
15
|
#include "duckdb/parser/simplified_token.hpp"
|
16
|
+
#include "duckdb/parser/parser_options.hpp"
|
16
17
|
|
17
18
|
namespace duckdb_libpgquery {
|
18
19
|
struct PGNode;
|
@@ -20,13 +21,6 @@ struct PGList;
|
|
20
21
|
} // namespace duckdb_libpgquery
|
21
22
|
|
22
23
|
namespace duckdb {
|
23
|
-
class ParserExtension;
|
24
|
-
|
25
|
-
struct ParserOptions {
|
26
|
-
bool preserve_identifier_case = true;
|
27
|
-
idx_t max_expression_depth = 1000;
|
28
|
-
const vector<ParserExtension> *extensions = nullptr;
|
29
|
-
};
|
30
24
|
|
31
25
|
//! The parser is responsible for parsing the query and converting it into a set
|
32
26
|
//! of parsed statements. The parsed statements can then be converted into a
|
@@ -0,0 +1,23 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/parser/parser_options.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/common.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
class ParserExtension;
|
15
|
+
|
16
|
+
struct ParserOptions {
|
17
|
+
bool preserve_identifier_case = true;
|
18
|
+
bool integer_division = false;
|
19
|
+
idx_t max_expression_depth = 1000;
|
20
|
+
const vector<ParserExtension> *extensions = nullptr;
|
21
|
+
};
|
22
|
+
|
23
|
+
} // namespace duckdb
|
@@ -33,6 +33,7 @@ struct CommonTableExpressionInfo;
|
|
33
33
|
struct GroupingExpressionMap;
|
34
34
|
class OnConflictInfo;
|
35
35
|
class UpdateSetInfo;
|
36
|
+
struct ParserOptions;
|
36
37
|
struct PivotColumn;
|
37
38
|
|
38
39
|
//! The transformer class is responsible for transforming the internal Postgres
|
@@ -47,7 +48,7 @@ class Transformer {
|
|
47
48
|
};
|
48
49
|
|
49
50
|
public:
|
50
|
-
explicit Transformer(
|
51
|
+
explicit Transformer(ParserOptions &options);
|
51
52
|
explicit Transformer(Transformer *parent);
|
52
53
|
~Transformer();
|
53
54
|
|
@@ -61,7 +62,8 @@ public:
|
|
61
62
|
|
62
63
|
private:
|
63
64
|
Transformer *parent;
|
64
|
-
|
65
|
+
//! Parser options
|
66
|
+
ParserOptions &options;
|
65
67
|
//! The current prepared statement parameter index
|
66
68
|
idx_t prepared_statement_parameter_index = 0;
|
67
69
|
//! Map from named parameter to parameter index;
|
@@ -292,7 +294,7 @@ private:
|
|
292
294
|
CommonTableExpressionInfo &info);
|
293
295
|
|
294
296
|
unique_ptr<ParsedExpression> TransformUnaryOperator(const string &op, unique_ptr<ParsedExpression> child);
|
295
|
-
unique_ptr<ParsedExpression> TransformBinaryOperator(
|
297
|
+
unique_ptr<ParsedExpression> TransformBinaryOperator(string op, unique_ptr<ParsedExpression> left,
|
296
298
|
unique_ptr<ParsedExpression> right);
|
297
299
|
//===--------------------------------------------------------------------===//
|
298
300
|
// TableRef transform
|