duckdb 0.7.2-dev1867.0 → 0.7.2-dev1898.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev1867.0",
5
+ "version": "0.7.2-dev1898.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -53,10 +53,10 @@ static DefaultMacro internal_macros[] = {
53
53
 
54
54
  // various postgres system functions
55
55
  {"pg_catalog", "pg_get_viewdef", {"oid", nullptr}, "(select sql from duckdb_views() v where v.view_oid=oid)"},
56
- {"pg_catalog", "pg_get_constraintdef", {"constraint_oid", "pretty_bool", nullptr}, "(select constraint_text from duckdb_constraints() d_constraint where d_constraint.table_oid=constraint_oid/1000000 and d_constraint.constraint_index=constraint_oid%1000000)"},
56
+ {"pg_catalog", "pg_get_constraintdef", {"constraint_oid", "pretty_bool", nullptr}, "(select constraint_text from duckdb_constraints() d_constraint where d_constraint.table_oid=constraint_oid//1000000 and d_constraint.constraint_index=constraint_oid%1000000)"},
57
57
  {"pg_catalog", "pg_get_expr", {"pg_node_tree", "relation_oid", nullptr}, "pg_node_tree"},
58
58
  {"pg_catalog", "format_pg_type", {"type_name", nullptr}, "case when logical_type='FLOAT' then 'real' when logical_type='DOUBLE' then 'double precision' when logical_type='DECIMAL' then 'numeric' when logical_type='ENUM' then lower(type_name) when logical_type='VARCHAR' then 'character varying' when logical_type='BLOB' then 'bytea' when logical_type='TIMESTAMP' then 'timestamp without time zone' when logical_type='TIME' then 'time without time zone' else lower(logical_type) end"},
59
- {"pg_catalog", "format_type", {"type_oid", "typemod", nullptr}, "(select format_pg_type(type_name) from duckdb_types() t where t.type_oid=type_oid) || case when typemod>0 then concat('(', typemod/1000, ',', typemod%1000, ')') else '' end"},
59
+ {"pg_catalog", "format_type", {"type_oid", "typemod", nullptr}, "(select format_pg_type(type_name) from duckdb_types() t where t.type_oid=type_oid) || case when typemod>0 then concat('(', typemod//1000, ',', typemod%1000, ')') else '' end"},
60
60
 
61
61
  {"pg_catalog", "pg_has_role", {"user", "role", "privilege", nullptr}, "true"}, //boolean //does user have privilege for role
62
62
  {"pg_catalog", "pg_has_role", {"role", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for role
@@ -366,11 +366,16 @@ int SBIterator::ComparisonValue(ExpressionType comparison) {
366
366
  }
367
367
  }
368
368
 
369
+ static idx_t GetBlockCountWithEmptyCheck(const GlobalSortState &gss) {
370
+ D_ASSERT(gss.sorted_blocks.size() > 0);
371
+ return gss.sorted_blocks[0]->radix_sorting_data.size();
372
+ }
373
+
369
374
  SBIterator::SBIterator(GlobalSortState &gss, ExpressionType comparison, idx_t entry_idx_p)
370
- : sort_layout(gss.sort_layout), block_count(gss.sorted_blocks[0]->radix_sorting_data.size()),
371
- block_capacity(gss.block_capacity), cmp_size(sort_layout.comparison_size), entry_size(sort_layout.entry_size),
372
- all_constant(sort_layout.all_constant), external(gss.external), cmp(ComparisonValue(comparison)),
373
- scan(gss.buffer_manager, gss), block_ptr(nullptr), entry_ptr(nullptr) {
375
+ : sort_layout(gss.sort_layout), block_count(GetBlockCountWithEmptyCheck(gss)), block_capacity(gss.block_capacity),
376
+ cmp_size(sort_layout.comparison_size), entry_size(sort_layout.entry_size), all_constant(sort_layout.all_constant),
377
+ external(gss.external), cmp(ComparisonValue(comparison)), scan(gss.buffer_manager, gss), block_ptr(nullptr),
378
+ entry_ptr(nullptr) {
374
379
 
375
380
  scan.sb = gss.sorted_blocks[0].get();
376
381
  scan.block_idx = block_count;
@@ -402,6 +402,10 @@ IEJoinUnion::IEJoinUnion(ClientContext &context, const PhysicalIEJoin &op, Sorte
402
402
  r_executor.AddExpression(*op.rhs_orders[1][0].expression);
403
403
  AppendKey(t2, r_executor, *l1, -1, -1, b2);
404
404
 
405
+ if (l1->global_sort_state.sorted_blocks.empty()) {
406
+ return;
407
+ }
408
+
405
409
  Sort(*l1);
406
410
 
407
411
  op1 = make_uniq<SBIterator>(l1->global_sort_state, cmp1);
@@ -4,6 +4,7 @@
4
4
  #include "duckdb/function/function_binder.hpp"
5
5
  #include "duckdb/storage/buffer_manager.hpp"
6
6
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
7
+ #include "duckdb/planner/expression/bound_constant_expression.hpp"
7
8
  #include "duckdb/parser/expression_map.hpp"
8
9
  #include "duckdb/function/aggregate/distributive_functions.hpp"
9
10
 
@@ -12,7 +13,8 @@ namespace duckdb {
12
13
  struct SortedAggregateBindData : public FunctionData {
13
14
  SortedAggregateBindData(ClientContext &context, BoundAggregateExpression &expr)
14
15
  : buffer_manager(BufferManager::GetBufferManager(context)), function(expr.function),
15
- bind_info(std::move(expr.bind_info)) {
16
+ bind_info(std::move(expr.bind_info)), threshold(ClientConfig::GetConfig(context).ordered_aggregate_threshold),
17
+ external(ClientConfig::GetConfig(context).force_external) {
16
18
  auto &children = expr.children;
17
19
  arg_types.reserve(children.size());
18
20
  for (const auto &child : children) {
@@ -32,7 +34,8 @@ struct SortedAggregateBindData : public FunctionData {
32
34
 
33
35
  SortedAggregateBindData(const SortedAggregateBindData &other)
34
36
  : buffer_manager(other.buffer_manager), function(other.function), arg_types(other.arg_types),
35
- sort_types(other.sort_types), sorted_on_args(other.sorted_on_args) {
37
+ sort_types(other.sort_types), sorted_on_args(other.sorted_on_args), threshold(other.threshold),
38
+ external(other.external) {
36
39
  if (other.bind_info) {
37
40
  bind_info = other.bind_info->Copy();
38
41
  }
@@ -76,13 +79,17 @@ struct SortedAggregateBindData : public FunctionData {
76
79
  vector<BoundOrderByNode> orders;
77
80
  vector<LogicalType> sort_types;
78
81
  bool sorted_on_args;
82
+
83
+ //! The sort flush threshold
84
+ const idx_t threshold;
85
+ const bool external;
79
86
  };
80
87
 
81
88
  struct SortedAggregateState {
82
89
  //! Default buffer size, optimised for small group to avoid blowing out memory.
83
90
  static const idx_t BUFFER_CAPACITY = 16;
84
91
 
85
- SortedAggregateState() : nsel(0), offset(0) {
92
+ SortedAggregateState() : count(0), nsel(0), offset(0) {
86
93
  }
87
94
 
88
95
  static inline void InitializeBuffer(DataChunk &chunk, const vector<LogicalType> &types) {
@@ -98,7 +105,7 @@ struct SortedAggregateState {
98
105
  chunk.Initialize(Allocator::DefaultAllocator(), types);
99
106
  }
100
107
 
101
- void Flush(SortedAggregateBindData &order_bind) {
108
+ void Flush(const SortedAggregateBindData &order_bind) {
102
109
  if (ordering) {
103
110
  return;
104
111
  }
@@ -116,7 +123,9 @@ struct SortedAggregateState {
116
123
  }
117
124
  }
118
125
 
119
- void Update(SortedAggregateBindData &order_bind, DataChunk &sort_chunk, DataChunk &arg_chunk) {
126
+ void Update(const SortedAggregateBindData &order_bind, DataChunk &sort_chunk, DataChunk &arg_chunk) {
127
+ count += sort_chunk.size();
128
+
120
129
  // Lazy instantiation of the buffer chunks
121
130
  InitializeBuffer(sort_buffer, order_bind.sort_types);
122
131
  if (!order_bind.sorted_on_args) {
@@ -139,7 +148,9 @@ struct SortedAggregateState {
139
148
  }
140
149
  }
141
150
 
142
- void UpdateSlice(SortedAggregateBindData &order_bind, DataChunk &sort_inputs, DataChunk &arg_inputs) {
151
+ void UpdateSlice(const SortedAggregateBindData &order_bind, DataChunk &sort_inputs, DataChunk &arg_inputs) {
152
+ count += nsel;
153
+
143
154
  // Lazy instantiation of the buffer chunks
144
155
  InitializeBuffer(sort_buffer, order_bind.sort_types);
145
156
  if (!order_bind.sorted_on_args) {
@@ -178,25 +189,35 @@ struct SortedAggregateState {
178
189
  Flush(order_bind);
179
190
  ordering->Combine(*other.ordering);
180
191
  arguments->Combine(*other.arguments);
192
+ count += other.count;
181
193
  } else if (other.ordering) {
182
194
  // Force CDC if the other has it
183
195
  Flush(order_bind);
184
196
  ordering->Combine(*other.ordering);
197
+ count += other.count;
185
198
  } else if (other.sort_buffer.size()) {
186
199
  Update(order_bind, other.sort_buffer, other.arg_buffer);
187
200
  }
188
201
  }
189
202
 
190
- void Finalize(SortedAggregateBindData &order_bind, LocalSortState &local_sort) {
203
+ void PrefixSortBuffer(DataChunk &prefixed) {
204
+ for (column_t col_idx = 0; col_idx < sort_buffer.ColumnCount(); ++col_idx) {
205
+ prefixed.data[col_idx + 1].Reference(sort_buffer.data[col_idx]);
206
+ }
207
+ prefixed.SetCardinality(sort_buffer);
208
+ }
209
+
210
+ void Finalize(const SortedAggregateBindData &order_bind, DataChunk &prefixed, LocalSortState &local_sort) {
191
211
  if (arguments) {
192
212
  ColumnDataScanState sort_state;
193
213
  ordering->InitializeScan(sort_state);
194
214
  ColumnDataScanState arg_state;
195
215
  arguments->InitializeScan(arg_state);
196
216
  for (sort_buffer.Reset(); ordering->Scan(sort_state, sort_buffer); sort_buffer.Reset()) {
217
+ PrefixSortBuffer(prefixed);
197
218
  arg_buffer.Reset();
198
219
  arguments->Scan(arg_state, arg_buffer);
199
- local_sort.SinkChunk(sort_buffer, arg_buffer);
220
+ local_sort.SinkChunk(prefixed, arg_buffer);
200
221
  }
201
222
  ordering->Reset();
202
223
  arguments->Reset();
@@ -204,16 +225,20 @@ struct SortedAggregateState {
204
225
  ColumnDataScanState sort_state;
205
226
  ordering->InitializeScan(sort_state);
206
227
  for (sort_buffer.Reset(); ordering->Scan(sort_state, sort_buffer); sort_buffer.Reset()) {
207
- local_sort.SinkChunk(sort_buffer, sort_buffer);
228
+ PrefixSortBuffer(prefixed);
229
+ local_sort.SinkChunk(prefixed, sort_buffer);
208
230
  }
209
231
  ordering->Reset();
210
232
  } else if (order_bind.sorted_on_args) {
211
- local_sort.SinkChunk(sort_buffer, sort_buffer);
233
+ PrefixSortBuffer(prefixed);
234
+ local_sort.SinkChunk(prefixed, sort_buffer);
212
235
  } else {
213
- local_sort.SinkChunk(sort_buffer, arg_buffer);
236
+ PrefixSortBuffer(prefixed);
237
+ local_sort.SinkChunk(prefixed, arg_buffer);
214
238
  }
215
239
  }
216
240
 
241
+ idx_t count;
217
242
  unique_ptr<ColumnDataCollection> arguments;
218
243
  unique_ptr<ColumnDataCollection> ordering;
219
244
 
@@ -237,19 +262,19 @@ struct SortedAggregateFunction {
237
262
  state->~STATE();
238
263
  }
239
264
 
240
- static void ProjectInputs(Vector inputs[], SortedAggregateBindData *order_bind, idx_t input_count, idx_t count,
241
- DataChunk &arg_chunk, DataChunk &sort_chunk) {
265
+ static void ProjectInputs(Vector inputs[], const SortedAggregateBindData &order_bind, idx_t input_count,
266
+ idx_t count, DataChunk &arg_chunk, DataChunk &sort_chunk) {
242
267
  idx_t col = 0;
243
268
 
244
- if (!order_bind->sorted_on_args) {
245
- arg_chunk.InitializeEmpty(order_bind->arg_types);
269
+ if (!order_bind.sorted_on_args) {
270
+ arg_chunk.InitializeEmpty(order_bind.arg_types);
246
271
  for (auto &dst : arg_chunk.data) {
247
272
  dst.Reference(inputs[col++]);
248
273
  }
249
274
  arg_chunk.SetCardinality(count);
250
275
  }
251
276
 
252
- sort_chunk.InitializeEmpty(order_bind->sort_types);
277
+ sort_chunk.InitializeEmpty(order_bind.sort_types);
253
278
  for (auto &dst : sort_chunk.data) {
254
279
  dst.Reference(inputs[col++]);
255
280
  }
@@ -258,13 +283,13 @@ struct SortedAggregateFunction {
258
283
 
259
284
  static void SimpleUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, data_ptr_t state,
260
285
  idx_t count) {
261
- const auto order_bind = (SortedAggregateBindData *)aggr_input_data.bind_data;
286
+ const auto order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
262
287
  DataChunk arg_chunk;
263
288
  DataChunk sort_chunk;
264
289
  ProjectInputs(inputs, order_bind, input_count, count, arg_chunk, sort_chunk);
265
290
 
266
291
  const auto order_state = (SortedAggregateState *)state;
267
- order_state->Update(*order_bind, sort_chunk, arg_chunk);
292
+ order_state->Update(order_bind, sort_chunk, arg_chunk);
268
293
  }
269
294
 
270
295
  static void ScatterUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, Vector &states,
@@ -274,7 +299,7 @@ struct SortedAggregateFunction {
274
299
  }
275
300
 
276
301
  // Append the arguments to the two sub-collections
277
- const auto order_bind = (SortedAggregateBindData *)aggr_input_data.bind_data;
302
+ const auto &order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
278
303
  DataChunk arg_inputs;
279
304
  DataChunk sort_inputs;
280
305
  ProjectInputs(inputs, order_bind, input_count, count, arg_inputs, sort_inputs);
@@ -315,7 +340,7 @@ struct SortedAggregateFunction {
315
340
  continue;
316
341
  }
317
342
 
318
- order_state->UpdateSlice(*order_bind, sort_inputs, arg_inputs);
343
+ order_state->UpdateSlice(order_bind, sort_inputs, arg_inputs);
319
344
  }
320
345
  }
321
346
 
@@ -333,78 +358,162 @@ struct SortedAggregateFunction {
333
358
  }
334
359
 
335
360
  static void Finalize(Vector &states, AggregateInputData &aggr_input_data, Vector &result, idx_t count,
336
- idx_t offset) {
337
- const auto order_bind = (SortedAggregateBindData *)aggr_input_data.bind_data;
338
- auto &buffer_manager = order_bind->buffer_manager;
339
- auto &orders = order_bind->orders;
361
+ const idx_t offset) {
362
+ const auto &order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
363
+ auto &buffer_manager = order_bind.buffer_manager;
340
364
  RowLayout payload_layout;
341
- payload_layout.Initialize(order_bind->arg_types);
365
+ payload_layout.Initialize(order_bind.arg_types);
342
366
  DataChunk chunk;
343
- chunk.Initialize(Allocator::DefaultAllocator(), order_bind->arg_types);
367
+ chunk.Initialize(Allocator::DefaultAllocator(), order_bind.arg_types);
368
+ DataChunk sliced;
369
+ sliced.Initialize(Allocator::DefaultAllocator(), order_bind.arg_types);
344
370
 
345
371
  // Reusable inner state
346
- vector<data_t> agg_state(order_bind->function.state_size());
372
+ vector<data_t> agg_state(order_bind.function.state_size());
347
373
  Vector agg_state_vec(Value::POINTER((idx_t)agg_state.data()));
348
374
 
349
375
  // State variables
350
- const auto input_count = order_bind->function.arguments.size();
351
- auto bind_info = order_bind->bind_info.get();
376
+ auto bind_info = order_bind.bind_info.get();
352
377
  AggregateInputData aggr_bind_info(bind_info, Allocator::DefaultAllocator());
353
378
 
354
379
  // Inner aggregate APIs
355
- auto initialize = order_bind->function.initialize;
356
- auto destructor = order_bind->function.destructor;
357
- auto simple_update = order_bind->function.simple_update;
358
- auto update = order_bind->function.update;
359
- auto finalize = order_bind->function.finalize;
380
+ auto initialize = order_bind.function.initialize;
381
+ auto destructor = order_bind.function.destructor;
382
+ auto simple_update = order_bind.function.simple_update;
383
+ auto update = order_bind.function.update;
384
+ auto finalize = order_bind.function.finalize;
360
385
 
361
386
  auto sdata = FlatVector::GetData<SortedAggregateState *>(states);
387
+
388
+ vector<idx_t> state_unprocessed(count, 0);
362
389
  for (idx_t i = 0; i < count; ++i) {
363
- initialize(agg_state.data());
364
- auto state = sdata[i];
365
-
366
- // Apply the sort before delegating the chunks
367
- auto global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
368
- LocalSortState local_sort;
369
- local_sort.Initialize(*global_sort, global_sort->buffer_manager);
370
- state->Finalize(*order_bind, local_sort);
371
- global_sort->AddLocalState(local_sort);
372
-
373
- if (!global_sort->sorted_blocks.empty()) {
374
- global_sort->PrepareMergePhase();
375
- while (global_sort->sorted_blocks.size() > 1) {
376
- global_sort->InitializeMergeRound();
377
- MergeSorter merge_sorter(*global_sort, global_sort->buffer_manager);
378
- merge_sorter.PerformInMergeRound();
379
- global_sort->CompleteMergeRound(false);
390
+ state_unprocessed[i] = sdata[i]->count;
391
+ }
392
+
393
+ // Sort the input payloads on (state_idx ASC, orders)
394
+ vector<BoundOrderByNode> orders;
395
+ orders.emplace_back(BoundOrderByNode(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST,
396
+ make_uniq<BoundConstantExpression>(Value::USMALLINT(0))));
397
+ for (const auto &order : order_bind.orders) {
398
+ orders.emplace_back(order.Copy());
399
+ }
400
+
401
+ auto global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
402
+ global_sort->external = order_bind.external;
403
+ auto local_sort = make_uniq<LocalSortState>();
404
+ local_sort->Initialize(*global_sort, global_sort->buffer_manager);
405
+
406
+ DataChunk prefixed;
407
+ prefixed.Initialize(Allocator::DefaultAllocator(), global_sort->sort_layout.logical_types);
408
+
409
+ // Go through the states accumulating values to sort until we hit the sort threshold
410
+ idx_t unsorted_count = 0;
411
+ idx_t sorted = 0;
412
+ for (idx_t finalized = 0; finalized < count;) {
413
+ if (unsorted_count < order_bind.threshold) {
414
+ auto state = sdata[finalized];
415
+ prefixed.Reset();
416
+ prefixed.data[0].Reference(Value::USMALLINT(finalized));
417
+ state->Finalize(order_bind, prefixed, *local_sort);
418
+ unsorted_count += state_unprocessed[finalized];
419
+
420
+ // Go to the next aggregate unless this is the last one
421
+ if (++finalized < count) {
422
+ continue;
380
423
  }
424
+ }
425
+
426
+ // If they were all empty (filtering) flush them
427
+ // (This can only happen on the last range)
428
+ if (!unsorted_count) {
429
+ break;
430
+ }
431
+
432
+ // Sort all the data
433
+ global_sort->AddLocalState(*local_sort);
434
+ global_sort->PrepareMergePhase();
435
+ while (global_sort->sorted_blocks.size() > 1) {
436
+ global_sort->InitializeMergeRound();
437
+ MergeSorter merge_sorter(*global_sort, global_sort->buffer_manager);
438
+ merge_sorter.PerformInMergeRound();
439
+ global_sort->CompleteMergeRound(false);
440
+ }
381
441
 
382
- PayloadScanner scanner(*global_sort);
383
- for (;;) {
384
- chunk.Reset();
385
- scanner.Scan(chunk);
386
- if (chunk.size() == 0) {
387
- break;
442
+ auto scanner = make_uniq<PayloadScanner>(*global_sort);
443
+ initialize(agg_state.data());
444
+ while (scanner->Remaining()) {
445
+ chunk.Reset();
446
+ scanner->Scan(chunk);
447
+ idx_t consumed = 0;
448
+
449
+ // Distribute the scanned chunk to the aggregates
450
+ while (consumed < chunk.size()) {
451
+ // Find the next aggregate that needs data
452
+ for (; !state_unprocessed[sorted]; ++sorted) {
453
+ // Finalize a single value at the next offset
454
+ agg_state_vec.SetVectorType(states.GetVectorType());
455
+ finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
456
+ if (destructor) {
457
+ destructor(agg_state_vec, aggr_bind_info, 1);
458
+ }
459
+
460
+ initialize(agg_state.data());
461
+ }
462
+ const auto input_count = MinValue(state_unprocessed[sorted], chunk.size() - consumed);
463
+ for (column_t col_idx = 0; col_idx < chunk.ColumnCount(); ++col_idx) {
464
+ sliced.data[col_idx].Slice(chunk.data[col_idx], consumed, consumed + input_count);
388
465
  }
466
+ sliced.SetCardinality(input_count);
467
+
389
468
  // These are all simple updates, so use it if available
390
469
  if (simple_update) {
391
- simple_update(chunk.data.data(), aggr_bind_info, input_count, agg_state.data(), chunk.size());
470
+ simple_update(sliced.data.data(), aggr_bind_info, 1, agg_state.data(), sliced.size());
392
471
  } else {
393
472
  // We are only updating a constant state
394
473
  agg_state_vec.SetVectorType(VectorType::CONSTANT_VECTOR);
395
- update(chunk.data.data(), aggr_bind_info, input_count, agg_state_vec, chunk.size());
474
+ update(sliced.data.data(), aggr_bind_info, 1, agg_state_vec, sliced.size());
396
475
  }
476
+
477
+ consumed += input_count;
478
+ state_unprocessed[sorted] -= input_count;
397
479
  }
398
480
  }
399
481
 
482
+ // Finalize the last state for this sort
483
+ agg_state_vec.SetVectorType(states.GetVectorType());
484
+ finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
485
+ if (destructor) {
486
+ destructor(agg_state_vec, aggr_bind_info, 1);
487
+ }
488
+ ++sorted;
489
+
490
+ // Stop if we are done
491
+ if (finalized >= count) {
492
+ break;
493
+ }
494
+
495
+ // Create a new sort
496
+ scanner.reset();
497
+ global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
498
+ global_sort->external = order_bind.external;
499
+ local_sort = make_uniq<LocalSortState>();
500
+ local_sort->Initialize(*global_sort, global_sort->buffer_manager);
501
+ unsorted_count = 0;
502
+ }
503
+
504
+ for (; sorted < count; ++sorted) {
505
+ initialize(agg_state.data());
506
+
400
507
  // Finalize a single value at the next offset
401
508
  agg_state_vec.SetVectorType(states.GetVectorType());
402
- finalize(agg_state_vec, aggr_bind_info, result, 1, i + offset);
509
+ finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
403
510
 
404
511
  if (destructor) {
405
512
  destructor(agg_state_vec, aggr_bind_info, 1);
406
513
  }
407
514
  }
515
+
516
+ result.Verify(count);
408
517
  }
409
518
 
410
519
  static void Serialize(FieldWriter &writer, const FunctionData *bind_data, const AggregateFunction &function) {
@@ -907,23 +907,29 @@ static scalar_function_t GetBinaryFunctionIgnoreZero(const LogicalType &type) {
907
907
  }
908
908
 
909
909
  void DivideFun::RegisterFunction(BuiltinFunctions &set) {
910
- ScalarFunctionSet functions("/");
910
+ ScalarFunctionSet fp_divide("/");
911
+ fp_divide.AddFunction(ScalarFunction({LogicalType::FLOAT, LogicalType::FLOAT}, LogicalType::FLOAT,
912
+ GetBinaryFunctionIgnoreZero<DivideOperator>(LogicalType::FLOAT)));
913
+ fp_divide.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE}, LogicalType::DOUBLE,
914
+ GetBinaryFunctionIgnoreZero<DivideOperator>(LogicalType::DOUBLE)));
915
+ fp_divide.AddFunction(
916
+ ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
917
+ BinaryScalarFunctionIgnoreZero<interval_t, int64_t, interval_t, DivideOperator>));
918
+ set.AddFunction(fp_divide);
919
+
920
+ ScalarFunctionSet full_divide("//");
911
921
  for (auto &type : LogicalType::Numeric()) {
912
922
  if (type.id() == LogicalTypeId::DECIMAL) {
913
923
  continue;
914
924
  } else {
915
- functions.AddFunction(
925
+ full_divide.AddFunction(
916
926
  ScalarFunction({type, type}, type, GetBinaryFunctionIgnoreZero<DivideOperator>(type)));
917
927
  }
918
928
  }
919
- functions.AddFunction(
920
- ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
921
- BinaryScalarFunctionIgnoreZero<interval_t, int64_t, interval_t, DivideOperator>));
929
+ set.AddFunction(full_divide);
922
930
 
923
- set.AddFunction(functions);
924
-
925
- functions.name = "divide";
926
- set.AddFunction(functions);
931
+ full_divide.name = "divide";
932
+ set.AddFunction(full_divide);
927
933
  }
928
934
 
929
935
  //===--------------------------------------------------------------------===//
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev1867"
2
+ #define DUCKDB_VERSION "0.7.2-dev1898"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "084890df27"
5
+ #define DUCKDB_SOURCE_ID "eaf507009f"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -77,6 +77,8 @@ struct ClientConfig {
77
77
  //! Maximum bits allowed for using a perfect hash table (i.e. the perfect HT can hold up to 2^perfect_ht_threshold
78
78
  //! elements)
79
79
  idx_t perfect_ht_threshold = 12;
80
+ //! The maximum number of rows to accumulate before sorting ordered aggregates.
81
+ idx_t ordered_aggregate_threshold = (idx_t(1) << 18);
80
82
 
81
83
  //! Callback to create a progress bar display
82
84
  progress_bar_display_create_func_t display_create_func = nullptr;
@@ -90,6 +92,9 @@ struct ClientConfig {
90
92
  //! The maximum amount of pivot columns
91
93
  idx_t pivot_limit = 100000;
92
94
 
95
+ //! Whether or not the "/" division operator defaults to integer division or floating point division
96
+ bool integer_division = false;
97
+
93
98
  //! Generic options
94
99
  case_insensitive_map_t<Value> set_variables;
95
100
 
@@ -65,6 +65,15 @@ struct DebugForceNoCrossProduct {
65
65
  static Value GetSetting(ClientContext &context);
66
66
  };
67
67
 
68
+ struct OrderedAggregateThreshold {
69
+ static constexpr const char *Name = "ordered_aggregate_threshold";
70
+ static constexpr const char *Description = "the number of rows to accumulate before sorting, used for tuning";
71
+ static constexpr const LogicalTypeId InputType = LogicalTypeId::UBIGINT;
72
+ static void SetLocal(ClientContext &context, const Value &parameter);
73
+ static void ResetLocal(ClientContext &context);
74
+ static Value GetSetting(ClientContext &context);
75
+ };
76
+
68
77
  struct DebugWindowMode {
69
78
  static constexpr const char *Name = "debug_window_mode";
70
79
  static constexpr const char *Description = "DEBUG SETTING: switch window mode to use";
@@ -270,6 +279,16 @@ struct HomeDirectorySetting {
270
279
  static Value GetSetting(ClientContext &context);
271
280
  };
272
281
 
282
+ struct IntegerDivisionSetting {
283
+ static constexpr const char *Name = "integer_division";
284
+ static constexpr const char *Description =
285
+ "Whether or not the / operator defaults to integer division, or to floating point division";
286
+ static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
287
+ static void SetLocal(ClientContext &context, const Value &parameter);
288
+ static void ResetLocal(ClientContext &context);
289
+ static Value GetSetting(ClientContext &context);
290
+ };
291
+
273
292
  struct LogQueryPathSetting {
274
293
  static constexpr const char *Name = "log_query_path";
275
294
  static constexpr const char *Description =
@@ -13,6 +13,7 @@
13
13
  #include "duckdb/parser/query_node.hpp"
14
14
  #include "duckdb/parser/column_list.hpp"
15
15
  #include "duckdb/parser/simplified_token.hpp"
16
+ #include "duckdb/parser/parser_options.hpp"
16
17
 
17
18
  namespace duckdb_libpgquery {
18
19
  struct PGNode;
@@ -20,13 +21,6 @@ struct PGList;
20
21
  } // namespace duckdb_libpgquery
21
22
 
22
23
  namespace duckdb {
23
- class ParserExtension;
24
-
25
- struct ParserOptions {
26
- bool preserve_identifier_case = true;
27
- idx_t max_expression_depth = 1000;
28
- const vector<ParserExtension> *extensions = nullptr;
29
- };
30
24
 
31
25
  //! The parser is responsible for parsing the query and converting it into a set
32
26
  //! of parsed statements. The parsed statements can then be converted into a
@@ -0,0 +1,23 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/parser/parser_options.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/common.hpp"
12
+
13
+ namespace duckdb {
14
+ class ParserExtension;
15
+
16
+ struct ParserOptions {
17
+ bool preserve_identifier_case = true;
18
+ bool integer_division = false;
19
+ idx_t max_expression_depth = 1000;
20
+ const vector<ParserExtension> *extensions = nullptr;
21
+ };
22
+
23
+ } // namespace duckdb
@@ -33,6 +33,7 @@ struct CommonTableExpressionInfo;
33
33
  struct GroupingExpressionMap;
34
34
  class OnConflictInfo;
35
35
  class UpdateSetInfo;
36
+ struct ParserOptions;
36
37
  struct PivotColumn;
37
38
 
38
39
  //! The transformer class is responsible for transforming the internal Postgres
@@ -47,7 +48,7 @@ class Transformer {
47
48
  };
48
49
 
49
50
  public:
50
- explicit Transformer(idx_t max_expression_depth_p);
51
+ explicit Transformer(ParserOptions &options);
51
52
  explicit Transformer(Transformer *parent);
52
53
  ~Transformer();
53
54
 
@@ -61,7 +62,8 @@ public:
61
62
 
62
63
  private:
63
64
  Transformer *parent;
64
- idx_t max_expression_depth;
65
+ //! Parser options
66
+ ParserOptions &options;
65
67
  //! The current prepared statement parameter index
66
68
  idx_t prepared_statement_parameter_index = 0;
67
69
  //! Map from named parameter to parameter index;
@@ -292,7 +294,7 @@ private:
292
294
  CommonTableExpressionInfo &info);
293
295
 
294
296
  unique_ptr<ParsedExpression> TransformUnaryOperator(const string &op, unique_ptr<ParsedExpression> child);
295
- unique_ptr<ParsedExpression> TransformBinaryOperator(const string &op, unique_ptr<ParsedExpression> left,
297
+ unique_ptr<ParsedExpression> TransformBinaryOperator(string op, unique_ptr<ParsedExpression> left,
296
298
  unique_ptr<ParsedExpression> right);
297
299
  //===--------------------------------------------------------------------===//
298
300
  // TableRef transform
@@ -1144,9 +1144,11 @@ bool ClientContext::TryGetCurrentSetting(const std::string &key, Value &result)
1144
1144
  }
1145
1145
 
1146
1146
  ParserOptions ClientContext::GetParserOptions() const {
1147
+ auto &client_config = ClientConfig::GetConfig(*this);
1147
1148
  ParserOptions options;
1148
- options.preserve_identifier_case = ClientConfig::GetConfig(*this).preserve_identifier_case;
1149
- options.max_expression_depth = ClientConfig::GetConfig(*this).max_expression_depth;
1149
+ options.preserve_identifier_case = client_config.preserve_identifier_case;
1150
+ options.integer_division = client_config.integer_division;
1151
+ options.max_expression_depth = client_config.max_expression_depth;
1150
1152
  options.extensions = &DBConfig::GetConfig(*this).parser_extensions;
1151
1153
  return options;
1152
1154
  }