duckdb 0.7.2-dev1188.0 → 0.7.2-dev1238.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev1188.0",
5
+ "version": "0.7.2-dev1238.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -1401,7 +1401,7 @@ unique_ptr<ColumnReader> ColumnReader::CreateReader(ParquetReader &reader, const
1401
1401
  case LogicalTypeId::TIME_TZ:
1402
1402
  if (schema_p.__isset.logicalType && schema_p.logicalType.__isset.TIME) {
1403
1403
  if (schema_p.logicalType.TIME.unit.__isset.MILLIS) {
1404
- return make_unique<CallbackColumnReader<int64_t, dtime_t, ParquetIntToTimeMs>>(
1404
+ return make_unique<CallbackColumnReader<int32_t, dtime_t, ParquetIntToTimeMs>>(
1405
1405
  reader, type_p, schema_p, file_idx_p, max_define, max_repeat);
1406
1406
  } else if (schema_p.logicalType.TIME.unit.__isset.MICROS) {
1407
1407
  return make_unique<CallbackColumnReader<int64_t, dtime_t, ParquetIntToTime>>(
@@ -1416,7 +1416,7 @@ unique_ptr<ColumnReader> ColumnReader::CreateReader(ParquetReader &reader, const
1416
1416
  return make_unique<CallbackColumnReader<int64_t, dtime_t, ParquetIntToTime>>(
1417
1417
  reader, type_p, schema_p, file_idx_p, max_define, max_repeat);
1418
1418
  case ConvertedType::TIME_MILLIS:
1419
- return make_unique<CallbackColumnReader<int64_t, dtime_t, ParquetIntToTimeMs>>(
1419
+ return make_unique<CallbackColumnReader<int32_t, dtime_t, ParquetIntToTimeMs>>(
1420
1420
  reader, type_p, schema_p, file_idx_p, max_define, max_repeat);
1421
1421
  default:
1422
1422
  break;
@@ -22,7 +22,7 @@ timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts);
22
22
  timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts);
23
23
  timestamp_t ParquetTimestampNsToTimestamp(const int64_t &raw_ts);
24
24
  date_t ParquetIntToDate(const int32_t &raw_date);
25
- dtime_t ParquetIntToTimeMs(const int64_t &raw_time);
25
+ dtime_t ParquetIntToTimeMs(const int32_t &raw_time);
26
26
  dtime_t ParquetIntToTime(const int64_t &raw_time);
27
27
  dtime_t ParquetIntToTimeNs(const int64_t &raw_time);
28
28
 
@@ -200,6 +200,11 @@ LogicalType ParquetReader::DeriveLogicalType(const SchemaElement &s_ele, bool bi
200
200
  throw IOException("UTF8 converted type can only be set for Type::(FIXED_LEN_)BYTE_ARRAY");
201
201
  }
202
202
  case ConvertedType::TIME_MILLIS:
203
+ if (s_ele.type == Type::INT32) {
204
+ return LogicalType::TIME;
205
+ } else {
206
+ throw IOException("TIME_MILLIS converted type can only be set for value of Type::INT32");
207
+ }
203
208
  case ConvertedType::TIME_MICROS:
204
209
  if (s_ele.type == Type::INT64) {
205
210
  return LogicalType::TIME;
@@ -1,12 +1,11 @@
1
1
  #include "parquet_statistics.hpp"
2
2
  #include "parquet_decimal_utils.hpp"
3
3
  #include "parquet_timestamp.hpp"
4
-
5
4
  #include "duckdb.hpp"
6
5
  #ifndef DUCKDB_AMALGAMATION
7
6
  #include "duckdb/common/types/blob.hpp"
8
7
  #include "duckdb/common/types/value.hpp"
9
-
8
+ #include "duckdb/common/types/time.hpp"
10
9
  #endif
11
10
 
12
11
  namespace duckdb {
@@ -155,11 +154,31 @@ Value ParquetStatisticsUtils::ConvertValue(const LogicalType &type,
155
154
  return Value::DATE(date_t(Load<int32_t>((data_ptr_t)stats.c_str())));
156
155
  case LogicalTypeId::TIME:
157
156
  case LogicalTypeId::TIME_TZ: {
158
- if (stats.size() != sizeof(int64_t)) {
157
+ int64_t val;
158
+ if (stats.size() == sizeof(int32_t)) {
159
+ val = Load<int32_t>((data_ptr_t)stats.c_str());
160
+ } else if (stats.size() == sizeof(int64_t)) {
161
+ val = Load<int64_t>((data_ptr_t)stats.c_str());
162
+ } else {
159
163
  throw InternalException("Incorrect stats size for type TIME");
160
164
  }
161
- auto time = dtime_t(Load<int64_t>((data_ptr_t)stats.c_str()));
162
- return Value::TIME(time);
165
+ if (schema_ele.__isset.logicalType && schema_ele.logicalType.__isset.TIME) {
166
+ // logical type
167
+ if (schema_ele.logicalType.TIME.unit.__isset.MILLIS) {
168
+ return Value::TIME(Time::FromTimeMs(val));
169
+ } else if (schema_ele.logicalType.TIME.unit.__isset.NANOS) {
170
+ return Value::TIME(Time::FromTimeNs(val));
171
+ } else if (schema_ele.logicalType.TIME.unit.__isset.MICROS) {
172
+ return Value::TIME(dtime_t(val));
173
+ } else {
174
+ throw InternalException("Time logicalType is set but unit is not defined");
175
+ }
176
+ }
177
+ if (schema_ele.converted_type == duckdb_parquet::format::ConvertedType::TIME_MILLIS) {
178
+ return Value::TIME(Time::FromTimeMs(val));
179
+ } else {
180
+ return Value::TIME(dtime_t(val));
181
+ }
163
182
  }
164
183
  case LogicalTypeId::TIMESTAMP:
165
184
  case LogicalTypeId::TIMESTAMP_TZ: {
@@ -54,7 +54,7 @@ date_t ParquetIntToDate(const int32_t &raw_date) {
54
54
  return date_t(raw_date);
55
55
  }
56
56
 
57
- dtime_t ParquetIntToTimeMs(const int64_t &raw_time) {
57
+ dtime_t ParquetIntToTimeMs(const int32_t &raw_time) {
58
58
  return Time::FromTimeMs(raw_time);
59
59
  }
60
60
 
@@ -11,9 +11,23 @@
11
11
  #include <sstream>
12
12
  #include <stdarg.h>
13
13
  #include <string.h>
14
+ #include <random>
14
15
 
15
16
  namespace duckdb {
16
17
 
18
+ string StringUtil::GenerateRandomName(idx_t length) {
19
+ std::random_device rd;
20
+ std::mt19937 gen(rd());
21
+ std::uniform_int_distribution<> dis(0, 15);
22
+
23
+ std::stringstream ss;
24
+ ss << std::hex;
25
+ for (idx_t i = 0; i < length; i++) {
26
+ ss << dis(gen);
27
+ }
28
+ return ss.str();
29
+ }
30
+
17
31
  bool StringUtil::Contains(const string &haystack, const string &needle) {
18
32
  return (haystack.find(needle) != string::npos);
19
33
  }
@@ -21,9 +21,9 @@ using ValidityBytes = RowLayout::ValidityBytes;
21
21
  GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, Allocator &allocator,
22
22
  vector<LogicalType> group_types, vector<LogicalType> payload_types,
23
23
  const vector<BoundAggregateExpression *> &bindings,
24
- HtEntryType entry_type)
24
+ HtEntryType entry_type, idx_t initial_capacity)
25
25
  : GroupedAggregateHashTable(context, allocator, std::move(group_types), std::move(payload_types),
26
- AggregateObject::CreateAggregateObjects(bindings), entry_type) {
26
+ AggregateObject::CreateAggregateObjects(bindings), entry_type, initial_capacity) {
27
27
  }
28
28
 
29
29
  GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, Allocator &allocator,
@@ -31,17 +31,19 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
31
31
  : GroupedAggregateHashTable(context, allocator, std::move(group_types), {}, vector<AggregateObject>()) {
32
32
  }
33
33
 
34
+ AggregateHTAppendState::AggregateHTAppendState()
35
+ : ht_offsets(LogicalTypeId::BIGINT), hash_salts(LogicalTypeId::SMALLINT),
36
+ group_compare_vector(STANDARD_VECTOR_SIZE), no_match_vector(STANDARD_VECTOR_SIZE),
37
+ empty_vector(STANDARD_VECTOR_SIZE), new_groups(STANDARD_VECTOR_SIZE), addresses(LogicalType::POINTER) {
38
+ }
39
+
34
40
  GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, Allocator &allocator,
35
41
  vector<LogicalType> group_types_p,
36
42
  vector<LogicalType> payload_types_p,
37
43
  vector<AggregateObject> aggregate_objects_p,
38
- HtEntryType entry_type)
44
+ HtEntryType entry_type, idx_t initial_capacity)
39
45
  : BaseAggregateHashTable(context, allocator, aggregate_objects_p, std::move(payload_types_p)),
40
- entry_type(entry_type), capacity(0), entries(0), payload_page_offset(0), is_finalized(false),
41
- ht_offsets(LogicalTypeId::BIGINT), hash_salts(LogicalTypeId::SMALLINT),
42
- group_compare_vector(STANDARD_VECTOR_SIZE), no_match_vector(STANDARD_VECTOR_SIZE),
43
- empty_vector(STANDARD_VECTOR_SIZE) {
44
-
46
+ entry_type(entry_type), capacity(0), entries(0), payload_page_offset(0), is_finalized(false) {
45
47
  // Append hash column to the end and initialise the row layout
46
48
  group_types_p.emplace_back(LogicalType::HASH);
47
49
  layout.Initialize(std::move(group_types_p), std::move(aggregate_objects_p));
@@ -59,12 +61,12 @@ GroupedAggregateHashTable::GroupedAggregateHashTable(ClientContext &context, All
59
61
  switch (entry_type) {
60
62
  case HtEntryType::HT_WIDTH_64: {
61
63
  hash_prefix_shift = (HASH_WIDTH - sizeof(aggr_ht_entry_64::salt)) * 8;
62
- Resize<aggr_ht_entry_64>(STANDARD_VECTOR_SIZE * 2L);
64
+ Resize<aggr_ht_entry_64>(initial_capacity);
63
65
  break;
64
66
  }
65
67
  case HtEntryType::HT_WIDTH_32: {
66
68
  hash_prefix_shift = (HASH_WIDTH - sizeof(aggr_ht_entry_32::salt)) * 8;
67
- Resize<aggr_ht_entry_32>(STANDARD_VECTOR_SIZE * 2L);
69
+ Resize<aggr_ht_entry_32>(initial_capacity);
68
70
  break;
69
71
  }
70
72
  default:
@@ -155,6 +157,10 @@ void GroupedAggregateHashTable::VerifyInternal() {
155
157
  D_ASSERT(count == entries);
156
158
  }
157
159
 
160
+ idx_t GroupedAggregateHashTable::InitialCapacity() {
161
+ return STANDARD_VECTOR_SIZE * 2ULL;
162
+ }
163
+
158
164
  idx_t GroupedAggregateHashTable::GetMaxCapacity(HtEntryType entry_type, idx_t tuple_size) {
159
165
  idx_t max_pages;
160
166
  idx_t max_tuples;
@@ -213,7 +219,6 @@ void GroupedAggregateHashTable::Resize(idx_t size) {
213
219
  hashes_hdl_ptr = hashes_hdl.Ptr();
214
220
  }
215
221
  memset(hashes_hdl_ptr, 0, byte_size);
216
- hashes_end_ptr = hashes_hdl_ptr + byte_size;
217
222
  capacity = size;
218
223
 
219
224
  auto hashes_arr = (ENTRY *)hashes_hdl_ptr;
@@ -240,7 +245,8 @@ void GroupedAggregateHashTable::Resize(idx_t size) {
240
245
  Verify();
241
246
  }
242
247
 
243
- idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload, AggregateType filter) {
248
+ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
249
+ AggregateType filter) {
244
250
  vector<idx_t> aggregate_filter;
245
251
 
246
252
  auto &aggregates = layout.GetAggregates();
@@ -250,34 +256,32 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload,
250
256
  aggregate_filter.push_back(i);
251
257
  }
252
258
  }
253
- return AddChunk(groups, payload, aggregate_filter);
259
+ return AddChunk(state, groups, payload, aggregate_filter);
254
260
  }
255
261
 
256
- idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload, const vector<idx_t> &filter) {
262
+ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
263
+ const vector<idx_t> &filter) {
257
264
  Vector hashes(LogicalType::HASH);
258
265
  groups.Hash(hashes);
259
266
 
260
- return AddChunk(groups, hashes, payload, filter);
267
+ return AddChunk(state, groups, hashes, payload, filter);
261
268
  }
262
269
 
263
- idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashes, DataChunk &payload,
264
- const vector<idx_t> &filter) {
270
+ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
271
+ DataChunk &payload, const vector<idx_t> &filter) {
265
272
  D_ASSERT(!is_finalized);
266
273
 
267
274
  if (groups.size() == 0) {
268
275
  return 0;
269
276
  }
270
- // dummy
271
- SelectionVector new_groups(STANDARD_VECTOR_SIZE);
272
277
 
273
278
  D_ASSERT(groups.ColumnCount() + 1 == layout.ColumnCount());
274
279
  for (idx_t i = 0; i < groups.ColumnCount(); i++) {
275
280
  D_ASSERT(groups.GetTypes()[i] == layout.GetTypes()[i]);
276
281
  }
277
282
 
278
- Vector addresses(LogicalType::POINTER);
279
- auto new_group_count = FindOrCreateGroups(groups, group_hashes, addresses, new_groups);
280
- VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), payload.size());
283
+ auto new_group_count = FindOrCreateGroups(state, groups, group_hashes, state.addresses, state.new_groups);
284
+ VectorOperations::AddInPlace(state.addresses, layout.GetAggrOffset(), payload.size());
281
285
 
282
286
  // now every cell has an entry
283
287
  // update the aggregates
@@ -290,20 +294,21 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashe
290
294
  if (filter_idx >= filter.size() || i < filter[filter_idx]) {
291
295
  // Skip all the aggregates that are not in the filter
292
296
  payload_idx += aggr.child_count;
293
- VectorOperations::AddInPlace(addresses, aggr.payload_size, payload.size());
297
+ VectorOperations::AddInPlace(state.addresses, aggr.payload_size, payload.size());
294
298
  continue;
295
299
  }
296
300
  D_ASSERT(i == filter[filter_idx]);
297
301
 
298
302
  if (aggr.aggr_type != AggregateType::DISTINCT && aggr.filter) {
299
- RowOperations::UpdateFilteredStates(filter_set.GetFilterData(i), aggr, addresses, payload, payload_idx);
303
+ RowOperations::UpdateFilteredStates(filter_set.GetFilterData(i), aggr, state.addresses, payload,
304
+ payload_idx);
300
305
  } else {
301
- RowOperations::UpdateStates(aggr, addresses, payload, payload_idx, payload.size());
306
+ RowOperations::UpdateStates(aggr, state.addresses, payload, payload_idx, payload.size());
302
307
  }
303
308
 
304
309
  // move to the next aggregate
305
310
  payload_idx += aggr.child_count;
306
- VectorOperations::AddInPlace(addresses, aggr.payload_size, payload.size());
311
+ VectorOperations::AddInPlace(state.addresses, aggr.payload_size, payload.size());
307
312
  filter_idx++;
308
313
  }
309
314
 
@@ -321,16 +326,23 @@ void GroupedAggregateHashTable::FetchAggregates(DataChunk &groups, DataChunk &re
321
326
  if (groups.size() == 0) {
322
327
  return;
323
328
  }
329
+
324
330
  // find the groups associated with the addresses
325
331
  // FIXME: this should not use the FindOrCreateGroups, creating them is unnecessary
332
+ AggregateHTAppendState append_state;
326
333
  Vector addresses(LogicalType::POINTER);
327
- FindOrCreateGroups(groups, addresses);
334
+ FindOrCreateGroups(append_state, groups, addresses);
328
335
  // now fetch the aggregates
329
336
  RowOperations::FinalizeStates(layout, addresses, result, 0);
330
337
  }
331
338
 
339
+ idx_t GroupedAggregateHashTable::ResizeThreshold() {
340
+ return capacity / LOAD_FACTOR;
341
+ }
342
+
332
343
  template <class ENTRY>
333
- idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes, Vector &addresses,
344
+ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendState &state, DataChunk &groups,
345
+ Vector &group_hashes, Vector &addresses,
334
346
  SelectionVector &new_groups_out) {
335
347
  D_ASSERT(!is_finalized);
336
348
 
@@ -339,7 +351,7 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
339
351
  }
340
352
 
341
353
  // resize at 50% capacity, also need to fit the entire vector
342
- if (capacity - entries <= groups.size() || entries > capacity / LOAD_FACTOR) {
354
+ if (capacity - entries <= groups.size() || entries > ResizeThreshold()) {
343
355
  Resize<ENTRY>(capacity * 2);
344
356
  }
345
357
 
@@ -352,42 +364,47 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
352
364
  group_hashes.Flatten(groups.size());
353
365
  auto group_hashes_ptr = FlatVector::GetData<hash_t>(group_hashes);
354
366
 
355
- D_ASSERT(ht_offsets.GetVectorType() == VectorType::FLAT_VECTOR);
356
- D_ASSERT(ht_offsets.GetType() == LogicalType::BIGINT);
367
+ D_ASSERT(state.ht_offsets.GetVectorType() == VectorType::FLAT_VECTOR);
368
+ D_ASSERT(state.ht_offsets.GetType() == LogicalType::BIGINT);
357
369
 
358
370
  D_ASSERT(addresses.GetType() == LogicalType::POINTER);
359
371
  addresses.Flatten(groups.size());
360
372
  auto addresses_ptr = FlatVector::GetData<data_ptr_t>(addresses);
361
373
 
362
- // now compute the entry in the table based on the hash using a modulo
363
- UnaryExecutor::Execute<hash_t, uint64_t>(group_hashes, ht_offsets, groups.size(), [&](hash_t element) {
374
+ // compute the entry in the table based on the hash using a modulo
375
+ // and precompute the hash salts for faster comparison below
376
+ D_ASSERT(state.hash_salts.GetType() == LogicalType::SMALLINT);
377
+ auto ht_offsets_ptr = FlatVector::GetData<uint64_t>(state.ht_offsets);
378
+ auto hash_salts_ptr = FlatVector::GetData<uint16_t>(state.hash_salts);
379
+ for (idx_t r = 0; r < groups.size(); r++) {
380
+ auto element = group_hashes_ptr[r];
364
381
  D_ASSERT((element & bitmask) == (element % capacity));
365
- return (element & bitmask);
366
- });
367
- auto ht_offsets_ptr = FlatVector::GetData<uint64_t>(ht_offsets);
368
-
369
- // precompute the hash salts for faster comparison below
370
- D_ASSERT(hash_salts.GetType() == LogicalType::SMALLINT);
371
- UnaryExecutor::Execute<hash_t, uint16_t>(group_hashes, hash_salts, groups.size(),
372
- [&](hash_t element) { return (element >> hash_prefix_shift); });
373
- auto hash_salts_ptr = FlatVector::GetData<uint16_t>(hash_salts);
374
-
382
+ ht_offsets_ptr[r] = element & bitmask;
383
+ hash_salts_ptr[r] = element >> hash_prefix_shift;
384
+ }
375
385
  // we start out with all entries [0, 1, 2, ..., groups.size()]
376
386
  const SelectionVector *sel_vector = FlatVector::IncrementalSelectionVector();
377
387
 
378
388
  idx_t remaining_entries = groups.size();
379
389
 
380
390
  // make a chunk that references the groups and the hashes
381
- DataChunk group_chunk;
382
- group_chunk.InitializeEmpty(layout.GetTypes());
391
+ if (state.group_chunk.ColumnCount() == 0) {
392
+ state.group_chunk.InitializeEmpty(layout.GetTypes());
393
+ }
394
+ D_ASSERT(state.group_chunk.ColumnCount() == layout.GetTypes().size());
383
395
  for (idx_t grp_idx = 0; grp_idx < groups.ColumnCount(); grp_idx++) {
384
- group_chunk.data[grp_idx].Reference(groups.data[grp_idx]);
396
+ state.group_chunk.data[grp_idx].Reference(groups.data[grp_idx]);
385
397
  }
386
- group_chunk.data[groups.ColumnCount()].Reference(group_hashes);
387
- group_chunk.SetCardinality(groups);
398
+ state.group_chunk.data[groups.ColumnCount()].Reference(group_hashes);
399
+ state.group_chunk.SetCardinality(groups);
388
400
 
389
401
  // convert all vectors to unified format
390
- auto group_data = group_chunk.ToUnifiedFormat();
402
+ if (!state.group_data) {
403
+ state.group_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[state.group_chunk.ColumnCount()]);
404
+ }
405
+ for (idx_t col_idx = 0; col_idx < state.group_chunk.ColumnCount(); col_idx++) {
406
+ state.group_chunk.data[col_idx].ToUnifiedFormat(state.group_chunk.size(), state.group_data[col_idx]);
407
+ }
391
408
 
392
409
  idx_t new_group_count = 0;
393
410
  while (remaining_entries > 0) {
@@ -420,7 +437,7 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
420
437
  ht_entry_ptr->page_offset = payload_page_offset++;
421
438
 
422
439
  // update selection lists for outer loops
423
- empty_vector.set_index(new_entry_count++, index);
440
+ state.empty_vector.set_index(new_entry_count++, index);
424
441
  new_groups_out.set_index(new_group_count++, index);
425
442
  entries++;
426
443
 
@@ -430,37 +447,37 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
430
447
  // cell is occupied: add to check list
431
448
  // only need to check if hash salt in ptr == prefix of hash in payload
432
449
  if (ht_entry_ptr->salt == hash_salts_ptr[index]) {
433
- group_compare_vector.set_index(need_compare_count++, index);
450
+ state.group_compare_vector.set_index(need_compare_count++, index);
434
451
 
435
452
  auto page_ptr = payload_hds_ptrs[ht_entry_ptr->page_nr - 1];
436
453
  auto page_offset = ht_entry_ptr->page_offset * tuple_size;
437
454
  addresses_ptr[index] = page_ptr + page_offset;
438
455
 
439
456
  } else {
440
- no_match_vector.set_index(no_match_count++, index);
457
+ state.no_match_vector.set_index(no_match_count++, index);
441
458
  }
442
459
  }
443
460
  }
444
461
 
445
462
  // for each of the locations that are empty, serialize the group columns to the locations
446
- RowOperations::Scatter(group_chunk, group_data.get(), layout, addresses, *string_heap, empty_vector,
447
- new_entry_count);
448
- RowOperations::InitializeStates(layout, addresses, empty_vector, new_entry_count);
463
+ RowOperations::Scatter(state.group_chunk, state.group_data.get(), layout, addresses, *string_heap,
464
+ state.empty_vector, new_entry_count);
465
+ RowOperations::InitializeStates(layout, addresses, state.empty_vector, new_entry_count);
449
466
 
450
467
  // now we have only the tuples remaining that might match to an existing group
451
468
  // start performing comparisons with each of the groups
452
- RowOperations::Match(group_chunk, group_data.get(), layout, addresses, predicates, group_compare_vector,
453
- need_compare_count, &no_match_vector, no_match_count);
469
+ RowOperations::Match(state.group_chunk, state.group_data.get(), layout, addresses, predicates,
470
+ state.group_compare_vector, need_compare_count, &state.no_match_vector, no_match_count);
454
471
 
455
472
  // each of the entries that do not match we move them to the next entry in the HT
456
473
  for (idx_t i = 0; i < no_match_count; i++) {
457
- idx_t index = no_match_vector.get_index(i);
474
+ idx_t index = state.no_match_vector.get_index(i);
458
475
  ht_offsets_ptr[index]++;
459
476
  if (ht_offsets_ptr[index] >= capacity) {
460
477
  ht_offsets_ptr[index] = 0;
461
478
  }
462
479
  }
463
- sel_vector = &no_match_vector;
480
+ sel_vector = &state.no_match_vector;
464
481
  remaining_entries = no_match_count;
465
482
  }
466
483
 
@@ -469,29 +486,30 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
469
486
 
470
487
  // this is to support distinct aggregations where we need to record whether we
471
488
  // have already seen a value for a group
472
- idx_t GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &group_hashes, Vector &addresses_out,
489
+ idx_t GroupedAggregateHashTable::FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups,
490
+ Vector &group_hashes, Vector &addresses_out,
473
491
  SelectionVector &new_groups_out) {
474
492
  switch (entry_type) {
475
493
  case HtEntryType::HT_WIDTH_64:
476
- return FindOrCreateGroupsInternal<aggr_ht_entry_64>(groups, group_hashes, addresses_out, new_groups_out);
494
+ return FindOrCreateGroupsInternal<aggr_ht_entry_64>(state, groups, group_hashes, addresses_out, new_groups_out);
477
495
  case HtEntryType::HT_WIDTH_32:
478
- return FindOrCreateGroupsInternal<aggr_ht_entry_32>(groups, group_hashes, addresses_out, new_groups_out);
496
+ return FindOrCreateGroupsInternal<aggr_ht_entry_32>(state, groups, group_hashes, addresses_out, new_groups_out);
479
497
  default:
480
498
  throw InternalException("Unknown HT entry width");
481
499
  }
482
500
  }
483
501
 
484
- void GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &addresses) {
502
+ void GroupedAggregateHashTable::FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups,
503
+ Vector &addresses) {
485
504
  // create a dummy new_groups sel vector
486
- SelectionVector new_groups(STANDARD_VECTOR_SIZE);
487
- FindOrCreateGroups(groups, addresses, new_groups);
505
+ FindOrCreateGroups(state, groups, addresses, state.new_groups);
488
506
  }
489
507
 
490
- idx_t GroupedAggregateHashTable::FindOrCreateGroups(DataChunk &groups, Vector &addresses_out,
491
- SelectionVector &new_groups_out) {
508
+ idx_t GroupedAggregateHashTable::FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups,
509
+ Vector &addresses_out, SelectionVector &new_groups_out) {
492
510
  Vector hashes(LogicalType::HASH);
493
511
  groups.Hash(hashes);
494
- return FindOrCreateGroups(groups, hashes, addresses_out, new_groups_out);
512
+ return FindOrCreateGroups(state, groups, hashes, addresses_out, new_groups_out);
495
513
  }
496
514
 
497
515
  struct FlushMoveState {
@@ -521,7 +539,8 @@ void GroupedAggregateHashTable::FlushMove(FlushMoveState &state, Vector &source_
521
539
  *FlatVector::IncrementalSelectionVector(), count, layout, col_no);
522
540
  }
523
541
 
524
- FindOrCreateGroups(state.groups, source_hashes, state.group_addresses, state.new_groups_sel);
542
+ AggregateHTAppendState append_state;
543
+ FindOrCreateGroups(append_state, state.groups, source_hashes, state.group_addresses, state.new_groups_sel);
525
544
 
526
545
  RowOperations::CombineStates(layout, source_addresses, state.group_addresses, count);
527
546
  }
@@ -219,7 +219,9 @@ void JoinHashTable::Build(DataChunk &keys, DataChunk &payload) {
219
219
  }
220
220
  info.correlated_payload.SetCardinality(keys);
221
221
  info.correlated_payload.data[0].Reference(keys.data[info.correlated_types.size()]);
222
- info.correlated_counts->AddChunk(info.group_chunk, info.correlated_payload, AggregateType::NON_DISTINCT);
222
+ AggregateHTAppendState append_state;
223
+ info.correlated_counts->AddChunk(append_state, info.group_chunk, info.correlated_payload,
224
+ AggregateType::NON_DISTINCT);
223
225
  }
224
226
 
225
227
  // prepare the keys for processing
@@ -42,6 +42,7 @@ public:
42
42
  bool initialized = false;
43
43
  bool finished_scan = false;
44
44
  SelectionVector new_groups;
45
+ AggregateHTAppendState append_state;
45
46
  };
46
47
 
47
48
  unique_ptr<GlobalSinkState> PhysicalRecursiveCTE::GetGlobalSinkState(ClientContext &context) const {
@@ -52,7 +53,7 @@ idx_t PhysicalRecursiveCTE::ProbeHT(DataChunk &chunk, RecursiveCTEState &state)
52
53
  Vector dummy_addresses(LogicalType::POINTER);
53
54
 
54
55
  // Use the HT to eliminate duplicate rows
55
- idx_t new_group_count = state.ht->FindOrCreateGroups(chunk, dummy_addresses, state.new_groups);
56
+ idx_t new_group_count = state.ht->FindOrCreateGroups(state.append_state, chunk, dummy_addresses, state.new_groups);
56
57
 
57
58
  // we only return entries we have not seen before (i.e. new groups)
58
59
  chunk.Slice(state.new_groups, new_group_count);
@@ -80,15 +80,17 @@ idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &group
80
80
  DataChunk &payload, const vector<idx_t> &filter) {
81
81
  // If this is false, a single AddChunk would overflow the max capacity
82
82
  D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
83
- if (list.empty() || list.back()->Size() + groups.size() > list.back()->MaxCapacity()) {
83
+ if (list.empty() || list.back()->Size() + groups.size() >= list.back()->MaxCapacity()) {
84
+ idx_t new_capacity = GroupedAggregateHashTable::InitialCapacity();
84
85
  if (!list.empty()) {
86
+ new_capacity = list.back()->Capacity();
85
87
  // early release first part of ht and prevent adding of more data
86
88
  list.back()->Finalize();
87
89
  }
88
90
  list.push_back(make_unique<GroupedAggregateHashTable>(context, allocator, group_types, payload_types, bindings,
89
- GetHTEntrySize()));
91
+ GetHTEntrySize(), new_capacity));
90
92
  }
91
- return list.back()->AddChunk(groups, group_hashes, payload, filter);
93
+ return list.back()->AddChunk(append_state, groups, group_hashes, payload, filter);
92
94
  }
93
95
 
94
96
  idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition,
@@ -150,6 +152,7 @@ void PartitionableHashTable::Partition() {
150
152
  D_ASSERT(partition_info.n_partitions > 1);
151
153
 
152
154
  vector<GroupedAggregateHashTable *> partition_hts(partition_info.n_partitions);
155
+ radix_partitioned_hts.resize(partition_info.n_partitions);
153
156
  for (auto &unpartitioned_ht : unpartitioned_hts) {
154
157
  for (idx_t r = 0; r < partition_info.n_partitions; r++) {
155
158
  radix_partitioned_hts[r].push_back(make_unique<GroupedAggregateHashTable>(
@@ -181,7 +184,7 @@ HashTableList PartitionableHashTable::GetUnpartitioned() {
181
184
  void PartitionableHashTable::Finalize() {
182
185
  if (IsPartitioned()) {
183
186
  for (auto &ht_list : radix_partitioned_hts) {
184
- for (auto &ht : ht_list.second) {
187
+ for (auto &ht : ht_list) {
185
188
  D_ASSERT(ht);
186
189
  ht->Finalize();
187
190
  }
@@ -78,6 +78,7 @@ public:
78
78
  bool is_partitioned = false;
79
79
 
80
80
  RadixPartitionInfo partition_info;
81
+ AggregateHTAppendState append_state;
81
82
  };
82
83
 
83
84
  class RadixHTLocalState : public LocalSinkState {
@@ -151,7 +152,8 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState
151
152
  }
152
153
  D_ASSERT(gstate.finalized_hts.size() == 1);
153
154
  D_ASSERT(gstate.finalized_hts[0]);
154
- llstate.total_groups += gstate.finalized_hts[0]->AddChunk(group_chunk, payload_input, filter);
155
+ llstate.total_groups +=
156
+ gstate.finalized_hts[0]->AddChunk(gstate.append_state, group_chunk, payload_input, filter);
155
157
  return;
156
158
  }
157
159
 
@@ -194,15 +196,13 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
194
196
  llstate.ht->Partition();
195
197
  }
196
198
 
197
- lock_guard<mutex> glock(gstate.lock);
199
+ // we will never add new values to these HTs so we can drop the first part of the HT
200
+ llstate.ht->Finalize();
198
201
 
202
+ lock_guard<mutex> glock(gstate.lock);
199
203
  if (!llstate.is_empty) {
200
204
  gstate.is_empty = false;
201
205
  }
202
-
203
- // we will never add new values to these HTs so we can drop the first part of the HT
204
- llstate.ht->Finalize();
205
-
206
206
  // at this point we just collect them the PhysicalHashAggregateFinalizeTask (below) will merge them in parallel
207
207
  gstate.intermediate_hts.push_back(std::move(llstate.ht));
208
208
  }
@@ -10,25 +10,29 @@
10
10
  namespace duckdb {
11
11
 
12
12
  struct SortedAggregateBindData : public FunctionData {
13
- SortedAggregateBindData(ClientContext &context, const AggregateFunction &function_p,
14
- vector<unique_ptr<Expression>> &children, unique_ptr<FunctionData> bind_info_p,
15
- const BoundOrderModifier &order_bys)
16
- : buffer_manager(BufferManager::GetBufferManager(context)), function(function_p),
17
- bind_info(std::move(bind_info_p)) {
13
+ SortedAggregateBindData(ClientContext &context, BoundAggregateExpression &expr)
14
+ : buffer_manager(BufferManager::GetBufferManager(context)), function(expr.function),
15
+ bind_info(std::move(expr.bind_info)) {
16
+ auto &children = expr.children;
18
17
  arg_types.reserve(children.size());
19
18
  for (const auto &child : children) {
20
19
  arg_types.emplace_back(child->return_type);
21
20
  }
21
+ auto &order_bys = *expr.order_bys;
22
22
  sort_types.reserve(order_bys.orders.size());
23
23
  for (auto &order : order_bys.orders) {
24
24
  orders.emplace_back(order.Copy());
25
25
  sort_types.emplace_back(order.expression->return_type);
26
26
  }
27
+ sorted_on_args = (children.size() == order_bys.orders.size());
28
+ for (size_t i = 0; sorted_on_args && i < children.size(); ++i) {
29
+ sorted_on_args = children[i]->Equals(order_bys.orders[i].expression.get());
30
+ }
27
31
  }
28
32
 
29
33
  SortedAggregateBindData(const SortedAggregateBindData &other)
30
34
  : buffer_manager(other.buffer_manager), function(other.function), arg_types(other.arg_types),
31
- sort_types(other.sort_types) {
35
+ sort_types(other.sort_types), sorted_on_args(other.sorted_on_args) {
32
36
  if (other.bind_info) {
33
37
  bind_info = other.bind_info->Copy();
34
38
  }
@@ -71,13 +75,14 @@ struct SortedAggregateBindData : public FunctionData {
71
75
 
72
76
  vector<BoundOrderByNode> orders;
73
77
  vector<LogicalType> sort_types;
78
+ bool sorted_on_args;
74
79
  };
75
80
 
76
81
  struct SortedAggregateState {
77
82
  //! Default buffer size, optimised for small group to avoid blowing out memory.
78
83
  static const idx_t BUFFER_CAPACITY = 16;
79
84
 
80
- SortedAggregateState() : nsel(0) {
85
+ SortedAggregateState() : nsel(0), offset(0) {
81
86
  }
82
87
 
83
88
  static inline void InitializeBuffer(DataChunk &chunk, const vector<LogicalType> &types) {
@@ -103,23 +108,31 @@ struct SortedAggregateState {
103
108
  ordering->Append(sort_buffer);
104
109
  ResetBuffer(sort_buffer, order_bind.sort_types);
105
110
 
106
- arguments = make_unique<ColumnDataCollection>(order_bind.buffer_manager, order_bind.arg_types);
107
- InitializeBuffer(arg_buffer, order_bind.arg_types);
108
- arguments->Append(arg_buffer);
109
- ResetBuffer(arg_buffer, order_bind.arg_types);
111
+ if (!order_bind.sorted_on_args) {
112
+ arguments = make_unique<ColumnDataCollection>(order_bind.buffer_manager, order_bind.arg_types);
113
+ InitializeBuffer(arg_buffer, order_bind.arg_types);
114
+ arguments->Append(arg_buffer);
115
+ ResetBuffer(arg_buffer, order_bind.arg_types);
116
+ }
110
117
  }
111
118
 
112
119
  void Update(SortedAggregateBindData &order_bind, DataChunk &sort_chunk, DataChunk &arg_chunk) {
113
120
  // Lazy instantiation of the buffer chunks
114
121
  InitializeBuffer(sort_buffer, order_bind.sort_types);
115
- InitializeBuffer(arg_buffer, order_bind.arg_types);
122
+ if (!order_bind.sorted_on_args) {
123
+ InitializeBuffer(arg_buffer, order_bind.arg_types);
124
+ }
116
125
 
117
126
  if (sort_chunk.size() + sort_buffer.size() > STANDARD_VECTOR_SIZE) {
118
127
  Flush(order_bind);
119
128
  }
120
- if (ordering) {
129
+ if (arguments) {
121
130
  ordering->Append(sort_chunk);
122
131
  arguments->Append(arg_chunk);
132
+ } else if (ordering) {
133
+ ordering->Append(sort_chunk);
134
+ } else if (order_bind.sorted_on_args) {
135
+ sort_buffer.Append(sort_chunk, true);
123
136
  } else {
124
137
  sort_buffer.Append(sort_chunk, true);
125
138
  arg_buffer.Append(arg_chunk, true);
@@ -129,12 +142,14 @@ struct SortedAggregateState {
129
142
  void UpdateSlice(SortedAggregateBindData &order_bind, DataChunk &sort_inputs, DataChunk &arg_inputs) {
130
143
  // Lazy instantiation of the buffer chunks
131
144
  InitializeBuffer(sort_buffer, order_bind.sort_types);
132
- InitializeBuffer(arg_buffer, order_bind.arg_types);
145
+ if (!order_bind.sorted_on_args) {
146
+ InitializeBuffer(arg_buffer, order_bind.arg_types);
147
+ }
133
148
 
134
149
  if (nsel + sort_buffer.size() > STANDARD_VECTOR_SIZE) {
135
150
  Flush(order_bind);
136
151
  }
137
- if (ordering) {
152
+ if (arguments) {
138
153
  sort_buffer.Reset();
139
154
  sort_buffer.Slice(sort_inputs, sel, nsel);
140
155
  ordering->Append(sort_buffer);
@@ -142,27 +157,38 @@ struct SortedAggregateState {
142
157
  arg_buffer.Reset();
143
158
  arg_buffer.Slice(arg_inputs, sel, nsel);
144
159
  arguments->Append(arg_buffer);
160
+ } else if (ordering) {
161
+ sort_buffer.Reset();
162
+ sort_buffer.Slice(sort_inputs, sel, nsel);
163
+ ordering->Append(sort_buffer);
164
+ } else if (order_bind.sorted_on_args) {
165
+ sort_buffer.Append(sort_inputs, true, &sel, nsel);
145
166
  } else {
146
167
  sort_buffer.Append(sort_inputs, true, &sel, nsel);
147
168
  arg_buffer.Append(arg_inputs, true, &sel, nsel);
148
169
  }
149
170
 
150
171
  nsel = 0;
172
+ offset = 0;
151
173
  }
152
174
 
153
175
  void Combine(SortedAggregateBindData &order_bind, SortedAggregateState &other) {
154
- if (other.ordering) {
155
- // Force CDC if the other hash it
176
+ if (other.arguments) {
177
+ // Force CDC if the other has it
156
178
  Flush(order_bind);
157
179
  ordering->Combine(*other.ordering);
158
180
  arguments->Combine(*other.arguments);
181
+ } else if (other.ordering) {
182
+ // Force CDC if the other has it
183
+ Flush(order_bind);
184
+ ordering->Combine(*other.ordering);
159
185
  } else if (other.sort_buffer.size()) {
160
186
  Update(order_bind, other.sort_buffer, other.arg_buffer);
161
187
  }
162
188
  }
163
189
 
164
- void Finalize(LocalSortState &local_sort) {
165
- if (ordering) {
190
+ void Finalize(SortedAggregateBindData &order_bind, LocalSortState &local_sort) {
191
+ if (arguments) {
166
192
  ColumnDataScanState sort_state;
167
193
  ordering->InitializeScan(sort_state);
168
194
  ColumnDataScanState arg_state;
@@ -174,6 +200,15 @@ struct SortedAggregateState {
174
200
  }
175
201
  ordering->Reset();
176
202
  arguments->Reset();
203
+ } else if (ordering) {
204
+ ColumnDataScanState sort_state;
205
+ ordering->InitializeScan(sort_state);
206
+ for (sort_buffer.Reset(); ordering->Scan(sort_state, sort_buffer); sort_buffer.Reset()) {
207
+ local_sort.SinkChunk(sort_buffer, sort_buffer);
208
+ }
209
+ ordering->Reset();
210
+ } else if (order_bind.sorted_on_args) {
211
+ local_sort.SinkChunk(sort_buffer, sort_buffer);
177
212
  } else {
178
213
  local_sort.SinkChunk(sort_buffer, arg_buffer);
179
214
  }
@@ -188,6 +223,7 @@ struct SortedAggregateState {
188
223
  // Selection for scattering
189
224
  SelectionVector sel;
190
225
  idx_t nsel;
226
+ idx_t offset;
191
227
  };
192
228
 
193
229
  struct SortedAggregateFunction {
@@ -205,11 +241,13 @@ struct SortedAggregateFunction {
205
241
  DataChunk &arg_chunk, DataChunk &sort_chunk) {
206
242
  idx_t col = 0;
207
243
 
208
- arg_chunk.InitializeEmpty(order_bind->arg_types);
209
- for (auto &dst : arg_chunk.data) {
210
- dst.Reference(inputs[col++]);
244
+ if (!order_bind->sorted_on_args) {
245
+ arg_chunk.InitializeEmpty(order_bind->arg_types);
246
+ for (auto &dst : arg_chunk.data) {
247
+ dst.Reference(inputs[col++]);
248
+ }
249
+ arg_chunk.SetCardinality(count);
211
250
  }
212
- arg_chunk.SetCardinality(count);
213
251
 
214
252
  sort_chunk.InitializeEmpty(order_bind->sort_types);
215
253
  for (auto &dst : sort_chunk.data) {
@@ -246,15 +284,27 @@ struct SortedAggregateFunction {
246
284
  UnifiedVectorFormat svdata;
247
285
  states.ToUnifiedFormat(count, svdata);
248
286
 
249
- // Build the selection vector for each state.
287
+ // Size the selection vector for each state.
250
288
  auto sdata = (SortedAggregateState **)svdata.data;
251
289
  for (idx_t i = 0; i < count; ++i) {
252
290
  auto sidx = svdata.sel->get_index(i);
253
291
  auto order_state = sdata[sidx];
254
- if (!order_state->sel.data()) {
255
- order_state->sel.Initialize();
292
+ order_state->nsel++;
293
+ }
294
+
295
+ // Build the selection vector for each state.
296
+ vector<sel_t> sel_data(count);
297
+ idx_t start = 0;
298
+ for (idx_t i = 0; i < count; ++i) {
299
+ auto sidx = svdata.sel->get_index(i);
300
+ auto order_state = sdata[sidx];
301
+ if (!order_state->offset) {
302
+ // First one
303
+ order_state->offset = start;
304
+ order_state->sel.Initialize(sel_data.data() + order_state->offset);
305
+ start += order_state->nsel;
256
306
  }
257
- order_state->sel.set_index(order_state->nsel++, i);
307
+ sel_data[order_state->offset++] = sidx;
258
308
  }
259
309
 
260
310
  // Append nonempty slices to the arguments
@@ -317,7 +367,7 @@ struct SortedAggregateFunction {
317
367
  auto global_sort = make_unique<GlobalSortState>(buffer_manager, orders, payload_layout);
318
368
  LocalSortState local_sort;
319
369
  local_sort.Initialize(*global_sort, global_sort->buffer_manager);
320
- state->Finalize(local_sort);
370
+ state->Finalize(*order_bind, local_sort);
321
371
  global_sort->AddLocalState(local_sort);
322
372
 
323
373
  if (!global_sort->sorted_blocks.empty()) {
@@ -399,12 +449,13 @@ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateE
399
449
  auto &bound_function = expr.function;
400
450
  auto &children = expr.children;
401
451
  auto &order_bys = *expr.order_bys;
402
- auto sorted_bind = make_unique<SortedAggregateBindData>(context, bound_function, expr.children,
403
- std::move(expr.bind_info), order_bys);
452
+ auto sorted_bind = make_unique<SortedAggregateBindData>(context, expr);
404
453
 
405
- // The arguments are the children plus the sort columns.
406
- for (auto &order : order_bys.orders) {
407
- children.emplace_back(std::move(order.expression));
454
+ if (!sorted_bind->sorted_on_args) {
455
+ // The arguments are the children plus the sort columns.
456
+ for (auto &order : order_bys.orders) {
457
+ children.emplace_back(std::move(order.expression));
458
+ }
408
459
  }
409
460
 
410
461
  vector<LogicalType> arguments;
@@ -1,8 +1,11 @@
1
+ #include "duckdb/catalog/catalog_search_path.hpp"
1
2
  #include "duckdb/common/constants.hpp"
2
3
  #include "duckdb/common/file_system.hpp"
3
4
  #include "duckdb/common/string_util.hpp"
4
5
  #include "duckdb/function/pragma/pragma_functions.hpp"
5
6
  #include "duckdb/main/config.hpp"
7
+ #include "duckdb/main/database_manager.hpp"
8
+ #include "duckdb/main/client_data.hpp"
6
9
  #include "duckdb/parser/parser.hpp"
7
10
  #include "duckdb/parser/qualified_name.hpp"
8
11
  #include "duckdb/parser/statement/copy_statement.hpp"
@@ -15,7 +18,27 @@ string PragmaTableInfo(ClientContext &context, const FunctionParameters &paramet
15
18
  }
16
19
 
17
20
  string PragmaShowTables(ClientContext &context, const FunctionParameters &parameters) {
18
- return "SELECT name FROM sqlite_master ORDER BY name;";
21
+ auto catalog = DatabaseManager::GetDefaultDatabase(context);
22
+ auto schema = ClientData::Get(context).catalog_search_path->GetDefault().schema;
23
+ schema = (schema == INVALID_SCHEMA) ? DEFAULT_SCHEMA : schema; // NOLINT
24
+
25
+ auto where_clause =
26
+ StringUtil::Join({"where database_name = '", catalog, "' and schema_name = '", schema, "'"}, "");
27
+ // clang-format off
28
+ auto pragma_query = StringUtil::Join(
29
+ {"with tables as (",
30
+ " SELECT table_name as name FROM duckdb_tables ", where_clause,
31
+ "), views as (",
32
+ " SELECT view_name as name FROM duckdb_views ", where_clause,
33
+ "), indexes as (",
34
+ " SELECT index_name as name FROM duckdb_indexes ", where_clause,
35
+ "), db_objects as (",
36
+ " SELECT name FROM tables UNION ALL SELECT name FROM views UNION ALL SELECT name FROM indexes",
37
+ ") SELECT name FROM db_objects ORDER BY name;"
38
+ }, "");
39
+ // clang-format on
40
+
41
+ return pragma_query;
19
42
  }
20
43
 
21
44
  string PragmaShowTablesExpanded(ClientContext &context, const FunctionParameters &parameters) {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev1188"
2
+ #define DUCKDB_VERSION "0.7.2-dev1238"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "d1518bdfe8"
5
+ #define DUCKDB_SOURCE_ID "4be6bdb565"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -21,6 +21,8 @@ namespace duckdb {
21
21
  */
22
22
  class StringUtil {
23
23
  public:
24
+ static string GenerateRandomName(idx_t length = 16);
25
+
24
26
  static uint8_t GetHexValue(char c) {
25
27
  if (c >= '0' && c <= '9') {
26
28
  return c - '0';
@@ -62,6 +62,20 @@ struct AggregateHTScanState {
62
62
  idx_t scan_position = 0;
63
63
  };
64
64
 
65
+ struct AggregateHTAppendState {
66
+ AggregateHTAppendState();
67
+
68
+ Vector ht_offsets;
69
+ Vector hash_salts;
70
+ SelectionVector group_compare_vector;
71
+ SelectionVector no_match_vector;
72
+ SelectionVector empty_vector;
73
+ SelectionVector new_groups;
74
+ Vector addresses;
75
+ unique_ptr<UnifiedVectorFormat[]> group_data;
76
+ DataChunk group_chunk;
77
+ };
78
+
65
79
  class GroupedAggregateHashTable : public BaseAggregateHashTable {
66
80
  public:
67
81
  //! The hash table load factor, when a resize is triggered
@@ -71,10 +85,12 @@ public:
71
85
  public:
72
86
  GroupedAggregateHashTable(ClientContext &context, Allocator &allocator, vector<LogicalType> group_types,
73
87
  vector<LogicalType> payload_types, const vector<BoundAggregateExpression *> &aggregates,
74
- HtEntryType entry_type = HtEntryType::HT_WIDTH_64);
88
+ HtEntryType entry_type = HtEntryType::HT_WIDTH_64,
89
+ idx_t initial_capacity = InitialCapacity());
75
90
  GroupedAggregateHashTable(ClientContext &context, Allocator &allocator, vector<LogicalType> group_types,
76
91
  vector<LogicalType> payload_types, vector<AggregateObject> aggregates,
77
- HtEntryType entry_type = HtEntryType::HT_WIDTH_64);
92
+ HtEntryType entry_type = HtEntryType::HT_WIDTH_64,
93
+ idx_t initial_capacity = InitialCapacity());
78
94
  GroupedAggregateHashTable(ClientContext &context, Allocator &allocator, vector<LogicalType> group_types);
79
95
  ~GroupedAggregateHashTable() override;
80
96
 
@@ -85,9 +101,10 @@ public:
85
101
  //! Add the given data to the HT, computing the aggregates grouped by the
86
102
  //! data in the group chunk. When resize = true, aggregates will not be
87
103
  //! computed but instead just assigned.
88
- idx_t AddChunk(DataChunk &groups, DataChunk &payload, const vector<idx_t> &filter);
89
- idx_t AddChunk(DataChunk &groups, Vector &group_hashes, DataChunk &payload, const vector<idx_t> &filter);
90
- idx_t AddChunk(DataChunk &groups, DataChunk &payload, AggregateType filter);
104
+ idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload, const vector<idx_t> &filter);
105
+ idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes, DataChunk &payload,
106
+ const vector<idx_t> &filter);
107
+ idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload, AggregateType filter);
91
108
 
92
109
  //! Scan the HT starting from the scan_position until the result and group
93
110
  //! chunks are filled. scan_position will be updated by this function.
@@ -100,18 +117,24 @@ public:
100
117
  //! Finds or creates groups in the hashtable using the specified group keys. The addresses vector will be filled
101
118
  //! with pointers to the groups in the hash table, and the new_groups selection vector will point to the newly
102
119
  //! created groups. The return value is the amount of newly created groups.
103
- idx_t FindOrCreateGroups(DataChunk &groups, Vector &group_hashes, Vector &addresses_out,
120
+ idx_t FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
121
+ Vector &addresses_out, SelectionVector &new_groups_out);
122
+ idx_t FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups, Vector &addresses_out,
104
123
  SelectionVector &new_groups_out);
105
- idx_t FindOrCreateGroups(DataChunk &groups, Vector &addresses_out, SelectionVector &new_groups_out);
106
- void FindOrCreateGroups(DataChunk &groups, Vector &addresses_out);
124
+ void FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups, Vector &addresses_out);
107
125
 
108
126
  //! Executes the filter(if any) and update the aggregates
109
127
  void Combine(GroupedAggregateHashTable &other);
110
128
 
129
+ static idx_t InitialCapacity();
111
130
  idx_t Size() {
112
131
  return entries;
113
132
  }
133
+ idx_t Capacity() {
134
+ return capacity;
135
+ }
114
136
 
137
+ idx_t ResizeThreshold();
115
138
  idx_t MaxCapacity();
116
139
  static idx_t GetMaxCapacity(HtEntryType entry_type, idx_t tuple_size);
117
140
 
@@ -138,8 +161,7 @@ private:
138
161
  //! The hashes of the HT
139
162
  BufferHandle hashes_hdl;
140
163
  data_ptr_t hashes_hdl_ptr;
141
- data_ptr_t hashes_end_ptr; // of hashes
142
- idx_t hash_offset; // Offset into the layout of the hash column
164
+ idx_t hash_offset; // Offset into the layout of the hash column
143
165
 
144
166
  hash_t hash_prefix_shift;
145
167
  idx_t payload_page_offset;
@@ -147,16 +169,8 @@ private:
147
169
  //! Bitmask for getting relevant bits from the hashes to determine the position
148
170
  hash_t bitmask;
149
171
 
150
- vector<unique_ptr<GroupedAggregateHashTable>> distinct_hashes;
151
-
152
172
  bool is_finalized;
153
173
 
154
- // some stuff from FindOrCreateGroupsInternal() to avoid allocation there
155
- Vector ht_offsets;
156
- Vector hash_salts;
157
- SelectionVector group_compare_vector;
158
- SelectionVector no_match_vector;
159
- SelectionVector empty_vector;
160
174
  vector<ExpressionType> predicates;
161
175
 
162
176
  private:
@@ -176,8 +190,8 @@ private:
176
190
  template <class ENTRY>
177
191
  void Resize(idx_t size);
178
192
  template <class ENTRY>
179
- idx_t FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes, Vector &addresses,
180
- SelectionVector &new_groups);
193
+ idx_t FindOrCreateGroupsInternal(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
194
+ Vector &addresses, SelectionVector &new_groups);
181
195
 
182
196
  template <class FUNC = std::function<void(idx_t, idx_t, data_ptr_t)>>
183
197
  void PayloadApply(FUNC fun);
@@ -54,9 +54,10 @@ private:
54
54
  vector<idx_t> sel_vector_sizes;
55
55
  DataChunk group_subset, payload_subset;
56
56
  Vector hashes, hashes_subset;
57
+ AggregateHTAppendState append_state;
57
58
 
58
59
  HashTableList unpartitioned_hts;
59
- unordered_map<hash_t, HashTableList> radix_partitioned_hts;
60
+ vector<HashTableList> radix_partitioned_hts;
60
61
  idx_t tuple_size;
61
62
 
62
63
  private: