duckdb 0.8.2-dev1724.0 → 0.8.2-dev1764.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev1724.0",
5
+ "version": "0.8.2-dev1764.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -87,16 +87,22 @@ PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
87
87
  const vector<unique_ptr<BaseStatistics>> &partition_stats,
88
88
  idx_t estimated_cardinality)
89
89
  : context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
90
- fixed_bits(0), payload_types(payload_types), memory_per_thread(0), count(0) {
90
+ fixed_bits(0), payload_types(payload_types), memory_per_thread(0), max_bits(1), count(0) {
91
91
 
92
92
  GenerateOrderings(partitions, orders, partition_bys, order_bys, partition_stats);
93
93
 
94
94
  memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
95
95
  external = ClientConfig::GetConfig(context).force_external;
96
96
 
97
+ const auto thread_pages = PreviousPowerOfTwo(memory_per_thread / (4 * idx_t(Storage::BLOCK_ALLOC_SIZE)));
98
+ while (max_bits < 10 && (thread_pages >> max_bits) > 1) {
99
+ ++max_bits;
100
+ }
101
+
97
102
  if (!orders.empty()) {
98
- grouping_types = payload_types;
99
- grouping_types.push_back(LogicalType::HASH);
103
+ auto types = payload_types;
104
+ types.push_back(LogicalType::HASH);
105
+ grouping_types.Initialize(types);
100
106
 
101
107
  ResizeGroupingData(estimated_cardinality);
102
108
  }
@@ -108,10 +114,15 @@ void PartitionGlobalSinkState::SyncPartitioning(const PartitionGlobalSinkState &
108
114
  const auto old_bits = grouping_data ? grouping_data->GetRadixBits() : 0;
109
115
  if (fixed_bits != old_bits) {
110
116
  const auto hash_col_idx = payload_types.size();
111
- grouping_data = make_uniq<RadixPartitionedColumnData>(context, grouping_types, fixed_bits, hash_col_idx);
117
+ grouping_data = make_uniq<RadixPartitionedTupleData>(buffer_manager, grouping_types, fixed_bits, hash_col_idx);
112
118
  }
113
119
  }
114
120
 
121
+ unique_ptr<RadixPartitionedTupleData> PartitionGlobalSinkState::CreatePartition(idx_t new_bits) const {
122
+ const auto hash_col_idx = payload_types.size();
123
+ return make_uniq<RadixPartitionedTupleData>(buffer_manager, grouping_types, new_bits, hash_col_idx);
124
+ }
125
+
115
126
  void PartitionGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
116
127
  // Have we started to combine? Then just live with it.
117
128
  if (fixed_bits || (grouping_data && !grouping_data->GetPartitions().empty())) {
@@ -121,47 +132,31 @@ void PartitionGlobalSinkState::ResizeGroupingData(idx_t cardinality) {
121
132
  const idx_t partition_size = STANDARD_ROW_GROUPS_SIZE;
122
133
  const auto bits = grouping_data ? grouping_data->GetRadixBits() : 0;
123
134
  auto new_bits = bits ? bits : 4;
124
- while (new_bits < 10 && (cardinality / RadixPartitioning::NumberOfPartitions(new_bits)) > partition_size) {
135
+ while (new_bits < max_bits && (cardinality / RadixPartitioning::NumberOfPartitions(new_bits)) > partition_size) {
125
136
  ++new_bits;
126
137
  }
127
138
 
128
139
  // Repartition the grouping data
129
140
  if (new_bits != bits) {
130
- const auto hash_col_idx = payload_types.size();
131
- grouping_data = make_uniq<RadixPartitionedColumnData>(context, grouping_types, new_bits, hash_col_idx);
141
+ grouping_data = CreatePartition(new_bits);
132
142
  }
133
143
  }
134
144
 
135
145
  void PartitionGlobalSinkState::SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append) {
136
146
  // We are done if the local_partition is right sized.
137
- auto &local_radix = local_partition->Cast<RadixPartitionedColumnData>();
138
- if (local_radix.GetRadixBits() == grouping_data->GetRadixBits()) {
147
+ auto &local_radix = local_partition->Cast<RadixPartitionedTupleData>();
148
+ const auto new_bits = grouping_data->GetRadixBits();
149
+ if (local_radix.GetRadixBits() == new_bits) {
139
150
  return;
140
151
  }
141
152
 
142
153
  // If the local partition is now too small, flush it and reallocate
143
- auto new_partition = grouping_data->CreateShared();
144
- auto new_append = make_uniq<PartitionedColumnDataAppendState>();
145
- new_partition->InitializeAppendState(*new_append);
146
-
154
+ auto new_partition = CreatePartition(new_bits);
147
155
  local_partition->FlushAppendState(*local_append);
148
- auto &local_groups = local_partition->GetPartitions();
149
- for (auto &local_group : local_groups) {
150
- ColumnDataScanState scanner;
151
- local_group->InitializeScan(scanner);
152
-
153
- DataChunk scan_chunk;
154
- local_group->InitializeScanChunk(scan_chunk);
155
- for (scan_chunk.Reset(); local_group->Scan(scanner, scan_chunk); scan_chunk.Reset()) {
156
- new_partition->Append(*new_append, scan_chunk);
157
- }
158
- }
159
-
160
- // The append state has stale pointers to the old local partition, so nuke it from orbit.
161
- new_partition->FlushAppendState(*new_append);
156
+ local_partition->Repartition(*new_partition);
162
157
 
163
158
  local_partition = std::move(new_partition);
164
- local_append = make_uniq<PartitionedColumnDataAppendState>();
159
+ local_append = make_uniq<PartitionedTupleDataAppendState>();
165
160
  local_partition->InitializeAppendState(*local_append);
166
161
  }
167
162
 
@@ -170,8 +165,8 @@ void PartitionGlobalSinkState::UpdateLocalPartition(GroupingPartition &local_par
170
165
  lock_guard<mutex> guard(lock);
171
166
 
172
167
  if (!local_partition) {
173
- local_partition = grouping_data->CreateShared();
174
- local_append = make_uniq<PartitionedColumnDataAppendState>();
168
+ local_partition = CreatePartition(grouping_data->GetRadixBits());
169
+ local_append = make_uniq<PartitionedTupleDataAppendState>();
175
170
  local_partition->InitializeAppendState(*local_append);
176
171
  return;
177
172
  }
@@ -196,7 +191,7 @@ void PartitionGlobalSinkState::CombineLocalPartition(GroupingPartition &local_pa
196
191
  grouping_data->Combine(*local_partition);
197
192
  }
198
193
 
199
- void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data, GlobalSortState &global_sort) const {
194
+ void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const {
200
195
  // Set up the sort expression computation.
201
196
  vector<LogicalType> sort_types;
202
197
  ExpressionExecutor executor(context);
@@ -221,16 +216,9 @@ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data,
221
216
  for (column_t i = 0; i < payload_types.size(); ++i) {
222
217
  column_ids.emplace_back(i);
223
218
  }
224
- ColumnDataConsumer scanner(group_data, column_ids);
225
- ColumnDataConsumerScanState chunk_state;
226
- chunk_state.current_chunk_state.properties = ColumnDataScanProperties::ALLOW_ZERO_COPY;
227
- scanner.InitializeScan();
228
- for (auto chunk_idx = scanner.ChunkCount(); chunk_idx-- > 0;) {
229
- if (!scanner.AssignChunk(chunk_state)) {
230
- break;
231
- }
232
- scanner.ScanChunk(chunk_state, payload_chunk);
233
-
219
+ TupleDataScanState chunk_state;
220
+ group_data.InitializeScan(chunk_state, column_ids);
221
+ while (group_data.Scan(chunk_state, payload_chunk)) {
234
222
  sort_chunk.Reset();
235
223
  executor.Execute(payload_chunk, sort_chunk);
236
224
 
@@ -238,13 +226,12 @@ void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data,
238
226
  if (local_sort.SizeInBytes() > memory_per_thread) {
239
227
  local_sort.Sort(global_sort, true);
240
228
  }
241
- scanner.FinishChunk(chunk_state);
242
229
  }
243
230
 
244
231
  global_sort.AddLocalState(local_sort);
245
232
  }
246
233
 
247
- void PartitionGlobalSinkState::BuildSortState(ColumnDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
234
+ void PartitionGlobalSinkState::BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &hash_group) {
248
235
  BuildSortState(group_data, *hash_group.global_sort);
249
236
 
250
237
  hash_group.count += group_data.Count();
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev1724"
2
+ #define DUCKDB_VERSION "0.8.2-dev1764"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "0e0fd210cd"
5
+ #define DUCKDB_SOURCE_ID "07b0b0a2a4"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -42,8 +42,8 @@ public:
42
42
  using Orders = vector<BoundOrderByNode>;
43
43
  using Types = vector<LogicalType>;
44
44
 
45
- using GroupingPartition = unique_ptr<PartitionedColumnData>;
46
- using GroupingAppend = unique_ptr<PartitionedColumnDataAppendState>;
45
+ using GroupingPartition = unique_ptr<PartitionedTupleData>;
46
+ using GroupingAppend = unique_ptr<PartitionedTupleDataAppendState>;
47
47
 
48
48
  static void GenerateOrderings(Orders &partitions, Orders &orders,
49
49
  const vector<unique_ptr<Expression>> &partition_bys, const Orders &order_bys,
@@ -53,13 +53,14 @@ public:
53
53
  const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
54
54
  const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
55
55
 
56
+ unique_ptr<RadixPartitionedTupleData> CreatePartition(idx_t new_bits) const;
56
57
  void SyncPartitioning(const PartitionGlobalSinkState &other);
57
58
 
58
59
  void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
59
60
  void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
60
61
 
61
- void BuildSortState(ColumnDataCollection &group_data, GlobalSortState &global_sort) const;
62
- void BuildSortState(ColumnDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
62
+ void BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const;
63
+ void BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
63
64
 
64
65
  ClientContext &context;
65
66
  BufferManager &buffer_manager;
@@ -67,9 +68,9 @@ public:
67
68
  mutex lock;
68
69
 
69
70
  // OVER(PARTITION BY...) (hash grouping)
70
- unique_ptr<RadixPartitionedColumnData> grouping_data;
71
+ unique_ptr<RadixPartitionedTupleData> grouping_data;
71
72
  //! Payload plus hash column
72
- Types grouping_types;
73
+ TupleDataLayout grouping_types;
73
74
  //! The number of radix bits if this partition is being synced with another
74
75
  idx_t fixed_bits;
75
76
 
@@ -88,6 +89,7 @@ public:
88
89
 
89
90
  // Threading
90
91
  idx_t memory_per_thread;
92
+ idx_t max_bits;
91
93
  atomic<idx_t> count;
92
94
 
93
95
  private:
@@ -107,8 +109,8 @@ public:
107
109
  ExpressionExecutor executor;
108
110
  DataChunk group_chunk;
109
111
  DataChunk payload_chunk;
110
- unique_ptr<PartitionedColumnData> local_partition;
111
- unique_ptr<PartitionedColumnDataAppendState> local_append;
112
+ unique_ptr<PartitionedTupleData> local_partition;
113
+ unique_ptr<PartitionedTupleDataAppendState> local_append;
112
114
 
113
115
  // OVER(...) (sorting)
114
116
  size_t sort_cols;
@@ -132,7 +134,7 @@ class PartitionLocalMergeState;
132
134
 
133
135
  class PartitionGlobalMergeState {
134
136
  public:
135
- using GroupDataPtr = unique_ptr<ColumnDataCollection>;
137
+ using GroupDataPtr = unique_ptr<TupleDataCollection>;
136
138
 
137
139
  PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);
138
140
 
@@ -123,7 +123,11 @@ protected:
123
123
  void BuildBufferSpace(PartitionedTupleDataAppendState &state);
124
124
  //! Create a collection for a specific a partition
125
125
  unique_ptr<TupleDataCollection> CreatePartitionCollection(idx_t partition_index) const {
126
- return make_uniq<TupleDataCollection>(allocators->allocators[partition_index]);
126
+ if (allocators) {
127
+ return make_uniq<TupleDataCollection>(allocators->allocators[partition_index]);
128
+ } else {
129
+ return make_uniq<TupleDataCollection>(buffer_manager, layout);
130
+ }
127
131
  }
128
132
 
129
133
  protected:
@@ -81,9 +81,7 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
81
81
 
82
82
  switch (plan_p->type) {
83
83
  case LogicalOperatorType::LOGICAL_TRANSACTION:
84
- case LogicalOperatorType::LOGICAL_SET:
85
- case LogicalOperatorType::LOGICAL_PRAGMA:
86
- return plan_p;
84
+ return plan_p; // skip optimizing simple & often-occurring plans unaffected by rewrites
87
85
  default:
88
86
  break;
89
87
  }