duckdb 0.6.1-dev86.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/connection.cpp +100 -99
- package/src/duckdb.cpp +1515 -799
- package/src/duckdb.hpp +92 -35
- package/src/duckdb_node.hpp +0 -1
- package/src/parquet-amalgamation.cpp +23249 -23239
- package/test/arrow.test.js +36 -45
package/src/duckdb.cpp
CHANGED
|
@@ -1405,7 +1405,7 @@ CopyFunctionCatalogEntry::CopyFunctionCatalogEntry(Catalog *catalog, SchemaCatal
|
|
|
1405
1405
|
//===----------------------------------------------------------------------===//
|
|
1406
1406
|
// DuckDB
|
|
1407
1407
|
//
|
|
1408
|
-
// duckdb/common/
|
|
1408
|
+
// duckdb/common/radix.hpp
|
|
1409
1409
|
//
|
|
1410
1410
|
//
|
|
1411
1411
|
//===----------------------------------------------------------------------===//
|
|
@@ -4121,6 +4121,20 @@ unique_ptr<CatalogEntry> TableCatalogEntry::AlterEntry(ClientContext &context, A
|
|
|
4121
4121
|
}
|
|
4122
4122
|
}
|
|
4123
4123
|
|
|
4124
|
+
void TableCatalogEntry::UndoAlter(ClientContext &context, AlterInfo *info) {
|
|
4125
|
+
D_ASSERT(!internal);
|
|
4126
|
+
D_ASSERT(info->type == AlterType::ALTER_TABLE);
|
|
4127
|
+
auto table_info = (AlterTableInfo *)info;
|
|
4128
|
+
switch (table_info->alter_table_type) {
|
|
4129
|
+
case AlterTableType::RENAME_TABLE: {
|
|
4130
|
+
storage->info->table = this->name;
|
|
4131
|
+
break;
|
|
4132
|
+
default:
|
|
4133
|
+
break;
|
|
4134
|
+
}
|
|
4135
|
+
}
|
|
4136
|
+
}
|
|
4137
|
+
|
|
4124
4138
|
static void RenameExpression(ParsedExpression &expr, RenameColumnInfo &info) {
|
|
4125
4139
|
if (expr.type == ExpressionType::COLUMN_REF) {
|
|
4126
4140
|
auto &colref = (ColumnRefExpression &)expr;
|
|
@@ -4219,6 +4233,8 @@ unique_ptr<CatalogEntry> TableCatalogEntry::AddColumn(ClientContext &context, Ad
|
|
|
4219
4233
|
create_info->constraints.push_back(constraint->Copy());
|
|
4220
4234
|
}
|
|
4221
4235
|
Binder::BindLogicalType(context, info.new_column.TypeMutable(), schema->name);
|
|
4236
|
+
info.new_column.SetOid(columns.LogicalColumnCount());
|
|
4237
|
+
info.new_column.SetStorageOid(columns.PhysicalColumnCount());
|
|
4222
4238
|
auto col = info.new_column.Copy();
|
|
4223
4239
|
|
|
4224
4240
|
create_info->columns.AddColumn(move(col));
|
|
@@ -4966,6 +4982,9 @@ unique_ptr<CatalogEntry> CatalogEntry::AlterEntry(ClientContext &context, AlterI
|
|
|
4966
4982
|
throw InternalException("Unsupported alter type for catalog entry!");
|
|
4967
4983
|
}
|
|
4968
4984
|
|
|
4985
|
+
void CatalogEntry::UndoAlter(ClientContext &context, AlterInfo *info) {
|
|
4986
|
+
}
|
|
4987
|
+
|
|
4969
4988
|
unique_ptr<CatalogEntry> CatalogEntry::Copy(ClientContext &context) {
|
|
4970
4989
|
throw InternalException("Unsupported copy type for catalog entry!");
|
|
4971
4990
|
}
|
|
@@ -5144,6 +5163,98 @@ private:
|
|
|
5144
5163
|
|
|
5145
5164
|
|
|
5146
5165
|
|
|
5166
|
+
//===----------------------------------------------------------------------===//
|
|
5167
|
+
// DuckDB
|
|
5168
|
+
//
|
|
5169
|
+
// duckdb/catalog/mapping_value.hpp
|
|
5170
|
+
//
|
|
5171
|
+
//
|
|
5172
|
+
//===----------------------------------------------------------------------===//
|
|
5173
|
+
|
|
5174
|
+
|
|
5175
|
+
|
|
5176
|
+
|
|
5177
|
+
|
|
5178
|
+
|
|
5179
|
+
namespace duckdb {
|
|
5180
|
+
struct AlterInfo;
|
|
5181
|
+
|
|
5182
|
+
class ClientContext;
|
|
5183
|
+
|
|
5184
|
+
struct EntryIndex {
|
|
5185
|
+
EntryIndex() : catalog(nullptr), index(DConstants::INVALID_INDEX) {
|
|
5186
|
+
}
|
|
5187
|
+
EntryIndex(CatalogSet &catalog, idx_t index) : catalog(&catalog), index(index) {
|
|
5188
|
+
auto entry = catalog.entries.find(index);
|
|
5189
|
+
if (entry == catalog.entries.end()) {
|
|
5190
|
+
throw InternalException("EntryIndex - Catalog entry not found in constructor!?");
|
|
5191
|
+
}
|
|
5192
|
+
catalog.entries[index].reference_count++;
|
|
5193
|
+
}
|
|
5194
|
+
~EntryIndex() {
|
|
5195
|
+
if (!catalog) {
|
|
5196
|
+
return;
|
|
5197
|
+
}
|
|
5198
|
+
auto entry = catalog->entries.find(index);
|
|
5199
|
+
D_ASSERT(entry != catalog->entries.end());
|
|
5200
|
+
auto remaining_ref = --entry->second.reference_count;
|
|
5201
|
+
if (remaining_ref == 0) {
|
|
5202
|
+
catalog->entries.erase(index);
|
|
5203
|
+
}
|
|
5204
|
+
catalog = nullptr;
|
|
5205
|
+
}
|
|
5206
|
+
// disable copy constructors
|
|
5207
|
+
EntryIndex(const EntryIndex &other) = delete;
|
|
5208
|
+
EntryIndex &operator=(const EntryIndex &) = delete;
|
|
5209
|
+
//! enable move constructors
|
|
5210
|
+
EntryIndex(EntryIndex &&other) noexcept {
|
|
5211
|
+
catalog = nullptr;
|
|
5212
|
+
index = DConstants::INVALID_INDEX;
|
|
5213
|
+
std::swap(catalog, other.catalog);
|
|
5214
|
+
std::swap(index, other.index);
|
|
5215
|
+
}
|
|
5216
|
+
EntryIndex &operator=(EntryIndex &&other) noexcept {
|
|
5217
|
+
std::swap(catalog, other.catalog);
|
|
5218
|
+
std::swap(index, other.index);
|
|
5219
|
+
return *this;
|
|
5220
|
+
}
|
|
5221
|
+
|
|
5222
|
+
unique_ptr<CatalogEntry> &GetEntry() {
|
|
5223
|
+
auto entry = catalog->entries.find(index);
|
|
5224
|
+
if (entry == catalog->entries.end()) {
|
|
5225
|
+
throw InternalException("EntryIndex - Catalog entry not found!?");
|
|
5226
|
+
}
|
|
5227
|
+
return entry->second.entry;
|
|
5228
|
+
}
|
|
5229
|
+
idx_t GetIndex() {
|
|
5230
|
+
return index;
|
|
5231
|
+
}
|
|
5232
|
+
EntryIndex Copy() {
|
|
5233
|
+
if (catalog) {
|
|
5234
|
+
return EntryIndex(*catalog, index);
|
|
5235
|
+
} else {
|
|
5236
|
+
return EntryIndex();
|
|
5237
|
+
}
|
|
5238
|
+
}
|
|
5239
|
+
|
|
5240
|
+
private:
|
|
5241
|
+
CatalogSet *catalog;
|
|
5242
|
+
idx_t index;
|
|
5243
|
+
};
|
|
5244
|
+
|
|
5245
|
+
struct MappingValue {
|
|
5246
|
+
explicit MappingValue(EntryIndex index_p) : index(move(index_p)), timestamp(0), deleted(false), parent(nullptr) {
|
|
5247
|
+
}
|
|
5248
|
+
|
|
5249
|
+
EntryIndex index;
|
|
5250
|
+
transaction_t timestamp;
|
|
5251
|
+
bool deleted;
|
|
5252
|
+
unique_ptr<MappingValue> child;
|
|
5253
|
+
MappingValue *parent;
|
|
5254
|
+
};
|
|
5255
|
+
|
|
5256
|
+
} // namespace duckdb
|
|
5257
|
+
|
|
5147
5258
|
|
|
5148
5259
|
namespace duckdb {
|
|
5149
5260
|
|
|
@@ -5157,27 +5268,44 @@ namespace duckdb {
|
|
|
5157
5268
|
class EntryDropper {
|
|
5158
5269
|
public:
|
|
5159
5270
|
//! Both constructor and destructor are privates because they should only be called by DropEntryDependencies
|
|
5160
|
-
explicit EntryDropper(
|
|
5161
|
-
|
|
5162
|
-
old_deleted = catalog_set.entries[entry_index].get()->deleted;
|
|
5271
|
+
explicit EntryDropper(EntryIndex &entry_index_p) : entry_index(entry_index_p) {
|
|
5272
|
+
old_deleted = entry_index.GetEntry()->deleted;
|
|
5163
5273
|
}
|
|
5164
5274
|
|
|
5165
5275
|
~EntryDropper() {
|
|
5166
|
-
|
|
5276
|
+
entry_index.GetEntry()->deleted = old_deleted;
|
|
5167
5277
|
}
|
|
5168
5278
|
|
|
5169
5279
|
private:
|
|
5170
|
-
//! The current catalog_set
|
|
5171
|
-
CatalogSet &catalog_set;
|
|
5172
5280
|
//! Keeps track of the state of the entry before starting the delete
|
|
5173
5281
|
bool old_deleted;
|
|
5174
5282
|
//! Index of entry to be deleted
|
|
5175
|
-
|
|
5283
|
+
EntryIndex &entry_index;
|
|
5176
5284
|
};
|
|
5177
5285
|
|
|
5178
5286
|
CatalogSet::CatalogSet(Catalog &catalog, unique_ptr<DefaultGenerator> defaults)
|
|
5179
5287
|
: catalog(catalog), defaults(move(defaults)) {
|
|
5180
5288
|
}
|
|
5289
|
+
CatalogSet::~CatalogSet() {
|
|
5290
|
+
}
|
|
5291
|
+
|
|
5292
|
+
EntryIndex CatalogSet::PutEntry(idx_t entry_index, unique_ptr<CatalogEntry> entry) {
|
|
5293
|
+
if (entries.find(entry_index) != entries.end()) {
|
|
5294
|
+
throw InternalException("Entry with entry index \"%llu\" already exists", entry_index);
|
|
5295
|
+
}
|
|
5296
|
+
entries.insert(make_pair(entry_index, EntryValue(move(entry))));
|
|
5297
|
+
return EntryIndex(*this, entry_index);
|
|
5298
|
+
}
|
|
5299
|
+
|
|
5300
|
+
void CatalogSet::PutEntry(EntryIndex index, unique_ptr<CatalogEntry> catalog_entry) {
|
|
5301
|
+
auto entry = entries.find(index.GetIndex());
|
|
5302
|
+
if (entry == entries.end()) {
|
|
5303
|
+
throw InternalException("Entry with entry index \"%llu\" does not exist", index.GetIndex());
|
|
5304
|
+
}
|
|
5305
|
+
catalog_entry->child = move(entry->second.entry);
|
|
5306
|
+
catalog_entry->child->parent = catalog_entry.get();
|
|
5307
|
+
entry->second.entry = move(catalog_entry);
|
|
5308
|
+
}
|
|
5181
5309
|
|
|
5182
5310
|
bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_ptr<CatalogEntry> value,
|
|
5183
5311
|
unordered_set<CatalogEntry *> &dependencies) {
|
|
@@ -5188,7 +5316,7 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_
|
|
|
5188
5316
|
unique_lock<mutex> read_lock(catalog_lock);
|
|
5189
5317
|
|
|
5190
5318
|
// first check if the entry exists in the unordered set
|
|
5191
|
-
idx_t
|
|
5319
|
+
idx_t index;
|
|
5192
5320
|
auto mapping_value = GetMapping(context, name);
|
|
5193
5321
|
if (mapping_value == nullptr || mapping_value->deleted) {
|
|
5194
5322
|
// if it does not: entry has never been created
|
|
@@ -5202,17 +5330,17 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_
|
|
|
5202
5330
|
// first create a dummy deleted entry for this entry
|
|
5203
5331
|
// so transactions started before the commit of this transaction don't
|
|
5204
5332
|
// see it yet
|
|
5205
|
-
entry_index = current_entry++;
|
|
5206
5333
|
auto dummy_node = make_unique<CatalogEntry>(CatalogType::INVALID, value->catalog, name);
|
|
5207
5334
|
dummy_node->timestamp = 0;
|
|
5208
5335
|
dummy_node->deleted = true;
|
|
5209
5336
|
dummy_node->set = this;
|
|
5210
5337
|
|
|
5211
|
-
|
|
5212
|
-
|
|
5338
|
+
auto entry_index = PutEntry(current_entry++, move(dummy_node));
|
|
5339
|
+
index = entry_index.GetIndex();
|
|
5340
|
+
PutMapping(context, name, move(entry_index));
|
|
5213
5341
|
} else {
|
|
5214
|
-
|
|
5215
|
-
auto ¤t = *
|
|
5342
|
+
index = mapping_value->index.GetIndex();
|
|
5343
|
+
auto ¤t = *mapping_value->index.GetEntry();
|
|
5216
5344
|
// if it does, we have to check version numbers
|
|
5217
5345
|
if (HasConflict(context, current.timestamp)) {
|
|
5218
5346
|
// current version has been written to by a currently active
|
|
@@ -5234,16 +5362,16 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_
|
|
|
5234
5362
|
// now add the dependency set of this object to the dependency manager
|
|
5235
5363
|
catalog.dependency_manager->AddObject(context, value.get(), dependencies);
|
|
5236
5364
|
|
|
5237
|
-
|
|
5238
|
-
|
|
5365
|
+
auto value_ptr = value.get();
|
|
5366
|
+
EntryIndex entry_index(*this, index);
|
|
5367
|
+
PutEntry(move(entry_index), move(value));
|
|
5239
5368
|
// push the old entry in the undo buffer for this transaction
|
|
5240
|
-
transaction.PushCatalogEntry(
|
|
5241
|
-
entries[entry_index] = move(value);
|
|
5369
|
+
transaction.PushCatalogEntry(value_ptr->child.get());
|
|
5242
5370
|
return true;
|
|
5243
5371
|
}
|
|
5244
5372
|
|
|
5245
|
-
bool CatalogSet::GetEntryInternal(ClientContext &context,
|
|
5246
|
-
catalog_entry =
|
|
5373
|
+
bool CatalogSet::GetEntryInternal(ClientContext &context, EntryIndex &entry_index, CatalogEntry *&catalog_entry) {
|
|
5374
|
+
catalog_entry = entry_index.GetEntry().get();
|
|
5247
5375
|
// if it does: we have to retrieve the entry and to check version numbers
|
|
5248
5376
|
if (HasConflict(context, catalog_entry->timestamp)) {
|
|
5249
5377
|
// current version has been written to by a currently active
|
|
@@ -5259,21 +5387,22 @@ bool CatalogSet::GetEntryInternal(ClientContext &context, idx_t entry_index, Cat
|
|
|
5259
5387
|
return true;
|
|
5260
5388
|
}
|
|
5261
5389
|
|
|
5262
|
-
bool CatalogSet::GetEntryInternal(ClientContext &context, const string &name,
|
|
5390
|
+
bool CatalogSet::GetEntryInternal(ClientContext &context, const string &name, EntryIndex *entry_index,
|
|
5263
5391
|
CatalogEntry *&catalog_entry) {
|
|
5264
5392
|
auto mapping_value = GetMapping(context, name);
|
|
5265
5393
|
if (mapping_value == nullptr || mapping_value->deleted) {
|
|
5266
5394
|
// the entry does not exist, check if we can create a default entry
|
|
5267
5395
|
return false;
|
|
5268
5396
|
}
|
|
5269
|
-
entry_index
|
|
5270
|
-
|
|
5397
|
+
if (entry_index) {
|
|
5398
|
+
*entry_index = mapping_value->index.Copy();
|
|
5399
|
+
}
|
|
5400
|
+
return GetEntryInternal(context, mapping_value->index, catalog_entry);
|
|
5271
5401
|
}
|
|
5272
5402
|
|
|
5273
5403
|
bool CatalogSet::AlterOwnership(ClientContext &context, ChangeOwnershipInfo *info) {
|
|
5274
|
-
idx_t entry_index;
|
|
5275
5404
|
CatalogEntry *entry;
|
|
5276
|
-
if (!GetEntryInternal(context, info->name,
|
|
5405
|
+
if (!GetEntryInternal(context, info->name, nullptr, entry)) {
|
|
5277
5406
|
return false;
|
|
5278
5407
|
}
|
|
5279
5408
|
|
|
@@ -5293,9 +5422,9 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
|
|
|
5293
5422
|
lock_guard<mutex> write_lock(catalog.write_lock);
|
|
5294
5423
|
|
|
5295
5424
|
// first check if the entry exists in the unordered set
|
|
5296
|
-
|
|
5425
|
+
EntryIndex entry_index;
|
|
5297
5426
|
CatalogEntry *entry;
|
|
5298
|
-
if (!GetEntryInternal(context, name, entry_index, entry)) {
|
|
5427
|
+
if (!GetEntryInternal(context, name, &entry_index, entry)) {
|
|
5299
5428
|
return false;
|
|
5300
5429
|
}
|
|
5301
5430
|
if (entry->internal) {
|
|
@@ -5318,8 +5447,9 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
|
|
|
5318
5447
|
if (value->name != original_name) {
|
|
5319
5448
|
auto mapping_value = GetMapping(context, value->name);
|
|
5320
5449
|
if (mapping_value && !mapping_value->deleted) {
|
|
5321
|
-
auto
|
|
5322
|
-
if (!
|
|
5450
|
+
auto original_entry = GetEntryForTransaction(context, mapping_value->index.GetEntry().get());
|
|
5451
|
+
if (!original_entry->deleted) {
|
|
5452
|
+
entry->UndoAlter(context, alter_info);
|
|
5323
5453
|
string rename_err_msg =
|
|
5324
5454
|
"Could not rename \"%s\" to \"%s\": another entry with this name already exists!";
|
|
5325
5455
|
throw CatalogException(rename_err_msg, original_name, value->name);
|
|
@@ -5329,25 +5459,22 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
|
|
|
5329
5459
|
|
|
5330
5460
|
if (value->name != original_name) {
|
|
5331
5461
|
// Do PutMapping and DeleteMapping after dependency check
|
|
5332
|
-
PutMapping(context, value->name, entry_index);
|
|
5462
|
+
PutMapping(context, value->name, entry_index.Copy());
|
|
5333
5463
|
DeleteMapping(context, original_name);
|
|
5334
5464
|
}
|
|
5335
5465
|
|
|
5336
5466
|
value->timestamp = transaction.transaction_id;
|
|
5337
|
-
value->child = move(entries[entry_index]);
|
|
5338
|
-
value->child->parent = value.get();
|
|
5339
5467
|
value->set = this;
|
|
5468
|
+
auto new_entry = value.get();
|
|
5469
|
+
PutEntry(move(entry_index), move(value));
|
|
5340
5470
|
|
|
5341
5471
|
// serialize the AlterInfo into a temporary buffer
|
|
5342
5472
|
BufferedSerializer serializer;
|
|
5343
5473
|
alter_info->Serialize(serializer);
|
|
5344
5474
|
BinaryData serialized_alter = serializer.GetData();
|
|
5345
5475
|
|
|
5346
|
-
auto new_entry = value.get();
|
|
5347
|
-
|
|
5348
5476
|
// push the old entry in the undo buffer for this transaction
|
|
5349
|
-
transaction.PushCatalogEntry(
|
|
5350
|
-
entries[entry_index] = move(value);
|
|
5477
|
+
transaction.PushCatalogEntry(new_entry->child.get(), serialized_alter.data.get(), serialized_alter.size);
|
|
5351
5478
|
|
|
5352
5479
|
// Check the dependency manager to verify that there are no conflicting dependencies with this alter
|
|
5353
5480
|
// Note that we do this AFTER the new entry has been entirely set up in the catalog set
|
|
@@ -5358,13 +5485,13 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
|
|
|
5358
5485
|
return true;
|
|
5359
5486
|
}
|
|
5360
5487
|
|
|
5361
|
-
void CatalogSet::DropEntryDependencies(ClientContext &context,
|
|
5362
|
-
|
|
5488
|
+
void CatalogSet::DropEntryDependencies(ClientContext &context, EntryIndex &entry_index, CatalogEntry &entry,
|
|
5489
|
+
bool cascade) {
|
|
5363
5490
|
// Stores the deleted value of the entry before starting the process
|
|
5364
|
-
EntryDropper dropper(
|
|
5491
|
+
EntryDropper dropper(entry_index);
|
|
5365
5492
|
|
|
5366
5493
|
// To correctly delete the object and its dependencies, it temporarily is set to deleted.
|
|
5367
|
-
|
|
5494
|
+
entry_index.GetEntry()->deleted = true;
|
|
5368
5495
|
|
|
5369
5496
|
// check any dependencies of this object
|
|
5370
5497
|
entry.catalog->dependency_manager->DropObject(context, &entry, cascade);
|
|
@@ -5374,7 +5501,7 @@ void CatalogSet::DropEntryDependencies(ClientContext &context, idx_t entry_index
|
|
|
5374
5501
|
// dropper.~EntryDropper()
|
|
5375
5502
|
}
|
|
5376
5503
|
|
|
5377
|
-
void CatalogSet::DropEntryInternal(ClientContext &context,
|
|
5504
|
+
void CatalogSet::DropEntryInternal(ClientContext &context, EntryIndex entry_index, CatalogEntry &entry, bool cascade) {
|
|
5378
5505
|
auto &transaction = Transaction::GetTransaction(context);
|
|
5379
5506
|
|
|
5380
5507
|
DropEntryDependencies(context, entry_index, entry, cascade);
|
|
@@ -5384,31 +5511,30 @@ void CatalogSet::DropEntryInternal(ClientContext &context, idx_t entry_index, Ca
|
|
|
5384
5511
|
// and point it at the dummy node
|
|
5385
5512
|
auto value = make_unique<CatalogEntry>(CatalogType::DELETED_ENTRY, entry.catalog, entry.name);
|
|
5386
5513
|
value->timestamp = transaction.transaction_id;
|
|
5387
|
-
value->child = move(entries[entry_index]);
|
|
5388
|
-
value->child->parent = value.get();
|
|
5389
5514
|
value->set = this;
|
|
5390
5515
|
value->deleted = true;
|
|
5516
|
+
auto value_ptr = value.get();
|
|
5517
|
+
PutEntry(move(entry_index), move(value));
|
|
5391
5518
|
|
|
5392
5519
|
// push the old entry in the undo buffer for this transaction
|
|
5393
|
-
transaction.PushCatalogEntry(
|
|
5394
|
-
|
|
5395
|
-
entries[entry_index] = move(value);
|
|
5520
|
+
transaction.PushCatalogEntry(value_ptr->child.get());
|
|
5396
5521
|
}
|
|
5397
5522
|
|
|
5398
5523
|
bool CatalogSet::DropEntry(ClientContext &context, const string &name, bool cascade) {
|
|
5399
5524
|
// lock the catalog for writing
|
|
5400
5525
|
lock_guard<mutex> write_lock(catalog.write_lock);
|
|
5401
5526
|
// we can only delete an entry that exists
|
|
5402
|
-
|
|
5527
|
+
EntryIndex entry_index;
|
|
5403
5528
|
CatalogEntry *entry;
|
|
5404
|
-
if (!GetEntryInternal(context, name, entry_index, entry)) {
|
|
5529
|
+
if (!GetEntryInternal(context, name, &entry_index, entry)) {
|
|
5405
5530
|
return false;
|
|
5406
5531
|
}
|
|
5407
5532
|
if (entry->internal) {
|
|
5408
5533
|
throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name);
|
|
5409
5534
|
}
|
|
5410
5535
|
|
|
5411
|
-
|
|
5536
|
+
lock_guard<mutex> read_lock(catalog_lock);
|
|
5537
|
+
DropEntryInternal(context, move(entry_index), *entry, cascade);
|
|
5412
5538
|
return true;
|
|
5413
5539
|
}
|
|
5414
5540
|
|
|
@@ -5426,12 +5552,10 @@ void CatalogSet::CleanupEntry(CatalogEntry *catalog_entry) {
|
|
|
5426
5552
|
if (parent->deleted && !parent->child && !parent->parent) {
|
|
5427
5553
|
auto mapping_entry = mapping.find(parent->name);
|
|
5428
5554
|
D_ASSERT(mapping_entry != mapping.end());
|
|
5429
|
-
auto
|
|
5430
|
-
|
|
5431
|
-
|
|
5432
|
-
if (entry->second.get() == parent) {
|
|
5555
|
+
auto entry = mapping_entry->second->index.GetEntry().get();
|
|
5556
|
+
D_ASSERT(entry);
|
|
5557
|
+
if (entry == parent) {
|
|
5433
5558
|
mapping.erase(mapping_entry);
|
|
5434
|
-
entries.erase(entry);
|
|
5435
5559
|
}
|
|
5436
5560
|
}
|
|
5437
5561
|
}
|
|
@@ -5465,9 +5589,9 @@ MappingValue *CatalogSet::GetMapping(ClientContext &context, const string &name,
|
|
|
5465
5589
|
return mapping_value;
|
|
5466
5590
|
}
|
|
5467
5591
|
|
|
5468
|
-
void CatalogSet::PutMapping(ClientContext &context, const string &name,
|
|
5592
|
+
void CatalogSet::PutMapping(ClientContext &context, const string &name, EntryIndex entry_index) {
|
|
5469
5593
|
auto entry = mapping.find(name);
|
|
5470
|
-
auto new_value = make_unique<MappingValue>(entry_index);
|
|
5594
|
+
auto new_value = make_unique<MappingValue>(move(entry_index));
|
|
5471
5595
|
new_value->timestamp = Transaction::GetTransaction(context).transaction_id;
|
|
5472
5596
|
if (entry != mapping.end()) {
|
|
5473
5597
|
if (HasConflict(context, entry->second->timestamp)) {
|
|
@@ -5482,7 +5606,7 @@ void CatalogSet::PutMapping(ClientContext &context, const string &name, idx_t en
|
|
|
5482
5606
|
void CatalogSet::DeleteMapping(ClientContext &context, const string &name) {
|
|
5483
5607
|
auto entry = mapping.find(name);
|
|
5484
5608
|
D_ASSERT(entry != mapping.end());
|
|
5485
|
-
auto delete_marker = make_unique<MappingValue>(entry->second->index);
|
|
5609
|
+
auto delete_marker = make_unique<MappingValue>(entry->second->index.Copy());
|
|
5486
5610
|
delete_marker->deleted = true;
|
|
5487
5611
|
delete_marker->timestamp = Transaction::GetTransaction(context).transaction_id;
|
|
5488
5612
|
delete_marker->child = move(entry->second);
|
|
@@ -5550,15 +5674,14 @@ CatalogEntry *CatalogSet::CreateEntryInternal(ClientContext &context, unique_ptr
|
|
|
5550
5674
|
return nullptr;
|
|
5551
5675
|
}
|
|
5552
5676
|
auto &name = entry->name;
|
|
5553
|
-
auto entry_index = current_entry++;
|
|
5554
5677
|
auto catalog_entry = entry.get();
|
|
5555
5678
|
|
|
5556
5679
|
entry->set = this;
|
|
5557
5680
|
entry->timestamp = 0;
|
|
5558
5681
|
|
|
5559
|
-
|
|
5682
|
+
auto entry_index = PutEntry(current_entry++, move(entry));
|
|
5683
|
+
PutMapping(context, name, move(entry_index));
|
|
5560
5684
|
mapping[name]->timestamp = 0;
|
|
5561
|
-
entries[entry_index] = move(entry);
|
|
5562
5685
|
return catalog_entry;
|
|
5563
5686
|
}
|
|
5564
5687
|
|
|
@@ -5597,7 +5720,7 @@ CatalogEntry *CatalogSet::GetEntry(ClientContext &context, const string &name) {
|
|
|
5597
5720
|
// we found an entry for this name
|
|
5598
5721
|
// check the version numbers
|
|
5599
5722
|
|
|
5600
|
-
auto catalog_entry =
|
|
5723
|
+
auto catalog_entry = mapping_value->index.GetEntry().get();
|
|
5601
5724
|
CatalogEntry *current = GetEntryForTransaction(context, catalog_entry);
|
|
5602
5725
|
if (current->deleted || (current->name != name && !UseTimestamp(context, mapping_value->timestamp))) {
|
|
5603
5726
|
return nullptr;
|
|
@@ -5706,7 +5829,7 @@ void CatalogSet::Undo(CatalogEntry *entry) {
|
|
|
5706
5829
|
// otherwise we need to update the base entry tables
|
|
5707
5830
|
auto &name = entry->name;
|
|
5708
5831
|
to_be_removed_node->child->SetAsRoot();
|
|
5709
|
-
|
|
5832
|
+
mapping[name]->index.GetEntry() = move(to_be_removed_node->child);
|
|
5710
5833
|
entry->parent = nullptr;
|
|
5711
5834
|
}
|
|
5712
5835
|
|
|
@@ -5721,7 +5844,7 @@ void CatalogSet::Undo(CatalogEntry *entry) {
|
|
|
5721
5844
|
}
|
|
5722
5845
|
}
|
|
5723
5846
|
// we mark the catalog as being modified, since this action can lead to e.g. tables being dropped
|
|
5724
|
-
|
|
5847
|
+
catalog.ModifyCatalog();
|
|
5725
5848
|
}
|
|
5726
5849
|
|
|
5727
5850
|
void CatalogSet::CreateDefaultEntries(ClientContext &context, unique_lock<mutex> &lock) {
|
|
@@ -5754,7 +5877,7 @@ void CatalogSet::Scan(ClientContext &context, const std::function<void(CatalogEn
|
|
|
5754
5877
|
CreateDefaultEntries(context, lock);
|
|
5755
5878
|
|
|
5756
5879
|
for (auto &kv : entries) {
|
|
5757
|
-
auto entry = kv.second.get();
|
|
5880
|
+
auto entry = kv.second.entry.get();
|
|
5758
5881
|
entry = GetEntryForTransaction(context, entry);
|
|
5759
5882
|
if (!entry->deleted) {
|
|
5760
5883
|
callback(entry);
|
|
@@ -5766,7 +5889,7 @@ void CatalogSet::Scan(const std::function<void(CatalogEntry *)> &callback) {
|
|
|
5766
5889
|
// lock the catalog set
|
|
5767
5890
|
lock_guard<mutex> lock(catalog_lock);
|
|
5768
5891
|
for (auto &kv : entries) {
|
|
5769
|
-
auto entry = kv.second.get();
|
|
5892
|
+
auto entry = kv.second.entry.get();
|
|
5770
5893
|
entry = GetCommittedEntry(entry);
|
|
5771
5894
|
if (!entry->deleted) {
|
|
5772
5895
|
callback(entry);
|
|
@@ -6182,14 +6305,17 @@ static DefaultView internal_views[] = {
|
|
|
6182
6305
|
{"pg_catalog", "pg_attrdef", "SELECT column_index oid, table_oid adrelid, column_index adnum, column_default adbin from duckdb_columns() where column_default is not null;"},
|
|
6183
6306
|
{"pg_catalog", "pg_class", "SELECT table_oid oid, table_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, estimated_size::real reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, index_count > 0 relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'r' relkind, column_count relnatts, check_constraint_count relchecks, false relhasoids, has_primary_key relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_tables() UNION ALL SELECT view_oid oid, view_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'v' relkind, column_count relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_views() UNION ALL SELECT sequence_oid oid, sequence_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'S' relkind, 0 relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_sequences() UNION ALL SELECT index_oid oid, index_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, 't' relpersistence, 'i' relkind, NULL relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_indexes()"},
|
|
6184
6307
|
{"pg_catalog", "pg_constraint", "SELECT table_oid*1000000+constraint_index oid, constraint_text conname, schema_oid connamespace, CASE constraint_type WHEN 'CHECK' then 'c' WHEN 'UNIQUE' then 'u' WHEN 'PRIMARY KEY' THEN 'p' WHEN 'FOREIGN KEY' THEN 'f' ELSE 'x' END contype, false condeferrable, false condeferred, true convalidated, table_oid conrelid, 0 contypid, 0 conindid, 0 conparentid, 0 confrelid, NULL confupdtype, NULL confdeltype, NULL confmatchtype, true conislocal, 0 coninhcount, false connoinherit, constraint_column_indexes conkey, NULL confkey, NULL conpfeqop, NULL conppeqop, NULL conffeqop, NULL conexclop, expression conbin FROM duckdb_constraints()"},
|
|
6308
|
+
{"pg_catalog", "pg_database", "SELECT 0 oid, 'main' datname"},
|
|
6185
6309
|
{"pg_catalog", "pg_depend", "SELECT * FROM duckdb_dependencies()"},
|
|
6186
6310
|
{"pg_catalog", "pg_description", "SELECT NULL objoid, NULL classoid, NULL objsubid, NULL description WHERE 1=0"},
|
|
6187
6311
|
{"pg_catalog", "pg_enum", "SELECT NULL oid, NULL enumtypid, NULL enumsortorder, NULL enumlabel WHERE 1=0"},
|
|
6188
6312
|
{"pg_catalog", "pg_index", "SELECT index_oid indexrelid, table_oid indrelid, 0 indnatts, 0 indnkeyatts, is_unique indisunique, is_primary indisprimary, false indisexclusion, true indimmediate, false indisclustered, true indisvalid, false indcheckxmin, true indisready, true indislive, false indisreplident, NULL::INT[] indkey, NULL::OID[] indcollation, NULL::OID[] indclass, NULL::INT[] indoption, expressions indexprs, NULL indpred FROM duckdb_indexes()"},
|
|
6189
6313
|
{"pg_catalog", "pg_indexes", "SELECT schema_name schemaname, table_name tablename, index_name indexname, NULL \"tablespace\", sql indexdef FROM duckdb_indexes()"},
|
|
6190
6314
|
{"pg_catalog", "pg_namespace", "SELECT oid, schema_name nspname, 0 nspowner, NULL nspacl FROM duckdb_schemas()"},
|
|
6315
|
+
{"pg_catalog", "pg_proc", "SELECT f.function_oid oid, function_name proname, s.oid pronamespace FROM duckdb_functions() f LEFT JOIN duckdb_schemas() s USING (schema_name)"},
|
|
6191
6316
|
{"pg_catalog", "pg_sequence", "SELECT sequence_oid seqrelid, 0 seqtypid, start_value seqstart, increment_by seqincrement, max_value seqmax, min_value seqmin, 0 seqcache, cycle seqcycle FROM duckdb_sequences()"},
|
|
6192
6317
|
{"pg_catalog", "pg_sequences", "SELECT schema_name schemaname, sequence_name sequencename, 'duckdb' sequenceowner, 0 data_type, start_value, min_value, max_value, increment_by, cycle, 0 cache_size, last_value FROM duckdb_sequences()"},
|
|
6318
|
+
{"pg_catalog", "pg_settings", "SELECT name, value setting, description short_desc, CASE WHEN input_type = 'VARCHAR' THEN 'string' WHEN input_type = 'BOOLEAN' THEN 'bool' WHEN input_type IN ('BIGINT', 'UBIGINT') THEN 'integer' ELSE input_type END vartype FROM duckdb_settings()"},
|
|
6193
6319
|
{"pg_catalog", "pg_tables", "SELECT schema_name schemaname, table_name tablename, 'duckdb' tableowner, NULL \"tablespace\", index_count > 0 hasindexes, false hasrules, false hastriggers FROM duckdb_tables()"},
|
|
6194
6320
|
{"pg_catalog", "pg_tablespace", "SELECT 0 oid, 'pg_default' spcname, 0 spcowner, NULL spcacl, NULL spcoptions"},
|
|
6195
6321
|
{"pg_catalog", "pg_type", "SELECT type_oid oid, format_pg_type(type_name) typname, schema_oid typnamespace, 0 typowner, type_size typlen, false typbyval, 'b' typtype, CASE WHEN type_category='NUMERIC' THEN 'N' WHEN type_category='STRING' THEN 'S' WHEN type_category='DATETIME' THEN 'D' WHEN type_category='BOOLEAN' THEN 'B' WHEN type_category='COMPOSITE' THEN 'C' WHEN type_category='USER' THEN 'U' ELSE 'X' END typcategory, false typispreferred, true typisdefined, NULL typdelim, NULL typrelid, NULL typsubscript, NULL typelem, NULL typarray, NULL typinput, NULL typoutput, NULL typreceive, NULL typsend, NULL typmodin, NULL typmodout, NULL typanalyze, 'd' typalign, 'p' typstorage, NULL typnotnull, NULL typbasetype, NULL typtypmod, NULL typndims, NULL typcollation, NULL typdefaultbin, NULL typdefault, NULL typacl FROM duckdb_types();"},
|
|
@@ -6256,6 +6382,7 @@ vector<string> DefaultViewGenerator::GetDefaultEntries() {
|
|
|
6256
6382
|
|
|
6257
6383
|
|
|
6258
6384
|
|
|
6385
|
+
|
|
6259
6386
|
namespace duckdb {
|
|
6260
6387
|
|
|
6261
6388
|
DependencyManager::DependencyManager(Catalog &catalog) : catalog(catalog) {
|
|
@@ -6265,12 +6392,11 @@ void DependencyManager::AddObject(ClientContext &context, CatalogEntry *object,
|
|
|
6265
6392
|
unordered_set<CatalogEntry *> &dependencies) {
|
|
6266
6393
|
// check for each object in the sources if they were not deleted yet
|
|
6267
6394
|
for (auto &dependency : dependencies) {
|
|
6268
|
-
idx_t entry_index;
|
|
6269
6395
|
CatalogEntry *catalog_entry;
|
|
6270
6396
|
if (!dependency->set) {
|
|
6271
6397
|
throw InternalException("Dependency has no set");
|
|
6272
6398
|
}
|
|
6273
|
-
if (!dependency->set->GetEntryInternal(context, dependency->name,
|
|
6399
|
+
if (!dependency->set->GetEntryInternal(context, dependency->name, nullptr, catalog_entry)) {
|
|
6274
6400
|
throw InternalException("Dependency has already been deleted?");
|
|
6275
6401
|
}
|
|
6276
6402
|
}
|
|
@@ -6298,10 +6424,9 @@ void DependencyManager::DropObject(ClientContext &context, CatalogEntry *object,
|
|
|
6298
6424
|
if (mapping_value == nullptr) {
|
|
6299
6425
|
continue;
|
|
6300
6426
|
}
|
|
6301
|
-
idx_t entry_index = mapping_value->index;
|
|
6302
6427
|
CatalogEntry *dependency_entry;
|
|
6303
6428
|
|
|
6304
|
-
if (!catalog_set.GetEntryInternal(context,
|
|
6429
|
+
if (!catalog_set.GetEntryInternal(context, mapping_value->index, dependency_entry)) {
|
|
6305
6430
|
// the dependent object was already deleted, no conflict
|
|
6306
6431
|
continue;
|
|
6307
6432
|
}
|
|
@@ -6309,7 +6434,7 @@ void DependencyManager::DropObject(ClientContext &context, CatalogEntry *object,
|
|
|
6309
6434
|
if (cascade || dep.dependency_type == DependencyType::DEPENDENCY_AUTOMATIC ||
|
|
6310
6435
|
dep.dependency_type == DependencyType::DEPENDENCY_OWNS) {
|
|
6311
6436
|
// cascade: drop the dependent object
|
|
6312
|
-
catalog_set.DropEntryInternal(context,
|
|
6437
|
+
catalog_set.DropEntryInternal(context, mapping_value->index.Copy(), *dependency_entry, cascade);
|
|
6313
6438
|
} else {
|
|
6314
6439
|
// no cascade and there are objects that depend on this object: throw error
|
|
6315
6440
|
throw DependencyException("Cannot drop entry \"%s\" because there are entries that "
|
|
@@ -6329,9 +6454,8 @@ void DependencyManager::AlterObject(ClientContext &context, CatalogEntry *old_ob
|
|
|
6329
6454
|
for (auto &dep : dependent_objects) {
|
|
6330
6455
|
// look up the entry in the catalog set
|
|
6331
6456
|
auto &catalog_set = *dep.entry->set;
|
|
6332
|
-
idx_t entry_index;
|
|
6333
6457
|
CatalogEntry *dependency_entry;
|
|
6334
|
-
if (!catalog_set.GetEntryInternal(context, dep.entry->name,
|
|
6458
|
+
if (!catalog_set.GetEntryInternal(context, dep.entry->name, nullptr, dependency_entry)) {
|
|
6335
6459
|
// the dependent object was already deleted, no conflict
|
|
6336
6460
|
continue;
|
|
6337
6461
|
}
|
|
@@ -9213,6 +9337,13 @@ void BoxRenderer::Render(ClientContext &context, const vector<string> &names, co
|
|
|
9213
9337
|
// figure out how many/which rows to render
|
|
9214
9338
|
idx_t row_count = result.Count();
|
|
9215
9339
|
idx_t rows_to_render = MinValue<idx_t>(row_count, config.max_rows);
|
|
9340
|
+
if (row_count <= config.max_rows + 3) {
|
|
9341
|
+
// hiding rows adds 3 extra rows
|
|
9342
|
+
// so hiding rows makes no sense if we are only slightly over the limit
|
|
9343
|
+
// if we are 1 row over the limit hiding rows will actually increase the number of lines we display!
|
|
9344
|
+
// in this case render all the rows
|
|
9345
|
+
rows_to_render = row_count;
|
|
9346
|
+
}
|
|
9216
9347
|
idx_t top_rows;
|
|
9217
9348
|
idx_t bottom_rows;
|
|
9218
9349
|
if (rows_to_render == row_count) {
|
|
@@ -30473,7 +30604,7 @@ public:
|
|
|
30473
30604
|
|
|
30474
30605
|
private:
|
|
30475
30606
|
void AllocateEmptyBlock(idx_t size);
|
|
30476
|
-
|
|
30607
|
+
BufferHandle AllocateBlock();
|
|
30477
30608
|
BufferHandle Pin(uint32_t block_id);
|
|
30478
30609
|
BufferHandle PinInternal(uint32_t block_id);
|
|
30479
30610
|
|
|
@@ -30587,11 +30718,7 @@ protected:
|
|
|
30587
30718
|
return make_unique<ColumnDataCollection>(allocators->allocators[partition_index], types);
|
|
30588
30719
|
}
|
|
30589
30720
|
//! Create a DataChunk used for buffering appends to the partition
|
|
30590
|
-
unique_ptr<DataChunk> CreatePartitionBuffer() const
|
|
30591
|
-
auto result = make_unique<DataChunk>();
|
|
30592
|
-
result->Initialize(Allocator::Get(context), types, BufferSize());
|
|
30593
|
-
return result;
|
|
30594
|
-
}
|
|
30721
|
+
unique_ptr<DataChunk> CreatePartitionBuffer() const;
|
|
30595
30722
|
|
|
30596
30723
|
protected:
|
|
30597
30724
|
PartitionedColumnDataType type;
|
|
@@ -30968,6 +31095,9 @@ struct PartitionFunctor {
|
|
|
30968
31095
|
const auto row_width = layout.GetRowWidth();
|
|
30969
31096
|
const auto has_heap = !layout.AllConstant();
|
|
30970
31097
|
|
|
31098
|
+
block_collection.VerifyBlockSizes();
|
|
31099
|
+
string_heap.VerifyBlockSizes();
|
|
31100
|
+
|
|
30971
31101
|
// Fixed-size data
|
|
30972
31102
|
RowDataBlock *partition_data_blocks[CONSTANTS::NUM_PARTITIONS];
|
|
30973
31103
|
vector<BufferHandle> partition_data_handles;
|
|
@@ -31102,6 +31232,10 @@ struct PartitionFunctor {
|
|
|
31102
31232
|
#ifdef DEBUG
|
|
31103
31233
|
for (idx_t bin = 0; bin < CONSTANTS::NUM_PARTITIONS; bin++) {
|
|
31104
31234
|
auto &p_block_collection = *partition_block_collections[bin];
|
|
31235
|
+
p_block_collection.VerifyBlockSizes();
|
|
31236
|
+
if (!layout.AllConstant()) {
|
|
31237
|
+
partition_string_heaps[bin]->VerifyBlockSizes();
|
|
31238
|
+
}
|
|
31105
31239
|
idx_t p_count = 0;
|
|
31106
31240
|
for (idx_t b = 0; b < p_block_collection.blocks.size(); b++) {
|
|
31107
31241
|
auto &data_block = *p_block_collection.blocks[b];
|
|
@@ -39011,14 +39145,13 @@ static void SortTiedBlobs(BufferManager &buffer_manager, const data_ptr_t datapt
|
|
|
39011
39145
|
return order * Comparators::CompareVal(left_ptr, right_ptr, logical_type) < 0;
|
|
39012
39146
|
});
|
|
39013
39147
|
// Re-order
|
|
39014
|
-
auto temp_block =
|
|
39015
|
-
|
|
39016
|
-
data_ptr_t temp_ptr = temp_block.Ptr();
|
|
39148
|
+
auto temp_block = buffer_manager.GetBufferAllocator().Allocate((end - start) * sort_layout.entry_size);
|
|
39149
|
+
data_ptr_t temp_ptr = temp_block.get();
|
|
39017
39150
|
for (idx_t i = 0; i < end - start; i++) {
|
|
39018
39151
|
FastMemcpy(temp_ptr, entry_ptrs[i], sort_layout.entry_size);
|
|
39019
39152
|
temp_ptr += sort_layout.entry_size;
|
|
39020
39153
|
}
|
|
39021
|
-
memcpy(dataptr + start * sort_layout.entry_size, temp_block.
|
|
39154
|
+
memcpy(dataptr + start * sort_layout.entry_size, temp_block.get(), (end - start) * sort_layout.entry_size);
|
|
39022
39155
|
// Determine if there are still ties (if this is not the last column)
|
|
39023
39156
|
if (tie_col < sort_layout.column_count - 1) {
|
|
39024
39157
|
data_ptr_t idx_ptr = dataptr + start * sort_layout.entry_size + sort_layout.comparison_size;
|
|
@@ -39083,7 +39216,7 @@ static void ComputeTies(data_ptr_t dataptr, const idx_t &count, const idx_t &col
|
|
|
39083
39216
|
//! Textbook LSD radix sort
|
|
39084
39217
|
void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, const idx_t &count, const idx_t &col_offset,
|
|
39085
39218
|
const idx_t &row_width, const idx_t &sorting_size) {
|
|
39086
|
-
auto temp_block = buffer_manager.Allocate(
|
|
39219
|
+
auto temp_block = buffer_manager.GetBufferAllocator().Allocate(count * row_width);
|
|
39087
39220
|
bool swap = false;
|
|
39088
39221
|
|
|
39089
39222
|
idx_t counts[SortConstants::VALUES_PER_RADIX];
|
|
@@ -39091,8 +39224,8 @@ void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, cons
|
|
|
39091
39224
|
// Init counts to 0
|
|
39092
39225
|
memset(counts, 0, sizeof(counts));
|
|
39093
39226
|
// Const some values for convenience
|
|
39094
|
-
const data_ptr_t source_ptr = swap ? temp_block.
|
|
39095
|
-
const data_ptr_t target_ptr = swap ? dataptr : temp_block.
|
|
39227
|
+
const data_ptr_t source_ptr = swap ? temp_block.get() : dataptr;
|
|
39228
|
+
const data_ptr_t target_ptr = swap ? dataptr : temp_block.get();
|
|
39096
39229
|
const idx_t offset = col_offset + sorting_size - r;
|
|
39097
39230
|
// Collect counts
|
|
39098
39231
|
data_ptr_t offset_ptr = source_ptr + offset;
|
|
@@ -39120,7 +39253,7 @@ void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, cons
|
|
|
39120
39253
|
}
|
|
39121
39254
|
// Move data back to original buffer (if it was swapped)
|
|
39122
39255
|
if (swap) {
|
|
39123
|
-
memcpy(dataptr, temp_block.
|
|
39256
|
+
memcpy(dataptr, temp_block.get(), count * row_width);
|
|
39124
39257
|
}
|
|
39125
39258
|
}
|
|
39126
39259
|
|
|
@@ -39468,6 +39601,9 @@ SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const {
|
|
|
39468
39601
|
}
|
|
39469
39602
|
|
|
39470
39603
|
LocalSortState::LocalSortState() : initialized(false) {
|
|
39604
|
+
if (!Radix::IsLittleEndian()) {
|
|
39605
|
+
throw NotImplementedException("Sorting is not supported on big endian architectures");
|
|
39606
|
+
}
|
|
39471
39607
|
}
|
|
39472
39608
|
|
|
39473
39609
|
void LocalSortState::Initialize(GlobalSortState &global_sort_state, BufferManager &buffer_manager_p) {
|
|
@@ -43143,13 +43279,14 @@ BufferHandle ColumnDataAllocator::PinInternal(uint32_t block_id) {
|
|
|
43143
43279
|
return alloc.buffer_manager->Pin(blocks[block_id].handle);
|
|
43144
43280
|
}
|
|
43145
43281
|
|
|
43146
|
-
|
|
43282
|
+
BufferHandle ColumnDataAllocator::AllocateBlock() {
|
|
43147
43283
|
D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR);
|
|
43148
43284
|
BlockMetaData data;
|
|
43149
43285
|
data.size = 0;
|
|
43150
43286
|
data.capacity = Storage::BLOCK_SIZE;
|
|
43151
|
-
|
|
43287
|
+
auto pin = alloc.buffer_manager->Allocate(Storage::BLOCK_SIZE, false, &data.handle);
|
|
43152
43288
|
blocks.push_back(move(data));
|
|
43289
|
+
return pin;
|
|
43153
43290
|
}
|
|
43154
43291
|
|
|
43155
43292
|
void ColumnDataAllocator::AllocateEmptyBlock(idx_t size) {
|
|
@@ -43183,11 +43320,10 @@ void ColumnDataAllocator::AllocateBuffer(idx_t size, uint32_t &block_id, uint32_
|
|
|
43183
43320
|
ChunkManagementState *chunk_state) {
|
|
43184
43321
|
D_ASSERT(allocated_data.empty());
|
|
43185
43322
|
if (blocks.empty() || blocks.back().Capacity() < size) {
|
|
43186
|
-
AllocateBlock();
|
|
43187
|
-
if (chunk_state
|
|
43188
|
-
|
|
43323
|
+
auto pinned_block = AllocateBlock();
|
|
43324
|
+
if (chunk_state) {
|
|
43325
|
+
D_ASSERT(!blocks.empty());
|
|
43189
43326
|
auto new_block_id = blocks.size() - 1;
|
|
43190
|
-
auto pinned_block = alloc.buffer_manager->Pin(last_block.handle);
|
|
43191
43327
|
chunk_state->handles[new_block_id] = move(pinned_block);
|
|
43192
43328
|
}
|
|
43193
43329
|
}
|
|
@@ -44132,7 +44268,7 @@ namespace duckdb {
|
|
|
44132
44268
|
|
|
44133
44269
|
ColumnDataCollectionSegment::ColumnDataCollectionSegment(shared_ptr<ColumnDataAllocator> allocator_p,
|
|
44134
44270
|
vector<LogicalType> types_p)
|
|
44135
|
-
: allocator(move(allocator_p)), types(move(types_p)), count(0) {
|
|
44271
|
+
: allocator(move(allocator_p)), types(move(types_p)), count(0), heap(allocator->GetAllocator()) {
|
|
44136
44272
|
}
|
|
44137
44273
|
|
|
44138
44274
|
idx_t ColumnDataCollectionSegment::GetDataSize(idx_t type_size) {
|
|
@@ -47530,6 +47666,12 @@ void PartitionedColumnData::InitializeAppendState(PartitionedColumnDataAppendSta
|
|
|
47530
47666
|
InitializeAppendStateInternal(state);
|
|
47531
47667
|
}
|
|
47532
47668
|
|
|
47669
|
+
unique_ptr<DataChunk> PartitionedColumnData::CreatePartitionBuffer() const {
|
|
47670
|
+
auto result = make_unique<DataChunk>();
|
|
47671
|
+
result->Initialize(BufferManager::GetBufferManager(context).GetBufferAllocator(), types, BufferSize());
|
|
47672
|
+
return result;
|
|
47673
|
+
}
|
|
47674
|
+
|
|
47533
47675
|
void PartitionedColumnData::Append(PartitionedColumnDataAppendState &state, DataChunk &input) {
|
|
47534
47676
|
// Compute partition indices and store them in state.partition_indices
|
|
47535
47677
|
ComputePartitionIndices(state, input);
|
|
@@ -48216,7 +48358,7 @@ buffer_ptr<SelectionData> SelectionVector::Slice(const SelectionVector &sel, idx
|
|
|
48216
48358
|
|
|
48217
48359
|
namespace duckdb {
|
|
48218
48360
|
|
|
48219
|
-
StringHeap::StringHeap() : allocator(
|
|
48361
|
+
StringHeap::StringHeap(Allocator &allocator) : allocator(allocator) {
|
|
48220
48362
|
}
|
|
48221
48363
|
|
|
48222
48364
|
void StringHeap::Destroy() {
|
|
@@ -57240,7 +57382,9 @@ static inline void ListLoopHash(Vector &input, Vector &hashes, const SelectionVe
|
|
|
57240
57382
|
const auto child_count = ListVector::GetListSize(input);
|
|
57241
57383
|
|
|
57242
57384
|
Vector child_hashes(LogicalType::HASH, child_count);
|
|
57243
|
-
|
|
57385
|
+
if (child_count > 0) {
|
|
57386
|
+
VectorOperations::Hash(child, child_hashes, child_count);
|
|
57387
|
+
}
|
|
57244
57388
|
auto chdata = FlatVector::GetData<hash_t>(child_hashes);
|
|
57245
57389
|
|
|
57246
57390
|
// Reduce the number of entries to check to the non-empty ones
|
|
@@ -58640,11 +58784,13 @@ public:
|
|
|
58640
58784
|
ColumnBindingResolver();
|
|
58641
58785
|
|
|
58642
58786
|
void VisitOperator(LogicalOperator &op) override;
|
|
58787
|
+
static void Verify(LogicalOperator &op);
|
|
58643
58788
|
|
|
58644
58789
|
protected:
|
|
58645
58790
|
vector<ColumnBinding> bindings;
|
|
58646
58791
|
|
|
58647
58792
|
unique_ptr<Expression> VisitReplace(BoundColumnRefExpression &expr, unique_ptr<Expression> *expr_ptr) override;
|
|
58793
|
+
static unordered_set<idx_t> VerifyInternal(LogicalOperator &op);
|
|
58648
58794
|
};
|
|
58649
58795
|
} // namespace duckdb
|
|
58650
58796
|
|
|
@@ -58986,6 +59132,35 @@ unique_ptr<Expression> ColumnBindingResolver::VisitReplace(BoundColumnRefExpress
|
|
|
58986
59132
|
// LCOV_EXCL_STOP
|
|
58987
59133
|
}
|
|
58988
59134
|
|
|
59135
|
+
unordered_set<idx_t> ColumnBindingResolver::VerifyInternal(LogicalOperator &op) {
|
|
59136
|
+
unordered_set<idx_t> result;
|
|
59137
|
+
for (auto &child : op.children) {
|
|
59138
|
+
auto child_indexes = VerifyInternal(*child);
|
|
59139
|
+
for (auto index : child_indexes) {
|
|
59140
|
+
D_ASSERT(index != DConstants::INVALID_INDEX);
|
|
59141
|
+
if (result.find(index) != result.end()) {
|
|
59142
|
+
throw InternalException("Duplicate table index \"%lld\" found", index);
|
|
59143
|
+
}
|
|
59144
|
+
result.insert(index);
|
|
59145
|
+
}
|
|
59146
|
+
}
|
|
59147
|
+
auto indexes = op.GetTableIndex();
|
|
59148
|
+
for (auto index : indexes) {
|
|
59149
|
+
D_ASSERT(index != DConstants::INVALID_INDEX);
|
|
59150
|
+
if (result.find(index) != result.end()) {
|
|
59151
|
+
throw InternalException("Duplicate table index \"%lld\" found", index);
|
|
59152
|
+
}
|
|
59153
|
+
result.insert(index);
|
|
59154
|
+
}
|
|
59155
|
+
return result;
|
|
59156
|
+
}
|
|
59157
|
+
|
|
59158
|
+
void ColumnBindingResolver::Verify(LogicalOperator &op) {
|
|
59159
|
+
#ifdef DEBUG
|
|
59160
|
+
VerifyInternal(op);
|
|
59161
|
+
#endif
|
|
59162
|
+
}
|
|
59163
|
+
|
|
58989
59164
|
} // namespace duckdb
|
|
58990
59165
|
|
|
58991
59166
|
|
|
@@ -60516,6 +60691,9 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
|
60516
60691
|
DatabaseInstance &db, idx_t block_id, idx_t block_offset)
|
|
60517
60692
|
: Index(IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type), db(db),
|
|
60518
60693
|
estimated_art_size(0), estimated_key_size(16) {
|
|
60694
|
+
if (!Radix::IsLittleEndian()) {
|
|
60695
|
+
throw NotImplementedException("ART indexes are not supported on big endian architectures");
|
|
60696
|
+
}
|
|
60519
60697
|
if (block_id != DConstants::INVALID_INDEX) {
|
|
60520
60698
|
tree = Node::Deserialize(*this, block_id, block_offset);
|
|
60521
60699
|
} else {
|
|
@@ -60799,7 +60977,7 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
|
|
|
60799
60977
|
auto payload_types = logical_types;
|
|
60800
60978
|
payload_types.emplace_back(LogicalType::ROW_TYPE);
|
|
60801
60979
|
|
|
60802
|
-
ArenaAllocator arena_allocator(
|
|
60980
|
+
ArenaAllocator arena_allocator(BufferAllocator::Get(db));
|
|
60803
60981
|
vector<Key> keys(STANDARD_VECTOR_SIZE);
|
|
60804
60982
|
|
|
60805
60983
|
auto temp_art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
|
|
@@ -60856,7 +61034,7 @@ bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
|
|
|
60856
61034
|
D_ASSERT(logical_types[0] == input.data[0].GetType());
|
|
60857
61035
|
|
|
60858
61036
|
// generate the keys for the given input
|
|
60859
|
-
ArenaAllocator arena_allocator(
|
|
61037
|
+
ArenaAllocator arena_allocator(BufferAllocator::Get(db));
|
|
60860
61038
|
vector<Key> keys(input.size());
|
|
60861
61039
|
GenerateKeys(arena_allocator, input, keys);
|
|
60862
61040
|
|
|
@@ -61016,7 +61194,7 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
|
|
61016
61194
|
estimated_art_size -= released_memory;
|
|
61017
61195
|
|
|
61018
61196
|
// then generate the keys for the given input
|
|
61019
|
-
ArenaAllocator arena_allocator(
|
|
61197
|
+
ArenaAllocator arena_allocator(BufferAllocator::Get(db));
|
|
61020
61198
|
vector<Key> keys(expression.size());
|
|
61021
61199
|
GenerateKeys(arena_allocator, expression, keys);
|
|
61022
61200
|
|
|
@@ -61260,7 +61438,7 @@ bool ART::Scan(Transaction &transaction, DataTable &table, IndexScanState &table
|
|
|
61260
61438
|
|
|
61261
61439
|
// FIXME: the key directly owning the data for a single key might be more efficient
|
|
61262
61440
|
D_ASSERT(state->values[0].type().InternalType() == types[0]);
|
|
61263
|
-
ArenaAllocator arena_allocator(Allocator::
|
|
61441
|
+
ArenaAllocator arena_allocator(Allocator::Get(db));
|
|
61264
61442
|
auto key = CreateKey(arena_allocator, types[0], state->values[0]);
|
|
61265
61443
|
|
|
61266
61444
|
if (state->values[1].IsNull()) {
|
|
@@ -61335,7 +61513,7 @@ void ART::VerifyExistence(DataChunk &chunk, VerifyExistenceType verify_type, str
|
|
|
61335
61513
|
ExecuteExpressions(chunk, expression_chunk);
|
|
61336
61514
|
|
|
61337
61515
|
// generate the keys for the given input
|
|
61338
|
-
ArenaAllocator arena_allocator(
|
|
61516
|
+
ArenaAllocator arena_allocator(BufferAllocator::Get(db));
|
|
61339
61517
|
vector<Key> keys(expression_chunk.size());
|
|
61340
61518
|
GenerateKeys(arena_allocator, expression_chunk, keys);
|
|
61341
61519
|
|
|
@@ -63513,7 +63691,7 @@ private:
|
|
|
63513
63691
|
mutex pinned_handles_lock;
|
|
63514
63692
|
vector<BufferHandle> pinned_handles;
|
|
63515
63693
|
//! The hash map of the HT, created after finalization
|
|
63516
|
-
|
|
63694
|
+
AllocatedData hash_map;
|
|
63517
63695
|
//! Whether or not NULL values are considered equal in each of the comparisons
|
|
63518
63696
|
vector<bool> null_values_are_equal;
|
|
63519
63697
|
|
|
@@ -63597,9 +63775,10 @@ public:
|
|
|
63597
63775
|
idx_t SwizzledSize() const {
|
|
63598
63776
|
return swizzled_block_collection->SizeInBytes() + swizzled_string_heap->SizeInBytes();
|
|
63599
63777
|
}
|
|
63600
|
-
//! Capacity of the pointer table given the
|
|
63778
|
+
//! Capacity of the pointer table given the ht count
|
|
63779
|
+
//! (minimum of 1024 to prevent collision chance for small HT's)
|
|
63601
63780
|
static idx_t PointerTableCapacity(idx_t count) {
|
|
63602
|
-
return
|
|
63781
|
+
return MaxValue<idx_t>(NextPowerOfTwo(count * 2), 1 << 10);
|
|
63603
63782
|
}
|
|
63604
63783
|
|
|
63605
63784
|
//! Swizzle the blocks in this HT (moves from block_collection and string_heap to swizzled_...)
|
|
@@ -63770,7 +63949,7 @@ void JoinHashTable::ApplyBitmask(Vector &hashes, const SelectionVector &sel, idx
|
|
|
63770
63949
|
|
|
63771
63950
|
auto hash_data = (hash_t *)hdata.data;
|
|
63772
63951
|
auto result_data = FlatVector::GetData<data_ptr_t *>(pointers);
|
|
63773
|
-
auto main_ht = (data_ptr_t *)hash_map.
|
|
63952
|
+
auto main_ht = (data_ptr_t *)hash_map.get();
|
|
63774
63953
|
for (idx_t i = 0; i < count; i++) {
|
|
63775
63954
|
auto rindex = sel.get_index(i);
|
|
63776
63955
|
auto hindex = hdata.sel->get_index(rindex);
|
|
@@ -63952,7 +64131,7 @@ void JoinHashTable::InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_loc
|
|
|
63952
64131
|
hashes.Flatten(count);
|
|
63953
64132
|
D_ASSERT(hashes.GetVectorType() == VectorType::FLAT_VECTOR);
|
|
63954
64133
|
|
|
63955
|
-
auto pointers = (atomic<data_ptr_t> *)hash_map.
|
|
64134
|
+
auto pointers = (atomic<data_ptr_t> *)hash_map.get();
|
|
63956
64135
|
auto indices = FlatVector::GetData<hash_t>(hashes);
|
|
63957
64136
|
|
|
63958
64137
|
if (parallel) {
|
|
@@ -63969,19 +64148,19 @@ void JoinHashTable::InitializePointerTable() {
|
|
|
63969
64148
|
D_ASSERT((capacity & (capacity - 1)) == 0);
|
|
63970
64149
|
bitmask = capacity - 1;
|
|
63971
64150
|
|
|
63972
|
-
if (!hash_map.
|
|
64151
|
+
if (!hash_map.get()) {
|
|
63973
64152
|
// allocate the HT if not yet done
|
|
63974
|
-
hash_map = buffer_manager.Allocate(capacity * sizeof(data_ptr_t));
|
|
64153
|
+
hash_map = buffer_manager.GetBufferAllocator().Allocate(capacity * sizeof(data_ptr_t));
|
|
63975
64154
|
}
|
|
63976
|
-
D_ASSERT(hash_map.
|
|
64155
|
+
D_ASSERT(hash_map.GetSize() == capacity * sizeof(data_ptr_t));
|
|
63977
64156
|
|
|
63978
64157
|
// initialize HT with all-zero entries
|
|
63979
|
-
memset(hash_map.
|
|
64158
|
+
memset(hash_map.get(), 0, capacity * sizeof(data_ptr_t));
|
|
63980
64159
|
}
|
|
63981
64160
|
|
|
63982
64161
|
void JoinHashTable::Finalize(idx_t block_idx_start, idx_t block_idx_end, bool parallel) {
|
|
63983
64162
|
// Pointer table should be allocated
|
|
63984
|
-
D_ASSERT(hash_map.
|
|
64163
|
+
D_ASSERT(hash_map.get());
|
|
63985
64164
|
|
|
63986
64165
|
vector<BufferHandle> local_pinned_handles;
|
|
63987
64166
|
|
|
@@ -64863,7 +65042,8 @@ ProbeSpillLocalState ProbeSpill::RegisterThread() {
|
|
|
64863
65042
|
result.local_partition = local_partitions.back().get();
|
|
64864
65043
|
result.local_partition_append_state = local_partition_append_states.back().get();
|
|
64865
65044
|
} else {
|
|
64866
|
-
local_spill_collections.emplace_back(
|
|
65045
|
+
local_spill_collections.emplace_back(
|
|
65046
|
+
make_unique<ColumnDataCollection>(BufferManager::GetBufferManager(context), probe_types));
|
|
64867
65047
|
local_spill_append_states.emplace_back(make_unique<ColumnDataAppendState>());
|
|
64868
65048
|
local_spill_collections.back()->InitializeAppend(*local_spill_append_states.back());
|
|
64869
65049
|
|
|
@@ -64894,7 +65074,8 @@ void ProbeSpill::Finalize() {
|
|
|
64894
65074
|
local_partition_append_states.clear();
|
|
64895
65075
|
} else {
|
|
64896
65076
|
if (local_spill_collections.empty()) {
|
|
64897
|
-
global_spill_collection =
|
|
65077
|
+
global_spill_collection =
|
|
65078
|
+
make_unique<ColumnDataCollection>(BufferManager::GetBufferManager(context), probe_types);
|
|
64898
65079
|
} else {
|
|
64899
65080
|
global_spill_collection = move(local_spill_collections[0]);
|
|
64900
65081
|
for (idx_t i = 1; i < local_spill_collections.size(); i++) {
|
|
@@ -64911,7 +65092,8 @@ void ProbeSpill::PrepareNextProbe() {
|
|
|
64911
65092
|
auto &partitions = global_partitions->GetPartitions();
|
|
64912
65093
|
if (partitions.empty() || ht.partition_start == partitions.size()) {
|
|
64913
65094
|
// Can't probe, just make an empty one
|
|
64914
|
-
global_spill_collection =
|
|
65095
|
+
global_spill_collection =
|
|
65096
|
+
make_unique<ColumnDataCollection>(BufferManager::GetBufferManager(context), probe_types);
|
|
64915
65097
|
} else {
|
|
64916
65098
|
// Move specific partitions to the global spill collection
|
|
64917
65099
|
global_spill_collection = move(partitions[ht.partition_start]);
|
|
@@ -65185,6 +65367,44 @@ static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t r
|
|
|
65185
65367
|
}
|
|
65186
65368
|
}
|
|
65187
65369
|
|
|
65370
|
+
static void MarkJoinNested(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[],
|
|
65371
|
+
ExpressionType comparison_type) {
|
|
65372
|
+
Vector left_reference(left.GetType());
|
|
65373
|
+
SelectionVector true_sel(rcount);
|
|
65374
|
+
for (idx_t i = 0; i < lcount; i++) {
|
|
65375
|
+
if (found_match[i]) {
|
|
65376
|
+
continue;
|
|
65377
|
+
}
|
|
65378
|
+
ConstantVector::Reference(left_reference, left, i, rcount);
|
|
65379
|
+
idx_t count;
|
|
65380
|
+
switch (comparison_type) {
|
|
65381
|
+
case ExpressionType::COMPARE_EQUAL:
|
|
65382
|
+
count = VectorOperations::Equals(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65383
|
+
break;
|
|
65384
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
|
65385
|
+
count = VectorOperations::NotEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65386
|
+
break;
|
|
65387
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
|
65388
|
+
count = VectorOperations::LessThan(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65389
|
+
break;
|
|
65390
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
|
65391
|
+
count = VectorOperations::GreaterThan(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65392
|
+
break;
|
|
65393
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
65394
|
+
count = VectorOperations::LessThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65395
|
+
break;
|
|
65396
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
65397
|
+
count = VectorOperations::GreaterThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65398
|
+
break;
|
|
65399
|
+
default:
|
|
65400
|
+
throw InternalException("Unsupported comparison type for MarkJoinNested");
|
|
65401
|
+
}
|
|
65402
|
+
if (count > 0) {
|
|
65403
|
+
found_match[i] = true;
|
|
65404
|
+
}
|
|
65405
|
+
}
|
|
65406
|
+
}
|
|
65407
|
+
|
|
65188
65408
|
template <class OP>
|
|
65189
65409
|
static void MarkJoinSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) {
|
|
65190
65410
|
switch (left.GetType().InternalType()) {
|
|
@@ -65220,6 +65440,13 @@ static void MarkJoinSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcou
|
|
|
65220
65440
|
|
|
65221
65441
|
static void MarkJoinComparisonSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[],
|
|
65222
65442
|
ExpressionType comparison_type) {
|
|
65443
|
+
switch (left.GetType().InternalType()) {
|
|
65444
|
+
case PhysicalType::STRUCT:
|
|
65445
|
+
case PhysicalType::LIST:
|
|
65446
|
+
return MarkJoinNested(left, right, lcount, rcount, found_match, comparison_type);
|
|
65447
|
+
default:
|
|
65448
|
+
break;
|
|
65449
|
+
}
|
|
65223
65450
|
D_ASSERT(left.GetType() == right.GetType());
|
|
65224
65451
|
switch (comparison_type) {
|
|
65225
65452
|
case ExpressionType::COMPARE_EQUAL:
|
|
@@ -71250,6 +71477,7 @@ class LimitPercentOperatorState : public GlobalSourceState {
|
|
|
71250
71477
|
public:
|
|
71251
71478
|
explicit LimitPercentOperatorState(const PhysicalLimitPercent &op)
|
|
71252
71479
|
: limit(DConstants::INVALID_INDEX), current_offset(0) {
|
|
71480
|
+
D_ASSERT(op.sink_state);
|
|
71253
71481
|
auto &gstate = (LimitPercentGlobalState &)*op.sink_state;
|
|
71254
71482
|
gstate.data.InitializeScan(scan_state);
|
|
71255
71483
|
}
|
|
@@ -72271,7 +72499,12 @@ void PhysicalTransaction::GetData(ExecutionContext &context, DataChunk &chunk, G
|
|
|
72271
72499
|
LocalSourceState &lstate) const {
|
|
72272
72500
|
auto &client = context.client;
|
|
72273
72501
|
|
|
72274
|
-
|
|
72502
|
+
auto type = info->type;
|
|
72503
|
+
if (type == TransactionType::COMMIT && ValidChecker::IsInvalidated(client.ActiveTransaction())) {
|
|
72504
|
+
// transaction is invalidated - turn COMMIT into ROLLBACK
|
|
72505
|
+
type = TransactionType::ROLLBACK;
|
|
72506
|
+
}
|
|
72507
|
+
switch (type) {
|
|
72275
72508
|
case TransactionType::BEGIN_TRANSACTION: {
|
|
72276
72509
|
if (client.transaction.IsAutoCommit()) {
|
|
72277
72510
|
// start the active transaction
|
|
@@ -72493,6 +72726,7 @@ public:
|
|
|
72493
72726
|
public:
|
|
72494
72727
|
bool EmptyResultIfRHSIsEmpty() const;
|
|
72495
72728
|
|
|
72729
|
+
static bool HasNullValues(DataChunk &chunk);
|
|
72496
72730
|
static void ConstructSemiJoinResult(DataChunk &left, DataChunk &result, bool found_match[]);
|
|
72497
72731
|
static void ConstructAntiJoinResult(DataChunk &left, DataChunk &result, bool found_match[]);
|
|
72498
72732
|
static void ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left, DataChunk &result, bool found_match[],
|
|
@@ -74496,6 +74730,10 @@ public:
|
|
|
74496
74730
|
};
|
|
74497
74731
|
|
|
74498
74732
|
void HashJoinGlobalSinkState::ScheduleFinalize(Pipeline &pipeline, Event &event) {
|
|
74733
|
+
if (hash_table->Count() == 0) {
|
|
74734
|
+
hash_table->finalized = true;
|
|
74735
|
+
return;
|
|
74736
|
+
}
|
|
74499
74737
|
hash_table->InitializePointerTable();
|
|
74500
74738
|
auto new_event = make_shared<HashJoinFinalizeEvent>(pipeline, *this);
|
|
74501
74739
|
event.InsertEvent(move(new_event));
|
|
@@ -76494,7 +76732,7 @@ namespace duckdb {
|
|
|
76494
76732
|
class IndexJoinOperatorState : public CachingOperatorState {
|
|
76495
76733
|
public:
|
|
76496
76734
|
IndexJoinOperatorState(ClientContext &context, const PhysicalIndexJoin &op)
|
|
76497
|
-
: probe_executor(context), arena_allocator(
|
|
76735
|
+
: probe_executor(context), arena_allocator(BufferAllocator::Get(context)), keys(STANDARD_VECTOR_SIZE) {
|
|
76498
76736
|
auto &allocator = Allocator::Get(context);
|
|
76499
76737
|
rhs_rows.resize(STANDARD_VECTOR_SIZE);
|
|
76500
76738
|
result_sizes.resize(STANDARD_VECTOR_SIZE);
|
|
@@ -76862,7 +77100,7 @@ public:
|
|
|
76862
77100
|
return true;
|
|
76863
77101
|
}
|
|
76864
77102
|
|
|
76865
|
-
static bool IsSupported(const vector<JoinCondition> &conditions);
|
|
77103
|
+
static bool IsSupported(const vector<JoinCondition> &conditions, JoinType join_type);
|
|
76866
77104
|
|
|
76867
77105
|
public:
|
|
76868
77106
|
//! Returns a list of the types of the join conditions
|
|
@@ -76896,7 +77134,7 @@ PhysicalNestedLoopJoin::PhysicalNestedLoopJoin(LogicalOperator &op, unique_ptr<P
|
|
|
76896
77134
|
children.push_back(move(right));
|
|
76897
77135
|
}
|
|
76898
77136
|
|
|
76899
|
-
|
|
77137
|
+
bool PhysicalJoin::HasNullValues(DataChunk &chunk) {
|
|
76900
77138
|
for (idx_t col_idx = 0; col_idx < chunk.ColumnCount(); col_idx++) {
|
|
76901
77139
|
UnifiedVectorFormat vdata;
|
|
76902
77140
|
chunk.data[col_idx].ToUnifiedFormat(chunk.size(), vdata);
|
|
@@ -76985,7 +77223,10 @@ void PhysicalJoin::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left
|
|
|
76985
77223
|
}
|
|
76986
77224
|
}
|
|
76987
77225
|
|
|
76988
|
-
bool PhysicalNestedLoopJoin::IsSupported(const vector<JoinCondition> &conditions) {
|
|
77226
|
+
bool PhysicalNestedLoopJoin::IsSupported(const vector<JoinCondition> &conditions, JoinType join_type) {
|
|
77227
|
+
if (join_type == JoinType::MARK) {
|
|
77228
|
+
return true;
|
|
77229
|
+
}
|
|
76989
77230
|
for (auto &cond : conditions) {
|
|
76990
77231
|
if (cond.left->return_type.InternalType() == PhysicalType::STRUCT ||
|
|
76991
77232
|
cond.left->return_type.InternalType() == PhysicalType::LIST) {
|
|
@@ -77029,7 +77270,7 @@ public:
|
|
|
77029
77270
|
//! Materialized join condition of the RHS
|
|
77030
77271
|
ColumnDataCollection right_condition_data;
|
|
77031
77272
|
//! Whether or not the RHS of the nested loop join has NULL values
|
|
77032
|
-
bool has_null;
|
|
77273
|
+
atomic<bool> has_null;
|
|
77033
77274
|
//! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
|
|
77034
77275
|
OuterJoinMarker right_outer;
|
|
77035
77276
|
};
|
|
@@ -85827,15 +86068,14 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
|
|
|
85827
86068
|
for (auto &pipeline : pipelines) {
|
|
85828
86069
|
auto sink = pipeline->GetSink();
|
|
85829
86070
|
if (sink != this) {
|
|
85830
|
-
|
|
85831
|
-
sink->sink_state = sink->GetGlobalSinkState(context.client);
|
|
86071
|
+
sink->sink_state.reset();
|
|
85832
86072
|
}
|
|
85833
86073
|
for (auto &op : pipeline->GetOperators()) {
|
|
85834
86074
|
if (op) {
|
|
85835
|
-
op->op_state
|
|
86075
|
+
op->op_state.reset();
|
|
85836
86076
|
}
|
|
85837
86077
|
}
|
|
85838
|
-
pipeline->
|
|
86078
|
+
pipeline->ClearSource();
|
|
85839
86079
|
}
|
|
85840
86080
|
|
|
85841
86081
|
// get the MetaPipelines in the recursive_meta_pipeline and reschedule them
|
|
@@ -86810,6 +87050,7 @@ public:
|
|
|
86810
87050
|
void Serialize(FieldWriter &writer) const override;
|
|
86811
87051
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
86812
87052
|
idx_t EstimateCardinality(ClientContext &context) override;
|
|
87053
|
+
vector<idx_t> GetTableIndex() const override;
|
|
86813
87054
|
|
|
86814
87055
|
protected:
|
|
86815
87056
|
void ResolveTypes() override;
|
|
@@ -87092,6 +87333,7 @@ public:
|
|
|
87092
87333
|
|
|
87093
87334
|
void Serialize(FieldWriter &writer) const override;
|
|
87094
87335
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
87336
|
+
vector<idx_t> GetTableIndex() const override;
|
|
87095
87337
|
|
|
87096
87338
|
protected:
|
|
87097
87339
|
void ResolveTypes() override {
|
|
@@ -87361,6 +87603,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
|
|
|
87361
87603
|
op.estimated_cardinality, perfect_join_stats);
|
|
87362
87604
|
|
|
87363
87605
|
} else {
|
|
87606
|
+
static constexpr const idx_t NESTED_LOOP_JOIN_THRESHOLD = 5;
|
|
87364
87607
|
bool can_merge = has_range > 0;
|
|
87365
87608
|
bool can_iejoin = has_range >= 2 && recursive_cte_tables.empty();
|
|
87366
87609
|
switch (op.join_type) {
|
|
@@ -87373,6 +87616,11 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
|
|
|
87373
87616
|
default:
|
|
87374
87617
|
break;
|
|
87375
87618
|
}
|
|
87619
|
+
if (left->estimated_cardinality <= NESTED_LOOP_JOIN_THRESHOLD ||
|
|
87620
|
+
right->estimated_cardinality <= NESTED_LOOP_JOIN_THRESHOLD) {
|
|
87621
|
+
can_iejoin = false;
|
|
87622
|
+
can_merge = false;
|
|
87623
|
+
}
|
|
87376
87624
|
if (can_iejoin) {
|
|
87377
87625
|
plan = make_unique<PhysicalIEJoin>(op, move(left), move(right), move(op.conditions), op.join_type,
|
|
87378
87626
|
op.estimated_cardinality);
|
|
@@ -87380,7 +87628,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
|
|
|
87380
87628
|
// range join: use piecewise merge join
|
|
87381
87629
|
plan = make_unique<PhysicalPiecewiseMergeJoin>(op, move(left), move(right), move(op.conditions),
|
|
87382
87630
|
op.join_type, op.estimated_cardinality);
|
|
87383
|
-
} else if (PhysicalNestedLoopJoin::IsSupported(op.conditions)) {
|
|
87631
|
+
} else if (PhysicalNestedLoopJoin::IsSupported(op.conditions, op.join_type)) {
|
|
87384
87632
|
// inequality join: use nested loop
|
|
87385
87633
|
plan = make_unique<PhysicalNestedLoopJoin>(op, move(left), move(right), move(op.conditions), op.join_type,
|
|
87386
87634
|
op.estimated_cardinality);
|
|
@@ -87604,7 +87852,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
|
|
|
87604
87852
|
|
|
87605
87853
|
|
|
87606
87854
|
|
|
87607
|
-
|
|
87608
87855
|
//===----------------------------------------------------------------------===//
|
|
87609
87856
|
// DuckDB
|
|
87610
87857
|
//
|
|
@@ -87646,25 +87893,11 @@ protected:
|
|
|
87646
87893
|
|
|
87647
87894
|
|
|
87648
87895
|
|
|
87649
|
-
namespace duckdb {
|
|
87650
87896
|
|
|
87651
|
-
|
|
87652
|
-
|
|
87653
|
-
auto &function = (BoundFunctionExpression &)expr;
|
|
87654
|
-
if (function.function.dependency) {
|
|
87655
|
-
function.function.dependency(function, dependencies);
|
|
87656
|
-
}
|
|
87657
|
-
}
|
|
87658
|
-
ExpressionIterator::EnumerateChildren(expr, [&](Expression &child) { ExtractDependencies(child, dependencies); });
|
|
87659
|
-
}
|
|
87897
|
+
|
|
87898
|
+
namespace duckdb {
|
|
87660
87899
|
|
|
87661
87900
|
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateTable &op) {
|
|
87662
|
-
// extract dependencies from any default values
|
|
87663
|
-
for (auto &default_value : op.info->bound_defaults) {
|
|
87664
|
-
if (default_value) {
|
|
87665
|
-
ExtractDependencies(*default_value, op.info->dependencies);
|
|
87666
|
-
}
|
|
87667
|
-
}
|
|
87668
87901
|
auto &create_info = (CreateTableInfo &)*op.info->base;
|
|
87669
87902
|
auto &catalog = Catalog::GetCatalog(context);
|
|
87670
87903
|
auto existing_entry =
|
|
@@ -87675,13 +87908,14 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateTabl
|
|
|
87675
87908
|
|
|
87676
87909
|
bool parallel_streaming_insert = !PreserveInsertionOrder(*plan);
|
|
87677
87910
|
bool use_batch_index = UseBatchIndex(*plan);
|
|
87911
|
+
auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
|
87678
87912
|
unique_ptr<PhysicalOperator> create;
|
|
87679
87913
|
if (!parallel_streaming_insert && use_batch_index) {
|
|
87680
87914
|
create = make_unique<PhysicalBatchInsert>(op, op.schema, move(op.info), op.estimated_cardinality);
|
|
87681
87915
|
|
|
87682
87916
|
} else {
|
|
87683
87917
|
create = make_unique<PhysicalInsert>(op, op.schema, move(op.info), op.estimated_cardinality,
|
|
87684
|
-
parallel_streaming_insert);
|
|
87918
|
+
parallel_streaming_insert && num_threads > 1);
|
|
87685
87919
|
}
|
|
87686
87920
|
|
|
87687
87921
|
D_ASSERT(op.children.size() == 1);
|
|
@@ -87763,8 +87997,9 @@ namespace duckdb {
|
|
|
87763
87997
|
|
|
87764
87998
|
class LogicalDelete : public LogicalOperator {
|
|
87765
87999
|
public:
|
|
87766
|
-
explicit LogicalDelete(TableCatalogEntry *table)
|
|
87767
|
-
: LogicalOperator(LogicalOperatorType::LOGICAL_DELETE), table(table), table_index(
|
|
88000
|
+
explicit LogicalDelete(TableCatalogEntry *table, idx_t table_index)
|
|
88001
|
+
: LogicalOperator(LogicalOperatorType::LOGICAL_DELETE), table(table), table_index(table_index),
|
|
88002
|
+
return_chunk(false) {
|
|
87768
88003
|
}
|
|
87769
88004
|
|
|
87770
88005
|
TableCatalogEntry *table;
|
|
@@ -87775,6 +88010,7 @@ public:
|
|
|
87775
88010
|
void Serialize(FieldWriter &writer) const override;
|
|
87776
88011
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
87777
88012
|
idx_t EstimateCardinality(ClientContext &context) override;
|
|
88013
|
+
vector<idx_t> GetTableIndex() const override;
|
|
87778
88014
|
|
|
87779
88015
|
protected:
|
|
87780
88016
|
vector<ColumnBinding> GetColumnBindings() override {
|
|
@@ -87851,6 +88087,7 @@ public:
|
|
|
87851
88087
|
}
|
|
87852
88088
|
void Serialize(FieldWriter &writer) const override;
|
|
87853
88089
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
88090
|
+
vector<idx_t> GetTableIndex() const override;
|
|
87854
88091
|
|
|
87855
88092
|
protected:
|
|
87856
88093
|
void ResolveTypes() override {
|
|
@@ -88091,6 +88328,7 @@ public:
|
|
|
88091
88328
|
}
|
|
88092
88329
|
void Serialize(FieldWriter &writer) const override;
|
|
88093
88330
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
88331
|
+
vector<idx_t> GetTableIndex() const override;
|
|
88094
88332
|
|
|
88095
88333
|
protected:
|
|
88096
88334
|
void ResolveTypes() override {
|
|
@@ -88474,6 +88712,7 @@ public:
|
|
|
88474
88712
|
idx_t EstimateCardinality(ClientContext &context) override {
|
|
88475
88713
|
return expressions.size();
|
|
88476
88714
|
}
|
|
88715
|
+
vector<idx_t> GetTableIndex() const override;
|
|
88477
88716
|
|
|
88478
88717
|
protected:
|
|
88479
88718
|
void ResolveTypes() override {
|
|
@@ -89145,8 +89384,9 @@ namespace duckdb {
|
|
|
89145
89384
|
//! LogicalInsert represents an insertion of data into a base table
|
|
89146
89385
|
class LogicalInsert : public LogicalOperator {
|
|
89147
89386
|
public:
|
|
89148
|
-
|
|
89149
|
-
: LogicalOperator(LogicalOperatorType::LOGICAL_INSERT), table(table), table_index(
|
|
89387
|
+
LogicalInsert(TableCatalogEntry *table, idx_t table_index)
|
|
89388
|
+
: LogicalOperator(LogicalOperatorType::LOGICAL_INSERT), table(table), table_index(table_index),
|
|
89389
|
+
return_chunk(false) {
|
|
89150
89390
|
}
|
|
89151
89391
|
|
|
89152
89392
|
vector<vector<unique_ptr<Expression>>> insert_values;
|
|
@@ -89183,6 +89423,7 @@ protected:
|
|
|
89183
89423
|
}
|
|
89184
89424
|
|
|
89185
89425
|
idx_t EstimateCardinality(ClientContext &context) override;
|
|
89426
|
+
vector<idx_t> GetTableIndex() const override;
|
|
89186
89427
|
};
|
|
89187
89428
|
} // namespace duckdb
|
|
89188
89429
|
|
|
@@ -89237,6 +89478,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalInsert &op
|
|
|
89237
89478
|
|
|
89238
89479
|
bool parallel_streaming_insert = !PreserveInsertionOrder(*plan);
|
|
89239
89480
|
bool use_batch_index = UseBatchIndex(*plan);
|
|
89481
|
+
auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
|
89240
89482
|
if (op.return_chunk) {
|
|
89241
89483
|
// not supported for RETURNING (yet?)
|
|
89242
89484
|
parallel_streaming_insert = false;
|
|
@@ -89248,7 +89490,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalInsert &op
|
|
|
89248
89490
|
op.estimated_cardinality);
|
|
89249
89491
|
} else {
|
|
89250
89492
|
insert = make_unique<PhysicalInsert>(op.types, op.table, op.column_index_map, move(op.bound_defaults),
|
|
89251
|
-
op.estimated_cardinality, op.return_chunk,
|
|
89493
|
+
op.estimated_cardinality, op.return_chunk,
|
|
89494
|
+
parallel_streaming_insert && num_threads > 1);
|
|
89252
89495
|
}
|
|
89253
89496
|
if (plan) {
|
|
89254
89497
|
insert->children.push_back(move(plan));
|
|
@@ -89591,6 +89834,7 @@ public:
|
|
|
89591
89834
|
vector<ColumnBinding> GetColumnBindings() override;
|
|
89592
89835
|
void Serialize(FieldWriter &writer) const override;
|
|
89593
89836
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
89837
|
+
vector<idx_t> GetTableIndex() const override;
|
|
89594
89838
|
|
|
89595
89839
|
protected:
|
|
89596
89840
|
void ResolveTypes() override;
|
|
@@ -89680,6 +89924,7 @@ public:
|
|
|
89680
89924
|
}
|
|
89681
89925
|
void Serialize(FieldWriter &writer) const override;
|
|
89682
89926
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
89927
|
+
vector<idx_t> GetTableIndex() const override;
|
|
89683
89928
|
|
|
89684
89929
|
protected:
|
|
89685
89930
|
void ResolveTypes() override {
|
|
@@ -89727,6 +89972,7 @@ public:
|
|
|
89727
89972
|
}
|
|
89728
89973
|
void Serialize(FieldWriter &writer) const override;
|
|
89729
89974
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
89975
|
+
vector<idx_t> GetTableIndex() const override;
|
|
89730
89976
|
|
|
89731
89977
|
protected:
|
|
89732
89978
|
void ResolveTypes() override {
|
|
@@ -89939,6 +90185,7 @@ public:
|
|
|
89939
90185
|
|
|
89940
90186
|
void Serialize(FieldWriter &writer) const override;
|
|
89941
90187
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
90188
|
+
vector<idx_t> GetTableIndex() const override;
|
|
89942
90189
|
|
|
89943
90190
|
protected:
|
|
89944
90191
|
void ResolveTypes() override {
|
|
@@ -90243,6 +90490,7 @@ public:
|
|
|
90243
90490
|
vector<ColumnBinding> GetColumnBindings() override;
|
|
90244
90491
|
void Serialize(FieldWriter &writer) const override;
|
|
90245
90492
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
90493
|
+
vector<idx_t> GetTableIndex() const override;
|
|
90246
90494
|
|
|
90247
90495
|
protected:
|
|
90248
90496
|
void ResolveTypes() override;
|
|
@@ -90370,6 +90618,7 @@ public:
|
|
|
90370
90618
|
vector<ColumnBinding> GetColumnBindings() override;
|
|
90371
90619
|
void Serialize(FieldWriter &writer) const override;
|
|
90372
90620
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
90621
|
+
vector<idx_t> GetTableIndex() const override;
|
|
90373
90622
|
|
|
90374
90623
|
protected:
|
|
90375
90624
|
void ResolveTypes() override;
|
|
@@ -90528,6 +90777,8 @@ struct LogicalExtensionOperator : public LogicalOperator {
|
|
|
90528
90777
|
: LogicalOperator(LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR, move(expressions)) {
|
|
90529
90778
|
}
|
|
90530
90779
|
|
|
90780
|
+
static unique_ptr<LogicalExtensionOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
90781
|
+
|
|
90531
90782
|
virtual unique_ptr<PhysicalOperator> CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) = 0;
|
|
90532
90783
|
};
|
|
90533
90784
|
} // namespace duckdb
|
|
@@ -98818,13 +99069,16 @@ struct LinkedList {
|
|
|
98818
99069
|
// forward declarations
|
|
98819
99070
|
struct WriteDataToSegment;
|
|
98820
99071
|
struct ReadDataFromSegment;
|
|
99072
|
+
struct CopyDataFromSegment;
|
|
98821
99073
|
typedef ListSegment *(*create_segment_t)(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
|
|
98822
|
-
vector<AllocatedData> &owning_vector, uint16_t &capacity);
|
|
99074
|
+
vector<AllocatedData> &owning_vector, const uint16_t &capacity);
|
|
98823
99075
|
typedef void (*write_data_to_segment_t)(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
|
|
98824
99076
|
vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
|
|
98825
99077
|
idx_t &entry_idx, idx_t &count);
|
|
98826
|
-
typedef void (*read_data_from_segment_t)(ReadDataFromSegment &read_data_from_segment, ListSegment *segment,
|
|
99078
|
+
typedef void (*read_data_from_segment_t)(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
|
|
98827
99079
|
Vector &result, idx_t &total_count);
|
|
99080
|
+
typedef ListSegment *(*copy_data_from_segment_t)(CopyDataFromSegment ©_data_from_segment, const ListSegment *source,
|
|
99081
|
+
Allocator &allocator, vector<AllocatedData> &owning_vector);
|
|
98828
99082
|
|
|
98829
99083
|
struct WriteDataToSegment {
|
|
98830
99084
|
create_segment_t create_segment;
|
|
@@ -98835,6 +99089,10 @@ struct ReadDataFromSegment {
|
|
|
98835
99089
|
read_data_from_segment_t segment_function;
|
|
98836
99090
|
vector<ReadDataFromSegment> child_functions;
|
|
98837
99091
|
};
|
|
99092
|
+
struct CopyDataFromSegment {
|
|
99093
|
+
copy_data_from_segment_t segment_function;
|
|
99094
|
+
vector<CopyDataFromSegment> child_functions;
|
|
99095
|
+
};
|
|
98838
99096
|
|
|
98839
99097
|
// forward declarations
|
|
98840
99098
|
static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
|
|
@@ -98842,24 +99100,27 @@ static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allo
|
|
|
98842
99100
|
idx_t &count);
|
|
98843
99101
|
static void BuildListVector(ReadDataFromSegment &read_data_from_segment, LinkedList *linked_list, Vector &result,
|
|
98844
99102
|
idx_t &initial_total_count);
|
|
99103
|
+
static void CopyLinkedList(CopyDataFromSegment ©_data_from_segment, const LinkedList *source_list,
|
|
99104
|
+
LinkedList &target_list, Allocator &allocator, vector<AllocatedData> &owning_vector);
|
|
98845
99105
|
|
|
98846
99106
|
template <class T>
|
|
98847
99107
|
static data_ptr_t AllocatePrimitiveData(Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
98848
|
-
uint16_t &capacity) {
|
|
99108
|
+
const uint16_t &capacity) {
|
|
98849
99109
|
|
|
98850
99110
|
owning_vector.emplace_back(allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(T))));
|
|
98851
99111
|
return owning_vector.back().get();
|
|
98852
99112
|
}
|
|
98853
99113
|
|
|
98854
|
-
static data_ptr_t AllocateListData(Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
99114
|
+
static data_ptr_t AllocateListData(Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
99115
|
+
const uint16_t &capacity) {
|
|
98855
99116
|
|
|
98856
99117
|
owning_vector.emplace_back(
|
|
98857
99118
|
allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList)));
|
|
98858
99119
|
return owning_vector.back().get();
|
|
98859
99120
|
}
|
|
98860
99121
|
|
|
98861
|
-
static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
98862
|
-
idx_t child_count) {
|
|
99122
|
+
static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
99123
|
+
const uint16_t &capacity, const idx_t &child_count) {
|
|
98863
99124
|
|
|
98864
99125
|
owning_vector.emplace_back(
|
|
98865
99126
|
allocator.Allocate(sizeof(ListSegment) + capacity * sizeof(bool) + child_count * sizeof(ListSegment *)));
|
|
@@ -98867,28 +99128,28 @@ static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData>
|
|
|
98867
99128
|
}
|
|
98868
99129
|
|
|
98869
99130
|
template <class T>
|
|
98870
|
-
static T *GetPrimitiveData(ListSegment *segment) {
|
|
99131
|
+
static T *GetPrimitiveData(const ListSegment *segment) {
|
|
98871
99132
|
return (T *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
|
|
98872
99133
|
}
|
|
98873
99134
|
|
|
98874
|
-
static uint64_t *GetListLengthData(ListSegment *segment) {
|
|
99135
|
+
static uint64_t *GetListLengthData(const ListSegment *segment) {
|
|
98875
99136
|
return (uint64_t *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
|
|
98876
99137
|
}
|
|
98877
99138
|
|
|
98878
|
-
static LinkedList *GetListChildData(ListSegment *segment) {
|
|
99139
|
+
static LinkedList *GetListChildData(const ListSegment *segment) {
|
|
98879
99140
|
return (LinkedList *)(((char *)segment) + sizeof(ListSegment) +
|
|
98880
99141
|
segment->capacity * (sizeof(bool) + sizeof(uint64_t)));
|
|
98881
99142
|
}
|
|
98882
99143
|
|
|
98883
|
-
static ListSegment **GetStructData(ListSegment *segment) {
|
|
99144
|
+
static ListSegment **GetStructData(const ListSegment *segment) {
|
|
98884
99145
|
return (ListSegment **)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
|
|
98885
99146
|
}
|
|
98886
99147
|
|
|
98887
|
-
static bool *GetNullMask(ListSegment *segment) {
|
|
99148
|
+
static bool *GetNullMask(const ListSegment *segment) {
|
|
98888
99149
|
return (bool *)(((char *)segment) + sizeof(ListSegment));
|
|
98889
99150
|
}
|
|
98890
99151
|
|
|
98891
|
-
static uint16_t GetCapacityForNewSegment(LinkedList *linked_list) {
|
|
99152
|
+
static uint16_t GetCapacityForNewSegment(const LinkedList *linked_list) {
|
|
98892
99153
|
|
|
98893
99154
|
// consecutive segments grow by the power of two
|
|
98894
99155
|
uint16_t capacity = 4;
|
|
@@ -98901,7 +99162,7 @@ static uint16_t GetCapacityForNewSegment(LinkedList *linked_list) {
|
|
|
98901
99162
|
|
|
98902
99163
|
template <class T>
|
|
98903
99164
|
static ListSegment *CreatePrimitiveSegment(WriteDataToSegment &, Allocator &allocator,
|
|
98904
|
-
vector<AllocatedData> &owning_vector, uint16_t &capacity) {
|
|
99165
|
+
vector<AllocatedData> &owning_vector, const uint16_t &capacity) {
|
|
98905
99166
|
|
|
98906
99167
|
// allocate data and set the header
|
|
98907
99168
|
auto segment = (ListSegment *)AllocatePrimitiveData<T>(allocator, owning_vector, capacity);
|
|
@@ -98912,7 +99173,7 @@ static ListSegment *CreatePrimitiveSegment(WriteDataToSegment &, Allocator &allo
|
|
|
98912
99173
|
}
|
|
98913
99174
|
|
|
98914
99175
|
static ListSegment *CreateListSegment(WriteDataToSegment &, Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
98915
|
-
uint16_t &capacity) {
|
|
99176
|
+
const uint16_t &capacity) {
|
|
98916
99177
|
|
|
98917
99178
|
// allocate data and set the header
|
|
98918
99179
|
auto segment = (ListSegment *)AllocateListData(allocator, owning_vector, capacity);
|
|
@@ -98929,7 +99190,7 @@ static ListSegment *CreateListSegment(WriteDataToSegment &, Allocator &allocator
|
|
|
98929
99190
|
}
|
|
98930
99191
|
|
|
98931
99192
|
static ListSegment *CreateStructSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
|
|
98932
|
-
vector<AllocatedData> &owning_vector, uint16_t &capacity) {
|
|
99193
|
+
vector<AllocatedData> &owning_vector, const uint16_t &capacity) {
|
|
98933
99194
|
|
|
98934
99195
|
// allocate data and set header
|
|
98935
99196
|
auto segment = (ListSegment *)AllocateStructData(allocator, owning_vector, capacity,
|
|
@@ -99123,7 +99384,7 @@ static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allo
|
|
|
99123
99384
|
}
|
|
99124
99385
|
|
|
99125
99386
|
template <class T>
|
|
99126
|
-
static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, ListSegment *segment, Vector &result,
|
|
99387
|
+
static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result,
|
|
99127
99388
|
idx_t &total_count) {
|
|
99128
99389
|
|
|
99129
99390
|
auto &aggr_vector_validity = FlatVector::Validity(result);
|
|
@@ -99147,7 +99408,7 @@ static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, ListSegment *seg
|
|
|
99147
99408
|
}
|
|
99148
99409
|
}
|
|
99149
99410
|
|
|
99150
|
-
static void ReadDataFromVarcharSegment(ReadDataFromSegment &, ListSegment *segment, Vector &result,
|
|
99411
|
+
static void ReadDataFromVarcharSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result,
|
|
99151
99412
|
idx_t &total_count) {
|
|
99152
99413
|
|
|
99153
99414
|
auto &aggr_vector_validity = FlatVector::Validity(result);
|
|
@@ -99188,8 +99449,8 @@ static void ReadDataFromVarcharSegment(ReadDataFromSegment &, ListSegment *segme
|
|
|
99188
99449
|
}
|
|
99189
99450
|
}
|
|
99190
99451
|
|
|
99191
|
-
static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, ListSegment *segment,
|
|
99192
|
-
idx_t &total_count) {
|
|
99452
|
+
static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
|
|
99453
|
+
Vector &result, idx_t &total_count) {
|
|
99193
99454
|
|
|
99194
99455
|
auto &aggr_vector_validity = FlatVector::Validity(result);
|
|
99195
99456
|
|
|
@@ -99228,8 +99489,8 @@ static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment,
|
|
|
99228
99489
|
BuildListVector(read_data_from_segment.child_functions[0], &linked_child_list, child_vector, starting_offset);
|
|
99229
99490
|
}
|
|
99230
99491
|
|
|
99231
|
-
static void ReadDataFromStructSegment(ReadDataFromSegment &read_data_from_segment, ListSegment *segment,
|
|
99232
|
-
idx_t &total_count) {
|
|
99492
|
+
static void ReadDataFromStructSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
|
|
99493
|
+
Vector &result, idx_t &total_count) {
|
|
99233
99494
|
|
|
99234
99495
|
auto &aggr_vector_validity = FlatVector::Validity(result);
|
|
99235
99496
|
|
|
@@ -99268,6 +99529,86 @@ static void BuildListVector(ReadDataFromSegment &read_data_from_segment, LinkedL
|
|
|
99268
99529
|
linked_list->last_segment = nullptr;
|
|
99269
99530
|
}
|
|
99270
99531
|
|
|
99532
|
+
template <class T>
|
|
99533
|
+
static ListSegment *CopyDataFromPrimitiveSegment(CopyDataFromSegment &, const ListSegment *source, Allocator &allocator,
|
|
99534
|
+
vector<AllocatedData> &owning_vector) {
|
|
99535
|
+
|
|
99536
|
+
auto target = (ListSegment *)AllocatePrimitiveData<T>(allocator, owning_vector, source->capacity);
|
|
99537
|
+
memcpy(target, source, sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(T)));
|
|
99538
|
+
target->next = nullptr;
|
|
99539
|
+
return target;
|
|
99540
|
+
}
|
|
99541
|
+
|
|
99542
|
+
static ListSegment *CopyDataFromListSegment(CopyDataFromSegment ©_data_from_segment, const ListSegment *source,
|
|
99543
|
+
Allocator &allocator, vector<AllocatedData> &owning_vector) {
|
|
99544
|
+
|
|
99545
|
+
// create an empty linked list for the child vector of target
|
|
99546
|
+
auto source_linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(source));
|
|
99547
|
+
|
|
99548
|
+
// create the segment
|
|
99549
|
+
auto target = (ListSegment *)AllocateListData(allocator, owning_vector, source->capacity);
|
|
99550
|
+
memcpy(target, source,
|
|
99551
|
+
sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList));
|
|
99552
|
+
target->next = nullptr;
|
|
99553
|
+
|
|
99554
|
+
auto target_linked_list = GetListChildData(target);
|
|
99555
|
+
LinkedList linked_list(source_linked_child_list.total_capacity, nullptr, nullptr);
|
|
99556
|
+
Store<LinkedList>(linked_list, (data_ptr_t)target_linked_list);
|
|
99557
|
+
|
|
99558
|
+
// recurse to copy the linked child list
|
|
99559
|
+
auto target_linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(target));
|
|
99560
|
+
D_ASSERT(copy_data_from_segment.child_functions.size() == 1);
|
|
99561
|
+
CopyLinkedList(copy_data_from_segment.child_functions[0], &source_linked_child_list, target_linked_child_list,
|
|
99562
|
+
allocator, owning_vector);
|
|
99563
|
+
|
|
99564
|
+
// store the updated linked list
|
|
99565
|
+
Store<LinkedList>(target_linked_child_list, (data_ptr_t)GetListChildData(target));
|
|
99566
|
+
return target;
|
|
99567
|
+
}
|
|
99568
|
+
|
|
99569
|
+
static ListSegment *CopyDataFromStructSegment(CopyDataFromSegment ©_data_from_segment, const ListSegment *source,
|
|
99570
|
+
Allocator &allocator, vector<AllocatedData> &owning_vector) {
|
|
99571
|
+
|
|
99572
|
+
auto source_child_count = copy_data_from_segment.child_functions.size();
|
|
99573
|
+
auto target = (ListSegment *)AllocateStructData(allocator, owning_vector, source->capacity, source_child_count);
|
|
99574
|
+
memcpy(target, source,
|
|
99575
|
+
sizeof(ListSegment) + source->capacity * sizeof(bool) + source_child_count * sizeof(ListSegment *));
|
|
99576
|
+
target->next = nullptr;
|
|
99577
|
+
|
|
99578
|
+
// recurse and copy the children
|
|
99579
|
+
auto source_child_segments = GetStructData(source);
|
|
99580
|
+
auto target_child_segments = GetStructData(target);
|
|
99581
|
+
|
|
99582
|
+
for (idx_t i = 0; i < copy_data_from_segment.child_functions.size(); i++) {
|
|
99583
|
+
auto child_function = copy_data_from_segment.child_functions[i];
|
|
99584
|
+
auto source_child_segment = Load<ListSegment *>((data_ptr_t)(source_child_segments + i));
|
|
99585
|
+
auto target_child_segment =
|
|
99586
|
+
child_function.segment_function(child_function, source_child_segment, allocator, owning_vector);
|
|
99587
|
+
Store<ListSegment *>(target_child_segment, (data_ptr_t)(target_child_segments + i));
|
|
99588
|
+
}
|
|
99589
|
+
return target;
|
|
99590
|
+
}
|
|
99591
|
+
|
|
99592
|
+
static void CopyLinkedList(CopyDataFromSegment ©_data_from_segment, const LinkedList *source_list,
|
|
99593
|
+
LinkedList &target_list, Allocator &allocator, vector<AllocatedData> &owning_vector) {
|
|
99594
|
+
|
|
99595
|
+
auto source_segment = source_list->first_segment;
|
|
99596
|
+
|
|
99597
|
+
while (source_segment) {
|
|
99598
|
+
auto target_segment =
|
|
99599
|
+
copy_data_from_segment.segment_function(copy_data_from_segment, source_segment, allocator, owning_vector);
|
|
99600
|
+
source_segment = source_segment->next;
|
|
99601
|
+
|
|
99602
|
+
if (!target_list.first_segment) {
|
|
99603
|
+
target_list.first_segment = target_segment;
|
|
99604
|
+
}
|
|
99605
|
+
if (target_list.last_segment) {
|
|
99606
|
+
target_list.last_segment->next = target_segment;
|
|
99607
|
+
}
|
|
99608
|
+
target_list.last_segment = target_segment;
|
|
99609
|
+
}
|
|
99610
|
+
}
|
|
99611
|
+
|
|
99271
99612
|
static void InitializeValidities(Vector &vector, idx_t &capacity) {
|
|
99272
99613
|
|
|
99273
99614
|
auto &validity_mask = FlatVector::Validity(vector);
|
|
@@ -99311,6 +99652,7 @@ struct ListBindData : public FunctionData {
|
|
|
99311
99652
|
LogicalType stype;
|
|
99312
99653
|
WriteDataToSegment write_data_to_segment;
|
|
99313
99654
|
ReadDataFromSegment read_data_from_segment;
|
|
99655
|
+
CopyDataFromSegment copy_data_from_segment;
|
|
99314
99656
|
|
|
99315
99657
|
unique_ptr<FunctionData> Copy() const override {
|
|
99316
99658
|
return make_unique<ListBindData>(stype);
|
|
@@ -99323,7 +99665,8 @@ struct ListBindData : public FunctionData {
|
|
|
99323
99665
|
};
|
|
99324
99666
|
|
|
99325
99667
|
static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment,
|
|
99326
|
-
ReadDataFromSegment &read_data_from_segment,
|
|
99668
|
+
ReadDataFromSegment &read_data_from_segment,
|
|
99669
|
+
CopyDataFromSegment ©_data_from_segment, const LogicalType &type) {
|
|
99327
99670
|
|
|
99328
99671
|
auto physical_type = type.InternalType();
|
|
99329
99672
|
switch (physical_type) {
|
|
@@ -99332,113 +99675,135 @@ static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment,
|
|
|
99332
99675
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<bool>;
|
|
99333
99676
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<bool>;
|
|
99334
99677
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<bool>;
|
|
99678
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<bool>;
|
|
99335
99679
|
break;
|
|
99336
99680
|
}
|
|
99337
99681
|
case PhysicalType::INT8: {
|
|
99338
99682
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<int8_t>;
|
|
99339
99683
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int8_t>;
|
|
99340
99684
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int8_t>;
|
|
99685
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int8_t>;
|
|
99341
99686
|
break;
|
|
99342
99687
|
}
|
|
99343
99688
|
case PhysicalType::INT16: {
|
|
99344
99689
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<int16_t>;
|
|
99345
99690
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int16_t>;
|
|
99346
99691
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int16_t>;
|
|
99692
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int16_t>;
|
|
99347
99693
|
break;
|
|
99348
99694
|
}
|
|
99349
99695
|
case PhysicalType::INT32: {
|
|
99350
99696
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<int32_t>;
|
|
99351
99697
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int32_t>;
|
|
99352
99698
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int32_t>;
|
|
99699
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int32_t>;
|
|
99353
99700
|
break;
|
|
99354
99701
|
}
|
|
99355
99702
|
case PhysicalType::INT64: {
|
|
99356
99703
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<int64_t>;
|
|
99357
99704
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int64_t>;
|
|
99358
99705
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int64_t>;
|
|
99706
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int64_t>;
|
|
99359
99707
|
break;
|
|
99360
99708
|
}
|
|
99361
99709
|
case PhysicalType::UINT8: {
|
|
99362
99710
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<uint8_t>;
|
|
99363
99711
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint8_t>;
|
|
99364
99712
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint8_t>;
|
|
99713
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint8_t>;
|
|
99365
99714
|
break;
|
|
99366
99715
|
}
|
|
99367
99716
|
case PhysicalType::UINT16: {
|
|
99368
99717
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<uint16_t>;
|
|
99369
99718
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint16_t>;
|
|
99370
99719
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint16_t>;
|
|
99720
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint16_t>;
|
|
99371
99721
|
break;
|
|
99372
99722
|
}
|
|
99373
99723
|
case PhysicalType::UINT32: {
|
|
99374
99724
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<uint32_t>;
|
|
99375
99725
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint32_t>;
|
|
99376
99726
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint32_t>;
|
|
99727
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint32_t>;
|
|
99377
99728
|
break;
|
|
99378
99729
|
}
|
|
99379
99730
|
case PhysicalType::UINT64: {
|
|
99380
99731
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<uint64_t>;
|
|
99381
99732
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint64_t>;
|
|
99382
99733
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint64_t>;
|
|
99734
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint64_t>;
|
|
99383
99735
|
break;
|
|
99384
99736
|
}
|
|
99385
99737
|
case PhysicalType::FLOAT: {
|
|
99386
99738
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<float>;
|
|
99387
99739
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<float>;
|
|
99388
99740
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<float>;
|
|
99741
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<float>;
|
|
99389
99742
|
break;
|
|
99390
99743
|
}
|
|
99391
99744
|
case PhysicalType::DOUBLE: {
|
|
99392
99745
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<double>;
|
|
99393
99746
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<double>;
|
|
99394
99747
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<double>;
|
|
99748
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<double>;
|
|
99395
99749
|
break;
|
|
99396
99750
|
}
|
|
99397
99751
|
case PhysicalType::INT128: {
|
|
99398
99752
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<hugeint_t>;
|
|
99399
99753
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<hugeint_t>;
|
|
99400
99754
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<hugeint_t>;
|
|
99755
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<hugeint_t>;
|
|
99401
99756
|
break;
|
|
99402
99757
|
}
|
|
99403
99758
|
case PhysicalType::INTERVAL: {
|
|
99404
99759
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<interval_t>;
|
|
99405
99760
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<interval_t>;
|
|
99406
99761
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<interval_t>;
|
|
99762
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<interval_t>;
|
|
99407
99763
|
break;
|
|
99408
99764
|
}
|
|
99409
99765
|
case PhysicalType::VARCHAR: {
|
|
99410
99766
|
write_data_to_segment.create_segment = CreateListSegment;
|
|
99411
99767
|
write_data_to_segment.segment_function = WriteDataToVarcharSegment;
|
|
99412
99768
|
read_data_from_segment.segment_function = ReadDataFromVarcharSegment;
|
|
99769
|
+
copy_data_from_segment.segment_function = CopyDataFromListSegment;
|
|
99413
99770
|
|
|
99414
99771
|
write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
|
|
99415
99772
|
write_data_to_segment.child_functions.back().create_segment = CreatePrimitiveSegment<char>;
|
|
99773
|
+
copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
|
|
99774
|
+
copy_data_from_segment.child_functions.back().segment_function = CopyDataFromPrimitiveSegment<char>;
|
|
99416
99775
|
break;
|
|
99417
99776
|
}
|
|
99418
99777
|
case PhysicalType::LIST: {
|
|
99419
99778
|
write_data_to_segment.create_segment = CreateListSegment;
|
|
99420
99779
|
write_data_to_segment.segment_function = WriteDataToListSegment;
|
|
99421
99780
|
read_data_from_segment.segment_function = ReadDataFromListSegment;
|
|
99781
|
+
copy_data_from_segment.segment_function = CopyDataFromListSegment;
|
|
99422
99782
|
|
|
99423
99783
|
// recurse
|
|
99424
99784
|
write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
|
|
99425
99785
|
read_data_from_segment.child_functions.emplace_back(ReadDataFromSegment());
|
|
99786
|
+
copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
|
|
99426
99787
|
GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
|
|
99427
|
-
read_data_from_segment.child_functions.back(),
|
|
99788
|
+
read_data_from_segment.child_functions.back(),
|
|
99789
|
+
copy_data_from_segment.child_functions.back(), ListType::GetChildType(type));
|
|
99428
99790
|
break;
|
|
99429
99791
|
}
|
|
99430
99792
|
case PhysicalType::STRUCT: {
|
|
99431
99793
|
write_data_to_segment.create_segment = CreateStructSegment;
|
|
99432
99794
|
write_data_to_segment.segment_function = WriteDataToStructSegment;
|
|
99433
99795
|
read_data_from_segment.segment_function = ReadDataFromStructSegment;
|
|
99796
|
+
copy_data_from_segment.segment_function = CopyDataFromStructSegment;
|
|
99434
99797
|
|
|
99435
99798
|
// recurse
|
|
99436
99799
|
auto child_types = StructType::GetChildTypes(type);
|
|
99437
99800
|
for (idx_t i = 0; i < child_types.size(); i++) {
|
|
99438
99801
|
write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
|
|
99439
99802
|
read_data_from_segment.child_functions.emplace_back(ReadDataFromSegment());
|
|
99803
|
+
copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
|
|
99440
99804
|
GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
|
|
99441
|
-
read_data_from_segment.child_functions.back(),
|
|
99805
|
+
read_data_from_segment.child_functions.back(),
|
|
99806
|
+
copy_data_from_segment.child_functions.back(), child_types[i].second);
|
|
99442
99807
|
}
|
|
99443
99808
|
break;
|
|
99444
99809
|
}
|
|
@@ -99451,7 +99816,7 @@ ListBindData::ListBindData(const LogicalType &stype_p) : stype(stype_p) {
|
|
|
99451
99816
|
|
|
99452
99817
|
// always unnest once because the result vector is of type LIST
|
|
99453
99818
|
auto type = ListType::GetChildType(stype_p);
|
|
99454
|
-
GetSegmentDataFunctions(write_data_to_segment, read_data_from_segment, type);
|
|
99819
|
+
GetSegmentDataFunctions(write_data_to_segment, read_data_from_segment, copy_data_from_segment, type);
|
|
99455
99820
|
}
|
|
99456
99821
|
|
|
99457
99822
|
ListBindData::~ListBindData() {
|
|
@@ -99519,11 +99884,13 @@ static void ListUpdateFunction(Vector inputs[], AggregateInputData &aggr_input_d
|
|
|
99519
99884
|
}
|
|
99520
99885
|
}
|
|
99521
99886
|
|
|
99522
|
-
static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData
|
|
99887
|
+
static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData &aggr_input_data, idx_t count) {
|
|
99523
99888
|
UnifiedVectorFormat sdata;
|
|
99524
99889
|
state.ToUnifiedFormat(count, sdata);
|
|
99525
99890
|
auto states_ptr = (ListAggState **)sdata.data;
|
|
99526
99891
|
|
|
99892
|
+
auto &list_bind_data = (ListBindData &)*aggr_input_data.bind_data;
|
|
99893
|
+
|
|
99527
99894
|
auto combined_ptr = FlatVector::GetData<ListAggState *>(combined);
|
|
99528
99895
|
for (idx_t i = 0; i < count; i++) {
|
|
99529
99896
|
auto state = states_ptr[sdata.sel->get_index(i)];
|
|
@@ -99533,32 +99900,27 @@ static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputD
|
|
|
99533
99900
|
}
|
|
99534
99901
|
D_ASSERT(state->type);
|
|
99535
99902
|
D_ASSERT(state->owning_vector);
|
|
99536
|
-
if (!combined_ptr[i]->linked_list) {
|
|
99537
99903
|
|
|
99538
|
-
|
|
99904
|
+
if (!combined_ptr[i]->linked_list) {
|
|
99539
99905
|
combined_ptr[i]->linked_list = new LinkedList(0, nullptr, nullptr);
|
|
99540
|
-
combined_ptr[i]->
|
|
99541
|
-
combined_ptr[i]->linked_list->last_segment = state->linked_list->last_segment;
|
|
99542
|
-
combined_ptr[i]->linked_list->total_capacity = state->linked_list->total_capacity;
|
|
99543
|
-
|
|
99544
|
-
// copy the type
|
|
99906
|
+
combined_ptr[i]->owning_vector = new vector<AllocatedData>;
|
|
99545
99907
|
combined_ptr[i]->type = new LogicalType(*state->type);
|
|
99908
|
+
}
|
|
99909
|
+
auto owning_vector = combined_ptr[i]->owning_vector;
|
|
99546
99910
|
|
|
99547
|
-
|
|
99548
|
-
|
|
99911
|
+
// copy the linked list of the state
|
|
99912
|
+
auto copied_linked_list = LinkedList(state->linked_list->total_capacity, nullptr, nullptr);
|
|
99913
|
+
CopyLinkedList(list_bind_data.copy_data_from_segment, state->linked_list, copied_linked_list,
|
|
99914
|
+
aggr_input_data.allocator, *owning_vector);
|
|
99549
99915
|
|
|
99916
|
+
// append the copied linked list to the combined state
|
|
99917
|
+
if (combined_ptr[i]->linked_list->last_segment) {
|
|
99918
|
+
combined_ptr[i]->linked_list->last_segment->next = copied_linked_list.first_segment;
|
|
99550
99919
|
} else {
|
|
99551
|
-
combined_ptr[i]->linked_list->
|
|
99552
|
-
combined_ptr[i]->linked_list->last_segment = state->linked_list->last_segment;
|
|
99553
|
-
combined_ptr[i]->linked_list->total_capacity += state->linked_list->total_capacity;
|
|
99554
|
-
}
|
|
99555
|
-
|
|
99556
|
-
// copy the owning vector (and its unique pointers to the allocated data)
|
|
99557
|
-
// FIXME: more efficient way of copying the unique pointers?
|
|
99558
|
-
auto &owning_vector = *state->owning_vector;
|
|
99559
|
-
for (idx_t j = 0; j < state->owning_vector->size(); j++) {
|
|
99560
|
-
combined_ptr[i]->owning_vector->push_back(move(owning_vector[j]));
|
|
99920
|
+
combined_ptr[i]->linked_list->first_segment = copied_linked_list.first_segment;
|
|
99561
99921
|
}
|
|
99922
|
+
combined_ptr[i]->linked_list->last_segment = copied_linked_list.last_segment;
|
|
99923
|
+
combined_ptr[i]->linked_list->total_capacity += copied_linked_list.total_capacity;
|
|
99562
99924
|
}
|
|
99563
99925
|
}
|
|
99564
99926
|
|
|
@@ -99822,10 +100184,11 @@ struct RegrCountFunction {
|
|
|
99822
100184
|
namespace duckdb {
|
|
99823
100185
|
|
|
99824
100186
|
void RegrCountFun::RegisterFunction(BuiltinFunctions &set) {
|
|
99825
|
-
|
|
99826
|
-
|
|
99827
|
-
|
|
99828
|
-
|
|
100187
|
+
auto regr_count = AggregateFunction::BinaryAggregate<size_t, double, double, uint32_t, RegrCountFunction>(
|
|
100188
|
+
LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::UINTEGER);
|
|
100189
|
+
regr_count.name = "regr_count";
|
|
100190
|
+
regr_count.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
|
|
100191
|
+
set.AddFunction(regr_count);
|
|
99829
100192
|
}
|
|
99830
100193
|
|
|
99831
100194
|
} // namespace duckdb
|
|
@@ -108931,15 +109294,16 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
|
|
|
108931
109294
|
}
|
|
108932
109295
|
|
|
108933
109296
|
struct StrfTimeBindData : public FunctionData {
|
|
108934
|
-
explicit StrfTimeBindData(StrfTimeFormat format_p, string format_string_p)
|
|
108935
|
-
: format(move(format_p)), format_string(move(format_string_p)) {
|
|
109297
|
+
explicit StrfTimeBindData(StrfTimeFormat format_p, string format_string_p, bool is_null)
|
|
109298
|
+
: format(move(format_p)), format_string(move(format_string_p)), is_null(is_null) {
|
|
108936
109299
|
}
|
|
108937
109300
|
|
|
108938
109301
|
StrfTimeFormat format;
|
|
108939
109302
|
string format_string;
|
|
109303
|
+
bool is_null;
|
|
108940
109304
|
|
|
108941
109305
|
unique_ptr<FunctionData> Copy() const override {
|
|
108942
|
-
return make_unique<StrfTimeBindData>(format, format_string);
|
|
109306
|
+
return make_unique<StrfTimeBindData>(format, format_string, is_null);
|
|
108943
109307
|
}
|
|
108944
109308
|
|
|
108945
109309
|
bool Equals(const FunctionData &other_p) const override {
|
|
@@ -108962,13 +109326,14 @@ static unique_ptr<FunctionData> StrfTimeBindFunction(ClientContext &context, Sca
|
|
|
108962
109326
|
Value options_str = ExpressionExecutor::EvaluateScalar(context, *format_arg);
|
|
108963
109327
|
auto format_string = options_str.GetValue<string>();
|
|
108964
109328
|
StrfTimeFormat format;
|
|
108965
|
-
|
|
109329
|
+
bool is_null = options_str.IsNull();
|
|
109330
|
+
if (!is_null) {
|
|
108966
109331
|
string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
|
|
108967
109332
|
if (!error.empty()) {
|
|
108968
109333
|
throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
|
|
108969
109334
|
}
|
|
108970
109335
|
}
|
|
108971
|
-
return make_unique<StrfTimeBindData>(format, format_string);
|
|
109336
|
+
return make_unique<StrfTimeBindData>(format, format_string, is_null);
|
|
108972
109337
|
}
|
|
108973
109338
|
|
|
108974
109339
|
void StrfTimeFormat::ConvertDateVector(Vector &input, Vector &result, idx_t count) {
|
|
@@ -108995,7 +109360,7 @@ static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector
|
|
|
108995
109360
|
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
108996
109361
|
auto &info = (StrfTimeBindData &)*func_expr.bind_info;
|
|
108997
109362
|
|
|
108998
|
-
if (
|
|
109363
|
+
if (info.is_null) {
|
|
108999
109364
|
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
109000
109365
|
ConstantVector::SetNull(result, true);
|
|
109001
109366
|
return;
|
|
@@ -109029,7 +109394,7 @@ static void StrfTimeFunctionTimestamp(DataChunk &args, ExpressionState &state, V
|
|
|
109029
109394
|
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
109030
109395
|
auto &info = (StrfTimeBindData &)*func_expr.bind_info;
|
|
109031
109396
|
|
|
109032
|
-
if (
|
|
109397
|
+
if (info.is_null) {
|
|
109033
109398
|
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
109034
109399
|
ConstantVector::SetNull(result, true);
|
|
109035
109400
|
return;
|
|
@@ -122527,7 +122892,9 @@ static unique_ptr<FunctionData> StructInsertBind(ClientContext &context, ScalarF
|
|
|
122527
122892
|
unique_ptr<BaseStatistics> StructInsertStats(ClientContext &context, FunctionStatisticsInput &input) {
|
|
122528
122893
|
auto &child_stats = input.child_stats;
|
|
122529
122894
|
auto &expr = input.expr;
|
|
122530
|
-
|
|
122895
|
+
if (child_stats.empty() || !child_stats[0]) {
|
|
122896
|
+
return nullptr;
|
|
122897
|
+
}
|
|
122531
122898
|
auto &existing_struct_stats = (StructStatistics &)*child_stats[0];
|
|
122532
122899
|
auto new_struct_stats = make_unique<StructStatistics>(expr.return_type);
|
|
122533
122900
|
|
|
@@ -127954,6 +128321,9 @@ static unique_ptr<FunctionData> DuckDBFunctionsBind(ClientContext &context, Tabl
|
|
|
127954
128321
|
names.emplace_back("has_side_effects");
|
|
127955
128322
|
return_types.emplace_back(LogicalType::BOOLEAN);
|
|
127956
128323
|
|
|
128324
|
+
names.emplace_back("function_oid");
|
|
128325
|
+
return_types.emplace_back(LogicalType::BIGINT);
|
|
128326
|
+
|
|
127957
128327
|
return nullptr;
|
|
127958
128328
|
}
|
|
127959
128329
|
|
|
@@ -128340,6 +128710,9 @@ bool ExtractFunctionData(StandardEntry *entry, idx_t function_idx, DataChunk &ou
|
|
|
128340
128710
|
// has_side_effects, LogicalType::BOOLEAN
|
|
128341
128711
|
output.SetValue(9, output_offset, OP::HasSideEffects(function, function_idx));
|
|
128342
128712
|
|
|
128713
|
+
// function_oid, LogicalType::BIGINT
|
|
128714
|
+
output.SetValue(10, output_offset, Value::BIGINT(entry->oid));
|
|
128715
|
+
|
|
128343
128716
|
return function_idx + 1 == OP::FunctionCount(function);
|
|
128344
128717
|
}
|
|
128345
128718
|
|
|
@@ -135347,6 +135720,10 @@ public:
|
|
|
135347
135720
|
|
|
135348
135721
|
private:
|
|
135349
135722
|
void RunOptimizer(OptimizerType type, const std::function<void()> &callback);
|
|
135723
|
+
void Verify(LogicalOperator &op);
|
|
135724
|
+
|
|
135725
|
+
private:
|
|
135726
|
+
unique_ptr<LogicalOperator> plan;
|
|
135350
135727
|
};
|
|
135351
135728
|
|
|
135352
135729
|
} // namespace duckdb
|
|
@@ -136051,6 +136428,7 @@ unique_ptr<LogicalOperator> ClientContext::ExtractPlan(const string &query) {
|
|
|
136051
136428
|
}
|
|
136052
136429
|
|
|
136053
136430
|
ColumnBindingResolver resolver;
|
|
136431
|
+
resolver.Verify(*plan);
|
|
136054
136432
|
resolver.VisitOperator(*plan);
|
|
136055
136433
|
|
|
136056
136434
|
plan->ResolveOperatorTypes();
|
|
@@ -137142,6 +137520,14 @@ struct MaximumMemorySetting {
|
|
|
137142
137520
|
static Value GetSetting(ClientContext &context);
|
|
137143
137521
|
};
|
|
137144
137522
|
|
|
137523
|
+
struct PasswordSetting {
|
|
137524
|
+
static constexpr const char *Name = "password";
|
|
137525
|
+
static constexpr const char *Description = "The password to use. Ignored for legacy compatibility.";
|
|
137526
|
+
static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
|
|
137527
|
+
static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter);
|
|
137528
|
+
static Value GetSetting(ClientContext &context);
|
|
137529
|
+
};
|
|
137530
|
+
|
|
137145
137531
|
struct PerfectHashThresholdSetting {
|
|
137146
137532
|
static constexpr const char *Name = "perfect_ht_threshold";
|
|
137147
137533
|
static constexpr const char *Description = "Threshold in bytes for when to use a perfect hash table (default: 12)";
|
|
@@ -137237,6 +137623,14 @@ struct ThreadsSetting {
|
|
|
137237
137623
|
static Value GetSetting(ClientContext &context);
|
|
137238
137624
|
};
|
|
137239
137625
|
|
|
137626
|
+
struct UsernameSetting {
|
|
137627
|
+
static constexpr const char *Name = "username";
|
|
137628
|
+
static constexpr const char *Description = "The username to use. Ignored for legacy compatibility.";
|
|
137629
|
+
static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
|
|
137630
|
+
static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter);
|
|
137631
|
+
static Value GetSetting(ClientContext &context);
|
|
137632
|
+
};
|
|
137633
|
+
|
|
137240
137634
|
} // namespace duckdb
|
|
137241
137635
|
|
|
137242
137636
|
|
|
@@ -137286,6 +137680,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
|
|
|
137286
137680
|
DUCKDB_GLOBAL(MaximumMemorySetting),
|
|
137287
137681
|
DUCKDB_GLOBAL_ALIAS("memory_limit", MaximumMemorySetting),
|
|
137288
137682
|
DUCKDB_GLOBAL_ALIAS("null_order", DefaultNullOrderSetting),
|
|
137683
|
+
DUCKDB_GLOBAL(PasswordSetting),
|
|
137289
137684
|
DUCKDB_LOCAL(PerfectHashThresholdSetting),
|
|
137290
137685
|
DUCKDB_LOCAL(PreserveIdentifierCase),
|
|
137291
137686
|
DUCKDB_GLOBAL(PreserveInsertionOrder),
|
|
@@ -137298,6 +137693,8 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
|
|
|
137298
137693
|
DUCKDB_LOCAL(SearchPathSetting),
|
|
137299
137694
|
DUCKDB_GLOBAL(TempDirectorySetting),
|
|
137300
137695
|
DUCKDB_GLOBAL(ThreadsSetting),
|
|
137696
|
+
DUCKDB_GLOBAL(UsernameSetting),
|
|
137697
|
+
DUCKDB_GLOBAL_ALIAS("user", UsernameSetting),
|
|
137301
137698
|
DUCKDB_GLOBAL_ALIAS("wal_autocheckpoint", CheckpointThresholdSetting),
|
|
137302
137699
|
DUCKDB_GLOBAL_ALIAS("worker_threads", ThreadsSetting),
|
|
137303
137700
|
FINAL_SETTING};
|
|
@@ -151761,6 +152158,17 @@ Value MaximumMemorySetting::GetSetting(ClientContext &context) {
|
|
|
151761
152158
|
return Value(StringUtil::BytesToHumanReadableString(config.options.maximum_memory));
|
|
151762
152159
|
}
|
|
151763
152160
|
|
|
152161
|
+
//===--------------------------------------------------------------------===//
|
|
152162
|
+
// Password Setting
|
|
152163
|
+
//===--------------------------------------------------------------------===//
|
|
152164
|
+
void PasswordSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
|
|
152165
|
+
// nop
|
|
152166
|
+
}
|
|
152167
|
+
|
|
152168
|
+
Value PasswordSetting::GetSetting(ClientContext &context) {
|
|
152169
|
+
return Value();
|
|
152170
|
+
}
|
|
152171
|
+
|
|
151764
152172
|
//===--------------------------------------------------------------------===//
|
|
151765
152173
|
// Perfect Hash Threshold
|
|
151766
152174
|
//===--------------------------------------------------------------------===//
|
|
@@ -151927,6 +152335,17 @@ Value ThreadsSetting::GetSetting(ClientContext &context) {
|
|
|
151927
152335
|
return Value::BIGINT(config.options.maximum_threads);
|
|
151928
152336
|
}
|
|
151929
152337
|
|
|
152338
|
+
//===--------------------------------------------------------------------===//
|
|
152339
|
+
// Username Setting
|
|
152340
|
+
//===--------------------------------------------------------------------===//
|
|
152341
|
+
void UsernameSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
|
|
152342
|
+
// nop
|
|
152343
|
+
}
|
|
152344
|
+
|
|
152345
|
+
Value UsernameSetting::GetSetting(ClientContext &context) {
|
|
152346
|
+
return Value();
|
|
152347
|
+
}
|
|
152348
|
+
|
|
151930
152349
|
} // namespace duckdb
|
|
151931
152350
|
|
|
151932
152351
|
|
|
@@ -153807,7 +154226,8 @@ bool Deliminator::RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, u
|
|
|
153807
154226
|
}
|
|
153808
154227
|
parent_expr =
|
|
153809
154228
|
make_unique<BoundColumnRefExpression>(parent_expr->alias, parent_expr->return_type, it->first);
|
|
153810
|
-
parent_cond.comparison =
|
|
154229
|
+
parent_cond.comparison =
|
|
154230
|
+
parent_delim_get_side == 0 ? child_cond.comparison : FlipComparisionExpression(child_cond.comparison);
|
|
153811
154231
|
break;
|
|
153812
154232
|
}
|
|
153813
154233
|
}
|
|
@@ -154266,6 +154686,9 @@ idx_t FilterCombiner::GetEquivalenceSet(Expression *expr) {
|
|
|
154266
154686
|
|
|
154267
154687
|
FilterResult FilterCombiner::AddConstantComparison(vector<ExpressionValueInformation> &info_list,
|
|
154268
154688
|
ExpressionValueInformation info) {
|
|
154689
|
+
if (info.constant.IsNull()) {
|
|
154690
|
+
return FilterResult::UNSATISFIABLE;
|
|
154691
|
+
}
|
|
154269
154692
|
for (idx_t i = 0; i < info_list.size(); i++) {
|
|
154270
154693
|
auto comparison = CompareValueInformation(info_list[i], info);
|
|
154271
154694
|
switch (comparison) {
|
|
@@ -155730,7 +156153,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownJoin(unique_ptr<LogicalOpera
|
|
|
155730
156153
|
void FilterPushdown::PushFilters() {
|
|
155731
156154
|
for (auto &f : filters) {
|
|
155732
156155
|
auto result = combiner.AddFilter(move(f->filter));
|
|
155733
|
-
D_ASSERT(result
|
|
156156
|
+
D_ASSERT(result != FilterResult::UNSUPPORTED);
|
|
155734
156157
|
(void)result;
|
|
155735
156158
|
}
|
|
155736
156159
|
filters.clear();
|
|
@@ -157927,6 +158350,7 @@ public:
|
|
|
157927
158350
|
|
|
157928
158351
|
|
|
157929
158352
|
|
|
158353
|
+
|
|
157930
158354
|
namespace duckdb {
|
|
157931
158355
|
|
|
157932
158356
|
Optimizer::Optimizer(Binder &binder, ClientContext &context) : context(context), binder(binder), rewriter(context) {
|
|
@@ -157963,9 +158387,18 @@ void Optimizer::RunOptimizer(OptimizerType type, const std::function<void()> &ca
|
|
|
157963
158387
|
profiler.StartPhase(OptimizerTypeToString(type));
|
|
157964
158388
|
callback();
|
|
157965
158389
|
profiler.EndPhase();
|
|
158390
|
+
if (plan) {
|
|
158391
|
+
Verify(*plan);
|
|
158392
|
+
}
|
|
157966
158393
|
}
|
|
157967
158394
|
|
|
157968
|
-
|
|
158395
|
+
void Optimizer::Verify(LogicalOperator &op) {
|
|
158396
|
+
ColumnBindingResolver::Verify(op);
|
|
158397
|
+
}
|
|
158398
|
+
|
|
158399
|
+
unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan_p) {
|
|
158400
|
+
Verify(*plan_p);
|
|
158401
|
+
this->plan = move(plan_p);
|
|
157969
158402
|
// first we perform expression rewrites using the ExpressionRewriter
|
|
157970
158403
|
// this does not change the logical plan structure, but only simplifies the expression trees
|
|
157971
158404
|
RunOptimizer(OptimizerType::EXPRESSION_REWRITER, [&]() { rewriter.VisitOperator(*plan); });
|
|
@@ -158052,7 +158485,7 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
|
|
|
158052
158485
|
|
|
158053
158486
|
Planner::VerifyPlan(context, plan);
|
|
158054
158487
|
|
|
158055
|
-
return plan;
|
|
158488
|
+
return move(plan);
|
|
158056
158489
|
}
|
|
158057
158490
|
|
|
158058
158491
|
} // namespace duckdb
|
|
@@ -158091,7 +158524,8 @@ namespace duckdb {
|
|
|
158091
158524
|
unique_ptr<LogicalOperator> FilterPullup::PullupFilter(unique_ptr<LogicalOperator> op) {
|
|
158092
158525
|
D_ASSERT(op->type == LogicalOperatorType::LOGICAL_FILTER);
|
|
158093
158526
|
|
|
158094
|
-
|
|
158527
|
+
auto &filter = (LogicalFilter &)*op;
|
|
158528
|
+
if (can_pullup && filter.projection_map.empty()) {
|
|
158095
158529
|
unique_ptr<LogicalOperator> child = move(op->children[0]);
|
|
158096
158530
|
child = Rewrite(move(child));
|
|
158097
158531
|
// moving filter's expressions
|
|
@@ -158398,6 +158832,9 @@ using Filter = FilterPushdown::Filter;
|
|
|
158398
158832
|
unique_ptr<LogicalOperator> FilterPushdown::PushdownFilter(unique_ptr<LogicalOperator> op) {
|
|
158399
158833
|
D_ASSERT(op->type == LogicalOperatorType::LOGICAL_FILTER);
|
|
158400
158834
|
auto &filter = (LogicalFilter &)*op;
|
|
158835
|
+
if (!filter.projection_map.empty()) {
|
|
158836
|
+
return FinishPushdown(move(op));
|
|
158837
|
+
}
|
|
158401
158838
|
// filter: gather the filters and remove the filter from the set of operations
|
|
158402
158839
|
for (auto &expression : filter.expressions) {
|
|
158403
158840
|
if (AddFilter(move(expression)) == FilterResult::UNSATISFIABLE) {
|
|
@@ -158701,8 +159138,8 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
|
|
|
158701
159138
|
|
|
158702
159139
|
right_bindings.insert(comp_join.mark_index);
|
|
158703
159140
|
FilterPushdown left_pushdown(optimizer), right_pushdown(optimizer);
|
|
158704
|
-
#
|
|
158705
|
-
bool
|
|
159141
|
+
#ifdef DEBUG
|
|
159142
|
+
bool simplified_mark_join = false;
|
|
158706
159143
|
#endif
|
|
158707
159144
|
// now check the set of filters
|
|
158708
159145
|
for (idx_t i = 0; i < filters.size(); i++) {
|
|
@@ -158714,15 +159151,16 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
|
|
|
158714
159151
|
filters.erase(filters.begin() + i);
|
|
158715
159152
|
i--;
|
|
158716
159153
|
} else if (side == JoinSide::RIGHT) {
|
|
158717
|
-
|
|
158718
|
-
|
|
158719
|
-
D_ASSERT(!found_mark_reference);
|
|
158720
|
-
found_mark_reference = true;
|
|
159154
|
+
#ifdef DEBUG
|
|
159155
|
+
D_ASSERT(!simplified_mark_join);
|
|
158721
159156
|
#endif
|
|
158722
159157
|
// this filter references the marker
|
|
158723
159158
|
// we can turn this into a SEMI join if the filter is on only the marker
|
|
158724
159159
|
if (filters[i]->filter->type == ExpressionType::BOUND_COLUMN_REF) {
|
|
158725
159160
|
// filter just references the marker: turn into semi join
|
|
159161
|
+
#ifdef DEBUG
|
|
159162
|
+
simplified_mark_join = true;
|
|
159163
|
+
#endif
|
|
158726
159164
|
join.join_type = JoinType::SEMI;
|
|
158727
159165
|
filters.erase(filters.begin() + i);
|
|
158728
159166
|
i--;
|
|
@@ -158745,6 +159183,9 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
|
|
|
158745
159183
|
}
|
|
158746
159184
|
}
|
|
158747
159185
|
if (all_null_values_are_equal) {
|
|
159186
|
+
#ifdef DEBUG
|
|
159187
|
+
simplified_mark_join = true;
|
|
159188
|
+
#endif
|
|
158748
159189
|
// all null values are equal, convert to ANTI join
|
|
158749
159190
|
join.join_type = JoinType::ANTI;
|
|
158750
159191
|
filters.erase(filters.begin() + i);
|
|
@@ -159507,7 +159948,8 @@ unique_ptr<Expression> ComparisonSimplificationRule::Apply(LogicalOperator &op,
|
|
|
159507
159948
|
}
|
|
159508
159949
|
|
|
159509
159950
|
// Is the constant cast invertible?
|
|
159510
|
-
if (!
|
|
159951
|
+
if (!cast_constant.IsNull() &&
|
|
159952
|
+
!BoundCastExpression::CastIsInvertible(cast_expression->return_type, target_type)) {
|
|
159511
159953
|
// Is it actually invertible?
|
|
159512
159954
|
Value uncast_constant;
|
|
159513
159955
|
if (!cast_constant.DefaultTryCastAs(constant_value.type(), uncast_constant, &error_message, true) ||
|
|
@@ -188507,13 +188949,14 @@ protected:
|
|
|
188507
188949
|
|
|
188508
188950
|
|
|
188509
188951
|
|
|
188952
|
+
|
|
188510
188953
|
namespace duckdb {
|
|
188511
188954
|
|
|
188512
188955
|
//! The HAVING binder is responsible for binding an expression within the HAVING clause of a SQL statement
|
|
188513
188956
|
class HavingBinder : public SelectBinder {
|
|
188514
188957
|
public:
|
|
188515
188958
|
HavingBinder(Binder &binder, ClientContext &context, BoundSelectNode &node, BoundGroupInformation &info,
|
|
188516
|
-
case_insensitive_map_t<idx_t> &alias_map);
|
|
188959
|
+
case_insensitive_map_t<idx_t> &alias_map, AggregateHandling aggregate_handling);
|
|
188517
188960
|
|
|
188518
188961
|
protected:
|
|
188519
188962
|
BindResult BindExpression(unique_ptr<ParsedExpression> *expr_ptr, idx_t depth,
|
|
@@ -188523,6 +188966,7 @@ private:
|
|
|
188523
188966
|
BindResult BindColumnRef(unique_ptr<ParsedExpression> *expr_ptr, idx_t depth, bool root_expression);
|
|
188524
188967
|
|
|
188525
188968
|
ColumnAliasBinder column_alias_binder;
|
|
188969
|
+
AggregateHandling aggregate_handling;
|
|
188526
188970
|
};
|
|
188527
188971
|
|
|
188528
188972
|
} // namespace duckdb
|
|
@@ -188653,6 +189097,9 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
|
|
|
188653
189097
|
delimiter_value = ExpressionExecutor::EvaluateScalar(context, *expr).CastAs(context, type);
|
|
188654
189098
|
return nullptr;
|
|
188655
189099
|
}
|
|
189100
|
+
if (!new_binder->correlated_columns.empty()) {
|
|
189101
|
+
throw BinderException("Correlated columns not supported in LIMIT/OFFSET");
|
|
189102
|
+
}
|
|
188656
189103
|
// move any correlated columns to this binder
|
|
188657
189104
|
MoveCorrelatedExpressions(*new_binder);
|
|
188658
189105
|
return expr;
|
|
@@ -189018,16 +189465,22 @@ unique_ptr<BoundQueryNode> Binder::BindNode(SelectNode &statement) {
|
|
|
189018
189465
|
|
|
189019
189466
|
// bind the HAVING clause, if any
|
|
189020
189467
|
if (statement.having) {
|
|
189021
|
-
HavingBinder having_binder(*this, context, *result, info, alias_map);
|
|
189468
|
+
HavingBinder having_binder(*this, context, *result, info, alias_map, statement.aggregate_handling);
|
|
189022
189469
|
ExpressionBinder::QualifyColumnNames(*this, statement.having);
|
|
189023
189470
|
result->having = having_binder.Bind(statement.having);
|
|
189024
189471
|
}
|
|
189025
189472
|
|
|
189026
189473
|
// bind the QUALIFY clause, if any
|
|
189027
189474
|
if (statement.qualify) {
|
|
189475
|
+
if (statement.aggregate_handling == AggregateHandling::FORCE_AGGREGATES) {
|
|
189476
|
+
throw BinderException("Combining QUALIFY with GROUP BY ALL is not supported yet");
|
|
189477
|
+
}
|
|
189028
189478
|
QualifyBinder qualify_binder(*this, context, *result, info, alias_map);
|
|
189029
189479
|
ExpressionBinder::QualifyColumnNames(*this, statement.qualify);
|
|
189030
189480
|
result->qualify = qualify_binder.Bind(statement.qualify);
|
|
189481
|
+
if (qualify_binder.HasBoundColumns() && qualify_binder.BoundAggregates()) {
|
|
189482
|
+
throw BinderException("Cannot mix aggregates with non-aggregated columns!");
|
|
189483
|
+
}
|
|
189031
189484
|
}
|
|
189032
189485
|
|
|
189033
189486
|
// after that, we bind to the SELECT list
|
|
@@ -190230,7 +190683,7 @@ unique_ptr<Expression> Binder::PlanSubquery(BoundSubqueryExpression &expr, uniqu
|
|
|
190230
190683
|
D_ASSERT(root);
|
|
190231
190684
|
// first we translate the QueryNode of the subquery into a logical plan
|
|
190232
190685
|
// note that we do not plan nested subqueries yet
|
|
190233
|
-
auto sub_binder = Binder::CreateBinder(context);
|
|
190686
|
+
auto sub_binder = Binder::CreateBinder(context, this);
|
|
190234
190687
|
sub_binder->plan_subquery = false;
|
|
190235
190688
|
auto subquery_root = sub_binder->CreatePlan(*expr.subquery);
|
|
190236
190689
|
D_ASSERT(subquery_root);
|
|
@@ -190446,8 +190899,8 @@ BoundStatement Binder::BindCopyFrom(CopyStatement &stmt) {
|
|
|
190446
190899
|
|
|
190447
190900
|
auto function_data =
|
|
190448
190901
|
copy_function->function.copy_from_bind(context, *stmt.info, expected_names, bound_insert.expected_types);
|
|
190449
|
-
auto get = make_unique<LogicalGet>(
|
|
190450
|
-
bound_insert.expected_types, expected_names);
|
|
190902
|
+
auto get = make_unique<LogicalGet>(GenerateTableIndex(), copy_function->function.copy_from_function,
|
|
190903
|
+
move(function_data), bound_insert.expected_types, expected_names);
|
|
190451
190904
|
for (idx_t i = 0; i < bound_insert.expected_types.size(); i++) {
|
|
190452
190905
|
get->column_ids.push_back(i);
|
|
190453
190906
|
}
|
|
@@ -191126,6 +191579,7 @@ protected:
|
|
|
191126
191579
|
|
|
191127
191580
|
|
|
191128
191581
|
|
|
191582
|
+
|
|
191129
191583
|
#include <algorithm>
|
|
191130
191584
|
|
|
191131
191585
|
namespace duckdb {
|
|
@@ -191326,6 +191780,31 @@ void Binder::BindDefaultValues(ColumnList &columns, vector<unique_ptr<Expression
|
|
|
191326
191780
|
}
|
|
191327
191781
|
}
|
|
191328
191782
|
|
|
191783
|
+
static void ExtractExpressionDependencies(Expression &expr, unordered_set<CatalogEntry *> &dependencies) {
|
|
191784
|
+
if (expr.type == ExpressionType::BOUND_FUNCTION) {
|
|
191785
|
+
auto &function = (BoundFunctionExpression &)expr;
|
|
191786
|
+
if (function.function.dependency) {
|
|
191787
|
+
function.function.dependency(function, dependencies);
|
|
191788
|
+
}
|
|
191789
|
+
}
|
|
191790
|
+
ExpressionIterator::EnumerateChildren(
|
|
191791
|
+
expr, [&](Expression &child) { ExtractExpressionDependencies(child, dependencies); });
|
|
191792
|
+
}
|
|
191793
|
+
|
|
191794
|
+
static void ExtractDependencies(BoundCreateTableInfo &info) {
|
|
191795
|
+
for (auto &default_value : info.bound_defaults) {
|
|
191796
|
+
if (default_value) {
|
|
191797
|
+
ExtractExpressionDependencies(*default_value, info.dependencies);
|
|
191798
|
+
}
|
|
191799
|
+
}
|
|
191800
|
+
for (auto &constraint : info.bound_constraints) {
|
|
191801
|
+
if (constraint->type == ConstraintType::CHECK) {
|
|
191802
|
+
auto &bound_check = (BoundCheckConstraint &)*constraint;
|
|
191803
|
+
ExtractExpressionDependencies(*bound_check.expression, info.dependencies);
|
|
191804
|
+
}
|
|
191805
|
+
}
|
|
191806
|
+
}
|
|
191807
|
+
|
|
191329
191808
|
unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateInfo> info) {
|
|
191330
191809
|
auto &base = (CreateTableInfo &)*info;
|
|
191331
191810
|
|
|
@@ -191356,6 +191835,8 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
|
|
|
191356
191835
|
// bind the default values
|
|
191357
191836
|
BindDefaultValues(base.columns, result->bound_defaults);
|
|
191358
191837
|
}
|
|
191838
|
+
// extract dependencies from any default values or CHECK constraints
|
|
191839
|
+
ExtractDependencies(*result);
|
|
191359
191840
|
|
|
191360
191841
|
if (base.columns.PhysicalColumnCount() == 0) {
|
|
191361
191842
|
throw BinderException("Creating a table without physical (non-generated) columns is not supported");
|
|
@@ -191449,7 +191930,8 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) {
|
|
|
191449
191930
|
unique_ptr<LogicalOperator> child_operator;
|
|
191450
191931
|
for (auto &using_clause : stmt.using_clauses) {
|
|
191451
191932
|
// bind the using clause
|
|
191452
|
-
auto
|
|
191933
|
+
auto using_binder = Binder::CreateBinder(context, this);
|
|
191934
|
+
auto bound_node = using_binder->Bind(*using_clause);
|
|
191453
191935
|
auto op = CreatePlan(*bound_node);
|
|
191454
191936
|
if (child_operator) {
|
|
191455
191937
|
// already bound a child: create a cross product to unify the two
|
|
@@ -191457,6 +191939,7 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) {
|
|
|
191457
191939
|
} else {
|
|
191458
191940
|
child_operator = move(op);
|
|
191459
191941
|
}
|
|
191942
|
+
bind_context.AddContext(move(using_binder->bind_context));
|
|
191460
191943
|
}
|
|
191461
191944
|
if (child_operator) {
|
|
191462
191945
|
root = LogicalCrossProduct::Create(move(root), move(child_operator));
|
|
@@ -191475,7 +191958,7 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) {
|
|
|
191475
191958
|
root = move(filter);
|
|
191476
191959
|
}
|
|
191477
191960
|
// create the delete node
|
|
191478
|
-
auto del = make_unique<LogicalDelete>(table);
|
|
191961
|
+
auto del = make_unique<LogicalDelete>(table, GenerateTableIndex());
|
|
191479
191962
|
del->AddChild(move(root));
|
|
191480
191963
|
|
|
191481
191964
|
// set up the delete expression
|
|
@@ -191603,6 +192086,7 @@ BoundStatement Binder::Bind(ExecuteStatement &stmt) {
|
|
|
191603
192086
|
prepared = prepared_planner.PrepareSQLStatement(entry->second->unbound_statement->Copy());
|
|
191604
192087
|
rebound_plan = move(prepared_planner.plan);
|
|
191605
192088
|
D_ASSERT(prepared->properties.bound_all_parameters);
|
|
192089
|
+
this->bound_tables = prepared_planner.binder->bound_tables;
|
|
191606
192090
|
}
|
|
191607
192091
|
// copy the properties of the prepared statement into the planner
|
|
191608
192092
|
this->properties = prepared->properties;
|
|
@@ -191824,7 +192308,7 @@ BoundStatement Binder::Bind(ExportStatement &stmt) {
|
|
|
191824
192308
|
CopyStatement copy_stmt;
|
|
191825
192309
|
copy_stmt.info = move(info);
|
|
191826
192310
|
|
|
191827
|
-
auto copy_binder = Binder::CreateBinder(context);
|
|
192311
|
+
auto copy_binder = Binder::CreateBinder(context, this);
|
|
191828
192312
|
auto bound_statement = copy_binder->Bind(copy_stmt);
|
|
191829
192313
|
if (child_operator) {
|
|
191830
192314
|
// use UNION ALL to combine the individual copy statements into a single node
|
|
@@ -191954,7 +192438,7 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
|
|
|
191954
192438
|
properties.read_only = false;
|
|
191955
192439
|
}
|
|
191956
192440
|
|
|
191957
|
-
auto insert = make_unique<LogicalInsert>(table);
|
|
192441
|
+
auto insert = make_unique<LogicalInsert>(table, GenerateTableIndex());
|
|
191958
192442
|
|
|
191959
192443
|
// Add CTEs as bindable
|
|
191960
192444
|
AddCTEMap(stmt.cte_map);
|
|
@@ -192158,6 +192642,7 @@ namespace duckdb {
|
|
|
192158
192642
|
BoundStatement Binder::Bind(PrepareStatement &stmt) {
|
|
192159
192643
|
Planner prepared_planner(context);
|
|
192160
192644
|
auto prepared_data = prepared_planner.PrepareSQLStatement(move(stmt.statement));
|
|
192645
|
+
this->bound_tables = prepared_planner.binder->bound_tables;
|
|
192161
192646
|
|
|
192162
192647
|
auto prepare = make_unique<LogicalPrepare>(stmt.name, move(prepared_data), move(prepared_planner.plan));
|
|
192163
192648
|
// we can prepare in read-only mode: prepared statements are not written to the catalog
|
|
@@ -192284,7 +192769,7 @@ BoundStatement Binder::Bind(AlterStatement &stmt) {
|
|
|
192284
192769
|
|
|
192285
192770
|
BoundStatement Binder::Bind(TransactionStatement &stmt) {
|
|
192286
192771
|
// transaction statements do not require a valid transaction
|
|
192287
|
-
properties.requires_valid_transaction =
|
|
192772
|
+
properties.requires_valid_transaction = stmt.info->type == TransactionType::BEGIN_TRANSACTION;
|
|
192288
192773
|
|
|
192289
192774
|
BoundStatement result;
|
|
192290
192775
|
result.names = {"Success"};
|
|
@@ -192578,6 +193063,13 @@ static void BindUpdateConstraints(TableCatalogEntry &table, LogicalGet &get, Log
|
|
|
192578
193063
|
BindExtraColumns(table, get, proj, update, check.bound_columns);
|
|
192579
193064
|
}
|
|
192580
193065
|
}
|
|
193066
|
+
if (update.return_chunk) {
|
|
193067
|
+
physical_index_set_t all_columns;
|
|
193068
|
+
for (idx_t i = 0; i < table.storage->column_definitions.size(); i++) {
|
|
193069
|
+
all_columns.insert(PhysicalIndex(i));
|
|
193070
|
+
}
|
|
193071
|
+
BindExtraColumns(table, get, proj, update, all_columns);
|
|
193072
|
+
}
|
|
192581
193073
|
// for index updates we always turn any update into an insert and a delete
|
|
192582
193074
|
// we thus need all the columns to be available, hence we check if the update touches any index columns
|
|
192583
193075
|
// If the returning keyword is used, we need access to the whole row in case the user requests it.
|
|
@@ -192600,7 +193092,7 @@ static void BindUpdateConstraints(TableCatalogEntry &table, LogicalGet &get, Log
|
|
|
192600
193092
|
}
|
|
192601
193093
|
}
|
|
192602
193094
|
|
|
192603
|
-
if (update.update_is_del_and_insert
|
|
193095
|
+
if (update.update_is_del_and_insert) {
|
|
192604
193096
|
// the update updates a column required by an index or requires returning the updated rows,
|
|
192605
193097
|
// push projections for all columns
|
|
192606
193098
|
physical_index_set_t all_columns;
|
|
@@ -192711,16 +193203,15 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) {
|
|
|
192711
193203
|
// set the projection as child of the update node and finalize the result
|
|
192712
193204
|
update->AddChild(move(proj));
|
|
192713
193205
|
|
|
193206
|
+
auto update_table_index = GenerateTableIndex();
|
|
193207
|
+
update->table_index = update_table_index;
|
|
192714
193208
|
if (!stmt.returning_list.empty()) {
|
|
192715
|
-
auto update_table_index = GenerateTableIndex();
|
|
192716
|
-
update->table_index = update_table_index;
|
|
192717
193209
|
unique_ptr<LogicalOperator> update_as_logicaloperator = move(update);
|
|
192718
193210
|
|
|
192719
193211
|
return BindReturning(move(stmt.returning_list), table, update_table_index, move(update_as_logicaloperator),
|
|
192720
193212
|
move(result));
|
|
192721
193213
|
}
|
|
192722
193214
|
|
|
192723
|
-
update->table_index = 0;
|
|
192724
193215
|
result.names = {"Count"};
|
|
192725
193216
|
result.types = {LogicalType::BIGINT};
|
|
192726
193217
|
result.plan = move(update);
|
|
@@ -193046,6 +193537,9 @@ unique_ptr<BoundTableRef> Binder::Bind(BaseTableRef &ref) {
|
|
|
193046
193537
|
// bind the child subquery
|
|
193047
193538
|
view_binder->AddBoundView(view_catalog_entry);
|
|
193048
193539
|
auto bound_child = view_binder->Bind(subquery);
|
|
193540
|
+
if (!view_binder->correlated_columns.empty()) {
|
|
193541
|
+
throw BinderException("Contents of view were altered - view bound correlated columns");
|
|
193542
|
+
}
|
|
193049
193543
|
|
|
193050
193544
|
D_ASSERT(bound_child->type == TableReferenceType::SUBQUERY);
|
|
193051
193545
|
// verify that the types and names match up with the expected types and names
|
|
@@ -193557,6 +194051,33 @@ unique_ptr<BoundTableRef> Binder::Bind(SubqueryRef &ref, CommonTableExpressionIn
|
|
|
193557
194051
|
|
|
193558
194052
|
|
|
193559
194053
|
|
|
194054
|
+
//===----------------------------------------------------------------------===//
|
|
194055
|
+
// DuckDB
|
|
194056
|
+
//
|
|
194057
|
+
// duckdb/planner/expression_binder/table_function_binder.hpp
|
|
194058
|
+
//
|
|
194059
|
+
//
|
|
194060
|
+
//===----------------------------------------------------------------------===//
|
|
194061
|
+
|
|
194062
|
+
|
|
194063
|
+
|
|
194064
|
+
|
|
194065
|
+
|
|
194066
|
+
namespace duckdb {
|
|
194067
|
+
|
|
194068
|
+
//! The Table function binder can bind standard table function parameters (i.e. non-table-in-out functions)
|
|
194069
|
+
class TableFunctionBinder : public ExpressionBinder {
|
|
194070
|
+
public:
|
|
194071
|
+
TableFunctionBinder(Binder &binder, ClientContext &context);
|
|
194072
|
+
|
|
194073
|
+
protected:
|
|
194074
|
+
BindResult BindColumnReference(ColumnRefExpression &expr);
|
|
194075
|
+
BindResult BindExpression(unique_ptr<ParsedExpression> *expr, idx_t depth, bool root_expression = false) override;
|
|
194076
|
+
|
|
194077
|
+
string UnsupportedAggregateMessage() override;
|
|
194078
|
+
};
|
|
194079
|
+
|
|
194080
|
+
} // namespace duckdb
|
|
193560
194081
|
|
|
193561
194082
|
|
|
193562
194083
|
|
|
@@ -193633,17 +194154,17 @@ bool Binder::BindTableFunctionParameters(TableFunctionCatalogEntry &table_functi
|
|
|
193633
194154
|
continue;
|
|
193634
194155
|
}
|
|
193635
194156
|
|
|
193636
|
-
|
|
194157
|
+
TableFunctionBinder binder(*this, context);
|
|
193637
194158
|
LogicalType sql_type;
|
|
193638
194159
|
auto expr = binder.Bind(child, &sql_type);
|
|
193639
194160
|
if (expr->HasParameter()) {
|
|
193640
194161
|
throw ParameterNotResolvedException();
|
|
193641
194162
|
}
|
|
193642
|
-
if (!expr->
|
|
194163
|
+
if (!expr->IsScalar()) {
|
|
193643
194164
|
error = "Table function requires a constant parameter";
|
|
193644
194165
|
return false;
|
|
193645
194166
|
}
|
|
193646
|
-
auto constant = ExpressionExecutor::EvaluateScalar(context, *expr);
|
|
194167
|
+
auto constant = ExpressionExecutor::EvaluateScalar(context, *expr, true);
|
|
193647
194168
|
if (parameter_name.empty()) {
|
|
193648
194169
|
// unnamed parameter
|
|
193649
194170
|
if (!named_parameters.empty()) {
|
|
@@ -196596,8 +197117,9 @@ BindResult GroupBinder::BindColumnRef(ColumnRefExpression &colref) {
|
|
|
196596
197117
|
namespace duckdb {
|
|
196597
197118
|
|
|
196598
197119
|
HavingBinder::HavingBinder(Binder &binder, ClientContext &context, BoundSelectNode &node, BoundGroupInformation &info,
|
|
196599
|
-
case_insensitive_map_t<idx_t> &alias_map)
|
|
196600
|
-
: SelectBinder(binder, context, node, info), column_alias_binder(node, alias_map)
|
|
197120
|
+
case_insensitive_map_t<idx_t> &alias_map, AggregateHandling aggregate_handling)
|
|
197121
|
+
: SelectBinder(binder, context, node, info), column_alias_binder(node, alias_map),
|
|
197122
|
+
aggregate_handling(aggregate_handling) {
|
|
196601
197123
|
target_type = LogicalType(LogicalTypeId::BOOLEAN);
|
|
196602
197124
|
}
|
|
196603
197125
|
|
|
@@ -196607,7 +197129,16 @@ BindResult HavingBinder::BindColumnRef(unique_ptr<ParsedExpression> *expr_ptr, i
|
|
|
196607
197129
|
if (!alias_result.HasError()) {
|
|
196608
197130
|
return alias_result;
|
|
196609
197131
|
}
|
|
196610
|
-
|
|
197132
|
+
if (aggregate_handling == AggregateHandling::FORCE_AGGREGATES) {
|
|
197133
|
+
auto expr = duckdb::SelectBinder::BindExpression(expr_ptr, depth);
|
|
197134
|
+
if (expr.HasError()) {
|
|
197135
|
+
return expr;
|
|
197136
|
+
}
|
|
197137
|
+
auto group_ref = make_unique<BoundColumnRefExpression>(
|
|
197138
|
+
expr.expression->return_type, ColumnBinding(node.group_index, node.groups.group_expressions.size()));
|
|
197139
|
+
node.groups.group_expressions.push_back(move(expr.expression));
|
|
197140
|
+
return BindResult(move(group_ref));
|
|
197141
|
+
}
|
|
196611
197142
|
return BindResult(StringUtil::Format(
|
|
196612
197143
|
"column %s must appear in the GROUP BY clause or be used in an aggregate function", expr.ToString()));
|
|
196613
197144
|
}
|
|
@@ -197089,6 +197620,42 @@ BindResult SelectBinder::BindGroup(ParsedExpression &expr, idx_t depth, idx_t gr
|
|
|
197089
197620
|
} // namespace duckdb
|
|
197090
197621
|
|
|
197091
197622
|
|
|
197623
|
+
|
|
197624
|
+
|
|
197625
|
+
namespace duckdb {
|
|
197626
|
+
|
|
197627
|
+
TableFunctionBinder::TableFunctionBinder(Binder &binder, ClientContext &context) : ExpressionBinder(binder, context) {
|
|
197628
|
+
}
|
|
197629
|
+
|
|
197630
|
+
BindResult TableFunctionBinder::BindColumnReference(ColumnRefExpression &expr) {
|
|
197631
|
+
auto result_name = StringUtil::Join(expr.column_names, ".");
|
|
197632
|
+
return BindResult(make_unique<BoundConstantExpression>(Value(result_name)));
|
|
197633
|
+
}
|
|
197634
|
+
|
|
197635
|
+
BindResult TableFunctionBinder::BindExpression(unique_ptr<ParsedExpression> *expr_ptr, idx_t depth,
|
|
197636
|
+
bool root_expression) {
|
|
197637
|
+
auto &expr = **expr_ptr;
|
|
197638
|
+
switch (expr.GetExpressionClass()) {
|
|
197639
|
+
case ExpressionClass::COLUMN_REF:
|
|
197640
|
+
return BindColumnReference((ColumnRefExpression &)expr);
|
|
197641
|
+
case ExpressionClass::SUBQUERY:
|
|
197642
|
+
throw BinderException("Table function cannot contain subqueries");
|
|
197643
|
+
case ExpressionClass::DEFAULT:
|
|
197644
|
+
return BindResult("Table function cannot contain DEFAULT clause");
|
|
197645
|
+
case ExpressionClass::WINDOW:
|
|
197646
|
+
return BindResult("Table function cannot contain window functions!");
|
|
197647
|
+
default:
|
|
197648
|
+
return ExpressionBinder::BindExpression(expr_ptr, depth);
|
|
197649
|
+
}
|
|
197650
|
+
}
|
|
197651
|
+
|
|
197652
|
+
string TableFunctionBinder::UnsupportedAggregateMessage() {
|
|
197653
|
+
return "Table function cannot contain aggregates!";
|
|
197654
|
+
}
|
|
197655
|
+
|
|
197656
|
+
} // namespace duckdb
|
|
197657
|
+
|
|
197658
|
+
|
|
197092
197659
|
namespace duckdb {
|
|
197093
197660
|
|
|
197094
197661
|
UpdateBinder::UpdateBinder(Binder &binder, ClientContext &context) : ExpressionBinder(binder, context) {
|
|
@@ -198023,6 +198590,7 @@ JoinSide JoinSide::GetJoinSide(const unordered_set<idx_t> &bindings, unordered_s
|
|
|
198023
198590
|
|
|
198024
198591
|
|
|
198025
198592
|
|
|
198593
|
+
|
|
198026
198594
|
namespace duckdb {
|
|
198027
198595
|
|
|
198028
198596
|
const uint64_t PLAN_SERIALIZATION_VERSION = 1;
|
|
@@ -198354,7 +198922,8 @@ unique_ptr<LogicalOperator> LogicalOperator::Deserialize(Deserializer &deseriali
|
|
|
198354
198922
|
result = LogicalSimple::Deserialize(state, reader);
|
|
198355
198923
|
break;
|
|
198356
198924
|
case LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR:
|
|
198357
|
-
|
|
198925
|
+
result = LogicalExtensionOperator::Deserialize(state, reader);
|
|
198926
|
+
break;
|
|
198358
198927
|
case LogicalOperatorType::LOGICAL_INVALID:
|
|
198359
198928
|
/* no default here to trigger a warning if we forget to implement deserialize for a new operator */
|
|
198360
198929
|
throw SerializationException("Invalid type for operator deserialization");
|
|
@@ -198366,6 +198935,10 @@ unique_ptr<LogicalOperator> LogicalOperator::Deserialize(Deserializer &deseriali
|
|
|
198366
198935
|
return result;
|
|
198367
198936
|
}
|
|
198368
198937
|
|
|
198938
|
+
vector<idx_t> LogicalOperator::GetTableIndex() const {
|
|
198939
|
+
return vector<idx_t> {};
|
|
198940
|
+
}
|
|
198941
|
+
|
|
198369
198942
|
unique_ptr<LogicalOperator> LogicalOperator::Copy(ClientContext &context) const {
|
|
198370
198943
|
BufferedSerializer logical_op_serializer;
|
|
198371
198944
|
try {
|
|
@@ -198757,6 +199330,14 @@ idx_t LogicalAggregate::EstimateCardinality(ClientContext &context) {
|
|
|
198757
199330
|
return LogicalOperator::EstimateCardinality(context);
|
|
198758
199331
|
}
|
|
198759
199332
|
|
|
199333
|
+
vector<idx_t> LogicalAggregate::GetTableIndex() const {
|
|
199334
|
+
vector<idx_t> result {group_index, aggregate_index};
|
|
199335
|
+
if (groupings_index != DConstants::INVALID_INDEX) {
|
|
199336
|
+
result.push_back(groupings_index);
|
|
199337
|
+
}
|
|
199338
|
+
return result;
|
|
199339
|
+
}
|
|
199340
|
+
|
|
198760
199341
|
} // namespace duckdb
|
|
198761
199342
|
|
|
198762
199343
|
|
|
@@ -198823,6 +199404,10 @@ unique_ptr<LogicalOperator> LogicalColumnDataGet::Deserialize(LogicalDeserializa
|
|
|
198823
199404
|
return make_unique<LogicalColumnDataGet>(table_index, move(chunk_types), move(collection));
|
|
198824
199405
|
}
|
|
198825
199406
|
|
|
199407
|
+
vector<idx_t> LogicalColumnDataGet::GetTableIndex() const {
|
|
199408
|
+
return vector<idx_t> {table_index};
|
|
199409
|
+
}
|
|
199410
|
+
|
|
198826
199411
|
} // namespace duckdb
|
|
198827
199412
|
|
|
198828
199413
|
|
|
@@ -199087,6 +199672,10 @@ unique_ptr<LogicalOperator> LogicalCTERef::Deserialize(LogicalDeserializationSta
|
|
|
199087
199672
|
return make_unique<LogicalCTERef>(table_index, cte_index, chunk_types, bound_columns);
|
|
199088
199673
|
}
|
|
199089
199674
|
|
|
199675
|
+
vector<idx_t> LogicalCTERef::GetTableIndex() const {
|
|
199676
|
+
return vector<idx_t> {table_index};
|
|
199677
|
+
}
|
|
199678
|
+
|
|
199090
199679
|
} // namespace duckdb
|
|
199091
199680
|
|
|
199092
199681
|
|
|
@@ -199107,8 +199696,8 @@ unique_ptr<LogicalOperator> LogicalDelete::Deserialize(LogicalDeserializationSta
|
|
|
199107
199696
|
|
|
199108
199697
|
TableCatalogEntry *table_catalog_entry = catalog.GetEntry<TableCatalogEntry>(context, info->schema, info->table);
|
|
199109
199698
|
|
|
199110
|
-
auto
|
|
199111
|
-
result
|
|
199699
|
+
auto table_index = reader.ReadRequired<idx_t>();
|
|
199700
|
+
auto result = make_unique<LogicalDelete>(table_catalog_entry, table_index);
|
|
199112
199701
|
result->return_chunk = reader.ReadRequired<bool>();
|
|
199113
199702
|
return move(result);
|
|
199114
199703
|
}
|
|
@@ -199117,6 +199706,10 @@ idx_t LogicalDelete::EstimateCardinality(ClientContext &context) {
|
|
|
199117
199706
|
return return_chunk ? LogicalOperator::EstimateCardinality(context) : 1;
|
|
199118
199707
|
}
|
|
199119
199708
|
|
|
199709
|
+
vector<idx_t> LogicalDelete::GetTableIndex() const {
|
|
199710
|
+
return vector<idx_t> {table_index};
|
|
199711
|
+
}
|
|
199712
|
+
|
|
199120
199713
|
} // namespace duckdb
|
|
199121
199714
|
|
|
199122
199715
|
|
|
@@ -199134,6 +199727,10 @@ unique_ptr<LogicalOperator> LogicalDelimGet::Deserialize(LogicalDeserializationS
|
|
|
199134
199727
|
return make_unique<LogicalDelimGet>(table_index, chunk_types);
|
|
199135
199728
|
}
|
|
199136
199729
|
|
|
199730
|
+
vector<idx_t> LogicalDelimGet::GetTableIndex() const {
|
|
199731
|
+
return vector<idx_t> {table_index};
|
|
199732
|
+
}
|
|
199733
|
+
|
|
199137
199734
|
} // namespace duckdb
|
|
199138
199735
|
|
|
199139
199736
|
|
|
@@ -199201,6 +199798,10 @@ unique_ptr<LogicalOperator> LogicalDummyScan::Deserialize(LogicalDeserialization
|
|
|
199201
199798
|
return make_unique<LogicalDummyScan>(table_index);
|
|
199202
199799
|
}
|
|
199203
199800
|
|
|
199801
|
+
vector<idx_t> LogicalDummyScan::GetTableIndex() const {
|
|
199802
|
+
return vector<idx_t> {table_index};
|
|
199803
|
+
}
|
|
199804
|
+
|
|
199204
199805
|
} // namespace duckdb
|
|
199205
199806
|
|
|
199206
199807
|
|
|
@@ -199309,6 +199910,28 @@ unique_ptr<LogicalOperator> LogicalExpressionGet::Deserialize(LogicalDeserializa
|
|
|
199309
199910
|
return make_unique<LogicalExpressionGet>(table_index, expr_types, move(expressions));
|
|
199310
199911
|
}
|
|
199311
199912
|
|
|
199913
|
+
vector<idx_t> LogicalExpressionGet::GetTableIndex() const {
|
|
199914
|
+
return vector<idx_t> {table_index};
|
|
199915
|
+
}
|
|
199916
|
+
|
|
199917
|
+
} // namespace duckdb
|
|
199918
|
+
|
|
199919
|
+
|
|
199920
|
+
|
|
199921
|
+
namespace duckdb {
|
|
199922
|
+
unique_ptr<LogicalExtensionOperator> LogicalExtensionOperator::Deserialize(LogicalDeserializationState &state,
|
|
199923
|
+
FieldReader &reader) {
|
|
199924
|
+
auto &config = DBConfig::GetConfig(state.gstate.context);
|
|
199925
|
+
|
|
199926
|
+
auto extension_name = reader.ReadRequired<std::string>();
|
|
199927
|
+
for (auto &extension : config.operator_extensions) {
|
|
199928
|
+
if (extension->GetName() == extension_name) {
|
|
199929
|
+
return extension->Deserialize(state, reader);
|
|
199930
|
+
}
|
|
199931
|
+
}
|
|
199932
|
+
|
|
199933
|
+
throw SerializationException("No serialization method exists for extension: " + extension_name);
|
|
199934
|
+
}
|
|
199312
199935
|
} // namespace duckdb
|
|
199313
199936
|
|
|
199314
199937
|
|
|
@@ -199546,6 +200169,10 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
|
|
|
199546
200169
|
return move(result);
|
|
199547
200170
|
}
|
|
199548
200171
|
|
|
200172
|
+
vector<idx_t> LogicalGet::GetTableIndex() const {
|
|
200173
|
+
return vector<idx_t> {table_index};
|
|
200174
|
+
}
|
|
200175
|
+
|
|
199549
200176
|
} // namespace duckdb
|
|
199550
200177
|
|
|
199551
200178
|
|
|
@@ -199591,10 +200218,9 @@ unique_ptr<LogicalOperator> LogicalInsert::Deserialize(LogicalDeserializationSta
|
|
|
199591
200218
|
throw InternalException("Cant find catalog entry for table %s", info->table);
|
|
199592
200219
|
}
|
|
199593
200220
|
|
|
199594
|
-
auto result = make_unique<LogicalInsert>(table_catalog_entry);
|
|
200221
|
+
auto result = make_unique<LogicalInsert>(table_catalog_entry, table_index);
|
|
199595
200222
|
result->type = state.type;
|
|
199596
200223
|
result->table = table_catalog_entry;
|
|
199597
|
-
result->table_index = table_index;
|
|
199598
200224
|
result->return_chunk = return_chunk;
|
|
199599
200225
|
result->insert_values = move(insert_values);
|
|
199600
200226
|
result->column_index_map = column_index_map;
|
|
@@ -199607,6 +200233,10 @@ idx_t LogicalInsert::EstimateCardinality(ClientContext &context) {
|
|
|
199607
200233
|
return return_chunk ? LogicalOperator::EstimateCardinality(context) : 1;
|
|
199608
200234
|
}
|
|
199609
200235
|
|
|
200236
|
+
vector<idx_t> LogicalInsert::GetTableIndex() const {
|
|
200237
|
+
return vector<idx_t> {table_index};
|
|
200238
|
+
}
|
|
200239
|
+
|
|
199610
200240
|
} // namespace duckdb
|
|
199611
200241
|
|
|
199612
200242
|
|
|
@@ -199843,6 +200473,10 @@ unique_ptr<LogicalOperator> LogicalProjection::Deserialize(LogicalDeserializatio
|
|
|
199843
200473
|
return make_unique<LogicalProjection>(table_index, move(expressions));
|
|
199844
200474
|
}
|
|
199845
200475
|
|
|
200476
|
+
vector<idx_t> LogicalProjection::GetTableIndex() const {
|
|
200477
|
+
return vector<idx_t> {table_index};
|
|
200478
|
+
}
|
|
200479
|
+
|
|
199846
200480
|
} // namespace duckdb
|
|
199847
200481
|
|
|
199848
200482
|
|
|
@@ -199863,6 +200497,10 @@ unique_ptr<LogicalOperator> LogicalRecursiveCTE::Deserialize(LogicalDeserializat
|
|
|
199863
200497
|
return unique_ptr<LogicalRecursiveCTE>(new LogicalRecursiveCTE(table_index, column_count, union_all, state.type));
|
|
199864
200498
|
}
|
|
199865
200499
|
|
|
200500
|
+
vector<idx_t> LogicalRecursiveCTE::GetTableIndex() const {
|
|
200501
|
+
return vector<idx_t> {table_index};
|
|
200502
|
+
}
|
|
200503
|
+
|
|
199866
200504
|
} // namespace duckdb
|
|
199867
200505
|
|
|
199868
200506
|
|
|
@@ -199881,7 +200519,12 @@ vector<ColumnBinding> LogicalSample::GetColumnBindings() {
|
|
|
199881
200519
|
idx_t LogicalSample::EstimateCardinality(ClientContext &context) {
|
|
199882
200520
|
auto child_cardinality = children[0]->EstimateCardinality(context);
|
|
199883
200521
|
if (sample_options->is_percentage) {
|
|
199884
|
-
|
|
200522
|
+
double sample_cardinality =
|
|
200523
|
+
double(child_cardinality) * (sample_options->sample_size.GetValue<double>() / 100.0);
|
|
200524
|
+
if (sample_cardinality > double(child_cardinality)) {
|
|
200525
|
+
return child_cardinality;
|
|
200526
|
+
}
|
|
200527
|
+
return idx_t(sample_cardinality);
|
|
199885
200528
|
} else {
|
|
199886
200529
|
auto sample_size = sample_options->sample_size.GetValue<uint64_t>();
|
|
199887
200530
|
if (sample_size < child_cardinality) {
|
|
@@ -199945,6 +200588,11 @@ unique_ptr<LogicalOperator> LogicalSetOperation::Deserialize(LogicalDeserializat
|
|
|
199945
200588
|
// TODO(stephwang): review if unique_ptr<LogicalOperator> plan is needed
|
|
199946
200589
|
return unique_ptr<LogicalSetOperation>(new LogicalSetOperation(table_index, column_count, state.type));
|
|
199947
200590
|
}
|
|
200591
|
+
|
|
200592
|
+
vector<idx_t> LogicalSetOperation::GetTableIndex() const {
|
|
200593
|
+
return vector<idx_t> {table_index};
|
|
200594
|
+
}
|
|
200595
|
+
|
|
199948
200596
|
} // namespace duckdb
|
|
199949
200597
|
|
|
199950
200598
|
|
|
@@ -200043,6 +200691,11 @@ unique_ptr<LogicalOperator> LogicalUnnest::Deserialize(LogicalDeserializationSta
|
|
|
200043
200691
|
result->expressions = move(expressions);
|
|
200044
200692
|
return move(result);
|
|
200045
200693
|
}
|
|
200694
|
+
|
|
200695
|
+
vector<idx_t> LogicalUnnest::GetTableIndex() const {
|
|
200696
|
+
return vector<idx_t> {unnest_index};
|
|
200697
|
+
}
|
|
200698
|
+
|
|
200046
200699
|
} // namespace duckdb
|
|
200047
200700
|
|
|
200048
200701
|
|
|
@@ -200117,6 +200770,10 @@ unique_ptr<LogicalOperator> LogicalWindow::Deserialize(LogicalDeserializationSta
|
|
|
200117
200770
|
return move(result);
|
|
200118
200771
|
}
|
|
200119
200772
|
|
|
200773
|
+
vector<idx_t> LogicalWindow::GetTableIndex() const {
|
|
200774
|
+
return vector<idx_t> {window_index};
|
|
200775
|
+
}
|
|
200776
|
+
|
|
200120
200777
|
} // namespace duckdb
|
|
200121
200778
|
|
|
200122
200779
|
|
|
@@ -200249,7 +200906,7 @@ void Planner::CreatePlan(SQLStatement &statement) {
|
|
|
200249
200906
|
this->plan = nullptr;
|
|
200250
200907
|
for (auto &extension_op : config.operator_extensions) {
|
|
200251
200908
|
auto bound_statement =
|
|
200252
|
-
extension_op
|
|
200909
|
+
extension_op->Bind(context, *this->binder, extension_op->operator_info.get(), statement);
|
|
200253
200910
|
if (bound_statement.plan != nullptr) {
|
|
200254
200911
|
this->names = bound_statement.names;
|
|
200255
200912
|
this->types = bound_statement.types;
|
|
@@ -200648,10 +201305,13 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
200648
201305
|
// we reached a node without correlated expressions
|
|
200649
201306
|
// we can eliminate the dependent join now and create a simple cross product
|
|
200650
201307
|
// now create the duplicate eliminated scan for this node
|
|
201308
|
+
auto left_columns = plan->GetColumnBindings().size();
|
|
200651
201309
|
auto delim_index = binder.GenerateTableIndex();
|
|
200652
201310
|
this->base_binding = ColumnBinding(delim_index, 0);
|
|
201311
|
+
this->delim_offset = 0;
|
|
201312
|
+
this->data_offset = left_columns;
|
|
200653
201313
|
auto delim_scan = make_unique<LogicalDelimGet>(delim_index, delim_types);
|
|
200654
|
-
return LogicalCrossProduct::Create(move(
|
|
201314
|
+
return LogicalCrossProduct::Create(move(plan), move(delim_scan));
|
|
200655
201315
|
}
|
|
200656
201316
|
switch (plan->type) {
|
|
200657
201317
|
case LogicalOperatorType::LOGICAL_UNNEST:
|
|
@@ -201015,8 +201675,19 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
201015
201675
|
case LogicalOperatorType::LOGICAL_UNION: {
|
|
201016
201676
|
auto &setop = (LogicalSetOperation &)*plan;
|
|
201017
201677
|
// set operator, push into both children
|
|
201678
|
+
#ifdef DEBUG
|
|
201679
|
+
plan->children[0]->ResolveOperatorTypes();
|
|
201680
|
+
plan->children[1]->ResolveOperatorTypes();
|
|
201681
|
+
D_ASSERT(plan->children[0]->types == plan->children[1]->types);
|
|
201682
|
+
#endif
|
|
201018
201683
|
plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
|
|
201019
201684
|
plan->children[1] = PushDownDependentJoin(move(plan->children[1]));
|
|
201685
|
+
#ifdef DEBUG
|
|
201686
|
+
D_ASSERT(plan->children[0]->GetColumnBindings().size() == plan->children[1]->GetColumnBindings().size());
|
|
201687
|
+
plan->children[0]->ResolveOperatorTypes();
|
|
201688
|
+
plan->children[1]->ResolveOperatorTypes();
|
|
201689
|
+
D_ASSERT(plan->children[0]->types == plan->children[1]->types);
|
|
201690
|
+
#endif
|
|
201020
201691
|
// we have to refer to the setop index now
|
|
201021
201692
|
base_binding.table_index = setop.table_index;
|
|
201022
201693
|
base_binding.column_index = setop.column_count;
|
|
@@ -201924,6 +202595,7 @@ BlockHandle::~BlockHandle() {
|
|
|
201924
202595
|
} else {
|
|
201925
202596
|
D_ASSERT(memory_charge.size == 0);
|
|
201926
202597
|
}
|
|
202598
|
+
buffer_manager.PurgeQueue();
|
|
201927
202599
|
block_manager.UnregisterBlock(block_id, can_destroy);
|
|
201928
202600
|
}
|
|
201929
202601
|
|
|
@@ -201950,7 +202622,7 @@ unique_ptr<FileBuffer> BufferManager::ConstructManagedBuffer(idx_t size, unique_
|
|
|
201950
202622
|
FileBufferType type) {
|
|
201951
202623
|
if (source) {
|
|
201952
202624
|
auto tmp = move(source);
|
|
201953
|
-
D_ASSERT(tmp->
|
|
202625
|
+
D_ASSERT(tmp->AllocSize() == BufferManager::GetAllocSize(size));
|
|
201954
202626
|
return make_unique<FileBuffer>(*tmp, type);
|
|
201955
202627
|
} else {
|
|
201956
202628
|
// no re-usable buffer: allocate a new buffer
|
|
@@ -202085,7 +202757,7 @@ void BufferManager::SetTemporaryDirectory(string new_dir) {
|
|
|
202085
202757
|
|
|
202086
202758
|
BufferManager::BufferManager(DatabaseInstance &db, string tmp, idx_t maximum_memory)
|
|
202087
202759
|
: db(db), current_memory(0), maximum_memory(maximum_memory), temp_directory(move(tmp)),
|
|
202088
|
-
queue(make_unique<EvictionQueue>()), temporary_id(MAXIMUM_BLOCK),
|
|
202760
|
+
queue(make_unique<EvictionQueue>()), temporary_id(MAXIMUM_BLOCK), queue_insertions(0),
|
|
202089
202761
|
buffer_allocator(BufferAllocatorAllocate, BufferAllocatorFree, BufferAllocatorRealloc,
|
|
202090
202762
|
make_unique<BufferAllocatorData>(*this)) {
|
|
202091
202763
|
temp_block_manager = make_unique<InMemoryBlockManager>(*this);
|
|
@@ -202161,6 +202833,7 @@ TempBufferPoolReservation BufferManager::EvictBlocksOrThrow(idx_t memory_delta,
|
|
|
202161
202833
|
}
|
|
202162
202834
|
|
|
202163
202835
|
shared_ptr<BlockHandle> BufferManager::RegisterSmallMemory(idx_t block_size) {
|
|
202836
|
+
D_ASSERT(block_size < Storage::BLOCK_SIZE);
|
|
202164
202837
|
auto res = EvictBlocksOrThrow(block_size, maximum_memory, nullptr,
|
|
202165
202838
|
"could not allocate block of %lld bytes (%lld/%lld used) %s", block_size,
|
|
202166
202839
|
GetUsedMemory(), GetMaxMemory());
|
|
@@ -202173,7 +202846,7 @@ shared_ptr<BlockHandle> BufferManager::RegisterSmallMemory(idx_t block_size) {
|
|
|
202173
202846
|
|
|
202174
202847
|
shared_ptr<BlockHandle> BufferManager::RegisterMemory(idx_t block_size, bool can_destroy) {
|
|
202175
202848
|
D_ASSERT(block_size >= Storage::BLOCK_SIZE);
|
|
202176
|
-
auto alloc_size =
|
|
202849
|
+
auto alloc_size = GetAllocSize(block_size);
|
|
202177
202850
|
// first evict blocks until we have enough memory to store this buffer
|
|
202178
202851
|
unique_ptr<FileBuffer> reusable_buffer;
|
|
202179
202852
|
auto res = EvictBlocksOrThrow(alloc_size, maximum_memory, &reusable_buffer,
|
|
@@ -202187,9 +202860,11 @@ shared_ptr<BlockHandle> BufferManager::RegisterMemory(idx_t block_size, bool can
|
|
|
202187
202860
|
move(res));
|
|
202188
202861
|
}
|
|
202189
202862
|
|
|
202190
|
-
BufferHandle BufferManager::Allocate(idx_t block_size) {
|
|
202191
|
-
|
|
202192
|
-
|
|
202863
|
+
BufferHandle BufferManager::Allocate(idx_t block_size, bool can_destroy, shared_ptr<BlockHandle> *block) {
|
|
202864
|
+
shared_ptr<BlockHandle> local_block;
|
|
202865
|
+
auto block_ptr = block ? block : &local_block;
|
|
202866
|
+
*block_ptr = RegisterMemory(block_size, can_destroy);
|
|
202867
|
+
return Pin(*block_ptr);
|
|
202193
202868
|
}
|
|
202194
202869
|
|
|
202195
202870
|
void BufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size) {
|
|
@@ -202219,6 +202894,7 @@ void BufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size
|
|
|
202219
202894
|
// resize and adjust current memory
|
|
202220
202895
|
handle->buffer->Resize(block_size);
|
|
202221
202896
|
handle->memory_usage += memory_delta;
|
|
202897
|
+
D_ASSERT(handle->memory_usage == handle->buffer->AllocSize());
|
|
202222
202898
|
}
|
|
202223
202899
|
|
|
202224
202900
|
BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
|
|
@@ -202259,6 +202935,7 @@ BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
|
|
|
202259
202935
|
handle->memory_usage += delta;
|
|
202260
202936
|
handle->memory_charge.Resize(current_memory, handle->memory_usage);
|
|
202261
202937
|
}
|
|
202938
|
+
D_ASSERT(handle->memory_usage == handle->buffer->AllocSize());
|
|
202262
202939
|
return buf;
|
|
202263
202940
|
}
|
|
202264
202941
|
|
|
@@ -202551,7 +203228,9 @@ private:
|
|
|
202551
203228
|
// as a result we can truncate the file
|
|
202552
203229
|
auto max_index = index_manager.GetMaxIndex();
|
|
202553
203230
|
auto &fs = FileSystem::GetFileSystem(db);
|
|
203231
|
+
#ifndef WIN32 // this ended up causing issues when sorting
|
|
202554
203232
|
fs.Truncate(*handle, GetPositionInFile(max_index + 1));
|
|
203233
|
+
#endif
|
|
202555
203234
|
}
|
|
202556
203235
|
}
|
|
202557
203236
|
|
|
@@ -202827,6 +203506,9 @@ void BufferManager::BufferAllocatorFree(PrivateAllocatorData *private_data, data
|
|
|
202827
203506
|
|
|
202828
203507
|
data_ptr_t BufferManager::BufferAllocatorRealloc(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t old_size,
|
|
202829
203508
|
idx_t size) {
|
|
203509
|
+
if (old_size == size) {
|
|
203510
|
+
return pointer;
|
|
203511
|
+
}
|
|
202830
203512
|
auto &data = (BufferAllocatorData &)*private_data;
|
|
202831
203513
|
BufferPoolReservation r;
|
|
202832
203514
|
r.size = old_size;
|
|
@@ -202840,6 +203522,10 @@ Allocator &BufferAllocator::Get(ClientContext &context) {
|
|
|
202840
203522
|
return manager.GetBufferAllocator();
|
|
202841
203523
|
}
|
|
202842
203524
|
|
|
203525
|
+
Allocator &BufferAllocator::Get(DatabaseInstance &db) {
|
|
203526
|
+
return BufferManager::GetBufferManager(db).GetBufferAllocator();
|
|
203527
|
+
}
|
|
203528
|
+
|
|
202843
203529
|
Allocator &BufferManager::GetBufferAllocator() {
|
|
202844
203530
|
return buffer_allocator;
|
|
202845
203531
|
}
|
|
@@ -208409,11 +209095,15 @@ public:
|
|
|
208409
209095
|
new_string = !LookupString(data[idx]);
|
|
208410
209096
|
}
|
|
208411
209097
|
|
|
208412
|
-
bool fits =
|
|
209098
|
+
bool fits = CalculateSpaceRequirements(new_string, string_size);
|
|
208413
209099
|
if (!fits) {
|
|
208414
209100
|
Flush();
|
|
208415
209101
|
new_string = true;
|
|
208416
|
-
|
|
209102
|
+
|
|
209103
|
+
fits = CalculateSpaceRequirements(new_string, string_size);
|
|
209104
|
+
if (!fits) {
|
|
209105
|
+
throw InternalException("Dictionary compression could not write to new segment");
|
|
209106
|
+
}
|
|
208417
209107
|
}
|
|
208418
209108
|
|
|
208419
209109
|
if (!row_is_valid) {
|
|
@@ -208441,8 +209131,8 @@ protected:
|
|
|
208441
209131
|
virtual void AddNewString(string_t str) = 0;
|
|
208442
209132
|
// Add a null value to the compression state
|
|
208443
209133
|
virtual void AddNull() = 0;
|
|
208444
|
-
//
|
|
208445
|
-
virtual bool
|
|
209134
|
+
// Needs to be called before adding a value. Will return false if a flush is required first.
|
|
209135
|
+
virtual bool CalculateSpaceRequirements(bool new_string, size_t string_size) = 0;
|
|
208446
209136
|
// Flush the segment to disk if compressing or reset the counters if analyzing
|
|
208447
209137
|
virtual void Flush(bool final = false) = 0;
|
|
208448
209138
|
};
|
|
@@ -208499,7 +209189,8 @@ struct DictionaryCompressionStorage {
|
|
|
208499
209189
|
// scanning the whole dictionary at once and then scanning the selection buffer for each emitted vector. Secondly, it
|
|
208500
209190
|
// allows for efficient bitpacking compression as the selection values should remain relatively small.
|
|
208501
209191
|
struct DictionaryCompressionCompressState : public DictionaryCompressionState {
|
|
208502
|
-
explicit DictionaryCompressionCompressState(ColumnDataCheckpointer &checkpointer)
|
|
209192
|
+
explicit DictionaryCompressionCompressState(ColumnDataCheckpointer &checkpointer)
|
|
209193
|
+
: checkpointer(checkpointer), heap(BufferAllocator::Get(checkpointer.GetDatabase())) {
|
|
208503
209194
|
auto &db = checkpointer.GetDatabase();
|
|
208504
209195
|
auto &config = DBConfig::GetConfig(db);
|
|
208505
209196
|
function = config.GetCompressionFunction(CompressionType::COMPRESSION_DICTIONARY, PhysicalType::VARCHAR);
|
|
@@ -208605,7 +209296,7 @@ public:
|
|
|
208605
209296
|
current_segment->count++;
|
|
208606
209297
|
}
|
|
208607
209298
|
|
|
208608
|
-
bool
|
|
209299
|
+
bool CalculateSpaceRequirements(bool new_string, size_t string_size) override {
|
|
208609
209300
|
if (new_string) {
|
|
208610
209301
|
next_width = BitpackingPrimitives::MinimumBitWidth(index_buffer.size() - 1 + new_string);
|
|
208611
209302
|
return DictionaryCompressionStorage::HasEnoughSpace(current_segment->count.load() + 1,
|
|
@@ -208726,7 +209417,7 @@ struct DictionaryAnalyzeState : public DictionaryCompressionState {
|
|
|
208726
209417
|
current_tuple_count++;
|
|
208727
209418
|
}
|
|
208728
209419
|
|
|
208729
|
-
bool
|
|
209420
|
+
bool CalculateSpaceRequirements(bool new_string, size_t string_size) override {
|
|
208730
209421
|
if (new_string) {
|
|
208731
209422
|
next_width =
|
|
208732
209423
|
BitpackingPrimitives::MinimumBitWidth(current_unique_count + 2); // 1 for null, one for new string
|
|
@@ -211977,8 +212668,7 @@ void UncompressedStringStorage::WriteStringMemory(ColumnSegment &segment, string
|
|
|
211977
212668
|
new_block->offset = 0;
|
|
211978
212669
|
new_block->size = alloc_size;
|
|
211979
212670
|
// allocate an in-memory buffer for it
|
|
211980
|
-
|
|
211981
|
-
handle = buffer_manager.Pin(block);
|
|
212671
|
+
handle = buffer_manager.Allocate(alloc_size, false, &block);
|
|
211982
212672
|
state.overflow_blocks[block->BlockId()] = new_block.get();
|
|
211983
212673
|
new_block->block = move(block);
|
|
211984
212674
|
new_block->next = move(state.head);
|
|
@@ -213413,7 +214103,12 @@ bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t
|
|
|
213413
214103
|
bool append_failed = false;
|
|
213414
214104
|
// now append the entries to the indices
|
|
213415
214105
|
indexes.Scan([&](Index &index) {
|
|
213416
|
-
|
|
214106
|
+
try {
|
|
214107
|
+
if (!index.Append(chunk, row_identifiers)) {
|
|
214108
|
+
append_failed = true;
|
|
214109
|
+
return true;
|
|
214110
|
+
}
|
|
214111
|
+
} catch (...) {
|
|
213417
214112
|
append_failed = true;
|
|
213418
214113
|
return true;
|
|
213419
214114
|
}
|
|
@@ -213427,7 +214122,6 @@ bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t
|
|
|
213427
214122
|
for (auto *index : already_appended) {
|
|
213428
214123
|
index->Delete(chunk, row_identifiers);
|
|
213429
214124
|
}
|
|
213430
|
-
|
|
213431
214125
|
return false;
|
|
213432
214126
|
}
|
|
213433
214127
|
return true;
|
|
@@ -214070,12 +214764,21 @@ void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendSta
|
|
|
214070
214764
|
append_state.current_row);
|
|
214071
214765
|
}
|
|
214072
214766
|
if (constraint_violated) {
|
|
214767
|
+
PreservedError error;
|
|
214073
214768
|
// need to revert the append
|
|
214074
214769
|
row_t current_row = append_state.row_start;
|
|
214075
214770
|
// remove the data from the indexes, if there are any indexes
|
|
214076
214771
|
row_groups->Scan(transaction, [&](DataChunk &chunk) -> bool {
|
|
214077
214772
|
// append this chunk to the indexes of the table
|
|
214078
|
-
|
|
214773
|
+
try {
|
|
214774
|
+
table->RemoveFromIndexes(append_state, chunk, current_row);
|
|
214775
|
+
} catch (Exception &ex) {
|
|
214776
|
+
error = PreservedError(ex);
|
|
214777
|
+
return false;
|
|
214778
|
+
} catch (std::exception &ex) {
|
|
214779
|
+
error = PreservedError(ex);
|
|
214780
|
+
return false;
|
|
214781
|
+
}
|
|
214079
214782
|
|
|
214080
214783
|
current_row += chunk.size();
|
|
214081
214784
|
if (current_row >= append_state.current_row) {
|
|
@@ -214087,6 +214790,9 @@ void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendSta
|
|
|
214087
214790
|
if (append_to_table) {
|
|
214088
214791
|
table->RevertAppendInternal(append_state.row_start, append_count);
|
|
214089
214792
|
}
|
|
214793
|
+
if (error) {
|
|
214794
|
+
error.Throw();
|
|
214795
|
+
}
|
|
214090
214796
|
throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
|
|
214091
214797
|
}
|
|
214092
214798
|
}
|
|
@@ -214218,7 +214924,7 @@ void LocalStorage::InitializeAppend(LocalAppendState &state, DataTable *table) {
|
|
|
214218
214924
|
void LocalStorage::Append(LocalAppendState &state, DataChunk &chunk) {
|
|
214219
214925
|
// append to unique indices (if any)
|
|
214220
214926
|
auto storage = state.storage;
|
|
214221
|
-
idx_t base_id = MAX_ROW_ID + storage->row_groups->GetTotalRows();
|
|
214927
|
+
idx_t base_id = MAX_ROW_ID + storage->row_groups->GetTotalRows() + state.append_state.total_append_count;
|
|
214222
214928
|
if (!DataTable::AppendToIndexes(storage->indexes, chunk, base_id)) {
|
|
214223
214929
|
throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
|
|
214224
214930
|
}
|
|
@@ -215000,6 +215706,7 @@ block_id_t SingleFileBlockManager::GetFreeBlockId() {
|
|
|
215000
215706
|
void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) {
|
|
215001
215707
|
lock_guard<mutex> lock(block_lock);
|
|
215002
215708
|
D_ASSERT(block_id >= 0);
|
|
215709
|
+
D_ASSERT(block_id < max_block);
|
|
215003
215710
|
D_ASSERT(free_list.find(block_id) == free_list.end());
|
|
215004
215711
|
multi_use_blocks.erase(block_id);
|
|
215005
215712
|
free_list.insert(block_id);
|
|
@@ -215008,6 +215715,7 @@ void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) {
|
|
|
215008
215715
|
void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) {
|
|
215009
215716
|
lock_guard<mutex> lock(block_lock);
|
|
215010
215717
|
D_ASSERT(block_id >= 0);
|
|
215718
|
+
D_ASSERT(block_id < max_block);
|
|
215011
215719
|
|
|
215012
215720
|
// check if the block is a multi-use block
|
|
215013
215721
|
auto entry = multi_use_blocks.find(block_id);
|
|
@@ -215030,6 +215738,8 @@ void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) {
|
|
|
215030
215738
|
|
|
215031
215739
|
void SingleFileBlockManager::IncreaseBlockReferenceCount(block_id_t block_id) {
|
|
215032
215740
|
lock_guard<mutex> lock(block_lock);
|
|
215741
|
+
D_ASSERT(block_id >= 0);
|
|
215742
|
+
D_ASSERT(block_id < max_block);
|
|
215033
215743
|
D_ASSERT(free_list.find(block_id) == free_list.end());
|
|
215034
215744
|
auto entry = multi_use_blocks.find(block_id);
|
|
215035
215745
|
if (entry != multi_use_blocks.end()) {
|
|
@@ -218111,7 +218821,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
|
|
|
218111
218821
|
block = block_manager.RegisterBlock(block_id);
|
|
218112
218822
|
}
|
|
218113
218823
|
auto segment_size = Storage::BLOCK_SIZE;
|
|
218114
|
-
return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::PERSISTENT, start, count, function,
|
|
218824
|
+
return make_unique<ColumnSegment>(db, move(block), type, ColumnSegmentType::PERSISTENT, start, count, function,
|
|
218115
218825
|
move(statistics), block_id, offset, segment_size);
|
|
218116
218826
|
}
|
|
218117
218827
|
|
|
@@ -218125,9 +218835,9 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance
|
|
|
218125
218835
|
if (segment_size < Storage::BLOCK_SIZE) {
|
|
218126
218836
|
block = buffer_manager.RegisterSmallMemory(segment_size);
|
|
218127
218837
|
} else {
|
|
218128
|
-
|
|
218838
|
+
buffer_manager.Allocate(segment_size, false, &block);
|
|
218129
218839
|
}
|
|
218130
|
-
return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr,
|
|
218840
|
+
return make_unique<ColumnSegment>(db, move(block), type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr,
|
|
218131
218841
|
INVALID_BLOCK, 0, segment_size);
|
|
218132
218842
|
}
|
|
218133
218843
|
|
|
@@ -218208,9 +218918,9 @@ void ColumnSegment::Resize(idx_t new_size) {
|
|
|
218208
218918
|
D_ASSERT(new_size > this->segment_size);
|
|
218209
218919
|
D_ASSERT(offset == 0);
|
|
218210
218920
|
auto &buffer_manager = BufferManager::GetBufferManager(db);
|
|
218211
|
-
auto new_block = buffer_manager.RegisterMemory(Storage::BLOCK_SIZE, false);
|
|
218212
218921
|
auto old_handle = buffer_manager.Pin(block);
|
|
218213
|
-
|
|
218922
|
+
shared_ptr<BlockHandle> new_block;
|
|
218923
|
+
auto new_handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block);
|
|
218214
218924
|
memcpy(new_handle.Ptr(), old_handle.Ptr(), segment_size);
|
|
218215
218925
|
this->block_id = new_block->BlockId();
|
|
218216
218926
|
this->block = move(new_block);
|
|
@@ -221658,7 +222368,8 @@ static UpdateSegment::rollback_update_function_t GetRollbackUpdateFunction(Physi
|
|
|
221658
222368
|
static UpdateSegment::statistics_update_function_t GetStatisticsUpdateFunction(PhysicalType type);
|
|
221659
222369
|
static UpdateSegment::fetch_row_function_t GetFetchRowFunction(PhysicalType type);
|
|
221660
222370
|
|
|
221661
|
-
UpdateSegment::UpdateSegment(ColumnData &column_data)
|
|
222371
|
+
UpdateSegment::UpdateSegment(ColumnData &column_data)
|
|
222372
|
+
: column_data(column_data), stats(column_data.type), heap(BufferAllocator::Get(column_data.GetDatabase())) {
|
|
221662
222373
|
auto physical_type = column_data.type.InternalType();
|
|
221663
222374
|
|
|
221664
222375
|
this->type_size = GetTypeIdSize(physical_type);
|
|
@@ -223877,7 +224588,10 @@ void CleanupState::Flush() {
|
|
|
223877
224588
|
Vector row_identifiers(LogicalType::ROW_TYPE, (data_ptr_t)row_numbers);
|
|
223878
224589
|
|
|
223879
224590
|
// delete the tuples from all the indexes
|
|
223880
|
-
|
|
224591
|
+
try {
|
|
224592
|
+
current_table->RemoveFromIndexes(row_identifiers, count);
|
|
224593
|
+
} catch (...) {
|
|
224594
|
+
}
|
|
223881
224595
|
|
|
223882
224596
|
count = 0;
|
|
223883
224597
|
}
|
|
@@ -228169,626 +228883,628 @@ size_t duckdb_fsst_compressAVX512(SymbolTable &symbolTable, u8* codeBase, u8* sy
|
|
|
228169
228883
|
|
|
228170
228884
|
|
|
228171
228885
|
Symbol concat(Symbol a, Symbol b) {
|
|
228172
|
-
|
|
228173
|
-
|
|
228174
|
-
|
|
228175
|
-
|
|
228176
|
-
|
|
228177
|
-
|
|
228886
|
+
Symbol s;
|
|
228887
|
+
u32 length = a.length()+b.length();
|
|
228888
|
+
if (length > Symbol::maxLength) length = Symbol::maxLength;
|
|
228889
|
+
s.set_code_len(FSST_CODE_MASK, length);
|
|
228890
|
+
s.val.num = (b.val.num << (8*a.length())) | a.val.num;
|
|
228891
|
+
return s;
|
|
228178
228892
|
}
|
|
228179
228893
|
|
|
228180
228894
|
namespace std {
|
|
228181
228895
|
template <>
|
|
228182
228896
|
class hash<QSymbol> {
|
|
228183
|
-
|
|
228184
|
-
|
|
228185
|
-
|
|
228186
|
-
|
|
228187
|
-
|
|
228188
|
-
|
|
228189
|
-
|
|
228190
|
-
|
|
228191
|
-
|
|
228192
|
-
|
|
228193
|
-
|
|
228194
|
-
|
|
228195
|
-
|
|
228196
|
-
|
|
228197
|
-
|
|
228198
|
-
|
|
228897
|
+
public:
|
|
228898
|
+
size_t operator()(const QSymbol& q) const {
|
|
228899
|
+
uint64_t k = q.symbol.val.num;
|
|
228900
|
+
const uint64_t m = 0xc6a4a7935bd1e995;
|
|
228901
|
+
const int r = 47;
|
|
228902
|
+
uint64_t h = 0x8445d61a4e774912 ^ (8*m);
|
|
228903
|
+
k *= m;
|
|
228904
|
+
k ^= k >> r;
|
|
228905
|
+
k *= m;
|
|
228906
|
+
h ^= k;
|
|
228907
|
+
h *= m;
|
|
228908
|
+
h ^= h >> r;
|
|
228909
|
+
h *= m;
|
|
228910
|
+
h ^= h >> r;
|
|
228911
|
+
return h;
|
|
228912
|
+
}
|
|
228199
228913
|
};
|
|
228200
228914
|
}
|
|
228201
228915
|
|
|
228202
228916
|
bool isEscapeCode(u16 pos) { return pos < FSST_CODE_BASE; }
|
|
228203
228917
|
|
|
228204
228918
|
std::ostream& operator<<(std::ostream& out, const Symbol& s) {
|
|
228205
|
-
|
|
228206
|
-
|
|
228207
|
-
|
|
228919
|
+
for (u32 i=0; i<s.length(); i++)
|
|
228920
|
+
out << s.val.str[i];
|
|
228921
|
+
return out;
|
|
228208
228922
|
}
|
|
228209
|
-
//static u64 iter = 0;
|
|
228210
228923
|
|
|
228211
228924
|
SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[], bool zeroTerminated=false) {
|
|
228212
|
-
|
|
228213
|
-
|
|
228214
|
-
|
|
228215
|
-
|
|
228216
|
-
|
|
228217
|
-
|
|
228218
|
-
|
|
228219
|
-
|
|
228220
|
-
|
|
228221
|
-
|
|
228222
|
-
|
|
228223
|
-
|
|
228224
|
-
|
|
228225
|
-
|
|
228226
|
-
|
|
228227
|
-
|
|
228228
|
-
|
|
228229
|
-
|
|
228230
|
-
|
|
228231
|
-
|
|
228232
|
-
|
|
228233
|
-
|
|
228234
|
-
|
|
228235
|
-
|
|
228925
|
+
SymbolTable *st = new SymbolTable(), *bestTable = new SymbolTable();
|
|
228926
|
+
int bestGain = (int) -FSST_SAMPLEMAXSZ; // worst case (everything exception)
|
|
228927
|
+
size_t sampleFrac = 128;
|
|
228928
|
+
|
|
228929
|
+
// start by determining the terminator. We use the (lowest) most infrequent byte as terminator
|
|
228930
|
+
st->zeroTerminated = zeroTerminated;
|
|
228931
|
+
if (zeroTerminated) {
|
|
228932
|
+
st->terminator = 0; // except in case of zeroTerminated mode, then byte 0 is terminator regardless frequency
|
|
228933
|
+
} else {
|
|
228934
|
+
u16 byteHisto[256];
|
|
228935
|
+
memset(byteHisto, 0, sizeof(byteHisto));
|
|
228936
|
+
for(size_t i=0; i<line.size(); i++) {
|
|
228937
|
+
u8* cur = line[i];
|
|
228938
|
+
u8* end = cur + len[i];
|
|
228939
|
+
while(cur < end) byteHisto[*cur++]++;
|
|
228940
|
+
}
|
|
228941
|
+
u32 minSize = FSST_SAMPLEMAXSZ, i = st->terminator = 256;
|
|
228942
|
+
while(i-- > 0) {
|
|
228943
|
+
if (byteHisto[i] > minSize) continue;
|
|
228944
|
+
st->terminator = i;
|
|
228945
|
+
minSize = byteHisto[i];
|
|
228946
|
+
}
|
|
228947
|
+
}
|
|
228948
|
+
assert(st->terminator != 256);
|
|
228949
|
+
|
|
228950
|
+
// a random number between 0 and 128
|
|
228951
|
+
auto rnd128 = [&](size_t i) { return 1 + (FSST_HASH((i+1UL)*sampleFrac)&127); };
|
|
228952
|
+
|
|
228953
|
+
// compress sample, and compute (pair-)frequencies
|
|
228954
|
+
auto compressCount = [&](SymbolTable *st, Counters &counters) { // returns gain
|
|
228955
|
+
int gain = 0;
|
|
228956
|
+
|
|
228957
|
+
for(size_t i=0; i<line.size(); i++) {
|
|
228958
|
+
u8* cur = line[i];
|
|
228959
|
+
u8* end = cur + len[i];
|
|
228960
|
+
|
|
228961
|
+
if (sampleFrac < 128) {
|
|
228962
|
+
// in earlier rounds (sampleFrac < 128) we skip data in the sample (reduces overall work ~2x)
|
|
228963
|
+
if (rnd128(i) > sampleFrac) continue;
|
|
228964
|
+
}
|
|
228965
|
+
if (cur < end) {
|
|
228966
|
+
u8* start = cur;
|
|
228967
|
+
u16 code2 = 255, code1 = st->findLongestSymbol(cur, end);
|
|
228968
|
+
cur += st->symbols[code1].length();
|
|
228969
|
+
gain += (int) (st->symbols[code1].length()-(1+isEscapeCode(code1)));
|
|
228970
|
+
while (true) {
|
|
228971
|
+
// count single symbol (i.e. an option is not extending it)
|
|
228972
|
+
counters.count1Inc(code1);
|
|
228236
228973
|
|
|
228237
|
-
|
|
228238
|
-
|
|
228974
|
+
// as an alternative, consider just using the next byte..
|
|
228975
|
+
if (st->symbols[code1].length() != 1) // .. but do not count single byte symbols doubly
|
|
228976
|
+
counters.count1Inc(*start);
|
|
228239
228977
|
|
|
228240
|
-
|
|
228241
|
-
|
|
228242
|
-
|
|
228978
|
+
if (cur==end) {
|
|
228979
|
+
break;
|
|
228980
|
+
}
|
|
228243
228981
|
|
|
228244
|
-
|
|
228245
|
-
|
|
228246
|
-
|
|
228982
|
+
// now match a new symbol
|
|
228983
|
+
start = cur;
|
|
228984
|
+
if (cur<end-7) {
|
|
228985
|
+
u64 word = fsst_unaligned_load(cur);
|
|
228986
|
+
size_t code = word & 0xFFFFFF;
|
|
228987
|
+
size_t idx = FSST_HASH(code)&(st->hashTabSize-1);
|
|
228988
|
+
Symbol s = st->hashTab[idx];
|
|
228989
|
+
code2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK;
|
|
228990
|
+
word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
|
|
228991
|
+
if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) {
|
|
228992
|
+
code2 = s.code();
|
|
228993
|
+
cur += s.length();
|
|
228994
|
+
} else if (code2 >= FSST_CODE_BASE) {
|
|
228995
|
+
cur += 2;
|
|
228996
|
+
} else {
|
|
228997
|
+
code2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK;
|
|
228998
|
+
cur += 1;
|
|
228999
|
+
}
|
|
229000
|
+
} else {
|
|
229001
|
+
code2 = st->findLongestSymbol(cur, end);
|
|
229002
|
+
cur += st->symbols[code2].length();
|
|
229003
|
+
}
|
|
228247
229004
|
|
|
228248
|
-
|
|
228249
|
-
|
|
228250
|
-
if (rnd128(i) > sampleFrac) continue;
|
|
228251
|
-
}
|
|
228252
|
-
if (cur < end) {
|
|
228253
|
-
u16 pos2 = 255, pos1 = st->findLongestSymbol(cur, end);
|
|
228254
|
-
cur += st->symbols[pos1].length();
|
|
228255
|
-
gain += (int) (st->symbols[pos1].length()-(1+isEscapeCode(pos1)));
|
|
228256
|
-
while (true) {
|
|
228257
|
-
u8* old = cur;
|
|
228258
|
-
counters.count1Inc(pos1);
|
|
228259
|
-
// count single symbol (i.e. an option is not extending it)
|
|
228260
|
-
if (cur>=end)
|
|
228261
|
-
break;
|
|
228262
|
-
if (st->symbols[pos1].length() != 1)
|
|
228263
|
-
counters.count1Inc(*cur);
|
|
228264
|
-
if (cur<end-7) {
|
|
228265
|
-
u64 word = fsst_unaligned_load(cur);
|
|
228266
|
-
size_t pos = word & 0xFFFFFF;
|
|
228267
|
-
size_t idx = FSST_HASH(pos)&(st->hashTabSize-1);
|
|
228268
|
-
Symbol s = st->hashTab[idx];
|
|
228269
|
-
pos2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK;
|
|
228270
|
-
word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
|
|
228271
|
-
if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) {
|
|
228272
|
-
pos2 = s.code();
|
|
228273
|
-
cur += s.length();
|
|
228274
|
-
} else if (pos2 >= FSST_CODE_BASE) {
|
|
228275
|
-
cur += 2;
|
|
228276
|
-
} else {
|
|
228277
|
-
pos2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK;
|
|
228278
|
-
cur += 1;
|
|
228279
|
-
}
|
|
228280
|
-
} else {
|
|
228281
|
-
assert(cur<end);
|
|
228282
|
-
pos2 = st->findLongestSymbol(cur, end);
|
|
228283
|
-
cur += st->symbols[pos2].length();
|
|
228284
|
-
}
|
|
228285
|
-
|
|
228286
|
-
// compute compressed output size
|
|
228287
|
-
gain += ((int) (cur-old))-(1+isEscapeCode(pos2));
|
|
228288
|
-
|
|
228289
|
-
// now count the subsequent two symbols we encode as an extension possibility
|
|
228290
|
-
if (sampleFrac < 128) { // no need to count pairs in final round
|
|
228291
|
-
counters.count2Inc(pos1, pos2);
|
|
228292
|
-
if ((cur-old) > 1) // do not count escaped bytes doubly
|
|
228293
|
-
counters.count2Inc(pos1, *old);
|
|
228294
|
-
}
|
|
228295
|
-
pos1 = pos2;
|
|
228296
|
-
}
|
|
228297
|
-
}
|
|
228298
|
-
}
|
|
228299
|
-
return gain;
|
|
228300
|
-
};
|
|
229005
|
+
// compute compressed output size
|
|
229006
|
+
gain += ((int) (cur-start))-(1+isEscapeCode(code2));
|
|
228301
229007
|
|
|
228302
|
-
|
|
228303
|
-
|
|
228304
|
-
|
|
228305
|
-
|
|
228306
|
-
// artificially make terminater the most frequent symbol so it gets included
|
|
228307
|
-
u16 terminator = st->nSymbols?FSST_CODE_BASE:st->terminator;
|
|
228308
|
-
counters.count1Set(terminator,65535);
|
|
228309
|
-
|
|
228310
|
-
auto addOrInc = [&](unordered_set<QSymbol> &cands, Symbol s, u64 count) {
|
|
228311
|
-
if (count < (5*sampleFrac)/128) return; // improves both compression speed (less candidates), but also quality!!
|
|
228312
|
-
QSymbol q;
|
|
228313
|
-
q.symbol = s;
|
|
228314
|
-
q.gain = count * s.length();
|
|
228315
|
-
auto it = cands.find(q);
|
|
228316
|
-
if (it != cands.end()) {
|
|
228317
|
-
q.gain += (*it).gain;
|
|
228318
|
-
cands.erase(*it);
|
|
228319
|
-
}
|
|
228320
|
-
cands.insert(q);
|
|
228321
|
-
};
|
|
229008
|
+
// now count the subsequent two symbols we encode as an extension codesibility
|
|
229009
|
+
if (sampleFrac < 128) { // no need to count pairs in final round
|
|
229010
|
+
// consider the symbol that is the concatenation of the two last symbols
|
|
229011
|
+
counters.count2Inc(code1, code2);
|
|
228322
229012
|
|
|
228323
|
-
|
|
228324
|
-
|
|
228325
|
-
|
|
228326
|
-
|
|
229013
|
+
// as an alternative, consider just extending with the next byte..
|
|
229014
|
+
if ((cur-start) > 1) // ..but do not count single byte extensions doubly
|
|
229015
|
+
counters.count2Inc(code1, *start);
|
|
229016
|
+
}
|
|
229017
|
+
code1 = code2;
|
|
229018
|
+
}
|
|
229019
|
+
}
|
|
229020
|
+
}
|
|
229021
|
+
return gain;
|
|
229022
|
+
};
|
|
228327
229023
|
|
|
228328
|
-
|
|
228329
|
-
|
|
228330
|
-
|
|
229024
|
+
auto makeTable = [&](SymbolTable *st, Counters &counters) {
|
|
229025
|
+
// hashmap of c (needed because we can generate duplicate candidates)
|
|
229026
|
+
unordered_set<QSymbol> cands;
|
|
229027
|
+
|
|
229028
|
+
// artificially make terminater the most frequent symbol so it gets included
|
|
229029
|
+
u16 terminator = st->nSymbols?FSST_CODE_BASE:st->terminator;
|
|
229030
|
+
counters.count1Set(terminator,65535);
|
|
229031
|
+
|
|
229032
|
+
auto addOrInc = [&](unordered_set<QSymbol> &cands, Symbol s, u64 count) {
|
|
229033
|
+
if (count < (5*sampleFrac)/128) return; // improves both compression speed (less candidates), but also quality!!
|
|
229034
|
+
QSymbol q;
|
|
229035
|
+
q.symbol = s;
|
|
229036
|
+
q.gain = count * s.length();
|
|
229037
|
+
auto it = cands.find(q);
|
|
229038
|
+
if (it != cands.end()) {
|
|
229039
|
+
q.gain += (*it).gain;
|
|
229040
|
+
cands.erase(*it);
|
|
229041
|
+
}
|
|
229042
|
+
cands.insert(q);
|
|
229043
|
+
};
|
|
228331
229044
|
|
|
228332
|
-
|
|
228333
|
-
|
|
228334
|
-
|
|
228335
|
-
|
|
228336
|
-
}
|
|
228337
|
-
for (u32 pos2=0; pos2<FSST_CODE_BASE+(size_t)st->nSymbols; pos2++) {
|
|
228338
|
-
u32 cnt2 = counters.count2GetNext(pos1, pos2); // may advance pos2!!
|
|
228339
|
-
if (!cnt2) continue;
|
|
228340
|
-
|
|
228341
|
-
// create a new symbol
|
|
228342
|
-
Symbol s2 = st->symbols[pos2];
|
|
228343
|
-
Symbol s3 = concat(s1, s2);
|
|
228344
|
-
if (s2.val.str[0] != st->terminator) // multi-byte symbols cannot contain the terminator byte
|
|
228345
|
-
addOrInc(cands, s3, cnt2);
|
|
228346
|
-
}
|
|
228347
|
-
}
|
|
229045
|
+
// add candidate symbols based on counted frequency
|
|
229046
|
+
for (u32 pos1=0; pos1<FSST_CODE_BASE+(size_t) st->nSymbols; pos1++) {
|
|
229047
|
+
u32 cnt1 = counters.count1GetNext(pos1); // may advance pos1!!
|
|
229048
|
+
if (!cnt1) continue;
|
|
228348
229049
|
|
|
228349
|
-
|
|
228350
|
-
|
|
228351
|
-
|
|
228352
|
-
|
|
228353
|
-
|
|
228354
|
-
|
|
228355
|
-
|
|
228356
|
-
|
|
228357
|
-
|
|
228358
|
-
|
|
228359
|
-
|
|
228360
|
-
|
|
228361
|
-
|
|
228362
|
-
|
|
229050
|
+
// heuristic: promoting single-byte symbols (*8) helps reduce exception rates and increases [de]compression speed
|
|
229051
|
+
Symbol s1 = st->symbols[pos1];
|
|
229052
|
+
addOrInc(cands, s1, ((s1.length()==1)?8LL:1LL)*cnt1);
|
|
229053
|
+
|
|
229054
|
+
if (sampleFrac >= 128 || // last round we do not create new (combined) symbols
|
|
229055
|
+
s1.length() == Symbol::maxLength || // symbol cannot be extended
|
|
229056
|
+
s1.val.str[0] == st->terminator) { // multi-byte symbols cannot contain the terminator byte
|
|
229057
|
+
continue;
|
|
229058
|
+
}
|
|
229059
|
+
for (u32 pos2=0; pos2<FSST_CODE_BASE+(size_t)st->nSymbols; pos2++) {
|
|
229060
|
+
u32 cnt2 = counters.count2GetNext(pos1, pos2); // may advance pos2!!
|
|
229061
|
+
if (!cnt2) continue;
|
|
229062
|
+
|
|
229063
|
+
// create a new symbol
|
|
229064
|
+
Symbol s2 = st->symbols[pos2];
|
|
229065
|
+
Symbol s3 = concat(s1, s2);
|
|
229066
|
+
if (s2.val.str[0] != st->terminator) // multi-byte symbols cannot contain the terminator byte
|
|
229067
|
+
addOrInc(cands, s3, cnt2);
|
|
229068
|
+
}
|
|
229069
|
+
}
|
|
228363
229070
|
|
|
228364
|
-
|
|
229071
|
+
// insert candidates into priority queue (by gain)
|
|
229072
|
+
auto cmpGn = [](const QSymbol& q1, const QSymbol& q2) { return (q1.gain < q2.gain) || (q1.gain == q2.gain && q1.symbol.val.num > q2.symbol.val.num); };
|
|
229073
|
+
priority_queue<QSymbol,vector<QSymbol>,decltype(cmpGn)> pq(cmpGn);
|
|
229074
|
+
for (auto& q : cands)
|
|
229075
|
+
pq.push(q);
|
|
229076
|
+
|
|
229077
|
+
// Create new symbol map using best candidates
|
|
229078
|
+
st->clear();
|
|
229079
|
+
while (st->nSymbols < 255 && !pq.empty()) {
|
|
229080
|
+
QSymbol q = pq.top();
|
|
229081
|
+
pq.pop();
|
|
229082
|
+
st->add(q.symbol);
|
|
229083
|
+
}
|
|
229084
|
+
};
|
|
229085
|
+
|
|
229086
|
+
u8 bestCounters[512*sizeof(u16)];
|
|
228365
229087
|
#ifdef NONOPT_FSST
|
|
228366
|
-
|
|
228367
|
-
|
|
229088
|
+
for(size_t frac : {127, 127, 127, 127, 127, 127, 127, 127, 127, 128}) {
|
|
229089
|
+
sampleFrac = frac;
|
|
228368
229090
|
#else
|
|
228369
|
-
|
|
229091
|
+
for(sampleFrac=8; true; sampleFrac += 30) {
|
|
228370
229092
|
#endif
|
|
228371
|
-
|
|
228372
|
-
|
|
228373
|
-
|
|
228374
|
-
|
|
228375
|
-
|
|
228376
|
-
|
|
228377
|
-
|
|
228378
|
-
|
|
228379
|
-
|
|
228380
|
-
|
|
228381
|
-
|
|
228382
|
-
|
|
228383
|
-
|
|
228384
|
-
|
|
229093
|
+
memset(&counters, 0, sizeof(Counters));
|
|
229094
|
+
long gain = compressCount(st, counters);
|
|
229095
|
+
if (gain >= bestGain) { // a new best solution!
|
|
229096
|
+
counters.backup1(bestCounters);
|
|
229097
|
+
*bestTable = *st; bestGain = gain;
|
|
229098
|
+
}
|
|
229099
|
+
if (sampleFrac >= 128) break; // we do 5 rounds (sampleFrac=8,38,68,98,128)
|
|
229100
|
+
makeTable(st, counters);
|
|
229101
|
+
}
|
|
229102
|
+
delete st;
|
|
229103
|
+
counters.restore1(bestCounters);
|
|
229104
|
+
makeTable(bestTable, counters);
|
|
229105
|
+
bestTable->finalize(zeroTerminated); // renumber codes for more efficient compression
|
|
229106
|
+
return bestTable;
|
|
228385
229107
|
}
|
|
228386
229108
|
|
|
228387
229109
|
static inline size_t compressSIMD(SymbolTable &symbolTable, u8* symbolBase, size_t nlines, size_t len[], u8* line[], size_t size, u8* dst, size_t lenOut[], u8* strOut[], int unroll) {
|
|
228388
|
-
|
|
228389
|
-
|
|
228390
|
-
|
|
228391
|
-
|
|
228392
|
-
|
|
228393
|
-
|
|
228394
|
-
while (curLine < nlines && outOff <= (1<<19)) {
|
|
228395
|
-
size_t prevLine = curLine, chunk, curOff = 0;
|
|
228396
|
-
|
|
228397
|
-
// bail out if the output buffer cannot hold the compressed next string fully
|
|
228398
|
-
if (((len[curLine]-curOff)*2 + 7) > budget) break; // see below for the +7
|
|
228399
|
-
else budget -= (len[curLine]-curOff)*2;
|
|
228400
|
-
|
|
228401
|
-
strOut[curLine] = (u8*) 0;
|
|
228402
|
-
lenOut[curLine] = 0;
|
|
229110
|
+
size_t curLine = 0, inOff = 0, outOff = 0, batchPos = 0, empty = 0, budget = size;
|
|
229111
|
+
u8 *lim = dst + size, *codeBase = symbolBase + (1<<18); // 512KB temp space for compressing 512 strings
|
|
229112
|
+
SIMDjob input[512]; // combined offsets of input strings (cur,end), and string #id (pos) and output (dst) pointer
|
|
229113
|
+
SIMDjob output[512]; // output are (pos:9,dst:19) end pointers (compute compressed length from this)
|
|
229114
|
+
size_t jobLine[512]; // for which line in the input sequence was this job (needed because we may split a line into multiple jobs)
|
|
228403
229115
|
|
|
228404
|
-
|
|
228405
|
-
|
|
228406
|
-
|
|
228407
|
-
|
|
228408
|
-
|
|
228409
|
-
|
|
228410
|
-
|
|
228411
|
-
|
|
228412
|
-
|
|
228413
|
-
|
|
228414
|
-
|
|
228415
|
-
|
|
228416
|
-
|
|
228417
|
-
|
|
228418
|
-
|
|
228419
|
-
|
|
228420
|
-
|
|
228421
|
-
|
|
228422
|
-
|
|
228423
|
-
|
|
228424
|
-
|
|
228425
|
-
|
|
228426
|
-
|
|
228427
|
-
|
|
228428
|
-
|
|
228429
|
-
|
|
228430
|
-
|
|
228431
|
-
|
|
228432
|
-
|
|
228433
|
-
|
|
228434
|
-
|
|
228435
|
-
|
|
228436
|
-
|
|
228437
|
-
|
|
228438
|
-
|
|
228439
|
-
|
|
228440
|
-
|
|
228441
|
-
|
|
228442
|
-
|
|
228443
|
-
|
|
228444
|
-
|
|
228445
|
-
|
|
228446
|
-
|
|
228447
|
-
|
|
228448
|
-
|
|
228449
|
-
|
|
228450
|
-
|
|
228451
|
-
|
|
228452
|
-
|
|
228453
|
-
|
|
228454
|
-
|
|
228455
|
-
|
|
228456
|
-
|
|
228457
|
-
|
|
228458
|
-
|
|
228459
|
-
|
|
228460
|
-
|
|
228461
|
-
|
|
228462
|
-
|
|
228463
|
-
|
|
228464
|
-
|
|
228465
|
-
|
|
228466
|
-
|
|
228467
|
-
|
|
228468
|
-
|
|
228469
|
-
|
|
228470
|
-
|
|
228471
|
-
|
|
228472
|
-
|
|
228473
|
-
|
|
228474
|
-
|
|
228475
|
-
|
|
228476
|
-
|
|
228477
|
-
|
|
228478
|
-
|
|
228479
|
-
|
|
228480
|
-
|
|
228481
|
-
|
|
228482
|
-
|
|
228483
|
-
|
|
228484
|
-
|
|
228485
|
-
|
|
228486
|
-
|
|
228487
|
-
|
|
228488
|
-
|
|
228489
|
-
|
|
228490
|
-
|
|
228491
|
-
|
|
228492
|
-
|
|
228493
|
-
|
|
228494
|
-
|
|
228495
|
-
|
|
228496
|
-
|
|
228497
|
-
|
|
228498
|
-
|
|
228499
|
-
|
|
228500
|
-
|
|
228501
|
-
|
|
228502
|
-
|
|
228503
|
-
|
|
228504
|
-
|
|
228505
|
-
|
|
228506
|
-
|
|
228507
|
-
|
|
228508
|
-
|
|
228509
|
-
|
|
228510
|
-
|
|
228511
|
-
|
|
228512
|
-
|
|
228513
|
-
|
|
228514
|
-
|
|
228515
|
-
|
|
228516
|
-
|
|
228517
|
-
|
|
229116
|
+
while (curLine < nlines && outOff <= (1<<19)) {
|
|
229117
|
+
size_t prevLine = curLine, chunk, curOff = 0;
|
|
229118
|
+
|
|
229119
|
+
// bail out if the output buffer cannot hold the compressed next string fully
|
|
229120
|
+
if (((len[curLine]-curOff)*2 + 7) > budget) break; // see below for the +7
|
|
229121
|
+
else budget -= (len[curLine]-curOff)*2;
|
|
229122
|
+
|
|
229123
|
+
strOut[curLine] = (u8*) 0;
|
|
229124
|
+
lenOut[curLine] = 0;
|
|
229125
|
+
|
|
229126
|
+
do {
|
|
229127
|
+
do {
|
|
229128
|
+
chunk = len[curLine] - curOff;
|
|
229129
|
+
if (chunk > 511) {
|
|
229130
|
+
chunk = 511; // large strings need to be chopped up into segments of 511 bytes
|
|
229131
|
+
}
|
|
229132
|
+
// create a job in this batch
|
|
229133
|
+
SIMDjob job;
|
|
229134
|
+
job.cur = inOff;
|
|
229135
|
+
job.end = job.cur + chunk;
|
|
229136
|
+
job.pos = batchPos;
|
|
229137
|
+
job.out = outOff;
|
|
229138
|
+
|
|
229139
|
+
// worst case estimate for compressed size (+7 is for the scatter that writes extra 7 zeros)
|
|
229140
|
+
outOff += 7 + 2*(size_t)(job.end - job.cur); // note, total size needed is 512*(511*2+7) bytes.
|
|
229141
|
+
if (outOff > (1<<19)) break; // simdbuf may get full, stop before this chunk
|
|
229142
|
+
|
|
229143
|
+
// register job in this batch
|
|
229144
|
+
input[batchPos] = job;
|
|
229145
|
+
jobLine[batchPos] = curLine;
|
|
229146
|
+
|
|
229147
|
+
if (chunk == 0) {
|
|
229148
|
+
empty++; // detect empty chunks -- SIMD code cannot handle empty strings, so they need to be filtered out
|
|
229149
|
+
} else {
|
|
229150
|
+
// copy string chunk into temp buffer
|
|
229151
|
+
memcpy(symbolBase + inOff, line[curLine] + curOff, chunk);
|
|
229152
|
+
inOff += chunk;
|
|
229153
|
+
curOff += chunk;
|
|
229154
|
+
symbolBase[inOff++] = (u8) symbolTable.terminator; // write an extra char at the end that will not be encoded
|
|
229155
|
+
}
|
|
229156
|
+
if (++batchPos == 512) break;
|
|
229157
|
+
} while(curOff < len[curLine]);
|
|
229158
|
+
|
|
229159
|
+
if ((batchPos == 512) || (outOff > (1<<19)) || (++curLine >= nlines)) { // cannot accumulate more?
|
|
229160
|
+
if (batchPos-empty >= 32) { // if we have enough work, fire off fsst_compressAVX512 (32 is due to max 4x8 unrolling)
|
|
229161
|
+
// radix-sort jobs on length (longest string first)
|
|
229162
|
+
// -- this provides best load balancing and allows to skip empty jobs at the end
|
|
229163
|
+
u16 sortpos[513];
|
|
229164
|
+
memset(sortpos, 0, sizeof(sortpos));
|
|
229165
|
+
|
|
229166
|
+
// calculate length histo
|
|
229167
|
+
for(size_t i=0; i<batchPos; i++) {
|
|
229168
|
+
size_t len = input[i].end - input[i].cur;
|
|
229169
|
+
sortpos[512UL - len]++;
|
|
229170
|
+
}
|
|
229171
|
+
// calculate running sum
|
|
229172
|
+
for(size_t i=1; i<=512; i++)
|
|
229173
|
+
sortpos[i] += sortpos[i-1];
|
|
229174
|
+
|
|
229175
|
+
// move jobs to their final destination
|
|
229176
|
+
SIMDjob inputOrdered[512];
|
|
229177
|
+
for(size_t i=0; i<batchPos; i++) {
|
|
229178
|
+
size_t len = input[i].end - input[i].cur;
|
|
229179
|
+
size_t pos = sortpos[511UL - len]++;
|
|
229180
|
+
inputOrdered[pos] = input[i];
|
|
229181
|
+
}
|
|
229182
|
+
// finally.. SIMD compress max 256KB of simdbuf into (max) 512KB of simdbuf (but presumably much less..)
|
|
229183
|
+
for(size_t done = duckdb_fsst_compressAVX512(symbolTable, codeBase, symbolBase, inputOrdered, output, batchPos-empty, unroll);
|
|
229184
|
+
done < batchPos; done++) output[done] = inputOrdered[done];
|
|
229185
|
+
} else {
|
|
229186
|
+
memcpy(output, input, batchPos*sizeof(SIMDjob));
|
|
229187
|
+
}
|
|
229188
|
+
|
|
229189
|
+
// finish encoding (unfinished strings in process, plus the few last strings not yet processed)
|
|
229190
|
+
for(size_t i=0; i<batchPos; i++) {
|
|
229191
|
+
SIMDjob job = output[i];
|
|
229192
|
+
if (job.cur < job.end) { // finish encoding this string with scalar code
|
|
229193
|
+
u8* cur = symbolBase + job.cur;
|
|
229194
|
+
u8* end = symbolBase + job.end;
|
|
229195
|
+
u8* out = codeBase + job.out;
|
|
229196
|
+
while (cur < end) {
|
|
229197
|
+
u64 word = fsst_unaligned_load(cur);
|
|
229198
|
+
size_t code = symbolTable.shortCodes[word & 0xFFFF];
|
|
229199
|
+
size_t pos = word & 0xFFFFFF;
|
|
229200
|
+
size_t idx = FSST_HASH(pos)&(symbolTable.hashTabSize-1);
|
|
229201
|
+
Symbol s = symbolTable.hashTab[idx];
|
|
229202
|
+
out[1] = (u8) word; // speculatively write out escaped byte
|
|
229203
|
+
word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
|
|
229204
|
+
if ((s.icl < FSST_ICL_FREE) && s.val.num == word) {
|
|
229205
|
+
*out++ = (u8) s.code(); cur += s.length();
|
|
229206
|
+
} else {
|
|
229207
|
+
// could be a 2-byte or 1-byte code, or miss
|
|
229208
|
+
// handle everything with predication
|
|
229209
|
+
*out = (u8) code;
|
|
229210
|
+
out += 1+((code&FSST_CODE_BASE)>>8);
|
|
229211
|
+
cur += (code>>FSST_LEN_BITS);
|
|
229212
|
+
}
|
|
229213
|
+
}
|
|
229214
|
+
job.out = out - codeBase;
|
|
229215
|
+
}
|
|
229216
|
+
// postprocess job info
|
|
229217
|
+
job.cur = 0;
|
|
229218
|
+
job.end = job.out - input[job.pos].out; // misuse .end field as compressed size
|
|
229219
|
+
job.out = input[job.pos].out; // reset offset to start of encoded string
|
|
229220
|
+
input[job.pos] = job;
|
|
229221
|
+
}
|
|
229222
|
+
|
|
229223
|
+
// copy out the result data
|
|
229224
|
+
for(size_t i=0; i<batchPos; i++) {
|
|
229225
|
+
size_t lineNr = jobLine[i]; // the sort must be order-preserving, as we concatenate results string in order
|
|
229226
|
+
size_t sz = input[i].end; // had stored compressed lengths here
|
|
229227
|
+
if (!strOut[lineNr]) strOut[lineNr] = dst; // first segment will be the strOut pointer
|
|
229228
|
+
lenOut[lineNr] += sz; // add segment (lenOut starts at 0 for this reason)
|
|
229229
|
+
memcpy(dst, codeBase+input[i].out, sz);
|
|
229230
|
+
dst += sz;
|
|
229231
|
+
}
|
|
229232
|
+
|
|
229233
|
+
// go for the next batch of 512 chunks
|
|
229234
|
+
inOff = outOff = batchPos = empty = 0;
|
|
229235
|
+
budget = (size_t) (lim - dst);
|
|
229236
|
+
}
|
|
229237
|
+
} while (curLine == prevLine && outOff <= (1<<19));
|
|
229238
|
+
}
|
|
229239
|
+
return curLine;
|
|
228518
229240
|
}
|
|
228519
229241
|
|
|
228520
229242
|
|
|
228521
229243
|
// optimized adaptive *scalar* compression method
|
|
228522
229244
|
static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, size_t lenIn[], u8* strIn[], size_t size, u8* out, size_t lenOut[], u8* strOut[], bool noSuffixOpt, bool avoidBranch) {
|
|
228523
|
-
|
|
228524
|
-
|
|
228525
|
-
|
|
228526
|
-
|
|
228527
|
-
|
|
228528
|
-
|
|
228529
|
-
|
|
228530
|
-
|
|
228531
|
-
|
|
228532
|
-
|
|
228533
|
-
|
|
228534
|
-
|
|
228535
|
-
|
|
228536
|
-
|
|
228537
|
-
|
|
228538
|
-
|
|
228539
|
-
|
|
228540
|
-
|
|
228541
|
-
|
|
228542
|
-
|
|
228543
|
-
|
|
228544
|
-
|
|
228545
|
-
|
|
228546
|
-
|
|
228547
|
-
|
|
228548
|
-
|
|
228549
|
-
|
|
228550
|
-
|
|
228551
|
-
|
|
228552
|
-
|
|
228553
|
-
|
|
228554
|
-
|
|
228555
|
-
|
|
228556
|
-
|
|
228557
|
-
|
|
228558
|
-
|
|
228559
|
-
|
|
228560
|
-
|
|
228561
|
-
|
|
228562
|
-
|
|
228563
|
-
|
|
228564
|
-
}
|
|
228565
|
-
}
|
|
228566
|
-
};
|
|
229245
|
+
u8 *cur = NULL, *end = NULL, *lim = out + size;
|
|
229246
|
+
size_t curLine, suffixLim = symbolTable.suffixLim;
|
|
229247
|
+
u8 byteLim = symbolTable.nSymbols + symbolTable.zeroTerminated - symbolTable.lenHisto[0];
|
|
229248
|
+
|
|
229249
|
+
u8 buf[512+7]; /* +7 sentinel is to avoid 8-byte unaligned-loads going beyond 511 out-of-bounds */
|
|
229250
|
+
memset(buf+511, 0, 8); /* and initialize the sentinal bytes */
|
|
229251
|
+
|
|
229252
|
+
// three variants are possible. dead code falls away since the bool arguments are constants
|
|
229253
|
+
auto compressVariant = [&](bool noSuffixOpt, bool avoidBranch) {
|
|
229254
|
+
while (cur < end) {
|
|
229255
|
+
u64 word = fsst_unaligned_load(cur);
|
|
229256
|
+
size_t code = symbolTable.shortCodes[word & 0xFFFF];
|
|
229257
|
+
if (noSuffixOpt && ((u8) code) < suffixLim) {
|
|
229258
|
+
// 2 byte code without having to worry about longer matches
|
|
229259
|
+
*out++ = (u8) code; cur += 2;
|
|
229260
|
+
} else {
|
|
229261
|
+
size_t pos = word & 0xFFFFFF;
|
|
229262
|
+
size_t idx = FSST_HASH(pos)&(symbolTable.hashTabSize-1);
|
|
229263
|
+
Symbol s = symbolTable.hashTab[idx];
|
|
229264
|
+
out[1] = (u8) word; // speculatively write out escaped byte
|
|
229265
|
+
word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
|
|
229266
|
+
if ((s.icl < FSST_ICL_FREE) && s.val.num == word) {
|
|
229267
|
+
*out++ = (u8) s.code(); cur += s.length();
|
|
229268
|
+
} else if (avoidBranch) {
|
|
229269
|
+
// could be a 2-byte or 1-byte code, or miss
|
|
229270
|
+
// handle everything with predication
|
|
229271
|
+
*out = (u8) code;
|
|
229272
|
+
out += 1+((code&FSST_CODE_BASE)>>8);
|
|
229273
|
+
cur += (code>>FSST_LEN_BITS);
|
|
229274
|
+
} else if ((u8) code < byteLim) {
|
|
229275
|
+
// 2 byte code after checking there is no longer pattern
|
|
229276
|
+
*out++ = (u8) code; cur += 2;
|
|
229277
|
+
} else {
|
|
229278
|
+
// 1 byte code or miss.
|
|
229279
|
+
*out = (u8) code;
|
|
229280
|
+
out += 1+((code&FSST_CODE_BASE)>>8); // predicated - tested with a branch, that was always worse
|
|
229281
|
+
cur++;
|
|
229282
|
+
}
|
|
229283
|
+
}
|
|
229284
|
+
}
|
|
229285
|
+
};
|
|
228567
229286
|
|
|
228568
|
-
|
|
228569
|
-
|
|
228570
|
-
|
|
228571
|
-
|
|
228572
|
-
|
|
228573
|
-
|
|
228574
|
-
|
|
228575
|
-
|
|
228576
|
-
|
|
228577
|
-
|
|
228578
|
-
|
|
228579
|
-
|
|
228580
|
-
|
|
228581
|
-
|
|
228582
|
-
|
|
228583
|
-
|
|
228584
|
-
|
|
228585
|
-
|
|
228586
|
-
|
|
228587
|
-
|
|
228588
|
-
|
|
228589
|
-
|
|
228590
|
-
|
|
228591
|
-
|
|
228592
|
-
|
|
228593
|
-
|
|
228594
|
-
|
|
228595
|
-
|
|
228596
|
-
|
|
228597
|
-
|
|
228598
|
-
}
|
|
228599
|
-
return curLine;
|
|
229287
|
+
for(curLine=0; curLine<nlines; curLine++) {
|
|
229288
|
+
size_t chunk, curOff = 0;
|
|
229289
|
+
strOut[curLine] = out;
|
|
229290
|
+
do {
|
|
229291
|
+
cur = strIn[curLine] + curOff;
|
|
229292
|
+
chunk = lenIn[curLine] - curOff;
|
|
229293
|
+
if (chunk > 511) {
|
|
229294
|
+
chunk = 511; // we need to compress in chunks of 511 in order to be byte-compatible with simd-compressed FSST
|
|
229295
|
+
}
|
|
229296
|
+
if ((2*chunk+7) > (size_t) (lim-out)) {
|
|
229297
|
+
return curLine; // out of memory
|
|
229298
|
+
}
|
|
229299
|
+
// copy the string to the 511-byte buffer
|
|
229300
|
+
memcpy(buf, cur, chunk);
|
|
229301
|
+
buf[chunk] = (u8) symbolTable.terminator;
|
|
229302
|
+
cur = buf;
|
|
229303
|
+
end = cur + chunk;
|
|
229304
|
+
|
|
229305
|
+
// based on symboltable stats, choose a variant that is nice to the branch predictor
|
|
229306
|
+
if (noSuffixOpt) {
|
|
229307
|
+
compressVariant(true,false);
|
|
229308
|
+
} else if (avoidBranch) {
|
|
229309
|
+
compressVariant(false,true);
|
|
229310
|
+
} else {
|
|
229311
|
+
compressVariant(false, false);
|
|
229312
|
+
}
|
|
229313
|
+
} while((curOff += chunk) < lenIn[curLine]);
|
|
229314
|
+
lenOut[curLine] = (size_t) (out - strOut[curLine]);
|
|
229315
|
+
}
|
|
229316
|
+
return curLine;
|
|
228600
229317
|
}
|
|
228601
229318
|
|
|
228602
229319
|
#define FSST_SAMPLELINE ((size_t) 512)
|
|
228603
229320
|
|
|
228604
229321
|
// quickly select a uniformly random set of lines such that we have between [FSST_SAMPLETARGET,FSST_SAMPLEMAXSZ) string bytes
|
|
228605
229322
|
vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t **lenRef, size_t nlines) {
|
|
228606
|
-
|
|
228607
|
-
|
|
229323
|
+
size_t totSize = 0, *lenIn = *lenRef;
|
|
229324
|
+
vector<u8*> sample;
|
|
228608
229325
|
|
|
228609
|
-
|
|
228610
|
-
|
|
229326
|
+
for(size_t i=0; i<nlines; i++)
|
|
229327
|
+
totSize += lenIn[i];
|
|
228611
229328
|
|
|
228612
|
-
|
|
228613
|
-
|
|
228614
|
-
|
|
228615
|
-
|
|
228616
|
-
|
|
228617
|
-
|
|
228618
|
-
|
|
228619
|
-
|
|
228620
|
-
|
|
228621
|
-
|
|
228622
|
-
|
|
228623
|
-
|
|
228624
|
-
|
|
228625
|
-
|
|
228626
|
-
|
|
228627
|
-
|
|
228628
|
-
|
|
228629
|
-
|
|
228630
|
-
|
|
228631
|
-
|
|
228632
|
-
|
|
228633
|
-
|
|
228634
|
-
|
|
228635
|
-
|
|
228636
|
-
|
|
228637
|
-
|
|
228638
|
-
|
|
228639
|
-
|
|
229329
|
+
if (totSize < FSST_SAMPLETARGET) {
|
|
229330
|
+
for(size_t i=0; i<nlines; i++)
|
|
229331
|
+
sample.push_back(strIn[i]);
|
|
229332
|
+
} else {
|
|
229333
|
+
size_t sampleRnd = FSST_HASH(4637947);
|
|
229334
|
+
u8* sampleLim = sampleBuf + FSST_SAMPLETARGET;
|
|
229335
|
+
size_t *sampleLen = *lenRef = new size_t[nlines + FSST_SAMPLEMAXSZ/FSST_SAMPLELINE];
|
|
229336
|
+
|
|
229337
|
+
while(sampleBuf < sampleLim) {
|
|
229338
|
+
// choose a non-empty line
|
|
229339
|
+
sampleRnd = FSST_HASH(sampleRnd);
|
|
229340
|
+
size_t linenr = sampleRnd % nlines;
|
|
229341
|
+
while (lenIn[linenr] == 0)
|
|
229342
|
+
if (++linenr == nlines) linenr = 0;
|
|
229343
|
+
|
|
229344
|
+
// choose a chunk
|
|
229345
|
+
size_t chunks = 1 + ((lenIn[linenr]-1) / FSST_SAMPLELINE);
|
|
229346
|
+
sampleRnd = FSST_HASH(sampleRnd);
|
|
229347
|
+
size_t chunk = FSST_SAMPLELINE*(sampleRnd % chunks);
|
|
229348
|
+
|
|
229349
|
+
// add the chunk to the sample
|
|
229350
|
+
size_t len = min(lenIn[linenr]-chunk,FSST_SAMPLELINE);
|
|
229351
|
+
memcpy(sampleBuf, strIn[linenr]+chunk, len);
|
|
229352
|
+
sample.push_back(sampleBuf);
|
|
229353
|
+
sampleBuf += *sampleLen++ = len;
|
|
229354
|
+
}
|
|
229355
|
+
}
|
|
229356
|
+
return sample;
|
|
228640
229357
|
}
|
|
228641
229358
|
|
|
228642
229359
|
extern "C" duckdb_fsst_encoder_t* duckdb_fsst_create(size_t n, size_t lenIn[], u8 *strIn[], int zeroTerminated) {
|
|
228643
|
-
|
|
228644
|
-
|
|
228645
|
-
|
|
228646
|
-
|
|
228647
|
-
|
|
228648
|
-
|
|
228649
|
-
|
|
228650
|
-
|
|
229360
|
+
u8* sampleBuf = new u8[FSST_SAMPLEMAXSZ];
|
|
229361
|
+
size_t *sampleLen = lenIn;
|
|
229362
|
+
vector<u8*> sample = makeSample(sampleBuf, strIn, &sampleLen, n?n:1); // careful handling of input to get a right-size and representative sample
|
|
229363
|
+
Encoder *encoder = new Encoder();
|
|
229364
|
+
encoder->symbolTable = shared_ptr<SymbolTable>(buildSymbolTable(encoder->counters, sample, sampleLen, zeroTerminated));
|
|
229365
|
+
if (sampleLen != lenIn) delete[] sampleLen;
|
|
229366
|
+
delete[] sampleBuf;
|
|
229367
|
+
return (duckdb_fsst_encoder_t*) encoder;
|
|
228651
229368
|
}
|
|
228652
229369
|
|
|
228653
229370
|
/* create another encoder instance, necessary to do multi-threaded encoding using the same symbol table */
|
|
228654
229371
|
extern "C" duckdb_fsst_encoder_t* duckdb_fsst_duplicate(duckdb_fsst_encoder_t *encoder) {
|
|
228655
|
-
|
|
228656
|
-
|
|
228657
|
-
|
|
229372
|
+
Encoder *e = new Encoder();
|
|
229373
|
+
e->symbolTable = ((Encoder*)encoder)->symbolTable; // it is a shared_ptr
|
|
229374
|
+
return (duckdb_fsst_encoder_t*) e;
|
|
228658
229375
|
}
|
|
228659
229376
|
|
|
228660
|
-
// export a symbol table in compact format.
|
|
229377
|
+
// export a symbol table in compact format.
|
|
228661
229378
|
extern "C" u32 duckdb_fsst_export(duckdb_fsst_encoder_t *encoder, u8 *buf) {
|
|
228662
|
-
|
|
228663
|
-
|
|
228664
|
-
|
|
228665
|
-
|
|
228666
|
-
|
|
228667
|
-
|
|
228668
|
-
|
|
228669
|
-
|
|
228670
|
-
|
|
228671
|
-
|
|
228672
|
-
|
|
228673
|
-
|
|
228674
|
-
|
|
228675
|
-
|
|
228676
|
-
// version allows keeping track of fsst versions, track endianness, and encoder reconstruction
|
|
228677
|
-
u64 version = (FSST_VERSION << 32) | // version is 24 bits, most significant byte is 0
|
|
228678
|
-
(((u64) e->symbolTable->suffixLim) << 24) |
|
|
228679
|
-
(((u64) e->symbolTable->terminator) << 16) |
|
|
228680
|
-
(((u64) e->symbolTable->nSymbols) << 8) |
|
|
228681
|
-
FSST_ENDIAN_MARKER; // least significant byte is nonzero
|
|
229379
|
+
Encoder *e = (Encoder*) encoder;
|
|
229380
|
+
// In ->version there is a versionnr, but we hide also suffixLim/terminator/nSymbols there.
|
|
229381
|
+
// This is sufficient in principle to *reconstruct* a duckdb_fsst_encoder_t from a duckdb_fsst_decoder_t
|
|
229382
|
+
// (such functionality could be useful to append compressed data to an existing block).
|
|
229383
|
+
//
|
|
229384
|
+
// However, the hash function in the encoder hash table is endian-sensitive, and given its
|
|
229385
|
+
// 'lossy perfect' hashing scheme is *unable* to contain other-endian-produced symbol tables.
|
|
229386
|
+
// Doing a endian-conversion during hashing will be slow and self-defeating.
|
|
229387
|
+
//
|
|
229388
|
+
// Overall, we could support reconstructing an encoder for incremental compression, but
|
|
229389
|
+
// should enforce equal-endianness. Bit of a bummer. Not going there now.
|
|
229390
|
+
//
|
|
229391
|
+
// The version field is now there just for future-proofness, but not used yet
|
|
228682
229392
|
|
|
228683
|
-
|
|
228684
|
-
|
|
228685
|
-
|
|
228686
|
-
|
|
228687
|
-
|
|
228688
|
-
|
|
229393
|
+
// version allows keeping track of fsst versions, track endianness, and encoder reconstruction
|
|
229394
|
+
u64 version = (FSST_VERSION << 32) | // version is 24 bits, most significant byte is 0
|
|
229395
|
+
(((u64) e->symbolTable->suffixLim) << 24) |
|
|
229396
|
+
(((u64) e->symbolTable->terminator) << 16) |
|
|
229397
|
+
(((u64) e->symbolTable->nSymbols) << 8) |
|
|
229398
|
+
FSST_ENDIAN_MARKER; // least significant byte is nonzero
|
|
228689
229399
|
|
|
228690
|
-
|
|
228691
|
-
|
|
228692
|
-
|
|
228693
|
-
|
|
229400
|
+
/* do not assume unaligned reads here */
|
|
229401
|
+
memcpy(buf, &version, 8);
|
|
229402
|
+
buf[8] = e->symbolTable->zeroTerminated;
|
|
229403
|
+
for(u32 i=0; i<8; i++)
|
|
229404
|
+
buf[9+i] = (u8) e->symbolTable->lenHisto[i];
|
|
229405
|
+
u32 pos = 17;
|
|
228694
229406
|
|
|
228695
|
-
|
|
229407
|
+
// emit only the used bytes of the symbols
|
|
229408
|
+
for(u32 i = e->symbolTable->zeroTerminated; i < e->symbolTable->nSymbols; i++)
|
|
229409
|
+
for(u32 j = 0; j < e->symbolTable->symbols[i].length(); j++)
|
|
229410
|
+
buf[pos++] = e->symbolTable->symbols[i].val.str[j]; // serialize used symbol bytes
|
|
229411
|
+
|
|
229412
|
+
return pos; // length of what was serialized
|
|
228696
229413
|
}
|
|
228697
229414
|
|
|
228698
229415
|
#define FSST_CORRUPT 32774747032022883 /* 7-byte number in little endian containing "corrupt" */
|
|
228699
229416
|
|
|
228700
229417
|
extern "C" u32 duckdb_fsst_import(duckdb_fsst_decoder_t *decoder, u8 *buf) {
|
|
228701
|
-
|
|
228702
|
-
|
|
228703
|
-
|
|
228704
|
-
|
|
228705
|
-
// version field (first 8 bytes) is now there just for future-proofness, unused still (skipped)
|
|
228706
|
-
memcpy(&version, buf, 8);
|
|
228707
|
-
if ((version>>32) != FSST_VERSION) return 0;
|
|
228708
|
-
decoder->zeroTerminated = buf[8]&1;
|
|
228709
|
-
memcpy(lenHisto, buf+9, 8);
|
|
228710
|
-
|
|
228711
|
-
// in case of zero-terminated, first symbol is "" (zero always, may be overwritten)
|
|
228712
|
-
decoder->len[0] = 1;
|
|
228713
|
-
decoder->symbol[0] = 0;
|
|
228714
|
-
|
|
228715
|
-
// we use lenHisto[0] as 1-byte symbol run length (at the end)
|
|
228716
|
-
code = decoder->zeroTerminated;
|
|
228717
|
-
if (decoder->zeroTerminated) lenHisto[0]--; // if zeroTerminated, then symbol "" aka 1-byte code=0, is not stored at the end
|
|
228718
|
-
|
|
228719
|
-
// now get all symbols from the buffer
|
|
228720
|
-
for(u32 l=1; l<=8; l++) { /* l = 1,2,3,4,5,6,7,8 */
|
|
228721
|
-
for(u32 i=0; i < lenHisto[(l&7) /* 1,2,3,4,5,6,7,0 */]; i++, code++) {
|
|
228722
|
-
decoder->len[code] = (l&7)+1; /* len = 2,3,4,5,6,7,8,1 */
|
|
228723
|
-
decoder->symbol[code] = 0;
|
|
228724
|
-
for(u32 j=0; j<decoder->len[code]; j++)
|
|
228725
|
-
((u8*) &decoder->symbol[code])[j] = buf[pos++]; // note this enforces 'little endian' symbols
|
|
228726
|
-
}
|
|
228727
|
-
}
|
|
228728
|
-
if (decoder->zeroTerminated) lenHisto[0]++;
|
|
229418
|
+
u64 version = 0;
|
|
229419
|
+
u32 code, pos = 17;
|
|
229420
|
+
u8 lenHisto[8];
|
|
228729
229421
|
|
|
228730
|
-
|
|
228731
|
-
|
|
228732
|
-
|
|
228733
|
-
|
|
228734
|
-
|
|
228735
|
-
|
|
229422
|
+
// version field (first 8 bytes) is now there just for future-proofness, unused still (skipped)
|
|
229423
|
+
memcpy(&version, buf, 8);
|
|
229424
|
+
if ((version>>32) != FSST_VERSION) return 0;
|
|
229425
|
+
decoder->zeroTerminated = buf[8]&1;
|
|
229426
|
+
memcpy(lenHisto, buf+9, 8);
|
|
229427
|
+
|
|
229428
|
+
// in case of zero-terminated, first symbol is "" (zero always, may be overwritten)
|
|
229429
|
+
decoder->len[0] = 1;
|
|
229430
|
+
decoder->symbol[0] = 0;
|
|
229431
|
+
|
|
229432
|
+
// we use lenHisto[0] as 1-byte symbol run length (at the end)
|
|
229433
|
+
code = decoder->zeroTerminated;
|
|
229434
|
+
if (decoder->zeroTerminated) lenHisto[0]--; // if zeroTerminated, then symbol "" aka 1-byte code=0, is not stored at the end
|
|
229435
|
+
|
|
229436
|
+
// now get all symbols from the buffer
|
|
229437
|
+
for(u32 l=1; l<=8; l++) { /* l = 1,2,3,4,5,6,7,8 */
|
|
229438
|
+
for(u32 i=0; i < lenHisto[(l&7) /* 1,2,3,4,5,6,7,0 */]; i++, code++) {
|
|
229439
|
+
decoder->len[code] = (l&7)+1; /* len = 2,3,4,5,6,7,8,1 */
|
|
229440
|
+
decoder->symbol[code] = 0;
|
|
229441
|
+
for(u32 j=0; j<decoder->len[code]; j++)
|
|
229442
|
+
((u8*) &decoder->symbol[code])[j] = buf[pos++]; // note this enforces 'little endian' symbols
|
|
229443
|
+
}
|
|
229444
|
+
}
|
|
229445
|
+
if (decoder->zeroTerminated) lenHisto[0]++;
|
|
229446
|
+
|
|
229447
|
+
// fill unused symbols with text "corrupt". Gives a chance to detect corrupted code sequences (if there are unused symbols).
|
|
229448
|
+
while(code<255) {
|
|
229449
|
+
decoder->symbol[code] = FSST_CORRUPT;
|
|
229450
|
+
decoder->len[code++] = 8;
|
|
229451
|
+
}
|
|
229452
|
+
return pos;
|
|
228736
229453
|
}
|
|
228737
229454
|
|
|
228738
229455
|
// runtime check for simd
|
|
228739
229456
|
inline size_t _compressImpl(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd) {
|
|
228740
229457
|
#ifndef NONOPT_FSST
|
|
228741
|
-
|
|
228742
|
-
|
|
229458
|
+
if (simd && duckdb_fsst_hasAVX512())
|
|
229459
|
+
return compressSIMD(*e->symbolTable, e->simdbuf, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
|
|
228743
229460
|
#endif
|
|
228744
|
-
|
|
228745
|
-
|
|
229461
|
+
(void) simd;
|
|
229462
|
+
return compressBulk(*e->symbolTable, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch);
|
|
228746
229463
|
}
|
|
228747
229464
|
size_t compressImpl(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd) {
|
|
228748
|
-
|
|
229465
|
+
return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd);
|
|
228749
229466
|
}
|
|
228750
229467
|
|
|
228751
|
-
// adaptive choosing of scalar compression method based on symbol length histogram
|
|
229468
|
+
// adaptive choosing of scalar compression method based on symbol length histogram
|
|
228752
229469
|
inline size_t _compressAuto(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) {
|
|
228753
|
-
|
|
228754
|
-
|
|
228755
|
-
|
|
228756
|
-
|
|
228757
|
-
|
|
228758
|
-
|
|
228759
|
-
|
|
228760
|
-
|
|
228761
|
-
|
|
229470
|
+
bool avoidBranch = false, noSuffixOpt = false;
|
|
229471
|
+
if (100*e->symbolTable->lenHisto[1] > 65*e->symbolTable->nSymbols && 100*e->symbolTable->suffixLim > 95*e->symbolTable->lenHisto[1]) {
|
|
229472
|
+
noSuffixOpt = true;
|
|
229473
|
+
} else if ((e->symbolTable->lenHisto[0] > 24 && e->symbolTable->lenHisto[0] < 92) &&
|
|
229474
|
+
(e->symbolTable->lenHisto[0] < 43 || e->symbolTable->lenHisto[6] + e->symbolTable->lenHisto[7] < 29) &&
|
|
229475
|
+
(e->symbolTable->lenHisto[0] < 72 || e->symbolTable->lenHisto[2] < 72)) {
|
|
229476
|
+
avoidBranch = true;
|
|
229477
|
+
}
|
|
229478
|
+
return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd);
|
|
228762
229479
|
}
|
|
228763
229480
|
size_t compressAuto(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) {
|
|
228764
|
-
|
|
229481
|
+
return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
|
|
228765
229482
|
}
|
|
228766
229483
|
|
|
228767
229484
|
// the main compression function (everything automatic)
|
|
228768
229485
|
extern "C" size_t duckdb_fsst_compress(duckdb_fsst_encoder_t *encoder, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[]) {
|
|
228769
|
-
|
|
228770
|
-
|
|
228771
|
-
|
|
228772
|
-
|
|
229486
|
+
// to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB)
|
|
229487
|
+
size_t totLen = accumulate(lenIn, lenIn+nlines, 0);
|
|
229488
|
+
int simd = totLen > nlines*12 && (nlines > 64 || totLen > (size_t) 1<<15);
|
|
229489
|
+
return _compressAuto((Encoder*) encoder, nlines, lenIn, strIn, size, output, lenOut, strOut, 3*simd);
|
|
228773
229490
|
}
|
|
228774
229491
|
|
|
228775
229492
|
/* deallocate encoder */
|
|
228776
229493
|
extern "C" void duckdb_fsst_destroy(duckdb_fsst_encoder_t* encoder) {
|
|
228777
|
-
|
|
228778
|
-
|
|
229494
|
+
Encoder *e = (Encoder*) encoder;
|
|
229495
|
+
delete e;
|
|
228779
229496
|
}
|
|
228780
229497
|
|
|
228781
229498
|
/* very lazy implementation relying on export and import */
|
|
228782
229499
|
extern "C" duckdb_fsst_decoder_t duckdb_fsst_decoder(duckdb_fsst_encoder_t *encoder) {
|
|
228783
|
-
|
|
228784
|
-
|
|
228785
|
-
|
|
228786
|
-
|
|
228787
|
-
|
|
228788
|
-
|
|
229500
|
+
u8 buf[sizeof(duckdb_fsst_decoder_t)];
|
|
229501
|
+
u32 cnt1 = duckdb_fsst_export(encoder, buf);
|
|
229502
|
+
duckdb_fsst_decoder_t decoder;
|
|
229503
|
+
u32 cnt2 = duckdb_fsst_import(&decoder, buf);
|
|
229504
|
+
assert(cnt1 == cnt2); (void) cnt1; (void) cnt2;
|
|
229505
|
+
return decoder;
|
|
228789
229506
|
}
|
|
228790
229507
|
|
|
228791
|
-
|
|
228792
229508
|
// LICENSE_CHANGE_END
|
|
228793
229509
|
|
|
228794
229510
|
|