duckdb 0.6.1-dev86.0 → 0.6.2-dev13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/connection.cpp +100 -99
- package/src/duckdb.cpp +1567 -811
- package/src/duckdb.hpp +100 -35
- package/src/duckdb_node.hpp +0 -1
- package/src/parquet-amalgamation.cpp +13204 -13194
- package/test/arrow.test.js +36 -45
package/src/duckdb.cpp
CHANGED
|
@@ -652,6 +652,7 @@ static constexpr ExtensionFunction EXTENSION_FUNCTIONS[] = {
|
|
|
652
652
|
{"from_substrait", "substrait"},
|
|
653
653
|
{"get_substrait", "substrait"},
|
|
654
654
|
{"get_substrait_json", "substrait"},
|
|
655
|
+
{"from_substrait_json", "substrait"},
|
|
655
656
|
{"icu_calendar_names", "icu"},
|
|
656
657
|
{"icu_sort_key", "icu"},
|
|
657
658
|
{"json", "json"},
|
|
@@ -1405,7 +1406,7 @@ CopyFunctionCatalogEntry::CopyFunctionCatalogEntry(Catalog *catalog, SchemaCatal
|
|
|
1405
1406
|
//===----------------------------------------------------------------------===//
|
|
1406
1407
|
// DuckDB
|
|
1407
1408
|
//
|
|
1408
|
-
// duckdb/common/
|
|
1409
|
+
// duckdb/common/radix.hpp
|
|
1409
1410
|
//
|
|
1410
1411
|
//
|
|
1411
1412
|
//===----------------------------------------------------------------------===//
|
|
@@ -4121,6 +4122,20 @@ unique_ptr<CatalogEntry> TableCatalogEntry::AlterEntry(ClientContext &context, A
|
|
|
4121
4122
|
}
|
|
4122
4123
|
}
|
|
4123
4124
|
|
|
4125
|
+
void TableCatalogEntry::UndoAlter(ClientContext &context, AlterInfo *info) {
|
|
4126
|
+
D_ASSERT(!internal);
|
|
4127
|
+
D_ASSERT(info->type == AlterType::ALTER_TABLE);
|
|
4128
|
+
auto table_info = (AlterTableInfo *)info;
|
|
4129
|
+
switch (table_info->alter_table_type) {
|
|
4130
|
+
case AlterTableType::RENAME_TABLE: {
|
|
4131
|
+
storage->info->table = this->name;
|
|
4132
|
+
break;
|
|
4133
|
+
default:
|
|
4134
|
+
break;
|
|
4135
|
+
}
|
|
4136
|
+
}
|
|
4137
|
+
}
|
|
4138
|
+
|
|
4124
4139
|
static void RenameExpression(ParsedExpression &expr, RenameColumnInfo &info) {
|
|
4125
4140
|
if (expr.type == ExpressionType::COLUMN_REF) {
|
|
4126
4141
|
auto &colref = (ColumnRefExpression &)expr;
|
|
@@ -4219,6 +4234,8 @@ unique_ptr<CatalogEntry> TableCatalogEntry::AddColumn(ClientContext &context, Ad
|
|
|
4219
4234
|
create_info->constraints.push_back(constraint->Copy());
|
|
4220
4235
|
}
|
|
4221
4236
|
Binder::BindLogicalType(context, info.new_column.TypeMutable(), schema->name);
|
|
4237
|
+
info.new_column.SetOid(columns.LogicalColumnCount());
|
|
4238
|
+
info.new_column.SetStorageOid(columns.PhysicalColumnCount());
|
|
4222
4239
|
auto col = info.new_column.Copy();
|
|
4223
4240
|
|
|
4224
4241
|
create_info->columns.AddColumn(move(col));
|
|
@@ -4966,6 +4983,9 @@ unique_ptr<CatalogEntry> CatalogEntry::AlterEntry(ClientContext &context, AlterI
|
|
|
4966
4983
|
throw InternalException("Unsupported alter type for catalog entry!");
|
|
4967
4984
|
}
|
|
4968
4985
|
|
|
4986
|
+
void CatalogEntry::UndoAlter(ClientContext &context, AlterInfo *info) {
|
|
4987
|
+
}
|
|
4988
|
+
|
|
4969
4989
|
unique_ptr<CatalogEntry> CatalogEntry::Copy(ClientContext &context) {
|
|
4970
4990
|
throw InternalException("Unsupported copy type for catalog entry!");
|
|
4971
4991
|
}
|
|
@@ -5144,6 +5164,98 @@ private:
|
|
|
5144
5164
|
|
|
5145
5165
|
|
|
5146
5166
|
|
|
5167
|
+
//===----------------------------------------------------------------------===//
|
|
5168
|
+
// DuckDB
|
|
5169
|
+
//
|
|
5170
|
+
// duckdb/catalog/mapping_value.hpp
|
|
5171
|
+
//
|
|
5172
|
+
//
|
|
5173
|
+
//===----------------------------------------------------------------------===//
|
|
5174
|
+
|
|
5175
|
+
|
|
5176
|
+
|
|
5177
|
+
|
|
5178
|
+
|
|
5179
|
+
|
|
5180
|
+
namespace duckdb {
|
|
5181
|
+
struct AlterInfo;
|
|
5182
|
+
|
|
5183
|
+
class ClientContext;
|
|
5184
|
+
|
|
5185
|
+
struct EntryIndex {
|
|
5186
|
+
EntryIndex() : catalog(nullptr), index(DConstants::INVALID_INDEX) {
|
|
5187
|
+
}
|
|
5188
|
+
EntryIndex(CatalogSet &catalog, idx_t index) : catalog(&catalog), index(index) {
|
|
5189
|
+
auto entry = catalog.entries.find(index);
|
|
5190
|
+
if (entry == catalog.entries.end()) {
|
|
5191
|
+
throw InternalException("EntryIndex - Catalog entry not found in constructor!?");
|
|
5192
|
+
}
|
|
5193
|
+
catalog.entries[index].reference_count++;
|
|
5194
|
+
}
|
|
5195
|
+
~EntryIndex() {
|
|
5196
|
+
if (!catalog) {
|
|
5197
|
+
return;
|
|
5198
|
+
}
|
|
5199
|
+
auto entry = catalog->entries.find(index);
|
|
5200
|
+
D_ASSERT(entry != catalog->entries.end());
|
|
5201
|
+
auto remaining_ref = --entry->second.reference_count;
|
|
5202
|
+
if (remaining_ref == 0) {
|
|
5203
|
+
catalog->entries.erase(index);
|
|
5204
|
+
}
|
|
5205
|
+
catalog = nullptr;
|
|
5206
|
+
}
|
|
5207
|
+
// disable copy constructors
|
|
5208
|
+
EntryIndex(const EntryIndex &other) = delete;
|
|
5209
|
+
EntryIndex &operator=(const EntryIndex &) = delete;
|
|
5210
|
+
//! enable move constructors
|
|
5211
|
+
EntryIndex(EntryIndex &&other) noexcept {
|
|
5212
|
+
catalog = nullptr;
|
|
5213
|
+
index = DConstants::INVALID_INDEX;
|
|
5214
|
+
std::swap(catalog, other.catalog);
|
|
5215
|
+
std::swap(index, other.index);
|
|
5216
|
+
}
|
|
5217
|
+
EntryIndex &operator=(EntryIndex &&other) noexcept {
|
|
5218
|
+
std::swap(catalog, other.catalog);
|
|
5219
|
+
std::swap(index, other.index);
|
|
5220
|
+
return *this;
|
|
5221
|
+
}
|
|
5222
|
+
|
|
5223
|
+
unique_ptr<CatalogEntry> &GetEntry() {
|
|
5224
|
+
auto entry = catalog->entries.find(index);
|
|
5225
|
+
if (entry == catalog->entries.end()) {
|
|
5226
|
+
throw InternalException("EntryIndex - Catalog entry not found!?");
|
|
5227
|
+
}
|
|
5228
|
+
return entry->second.entry;
|
|
5229
|
+
}
|
|
5230
|
+
idx_t GetIndex() {
|
|
5231
|
+
return index;
|
|
5232
|
+
}
|
|
5233
|
+
EntryIndex Copy() {
|
|
5234
|
+
if (catalog) {
|
|
5235
|
+
return EntryIndex(*catalog, index);
|
|
5236
|
+
} else {
|
|
5237
|
+
return EntryIndex();
|
|
5238
|
+
}
|
|
5239
|
+
}
|
|
5240
|
+
|
|
5241
|
+
private:
|
|
5242
|
+
CatalogSet *catalog;
|
|
5243
|
+
idx_t index;
|
|
5244
|
+
};
|
|
5245
|
+
|
|
5246
|
+
struct MappingValue {
|
|
5247
|
+
explicit MappingValue(EntryIndex index_p) : index(move(index_p)), timestamp(0), deleted(false), parent(nullptr) {
|
|
5248
|
+
}
|
|
5249
|
+
|
|
5250
|
+
EntryIndex index;
|
|
5251
|
+
transaction_t timestamp;
|
|
5252
|
+
bool deleted;
|
|
5253
|
+
unique_ptr<MappingValue> child;
|
|
5254
|
+
MappingValue *parent;
|
|
5255
|
+
};
|
|
5256
|
+
|
|
5257
|
+
} // namespace duckdb
|
|
5258
|
+
|
|
5147
5259
|
|
|
5148
5260
|
namespace duckdb {
|
|
5149
5261
|
|
|
@@ -5157,27 +5269,44 @@ namespace duckdb {
|
|
|
5157
5269
|
class EntryDropper {
|
|
5158
5270
|
public:
|
|
5159
5271
|
//! Both constructor and destructor are privates because they should only be called by DropEntryDependencies
|
|
5160
|
-
explicit EntryDropper(
|
|
5161
|
-
|
|
5162
|
-
old_deleted = catalog_set.entries[entry_index].get()->deleted;
|
|
5272
|
+
explicit EntryDropper(EntryIndex &entry_index_p) : entry_index(entry_index_p) {
|
|
5273
|
+
old_deleted = entry_index.GetEntry()->deleted;
|
|
5163
5274
|
}
|
|
5164
5275
|
|
|
5165
5276
|
~EntryDropper() {
|
|
5166
|
-
|
|
5277
|
+
entry_index.GetEntry()->deleted = old_deleted;
|
|
5167
5278
|
}
|
|
5168
5279
|
|
|
5169
5280
|
private:
|
|
5170
|
-
//! The current catalog_set
|
|
5171
|
-
CatalogSet &catalog_set;
|
|
5172
5281
|
//! Keeps track of the state of the entry before starting the delete
|
|
5173
5282
|
bool old_deleted;
|
|
5174
5283
|
//! Index of entry to be deleted
|
|
5175
|
-
|
|
5284
|
+
EntryIndex &entry_index;
|
|
5176
5285
|
};
|
|
5177
5286
|
|
|
5178
5287
|
CatalogSet::CatalogSet(Catalog &catalog, unique_ptr<DefaultGenerator> defaults)
|
|
5179
5288
|
: catalog(catalog), defaults(move(defaults)) {
|
|
5180
5289
|
}
|
|
5290
|
+
CatalogSet::~CatalogSet() {
|
|
5291
|
+
}
|
|
5292
|
+
|
|
5293
|
+
EntryIndex CatalogSet::PutEntry(idx_t entry_index, unique_ptr<CatalogEntry> entry) {
|
|
5294
|
+
if (entries.find(entry_index) != entries.end()) {
|
|
5295
|
+
throw InternalException("Entry with entry index \"%llu\" already exists", entry_index);
|
|
5296
|
+
}
|
|
5297
|
+
entries.insert(make_pair(entry_index, EntryValue(move(entry))));
|
|
5298
|
+
return EntryIndex(*this, entry_index);
|
|
5299
|
+
}
|
|
5300
|
+
|
|
5301
|
+
void CatalogSet::PutEntry(EntryIndex index, unique_ptr<CatalogEntry> catalog_entry) {
|
|
5302
|
+
auto entry = entries.find(index.GetIndex());
|
|
5303
|
+
if (entry == entries.end()) {
|
|
5304
|
+
throw InternalException("Entry with entry index \"%llu\" does not exist", index.GetIndex());
|
|
5305
|
+
}
|
|
5306
|
+
catalog_entry->child = move(entry->second.entry);
|
|
5307
|
+
catalog_entry->child->parent = catalog_entry.get();
|
|
5308
|
+
entry->second.entry = move(catalog_entry);
|
|
5309
|
+
}
|
|
5181
5310
|
|
|
5182
5311
|
bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_ptr<CatalogEntry> value,
|
|
5183
5312
|
unordered_set<CatalogEntry *> &dependencies) {
|
|
@@ -5188,7 +5317,7 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_
|
|
|
5188
5317
|
unique_lock<mutex> read_lock(catalog_lock);
|
|
5189
5318
|
|
|
5190
5319
|
// first check if the entry exists in the unordered set
|
|
5191
|
-
idx_t
|
|
5320
|
+
idx_t index;
|
|
5192
5321
|
auto mapping_value = GetMapping(context, name);
|
|
5193
5322
|
if (mapping_value == nullptr || mapping_value->deleted) {
|
|
5194
5323
|
// if it does not: entry has never been created
|
|
@@ -5202,17 +5331,17 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_
|
|
|
5202
5331
|
// first create a dummy deleted entry for this entry
|
|
5203
5332
|
// so transactions started before the commit of this transaction don't
|
|
5204
5333
|
// see it yet
|
|
5205
|
-
entry_index = current_entry++;
|
|
5206
5334
|
auto dummy_node = make_unique<CatalogEntry>(CatalogType::INVALID, value->catalog, name);
|
|
5207
5335
|
dummy_node->timestamp = 0;
|
|
5208
5336
|
dummy_node->deleted = true;
|
|
5209
5337
|
dummy_node->set = this;
|
|
5210
5338
|
|
|
5211
|
-
|
|
5212
|
-
|
|
5339
|
+
auto entry_index = PutEntry(current_entry++, move(dummy_node));
|
|
5340
|
+
index = entry_index.GetIndex();
|
|
5341
|
+
PutMapping(context, name, move(entry_index));
|
|
5213
5342
|
} else {
|
|
5214
|
-
|
|
5215
|
-
auto ¤t = *
|
|
5343
|
+
index = mapping_value->index.GetIndex();
|
|
5344
|
+
auto ¤t = *mapping_value->index.GetEntry();
|
|
5216
5345
|
// if it does, we have to check version numbers
|
|
5217
5346
|
if (HasConflict(context, current.timestamp)) {
|
|
5218
5347
|
// current version has been written to by a currently active
|
|
@@ -5234,16 +5363,16 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_
|
|
|
5234
5363
|
// now add the dependency set of this object to the dependency manager
|
|
5235
5364
|
catalog.dependency_manager->AddObject(context, value.get(), dependencies);
|
|
5236
5365
|
|
|
5237
|
-
|
|
5238
|
-
|
|
5366
|
+
auto value_ptr = value.get();
|
|
5367
|
+
EntryIndex entry_index(*this, index);
|
|
5368
|
+
PutEntry(move(entry_index), move(value));
|
|
5239
5369
|
// push the old entry in the undo buffer for this transaction
|
|
5240
|
-
transaction.PushCatalogEntry(
|
|
5241
|
-
entries[entry_index] = move(value);
|
|
5370
|
+
transaction.PushCatalogEntry(value_ptr->child.get());
|
|
5242
5371
|
return true;
|
|
5243
5372
|
}
|
|
5244
5373
|
|
|
5245
|
-
bool CatalogSet::GetEntryInternal(ClientContext &context,
|
|
5246
|
-
catalog_entry =
|
|
5374
|
+
bool CatalogSet::GetEntryInternal(ClientContext &context, EntryIndex &entry_index, CatalogEntry *&catalog_entry) {
|
|
5375
|
+
catalog_entry = entry_index.GetEntry().get();
|
|
5247
5376
|
// if it does: we have to retrieve the entry and to check version numbers
|
|
5248
5377
|
if (HasConflict(context, catalog_entry->timestamp)) {
|
|
5249
5378
|
// current version has been written to by a currently active
|
|
@@ -5259,21 +5388,22 @@ bool CatalogSet::GetEntryInternal(ClientContext &context, idx_t entry_index, Cat
|
|
|
5259
5388
|
return true;
|
|
5260
5389
|
}
|
|
5261
5390
|
|
|
5262
|
-
bool CatalogSet::GetEntryInternal(ClientContext &context, const string &name,
|
|
5391
|
+
bool CatalogSet::GetEntryInternal(ClientContext &context, const string &name, EntryIndex *entry_index,
|
|
5263
5392
|
CatalogEntry *&catalog_entry) {
|
|
5264
5393
|
auto mapping_value = GetMapping(context, name);
|
|
5265
5394
|
if (mapping_value == nullptr || mapping_value->deleted) {
|
|
5266
5395
|
// the entry does not exist, check if we can create a default entry
|
|
5267
5396
|
return false;
|
|
5268
5397
|
}
|
|
5269
|
-
entry_index
|
|
5270
|
-
|
|
5398
|
+
if (entry_index) {
|
|
5399
|
+
*entry_index = mapping_value->index.Copy();
|
|
5400
|
+
}
|
|
5401
|
+
return GetEntryInternal(context, mapping_value->index, catalog_entry);
|
|
5271
5402
|
}
|
|
5272
5403
|
|
|
5273
5404
|
bool CatalogSet::AlterOwnership(ClientContext &context, ChangeOwnershipInfo *info) {
|
|
5274
|
-
idx_t entry_index;
|
|
5275
5405
|
CatalogEntry *entry;
|
|
5276
|
-
if (!GetEntryInternal(context, info->name,
|
|
5406
|
+
if (!GetEntryInternal(context, info->name, nullptr, entry)) {
|
|
5277
5407
|
return false;
|
|
5278
5408
|
}
|
|
5279
5409
|
|
|
@@ -5293,9 +5423,9 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
|
|
|
5293
5423
|
lock_guard<mutex> write_lock(catalog.write_lock);
|
|
5294
5424
|
|
|
5295
5425
|
// first check if the entry exists in the unordered set
|
|
5296
|
-
|
|
5426
|
+
EntryIndex entry_index;
|
|
5297
5427
|
CatalogEntry *entry;
|
|
5298
|
-
if (!GetEntryInternal(context, name, entry_index, entry)) {
|
|
5428
|
+
if (!GetEntryInternal(context, name, &entry_index, entry)) {
|
|
5299
5429
|
return false;
|
|
5300
5430
|
}
|
|
5301
5431
|
if (entry->internal) {
|
|
@@ -5318,8 +5448,9 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
|
|
|
5318
5448
|
if (value->name != original_name) {
|
|
5319
5449
|
auto mapping_value = GetMapping(context, value->name);
|
|
5320
5450
|
if (mapping_value && !mapping_value->deleted) {
|
|
5321
|
-
auto
|
|
5322
|
-
if (!
|
|
5451
|
+
auto original_entry = GetEntryForTransaction(context, mapping_value->index.GetEntry().get());
|
|
5452
|
+
if (!original_entry->deleted) {
|
|
5453
|
+
entry->UndoAlter(context, alter_info);
|
|
5323
5454
|
string rename_err_msg =
|
|
5324
5455
|
"Could not rename \"%s\" to \"%s\": another entry with this name already exists!";
|
|
5325
5456
|
throw CatalogException(rename_err_msg, original_name, value->name);
|
|
@@ -5329,25 +5460,22 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
|
|
|
5329
5460
|
|
|
5330
5461
|
if (value->name != original_name) {
|
|
5331
5462
|
// Do PutMapping and DeleteMapping after dependency check
|
|
5332
|
-
PutMapping(context, value->name, entry_index);
|
|
5463
|
+
PutMapping(context, value->name, entry_index.Copy());
|
|
5333
5464
|
DeleteMapping(context, original_name);
|
|
5334
5465
|
}
|
|
5335
5466
|
|
|
5336
5467
|
value->timestamp = transaction.transaction_id;
|
|
5337
|
-
value->child = move(entries[entry_index]);
|
|
5338
|
-
value->child->parent = value.get();
|
|
5339
5468
|
value->set = this;
|
|
5469
|
+
auto new_entry = value.get();
|
|
5470
|
+
PutEntry(move(entry_index), move(value));
|
|
5340
5471
|
|
|
5341
5472
|
// serialize the AlterInfo into a temporary buffer
|
|
5342
5473
|
BufferedSerializer serializer;
|
|
5343
5474
|
alter_info->Serialize(serializer);
|
|
5344
5475
|
BinaryData serialized_alter = serializer.GetData();
|
|
5345
5476
|
|
|
5346
|
-
auto new_entry = value.get();
|
|
5347
|
-
|
|
5348
5477
|
// push the old entry in the undo buffer for this transaction
|
|
5349
|
-
transaction.PushCatalogEntry(
|
|
5350
|
-
entries[entry_index] = move(value);
|
|
5478
|
+
transaction.PushCatalogEntry(new_entry->child.get(), serialized_alter.data.get(), serialized_alter.size);
|
|
5351
5479
|
|
|
5352
5480
|
// Check the dependency manager to verify that there are no conflicting dependencies with this alter
|
|
5353
5481
|
// Note that we do this AFTER the new entry has been entirely set up in the catalog set
|
|
@@ -5358,13 +5486,13 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
|
|
|
5358
5486
|
return true;
|
|
5359
5487
|
}
|
|
5360
5488
|
|
|
5361
|
-
void CatalogSet::DropEntryDependencies(ClientContext &context,
|
|
5362
|
-
|
|
5489
|
+
void CatalogSet::DropEntryDependencies(ClientContext &context, EntryIndex &entry_index, CatalogEntry &entry,
|
|
5490
|
+
bool cascade) {
|
|
5363
5491
|
// Stores the deleted value of the entry before starting the process
|
|
5364
|
-
EntryDropper dropper(
|
|
5492
|
+
EntryDropper dropper(entry_index);
|
|
5365
5493
|
|
|
5366
5494
|
// To correctly delete the object and its dependencies, it temporarily is set to deleted.
|
|
5367
|
-
|
|
5495
|
+
entry_index.GetEntry()->deleted = true;
|
|
5368
5496
|
|
|
5369
5497
|
// check any dependencies of this object
|
|
5370
5498
|
entry.catalog->dependency_manager->DropObject(context, &entry, cascade);
|
|
@@ -5374,7 +5502,7 @@ void CatalogSet::DropEntryDependencies(ClientContext &context, idx_t entry_index
|
|
|
5374
5502
|
// dropper.~EntryDropper()
|
|
5375
5503
|
}
|
|
5376
5504
|
|
|
5377
|
-
void CatalogSet::DropEntryInternal(ClientContext &context,
|
|
5505
|
+
void CatalogSet::DropEntryInternal(ClientContext &context, EntryIndex entry_index, CatalogEntry &entry, bool cascade) {
|
|
5378
5506
|
auto &transaction = Transaction::GetTransaction(context);
|
|
5379
5507
|
|
|
5380
5508
|
DropEntryDependencies(context, entry_index, entry, cascade);
|
|
@@ -5384,31 +5512,30 @@ void CatalogSet::DropEntryInternal(ClientContext &context, idx_t entry_index, Ca
|
|
|
5384
5512
|
// and point it at the dummy node
|
|
5385
5513
|
auto value = make_unique<CatalogEntry>(CatalogType::DELETED_ENTRY, entry.catalog, entry.name);
|
|
5386
5514
|
value->timestamp = transaction.transaction_id;
|
|
5387
|
-
value->child = move(entries[entry_index]);
|
|
5388
|
-
value->child->parent = value.get();
|
|
5389
5515
|
value->set = this;
|
|
5390
5516
|
value->deleted = true;
|
|
5517
|
+
auto value_ptr = value.get();
|
|
5518
|
+
PutEntry(move(entry_index), move(value));
|
|
5391
5519
|
|
|
5392
5520
|
// push the old entry in the undo buffer for this transaction
|
|
5393
|
-
transaction.PushCatalogEntry(
|
|
5394
|
-
|
|
5395
|
-
entries[entry_index] = move(value);
|
|
5521
|
+
transaction.PushCatalogEntry(value_ptr->child.get());
|
|
5396
5522
|
}
|
|
5397
5523
|
|
|
5398
5524
|
bool CatalogSet::DropEntry(ClientContext &context, const string &name, bool cascade) {
|
|
5399
5525
|
// lock the catalog for writing
|
|
5400
5526
|
lock_guard<mutex> write_lock(catalog.write_lock);
|
|
5401
5527
|
// we can only delete an entry that exists
|
|
5402
|
-
|
|
5528
|
+
EntryIndex entry_index;
|
|
5403
5529
|
CatalogEntry *entry;
|
|
5404
|
-
if (!GetEntryInternal(context, name, entry_index, entry)) {
|
|
5530
|
+
if (!GetEntryInternal(context, name, &entry_index, entry)) {
|
|
5405
5531
|
return false;
|
|
5406
5532
|
}
|
|
5407
5533
|
if (entry->internal) {
|
|
5408
5534
|
throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name);
|
|
5409
5535
|
}
|
|
5410
5536
|
|
|
5411
|
-
|
|
5537
|
+
lock_guard<mutex> read_lock(catalog_lock);
|
|
5538
|
+
DropEntryInternal(context, move(entry_index), *entry, cascade);
|
|
5412
5539
|
return true;
|
|
5413
5540
|
}
|
|
5414
5541
|
|
|
@@ -5426,12 +5553,10 @@ void CatalogSet::CleanupEntry(CatalogEntry *catalog_entry) {
|
|
|
5426
5553
|
if (parent->deleted && !parent->child && !parent->parent) {
|
|
5427
5554
|
auto mapping_entry = mapping.find(parent->name);
|
|
5428
5555
|
D_ASSERT(mapping_entry != mapping.end());
|
|
5429
|
-
auto
|
|
5430
|
-
|
|
5431
|
-
|
|
5432
|
-
if (entry->second.get() == parent) {
|
|
5556
|
+
auto entry = mapping_entry->second->index.GetEntry().get();
|
|
5557
|
+
D_ASSERT(entry);
|
|
5558
|
+
if (entry == parent) {
|
|
5433
5559
|
mapping.erase(mapping_entry);
|
|
5434
|
-
entries.erase(entry);
|
|
5435
5560
|
}
|
|
5436
5561
|
}
|
|
5437
5562
|
}
|
|
@@ -5465,9 +5590,9 @@ MappingValue *CatalogSet::GetMapping(ClientContext &context, const string &name,
|
|
|
5465
5590
|
return mapping_value;
|
|
5466
5591
|
}
|
|
5467
5592
|
|
|
5468
|
-
void CatalogSet::PutMapping(ClientContext &context, const string &name,
|
|
5593
|
+
void CatalogSet::PutMapping(ClientContext &context, const string &name, EntryIndex entry_index) {
|
|
5469
5594
|
auto entry = mapping.find(name);
|
|
5470
|
-
auto new_value = make_unique<MappingValue>(entry_index);
|
|
5595
|
+
auto new_value = make_unique<MappingValue>(move(entry_index));
|
|
5471
5596
|
new_value->timestamp = Transaction::GetTransaction(context).transaction_id;
|
|
5472
5597
|
if (entry != mapping.end()) {
|
|
5473
5598
|
if (HasConflict(context, entry->second->timestamp)) {
|
|
@@ -5482,7 +5607,7 @@ void CatalogSet::PutMapping(ClientContext &context, const string &name, idx_t en
|
|
|
5482
5607
|
void CatalogSet::DeleteMapping(ClientContext &context, const string &name) {
|
|
5483
5608
|
auto entry = mapping.find(name);
|
|
5484
5609
|
D_ASSERT(entry != mapping.end());
|
|
5485
|
-
auto delete_marker = make_unique<MappingValue>(entry->second->index);
|
|
5610
|
+
auto delete_marker = make_unique<MappingValue>(entry->second->index.Copy());
|
|
5486
5611
|
delete_marker->deleted = true;
|
|
5487
5612
|
delete_marker->timestamp = Transaction::GetTransaction(context).transaction_id;
|
|
5488
5613
|
delete_marker->child = move(entry->second);
|
|
@@ -5550,15 +5675,14 @@ CatalogEntry *CatalogSet::CreateEntryInternal(ClientContext &context, unique_ptr
|
|
|
5550
5675
|
return nullptr;
|
|
5551
5676
|
}
|
|
5552
5677
|
auto &name = entry->name;
|
|
5553
|
-
auto entry_index = current_entry++;
|
|
5554
5678
|
auto catalog_entry = entry.get();
|
|
5555
5679
|
|
|
5556
5680
|
entry->set = this;
|
|
5557
5681
|
entry->timestamp = 0;
|
|
5558
5682
|
|
|
5559
|
-
|
|
5683
|
+
auto entry_index = PutEntry(current_entry++, move(entry));
|
|
5684
|
+
PutMapping(context, name, move(entry_index));
|
|
5560
5685
|
mapping[name]->timestamp = 0;
|
|
5561
|
-
entries[entry_index] = move(entry);
|
|
5562
5686
|
return catalog_entry;
|
|
5563
5687
|
}
|
|
5564
5688
|
|
|
@@ -5597,7 +5721,7 @@ CatalogEntry *CatalogSet::GetEntry(ClientContext &context, const string &name) {
|
|
|
5597
5721
|
// we found an entry for this name
|
|
5598
5722
|
// check the version numbers
|
|
5599
5723
|
|
|
5600
|
-
auto catalog_entry =
|
|
5724
|
+
auto catalog_entry = mapping_value->index.GetEntry().get();
|
|
5601
5725
|
CatalogEntry *current = GetEntryForTransaction(context, catalog_entry);
|
|
5602
5726
|
if (current->deleted || (current->name != name && !UseTimestamp(context, mapping_value->timestamp))) {
|
|
5603
5727
|
return nullptr;
|
|
@@ -5706,7 +5830,7 @@ void CatalogSet::Undo(CatalogEntry *entry) {
|
|
|
5706
5830
|
// otherwise we need to update the base entry tables
|
|
5707
5831
|
auto &name = entry->name;
|
|
5708
5832
|
to_be_removed_node->child->SetAsRoot();
|
|
5709
|
-
|
|
5833
|
+
mapping[name]->index.GetEntry() = move(to_be_removed_node->child);
|
|
5710
5834
|
entry->parent = nullptr;
|
|
5711
5835
|
}
|
|
5712
5836
|
|
|
@@ -5721,7 +5845,7 @@ void CatalogSet::Undo(CatalogEntry *entry) {
|
|
|
5721
5845
|
}
|
|
5722
5846
|
}
|
|
5723
5847
|
// we mark the catalog as being modified, since this action can lead to e.g. tables being dropped
|
|
5724
|
-
|
|
5848
|
+
catalog.ModifyCatalog();
|
|
5725
5849
|
}
|
|
5726
5850
|
|
|
5727
5851
|
void CatalogSet::CreateDefaultEntries(ClientContext &context, unique_lock<mutex> &lock) {
|
|
@@ -5754,7 +5878,7 @@ void CatalogSet::Scan(ClientContext &context, const std::function<void(CatalogEn
|
|
|
5754
5878
|
CreateDefaultEntries(context, lock);
|
|
5755
5879
|
|
|
5756
5880
|
for (auto &kv : entries) {
|
|
5757
|
-
auto entry = kv.second.get();
|
|
5881
|
+
auto entry = kv.second.entry.get();
|
|
5758
5882
|
entry = GetEntryForTransaction(context, entry);
|
|
5759
5883
|
if (!entry->deleted) {
|
|
5760
5884
|
callback(entry);
|
|
@@ -5766,7 +5890,7 @@ void CatalogSet::Scan(const std::function<void(CatalogEntry *)> &callback) {
|
|
|
5766
5890
|
// lock the catalog set
|
|
5767
5891
|
lock_guard<mutex> lock(catalog_lock);
|
|
5768
5892
|
for (auto &kv : entries) {
|
|
5769
|
-
auto entry = kv.second.get();
|
|
5893
|
+
auto entry = kv.second.entry.get();
|
|
5770
5894
|
entry = GetCommittedEntry(entry);
|
|
5771
5895
|
if (!entry->deleted) {
|
|
5772
5896
|
callback(entry);
|
|
@@ -6182,14 +6306,17 @@ static DefaultView internal_views[] = {
|
|
|
6182
6306
|
{"pg_catalog", "pg_attrdef", "SELECT column_index oid, table_oid adrelid, column_index adnum, column_default adbin from duckdb_columns() where column_default is not null;"},
|
|
6183
6307
|
{"pg_catalog", "pg_class", "SELECT table_oid oid, table_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, estimated_size::real reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, index_count > 0 relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'r' relkind, column_count relnatts, check_constraint_count relchecks, false relhasoids, has_primary_key relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_tables() UNION ALL SELECT view_oid oid, view_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'v' relkind, column_count relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_views() UNION ALL SELECT sequence_oid oid, sequence_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'S' relkind, 0 relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_sequences() UNION ALL SELECT index_oid oid, index_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, 't' relpersistence, 'i' relkind, NULL relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_indexes()"},
|
|
6184
6308
|
{"pg_catalog", "pg_constraint", "SELECT table_oid*1000000+constraint_index oid, constraint_text conname, schema_oid connamespace, CASE constraint_type WHEN 'CHECK' then 'c' WHEN 'UNIQUE' then 'u' WHEN 'PRIMARY KEY' THEN 'p' WHEN 'FOREIGN KEY' THEN 'f' ELSE 'x' END contype, false condeferrable, false condeferred, true convalidated, table_oid conrelid, 0 contypid, 0 conindid, 0 conparentid, 0 confrelid, NULL confupdtype, NULL confdeltype, NULL confmatchtype, true conislocal, 0 coninhcount, false connoinherit, constraint_column_indexes conkey, NULL confkey, NULL conpfeqop, NULL conppeqop, NULL conffeqop, NULL conexclop, expression conbin FROM duckdb_constraints()"},
|
|
6309
|
+
{"pg_catalog", "pg_database", "SELECT 0 oid, 'main' datname"},
|
|
6185
6310
|
{"pg_catalog", "pg_depend", "SELECT * FROM duckdb_dependencies()"},
|
|
6186
6311
|
{"pg_catalog", "pg_description", "SELECT NULL objoid, NULL classoid, NULL objsubid, NULL description WHERE 1=0"},
|
|
6187
6312
|
{"pg_catalog", "pg_enum", "SELECT NULL oid, NULL enumtypid, NULL enumsortorder, NULL enumlabel WHERE 1=0"},
|
|
6188
6313
|
{"pg_catalog", "pg_index", "SELECT index_oid indexrelid, table_oid indrelid, 0 indnatts, 0 indnkeyatts, is_unique indisunique, is_primary indisprimary, false indisexclusion, true indimmediate, false indisclustered, true indisvalid, false indcheckxmin, true indisready, true indislive, false indisreplident, NULL::INT[] indkey, NULL::OID[] indcollation, NULL::OID[] indclass, NULL::INT[] indoption, expressions indexprs, NULL indpred FROM duckdb_indexes()"},
|
|
6189
6314
|
{"pg_catalog", "pg_indexes", "SELECT schema_name schemaname, table_name tablename, index_name indexname, NULL \"tablespace\", sql indexdef FROM duckdb_indexes()"},
|
|
6190
6315
|
{"pg_catalog", "pg_namespace", "SELECT oid, schema_name nspname, 0 nspowner, NULL nspacl FROM duckdb_schemas()"},
|
|
6316
|
+
{"pg_catalog", "pg_proc", "SELECT f.function_oid oid, function_name proname, s.oid pronamespace FROM duckdb_functions() f LEFT JOIN duckdb_schemas() s USING (schema_name)"},
|
|
6191
6317
|
{"pg_catalog", "pg_sequence", "SELECT sequence_oid seqrelid, 0 seqtypid, start_value seqstart, increment_by seqincrement, max_value seqmax, min_value seqmin, 0 seqcache, cycle seqcycle FROM duckdb_sequences()"},
|
|
6192
6318
|
{"pg_catalog", "pg_sequences", "SELECT schema_name schemaname, sequence_name sequencename, 'duckdb' sequenceowner, 0 data_type, start_value, min_value, max_value, increment_by, cycle, 0 cache_size, last_value FROM duckdb_sequences()"},
|
|
6319
|
+
{"pg_catalog", "pg_settings", "SELECT name, value setting, description short_desc, CASE WHEN input_type = 'VARCHAR' THEN 'string' WHEN input_type = 'BOOLEAN' THEN 'bool' WHEN input_type IN ('BIGINT', 'UBIGINT') THEN 'integer' ELSE input_type END vartype FROM duckdb_settings()"},
|
|
6193
6320
|
{"pg_catalog", "pg_tables", "SELECT schema_name schemaname, table_name tablename, 'duckdb' tableowner, NULL \"tablespace\", index_count > 0 hasindexes, false hasrules, false hastriggers FROM duckdb_tables()"},
|
|
6194
6321
|
{"pg_catalog", "pg_tablespace", "SELECT 0 oid, 'pg_default' spcname, 0 spcowner, NULL spcacl, NULL spcoptions"},
|
|
6195
6322
|
{"pg_catalog", "pg_type", "SELECT type_oid oid, format_pg_type(type_name) typname, schema_oid typnamespace, 0 typowner, type_size typlen, false typbyval, 'b' typtype, CASE WHEN type_category='NUMERIC' THEN 'N' WHEN type_category='STRING' THEN 'S' WHEN type_category='DATETIME' THEN 'D' WHEN type_category='BOOLEAN' THEN 'B' WHEN type_category='COMPOSITE' THEN 'C' WHEN type_category='USER' THEN 'U' ELSE 'X' END typcategory, false typispreferred, true typisdefined, NULL typdelim, NULL typrelid, NULL typsubscript, NULL typelem, NULL typarray, NULL typinput, NULL typoutput, NULL typreceive, NULL typsend, NULL typmodin, NULL typmodout, NULL typanalyze, 'd' typalign, 'p' typstorage, NULL typnotnull, NULL typbasetype, NULL typtypmod, NULL typndims, NULL typcollation, NULL typdefaultbin, NULL typdefault, NULL typacl FROM duckdb_types();"},
|
|
@@ -6256,6 +6383,7 @@ vector<string> DefaultViewGenerator::GetDefaultEntries() {
|
|
|
6256
6383
|
|
|
6257
6384
|
|
|
6258
6385
|
|
|
6386
|
+
|
|
6259
6387
|
namespace duckdb {
|
|
6260
6388
|
|
|
6261
6389
|
DependencyManager::DependencyManager(Catalog &catalog) : catalog(catalog) {
|
|
@@ -6265,12 +6393,11 @@ void DependencyManager::AddObject(ClientContext &context, CatalogEntry *object,
|
|
|
6265
6393
|
unordered_set<CatalogEntry *> &dependencies) {
|
|
6266
6394
|
// check for each object in the sources if they were not deleted yet
|
|
6267
6395
|
for (auto &dependency : dependencies) {
|
|
6268
|
-
idx_t entry_index;
|
|
6269
6396
|
CatalogEntry *catalog_entry;
|
|
6270
6397
|
if (!dependency->set) {
|
|
6271
6398
|
throw InternalException("Dependency has no set");
|
|
6272
6399
|
}
|
|
6273
|
-
if (!dependency->set->GetEntryInternal(context, dependency->name,
|
|
6400
|
+
if (!dependency->set->GetEntryInternal(context, dependency->name, nullptr, catalog_entry)) {
|
|
6274
6401
|
throw InternalException("Dependency has already been deleted?");
|
|
6275
6402
|
}
|
|
6276
6403
|
}
|
|
@@ -6298,10 +6425,9 @@ void DependencyManager::DropObject(ClientContext &context, CatalogEntry *object,
|
|
|
6298
6425
|
if (mapping_value == nullptr) {
|
|
6299
6426
|
continue;
|
|
6300
6427
|
}
|
|
6301
|
-
idx_t entry_index = mapping_value->index;
|
|
6302
6428
|
CatalogEntry *dependency_entry;
|
|
6303
6429
|
|
|
6304
|
-
if (!catalog_set.GetEntryInternal(context,
|
|
6430
|
+
if (!catalog_set.GetEntryInternal(context, mapping_value->index, dependency_entry)) {
|
|
6305
6431
|
// the dependent object was already deleted, no conflict
|
|
6306
6432
|
continue;
|
|
6307
6433
|
}
|
|
@@ -6309,7 +6435,7 @@ void DependencyManager::DropObject(ClientContext &context, CatalogEntry *object,
|
|
|
6309
6435
|
if (cascade || dep.dependency_type == DependencyType::DEPENDENCY_AUTOMATIC ||
|
|
6310
6436
|
dep.dependency_type == DependencyType::DEPENDENCY_OWNS) {
|
|
6311
6437
|
// cascade: drop the dependent object
|
|
6312
|
-
catalog_set.DropEntryInternal(context,
|
|
6438
|
+
catalog_set.DropEntryInternal(context, mapping_value->index.Copy(), *dependency_entry, cascade);
|
|
6313
6439
|
} else {
|
|
6314
6440
|
// no cascade and there are objects that depend on this object: throw error
|
|
6315
6441
|
throw DependencyException("Cannot drop entry \"%s\" because there are entries that "
|
|
@@ -6329,9 +6455,8 @@ void DependencyManager::AlterObject(ClientContext &context, CatalogEntry *old_ob
|
|
|
6329
6455
|
for (auto &dep : dependent_objects) {
|
|
6330
6456
|
// look up the entry in the catalog set
|
|
6331
6457
|
auto &catalog_set = *dep.entry->set;
|
|
6332
|
-
idx_t entry_index;
|
|
6333
6458
|
CatalogEntry *dependency_entry;
|
|
6334
|
-
if (!catalog_set.GetEntryInternal(context, dep.entry->name,
|
|
6459
|
+
if (!catalog_set.GetEntryInternal(context, dep.entry->name, nullptr, dependency_entry)) {
|
|
6335
6460
|
// the dependent object was already deleted, no conflict
|
|
6336
6461
|
continue;
|
|
6337
6462
|
}
|
|
@@ -9213,6 +9338,13 @@ void BoxRenderer::Render(ClientContext &context, const vector<string> &names, co
|
|
|
9213
9338
|
// figure out how many/which rows to render
|
|
9214
9339
|
idx_t row_count = result.Count();
|
|
9215
9340
|
idx_t rows_to_render = MinValue<idx_t>(row_count, config.max_rows);
|
|
9341
|
+
if (row_count <= config.max_rows + 3) {
|
|
9342
|
+
// hiding rows adds 3 extra rows
|
|
9343
|
+
// so hiding rows makes no sense if we are only slightly over the limit
|
|
9344
|
+
// if we are 1 row over the limit hiding rows will actually increase the number of lines we display!
|
|
9345
|
+
// in this case render all the rows
|
|
9346
|
+
rows_to_render = row_count;
|
|
9347
|
+
}
|
|
9216
9348
|
idx_t top_rows;
|
|
9217
9349
|
idx_t bottom_rows;
|
|
9218
9350
|
if (rows_to_render == row_count) {
|
|
@@ -30473,7 +30605,7 @@ public:
|
|
|
30473
30605
|
|
|
30474
30606
|
private:
|
|
30475
30607
|
void AllocateEmptyBlock(idx_t size);
|
|
30476
|
-
|
|
30608
|
+
BufferHandle AllocateBlock();
|
|
30477
30609
|
BufferHandle Pin(uint32_t block_id);
|
|
30478
30610
|
BufferHandle PinInternal(uint32_t block_id);
|
|
30479
30611
|
|
|
@@ -30587,11 +30719,7 @@ protected:
|
|
|
30587
30719
|
return make_unique<ColumnDataCollection>(allocators->allocators[partition_index], types);
|
|
30588
30720
|
}
|
|
30589
30721
|
//! Create a DataChunk used for buffering appends to the partition
|
|
30590
|
-
unique_ptr<DataChunk> CreatePartitionBuffer() const
|
|
30591
|
-
auto result = make_unique<DataChunk>();
|
|
30592
|
-
result->Initialize(Allocator::Get(context), types, BufferSize());
|
|
30593
|
-
return result;
|
|
30594
|
-
}
|
|
30722
|
+
unique_ptr<DataChunk> CreatePartitionBuffer() const;
|
|
30595
30723
|
|
|
30596
30724
|
protected:
|
|
30597
30725
|
PartitionedColumnDataType type;
|
|
@@ -30968,6 +31096,9 @@ struct PartitionFunctor {
|
|
|
30968
31096
|
const auto row_width = layout.GetRowWidth();
|
|
30969
31097
|
const auto has_heap = !layout.AllConstant();
|
|
30970
31098
|
|
|
31099
|
+
block_collection.VerifyBlockSizes();
|
|
31100
|
+
string_heap.VerifyBlockSizes();
|
|
31101
|
+
|
|
30971
31102
|
// Fixed-size data
|
|
30972
31103
|
RowDataBlock *partition_data_blocks[CONSTANTS::NUM_PARTITIONS];
|
|
30973
31104
|
vector<BufferHandle> partition_data_handles;
|
|
@@ -31102,6 +31233,10 @@ struct PartitionFunctor {
|
|
|
31102
31233
|
#ifdef DEBUG
|
|
31103
31234
|
for (idx_t bin = 0; bin < CONSTANTS::NUM_PARTITIONS; bin++) {
|
|
31104
31235
|
auto &p_block_collection = *partition_block_collections[bin];
|
|
31236
|
+
p_block_collection.VerifyBlockSizes();
|
|
31237
|
+
if (!layout.AllConstant()) {
|
|
31238
|
+
partition_string_heaps[bin]->VerifyBlockSizes();
|
|
31239
|
+
}
|
|
31105
31240
|
idx_t p_count = 0;
|
|
31106
31241
|
for (idx_t b = 0; b < p_block_collection.blocks.size(); b++) {
|
|
31107
31242
|
auto &data_block = *p_block_collection.blocks[b];
|
|
@@ -39011,14 +39146,13 @@ static void SortTiedBlobs(BufferManager &buffer_manager, const data_ptr_t datapt
|
|
|
39011
39146
|
return order * Comparators::CompareVal(left_ptr, right_ptr, logical_type) < 0;
|
|
39012
39147
|
});
|
|
39013
39148
|
// Re-order
|
|
39014
|
-
auto temp_block =
|
|
39015
|
-
|
|
39016
|
-
data_ptr_t temp_ptr = temp_block.Ptr();
|
|
39149
|
+
auto temp_block = buffer_manager.GetBufferAllocator().Allocate((end - start) * sort_layout.entry_size);
|
|
39150
|
+
data_ptr_t temp_ptr = temp_block.get();
|
|
39017
39151
|
for (idx_t i = 0; i < end - start; i++) {
|
|
39018
39152
|
FastMemcpy(temp_ptr, entry_ptrs[i], sort_layout.entry_size);
|
|
39019
39153
|
temp_ptr += sort_layout.entry_size;
|
|
39020
39154
|
}
|
|
39021
|
-
memcpy(dataptr + start * sort_layout.entry_size, temp_block.
|
|
39155
|
+
memcpy(dataptr + start * sort_layout.entry_size, temp_block.get(), (end - start) * sort_layout.entry_size);
|
|
39022
39156
|
// Determine if there are still ties (if this is not the last column)
|
|
39023
39157
|
if (tie_col < sort_layout.column_count - 1) {
|
|
39024
39158
|
data_ptr_t idx_ptr = dataptr + start * sort_layout.entry_size + sort_layout.comparison_size;
|
|
@@ -39083,7 +39217,7 @@ static void ComputeTies(data_ptr_t dataptr, const idx_t &count, const idx_t &col
|
|
|
39083
39217
|
//! Textbook LSD radix sort
|
|
39084
39218
|
void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, const idx_t &count, const idx_t &col_offset,
|
|
39085
39219
|
const idx_t &row_width, const idx_t &sorting_size) {
|
|
39086
|
-
auto temp_block = buffer_manager.Allocate(
|
|
39220
|
+
auto temp_block = buffer_manager.GetBufferAllocator().Allocate(count * row_width);
|
|
39087
39221
|
bool swap = false;
|
|
39088
39222
|
|
|
39089
39223
|
idx_t counts[SortConstants::VALUES_PER_RADIX];
|
|
@@ -39091,8 +39225,8 @@ void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, cons
|
|
|
39091
39225
|
// Init counts to 0
|
|
39092
39226
|
memset(counts, 0, sizeof(counts));
|
|
39093
39227
|
// Const some values for convenience
|
|
39094
|
-
const data_ptr_t source_ptr = swap ? temp_block.
|
|
39095
|
-
const data_ptr_t target_ptr = swap ? dataptr : temp_block.
|
|
39228
|
+
const data_ptr_t source_ptr = swap ? temp_block.get() : dataptr;
|
|
39229
|
+
const data_ptr_t target_ptr = swap ? dataptr : temp_block.get();
|
|
39096
39230
|
const idx_t offset = col_offset + sorting_size - r;
|
|
39097
39231
|
// Collect counts
|
|
39098
39232
|
data_ptr_t offset_ptr = source_ptr + offset;
|
|
@@ -39120,7 +39254,7 @@ void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, cons
|
|
|
39120
39254
|
}
|
|
39121
39255
|
// Move data back to original buffer (if it was swapped)
|
|
39122
39256
|
if (swap) {
|
|
39123
|
-
memcpy(dataptr, temp_block.
|
|
39257
|
+
memcpy(dataptr, temp_block.get(), count * row_width);
|
|
39124
39258
|
}
|
|
39125
39259
|
}
|
|
39126
39260
|
|
|
@@ -39468,6 +39602,9 @@ SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const {
|
|
|
39468
39602
|
}
|
|
39469
39603
|
|
|
39470
39604
|
LocalSortState::LocalSortState() : initialized(false) {
|
|
39605
|
+
if (!Radix::IsLittleEndian()) {
|
|
39606
|
+
throw NotImplementedException("Sorting is not supported on big endian architectures");
|
|
39607
|
+
}
|
|
39471
39608
|
}
|
|
39472
39609
|
|
|
39473
39610
|
void LocalSortState::Initialize(GlobalSortState &global_sort_state, BufferManager &buffer_manager_p) {
|
|
@@ -43143,13 +43280,14 @@ BufferHandle ColumnDataAllocator::PinInternal(uint32_t block_id) {
|
|
|
43143
43280
|
return alloc.buffer_manager->Pin(blocks[block_id].handle);
|
|
43144
43281
|
}
|
|
43145
43282
|
|
|
43146
|
-
|
|
43283
|
+
BufferHandle ColumnDataAllocator::AllocateBlock() {
|
|
43147
43284
|
D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR);
|
|
43148
43285
|
BlockMetaData data;
|
|
43149
43286
|
data.size = 0;
|
|
43150
43287
|
data.capacity = Storage::BLOCK_SIZE;
|
|
43151
|
-
|
|
43288
|
+
auto pin = alloc.buffer_manager->Allocate(Storage::BLOCK_SIZE, false, &data.handle);
|
|
43152
43289
|
blocks.push_back(move(data));
|
|
43290
|
+
return pin;
|
|
43153
43291
|
}
|
|
43154
43292
|
|
|
43155
43293
|
void ColumnDataAllocator::AllocateEmptyBlock(idx_t size) {
|
|
@@ -43183,11 +43321,10 @@ void ColumnDataAllocator::AllocateBuffer(idx_t size, uint32_t &block_id, uint32_
|
|
|
43183
43321
|
ChunkManagementState *chunk_state) {
|
|
43184
43322
|
D_ASSERT(allocated_data.empty());
|
|
43185
43323
|
if (blocks.empty() || blocks.back().Capacity() < size) {
|
|
43186
|
-
AllocateBlock();
|
|
43187
|
-
if (chunk_state
|
|
43188
|
-
|
|
43324
|
+
auto pinned_block = AllocateBlock();
|
|
43325
|
+
if (chunk_state) {
|
|
43326
|
+
D_ASSERT(!blocks.empty());
|
|
43189
43327
|
auto new_block_id = blocks.size() - 1;
|
|
43190
|
-
auto pinned_block = alloc.buffer_manager->Pin(last_block.handle);
|
|
43191
43328
|
chunk_state->handles[new_block_id] = move(pinned_block);
|
|
43192
43329
|
}
|
|
43193
43330
|
}
|
|
@@ -44132,7 +44269,7 @@ namespace duckdb {
|
|
|
44132
44269
|
|
|
44133
44270
|
ColumnDataCollectionSegment::ColumnDataCollectionSegment(shared_ptr<ColumnDataAllocator> allocator_p,
|
|
44134
44271
|
vector<LogicalType> types_p)
|
|
44135
|
-
: allocator(move(allocator_p)), types(move(types_p)), count(0) {
|
|
44272
|
+
: allocator(move(allocator_p)), types(move(types_p)), count(0), heap(allocator->GetAllocator()) {
|
|
44136
44273
|
}
|
|
44137
44274
|
|
|
44138
44275
|
idx_t ColumnDataCollectionSegment::GetDataSize(idx_t type_size) {
|
|
@@ -47530,6 +47667,12 @@ void PartitionedColumnData::InitializeAppendState(PartitionedColumnDataAppendSta
|
|
|
47530
47667
|
InitializeAppendStateInternal(state);
|
|
47531
47668
|
}
|
|
47532
47669
|
|
|
47670
|
+
unique_ptr<DataChunk> PartitionedColumnData::CreatePartitionBuffer() const {
|
|
47671
|
+
auto result = make_unique<DataChunk>();
|
|
47672
|
+
result->Initialize(BufferManager::GetBufferManager(context).GetBufferAllocator(), types, BufferSize());
|
|
47673
|
+
return result;
|
|
47674
|
+
}
|
|
47675
|
+
|
|
47533
47676
|
void PartitionedColumnData::Append(PartitionedColumnDataAppendState &state, DataChunk &input) {
|
|
47534
47677
|
// Compute partition indices and store them in state.partition_indices
|
|
47535
47678
|
ComputePartitionIndices(state, input);
|
|
@@ -48216,7 +48359,7 @@ buffer_ptr<SelectionData> SelectionVector::Slice(const SelectionVector &sel, idx
|
|
|
48216
48359
|
|
|
48217
48360
|
namespace duckdb {
|
|
48218
48361
|
|
|
48219
|
-
StringHeap::StringHeap() : allocator(
|
|
48362
|
+
StringHeap::StringHeap(Allocator &allocator) : allocator(allocator) {
|
|
48220
48363
|
}
|
|
48221
48364
|
|
|
48222
48365
|
void StringHeap::Destroy() {
|
|
@@ -53780,7 +53923,7 @@ string LogicalType::ToString() const {
|
|
|
53780
53923
|
string ret = "UNION(";
|
|
53781
53924
|
size_t count = UnionType::GetMemberCount(*this);
|
|
53782
53925
|
for (size_t i = 0; i < count; i++) {
|
|
53783
|
-
ret += UnionType::GetMemberType(*this, i).ToString();
|
|
53926
|
+
ret += UnionType::GetMemberName(*this, i) + " " + UnionType::GetMemberType(*this, i).ToString();
|
|
53784
53927
|
if (i < count - 1) {
|
|
53785
53928
|
ret += ", ";
|
|
53786
53929
|
}
|
|
@@ -57240,7 +57383,9 @@ static inline void ListLoopHash(Vector &input, Vector &hashes, const SelectionVe
|
|
|
57240
57383
|
const auto child_count = ListVector::GetListSize(input);
|
|
57241
57384
|
|
|
57242
57385
|
Vector child_hashes(LogicalType::HASH, child_count);
|
|
57243
|
-
|
|
57386
|
+
if (child_count > 0) {
|
|
57387
|
+
VectorOperations::Hash(child, child_hashes, child_count);
|
|
57388
|
+
}
|
|
57244
57389
|
auto chdata = FlatVector::GetData<hash_t>(child_hashes);
|
|
57245
57390
|
|
|
57246
57391
|
// Reduce the number of entries to check to the non-empty ones
|
|
@@ -58640,11 +58785,13 @@ public:
|
|
|
58640
58785
|
ColumnBindingResolver();
|
|
58641
58786
|
|
|
58642
58787
|
void VisitOperator(LogicalOperator &op) override;
|
|
58788
|
+
static void Verify(LogicalOperator &op);
|
|
58643
58789
|
|
|
58644
58790
|
protected:
|
|
58645
58791
|
vector<ColumnBinding> bindings;
|
|
58646
58792
|
|
|
58647
58793
|
unique_ptr<Expression> VisitReplace(BoundColumnRefExpression &expr, unique_ptr<Expression> *expr_ptr) override;
|
|
58794
|
+
static unordered_set<idx_t> VerifyInternal(LogicalOperator &op);
|
|
58648
58795
|
};
|
|
58649
58796
|
} // namespace duckdb
|
|
58650
58797
|
|
|
@@ -58986,6 +59133,35 @@ unique_ptr<Expression> ColumnBindingResolver::VisitReplace(BoundColumnRefExpress
|
|
|
58986
59133
|
// LCOV_EXCL_STOP
|
|
58987
59134
|
}
|
|
58988
59135
|
|
|
59136
|
+
unordered_set<idx_t> ColumnBindingResolver::VerifyInternal(LogicalOperator &op) {
|
|
59137
|
+
unordered_set<idx_t> result;
|
|
59138
|
+
for (auto &child : op.children) {
|
|
59139
|
+
auto child_indexes = VerifyInternal(*child);
|
|
59140
|
+
for (auto index : child_indexes) {
|
|
59141
|
+
D_ASSERT(index != DConstants::INVALID_INDEX);
|
|
59142
|
+
if (result.find(index) != result.end()) {
|
|
59143
|
+
throw InternalException("Duplicate table index \"%lld\" found", index);
|
|
59144
|
+
}
|
|
59145
|
+
result.insert(index);
|
|
59146
|
+
}
|
|
59147
|
+
}
|
|
59148
|
+
auto indexes = op.GetTableIndex();
|
|
59149
|
+
for (auto index : indexes) {
|
|
59150
|
+
D_ASSERT(index != DConstants::INVALID_INDEX);
|
|
59151
|
+
if (result.find(index) != result.end()) {
|
|
59152
|
+
throw InternalException("Duplicate table index \"%lld\" found", index);
|
|
59153
|
+
}
|
|
59154
|
+
result.insert(index);
|
|
59155
|
+
}
|
|
59156
|
+
return result;
|
|
59157
|
+
}
|
|
59158
|
+
|
|
59159
|
+
void ColumnBindingResolver::Verify(LogicalOperator &op) {
|
|
59160
|
+
#ifdef DEBUG
|
|
59161
|
+
VerifyInternal(op);
|
|
59162
|
+
#endif
|
|
59163
|
+
}
|
|
59164
|
+
|
|
58989
59165
|
} // namespace duckdb
|
|
58990
59166
|
|
|
58991
59167
|
|
|
@@ -60516,6 +60692,9 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
|
60516
60692
|
DatabaseInstance &db, idx_t block_id, idx_t block_offset)
|
|
60517
60693
|
: Index(IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type), db(db),
|
|
60518
60694
|
estimated_art_size(0), estimated_key_size(16) {
|
|
60695
|
+
if (!Radix::IsLittleEndian()) {
|
|
60696
|
+
throw NotImplementedException("ART indexes are not supported on big endian architectures");
|
|
60697
|
+
}
|
|
60519
60698
|
if (block_id != DConstants::INVALID_INDEX) {
|
|
60520
60699
|
tree = Node::Deserialize(*this, block_id, block_offset);
|
|
60521
60700
|
} else {
|
|
@@ -60799,7 +60978,7 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
|
|
|
60799
60978
|
auto payload_types = logical_types;
|
|
60800
60979
|
payload_types.emplace_back(LogicalType::ROW_TYPE);
|
|
60801
60980
|
|
|
60802
|
-
ArenaAllocator arena_allocator(
|
|
60981
|
+
ArenaAllocator arena_allocator(BufferAllocator::Get(db));
|
|
60803
60982
|
vector<Key> keys(STANDARD_VECTOR_SIZE);
|
|
60804
60983
|
|
|
60805
60984
|
auto temp_art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
|
|
@@ -60856,7 +61035,7 @@ bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
|
|
|
60856
61035
|
D_ASSERT(logical_types[0] == input.data[0].GetType());
|
|
60857
61036
|
|
|
60858
61037
|
// generate the keys for the given input
|
|
60859
|
-
ArenaAllocator arena_allocator(
|
|
61038
|
+
ArenaAllocator arena_allocator(BufferAllocator::Get(db));
|
|
60860
61039
|
vector<Key> keys(input.size());
|
|
60861
61040
|
GenerateKeys(arena_allocator, input, keys);
|
|
60862
61041
|
|
|
@@ -61016,7 +61195,7 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
|
|
61016
61195
|
estimated_art_size -= released_memory;
|
|
61017
61196
|
|
|
61018
61197
|
// then generate the keys for the given input
|
|
61019
|
-
ArenaAllocator arena_allocator(
|
|
61198
|
+
ArenaAllocator arena_allocator(BufferAllocator::Get(db));
|
|
61020
61199
|
vector<Key> keys(expression.size());
|
|
61021
61200
|
GenerateKeys(arena_allocator, expression, keys);
|
|
61022
61201
|
|
|
@@ -61260,7 +61439,7 @@ bool ART::Scan(Transaction &transaction, DataTable &table, IndexScanState &table
|
|
|
61260
61439
|
|
|
61261
61440
|
// FIXME: the key directly owning the data for a single key might be more efficient
|
|
61262
61441
|
D_ASSERT(state->values[0].type().InternalType() == types[0]);
|
|
61263
|
-
ArenaAllocator arena_allocator(Allocator::
|
|
61442
|
+
ArenaAllocator arena_allocator(Allocator::Get(db));
|
|
61264
61443
|
auto key = CreateKey(arena_allocator, types[0], state->values[0]);
|
|
61265
61444
|
|
|
61266
61445
|
if (state->values[1].IsNull()) {
|
|
@@ -61335,7 +61514,7 @@ void ART::VerifyExistence(DataChunk &chunk, VerifyExistenceType verify_type, str
|
|
|
61335
61514
|
ExecuteExpressions(chunk, expression_chunk);
|
|
61336
61515
|
|
|
61337
61516
|
// generate the keys for the given input
|
|
61338
|
-
ArenaAllocator arena_allocator(
|
|
61517
|
+
ArenaAllocator arena_allocator(BufferAllocator::Get(db));
|
|
61339
61518
|
vector<Key> keys(expression_chunk.size());
|
|
61340
61519
|
GenerateKeys(arena_allocator, expression_chunk, keys);
|
|
61341
61520
|
|
|
@@ -63513,7 +63692,7 @@ private:
|
|
|
63513
63692
|
mutex pinned_handles_lock;
|
|
63514
63693
|
vector<BufferHandle> pinned_handles;
|
|
63515
63694
|
//! The hash map of the HT, created after finalization
|
|
63516
|
-
|
|
63695
|
+
AllocatedData hash_map;
|
|
63517
63696
|
//! Whether or not NULL values are considered equal in each of the comparisons
|
|
63518
63697
|
vector<bool> null_values_are_equal;
|
|
63519
63698
|
|
|
@@ -63597,9 +63776,10 @@ public:
|
|
|
63597
63776
|
idx_t SwizzledSize() const {
|
|
63598
63777
|
return swizzled_block_collection->SizeInBytes() + swizzled_string_heap->SizeInBytes();
|
|
63599
63778
|
}
|
|
63600
|
-
//! Capacity of the pointer table given the
|
|
63779
|
+
//! Capacity of the pointer table given the ht count
|
|
63780
|
+
//! (minimum of 1024 to prevent collision chance for small HT's)
|
|
63601
63781
|
static idx_t PointerTableCapacity(idx_t count) {
|
|
63602
|
-
return
|
|
63782
|
+
return MaxValue<idx_t>(NextPowerOfTwo(count * 2), 1 << 10);
|
|
63603
63783
|
}
|
|
63604
63784
|
|
|
63605
63785
|
//! Swizzle the blocks in this HT (moves from block_collection and string_heap to swizzled_...)
|
|
@@ -63770,7 +63950,7 @@ void JoinHashTable::ApplyBitmask(Vector &hashes, const SelectionVector &sel, idx
|
|
|
63770
63950
|
|
|
63771
63951
|
auto hash_data = (hash_t *)hdata.data;
|
|
63772
63952
|
auto result_data = FlatVector::GetData<data_ptr_t *>(pointers);
|
|
63773
|
-
auto main_ht = (data_ptr_t *)hash_map.
|
|
63953
|
+
auto main_ht = (data_ptr_t *)hash_map.get();
|
|
63774
63954
|
for (idx_t i = 0; i < count; i++) {
|
|
63775
63955
|
auto rindex = sel.get_index(i);
|
|
63776
63956
|
auto hindex = hdata.sel->get_index(rindex);
|
|
@@ -63952,7 +64132,7 @@ void JoinHashTable::InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_loc
|
|
|
63952
64132
|
hashes.Flatten(count);
|
|
63953
64133
|
D_ASSERT(hashes.GetVectorType() == VectorType::FLAT_VECTOR);
|
|
63954
64134
|
|
|
63955
|
-
auto pointers = (atomic<data_ptr_t> *)hash_map.
|
|
64135
|
+
auto pointers = (atomic<data_ptr_t> *)hash_map.get();
|
|
63956
64136
|
auto indices = FlatVector::GetData<hash_t>(hashes);
|
|
63957
64137
|
|
|
63958
64138
|
if (parallel) {
|
|
@@ -63969,19 +64149,19 @@ void JoinHashTable::InitializePointerTable() {
|
|
|
63969
64149
|
D_ASSERT((capacity & (capacity - 1)) == 0);
|
|
63970
64150
|
bitmask = capacity - 1;
|
|
63971
64151
|
|
|
63972
|
-
if (!hash_map.
|
|
64152
|
+
if (!hash_map.get()) {
|
|
63973
64153
|
// allocate the HT if not yet done
|
|
63974
|
-
hash_map = buffer_manager.Allocate(capacity * sizeof(data_ptr_t));
|
|
64154
|
+
hash_map = buffer_manager.GetBufferAllocator().Allocate(capacity * sizeof(data_ptr_t));
|
|
63975
64155
|
}
|
|
63976
|
-
D_ASSERT(hash_map.
|
|
64156
|
+
D_ASSERT(hash_map.GetSize() == capacity * sizeof(data_ptr_t));
|
|
63977
64157
|
|
|
63978
64158
|
// initialize HT with all-zero entries
|
|
63979
|
-
memset(hash_map.
|
|
64159
|
+
memset(hash_map.get(), 0, capacity * sizeof(data_ptr_t));
|
|
63980
64160
|
}
|
|
63981
64161
|
|
|
63982
64162
|
void JoinHashTable::Finalize(idx_t block_idx_start, idx_t block_idx_end, bool parallel) {
|
|
63983
64163
|
// Pointer table should be allocated
|
|
63984
|
-
D_ASSERT(hash_map.
|
|
64164
|
+
D_ASSERT(hash_map.get());
|
|
63985
64165
|
|
|
63986
64166
|
vector<BufferHandle> local_pinned_handles;
|
|
63987
64167
|
|
|
@@ -64863,7 +65043,8 @@ ProbeSpillLocalState ProbeSpill::RegisterThread() {
|
|
|
64863
65043
|
result.local_partition = local_partitions.back().get();
|
|
64864
65044
|
result.local_partition_append_state = local_partition_append_states.back().get();
|
|
64865
65045
|
} else {
|
|
64866
|
-
local_spill_collections.emplace_back(
|
|
65046
|
+
local_spill_collections.emplace_back(
|
|
65047
|
+
make_unique<ColumnDataCollection>(BufferManager::GetBufferManager(context), probe_types));
|
|
64867
65048
|
local_spill_append_states.emplace_back(make_unique<ColumnDataAppendState>());
|
|
64868
65049
|
local_spill_collections.back()->InitializeAppend(*local_spill_append_states.back());
|
|
64869
65050
|
|
|
@@ -64894,7 +65075,8 @@ void ProbeSpill::Finalize() {
|
|
|
64894
65075
|
local_partition_append_states.clear();
|
|
64895
65076
|
} else {
|
|
64896
65077
|
if (local_spill_collections.empty()) {
|
|
64897
|
-
global_spill_collection =
|
|
65078
|
+
global_spill_collection =
|
|
65079
|
+
make_unique<ColumnDataCollection>(BufferManager::GetBufferManager(context), probe_types);
|
|
64898
65080
|
} else {
|
|
64899
65081
|
global_spill_collection = move(local_spill_collections[0]);
|
|
64900
65082
|
for (idx_t i = 1; i < local_spill_collections.size(); i++) {
|
|
@@ -64911,7 +65093,8 @@ void ProbeSpill::PrepareNextProbe() {
|
|
|
64911
65093
|
auto &partitions = global_partitions->GetPartitions();
|
|
64912
65094
|
if (partitions.empty() || ht.partition_start == partitions.size()) {
|
|
64913
65095
|
// Can't probe, just make an empty one
|
|
64914
|
-
global_spill_collection =
|
|
65096
|
+
global_spill_collection =
|
|
65097
|
+
make_unique<ColumnDataCollection>(BufferManager::GetBufferManager(context), probe_types);
|
|
64915
65098
|
} else {
|
|
64916
65099
|
// Move specific partitions to the global spill collection
|
|
64917
65100
|
global_spill_collection = move(partitions[ht.partition_start]);
|
|
@@ -65185,6 +65368,44 @@ static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t r
|
|
|
65185
65368
|
}
|
|
65186
65369
|
}
|
|
65187
65370
|
|
|
65371
|
+
static void MarkJoinNested(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[],
|
|
65372
|
+
ExpressionType comparison_type) {
|
|
65373
|
+
Vector left_reference(left.GetType());
|
|
65374
|
+
SelectionVector true_sel(rcount);
|
|
65375
|
+
for (idx_t i = 0; i < lcount; i++) {
|
|
65376
|
+
if (found_match[i]) {
|
|
65377
|
+
continue;
|
|
65378
|
+
}
|
|
65379
|
+
ConstantVector::Reference(left_reference, left, i, rcount);
|
|
65380
|
+
idx_t count;
|
|
65381
|
+
switch (comparison_type) {
|
|
65382
|
+
case ExpressionType::COMPARE_EQUAL:
|
|
65383
|
+
count = VectorOperations::Equals(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65384
|
+
break;
|
|
65385
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
|
65386
|
+
count = VectorOperations::NotEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65387
|
+
break;
|
|
65388
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
|
65389
|
+
count = VectorOperations::LessThan(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65390
|
+
break;
|
|
65391
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
|
65392
|
+
count = VectorOperations::GreaterThan(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65393
|
+
break;
|
|
65394
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
65395
|
+
count = VectorOperations::LessThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65396
|
+
break;
|
|
65397
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
65398
|
+
count = VectorOperations::GreaterThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr);
|
|
65399
|
+
break;
|
|
65400
|
+
default:
|
|
65401
|
+
throw InternalException("Unsupported comparison type for MarkJoinNested");
|
|
65402
|
+
}
|
|
65403
|
+
if (count > 0) {
|
|
65404
|
+
found_match[i] = true;
|
|
65405
|
+
}
|
|
65406
|
+
}
|
|
65407
|
+
}
|
|
65408
|
+
|
|
65188
65409
|
template <class OP>
|
|
65189
65410
|
static void MarkJoinSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) {
|
|
65190
65411
|
switch (left.GetType().InternalType()) {
|
|
@@ -65220,6 +65441,13 @@ static void MarkJoinSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcou
|
|
|
65220
65441
|
|
|
65221
65442
|
static void MarkJoinComparisonSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[],
|
|
65222
65443
|
ExpressionType comparison_type) {
|
|
65444
|
+
switch (left.GetType().InternalType()) {
|
|
65445
|
+
case PhysicalType::STRUCT:
|
|
65446
|
+
case PhysicalType::LIST:
|
|
65447
|
+
return MarkJoinNested(left, right, lcount, rcount, found_match, comparison_type);
|
|
65448
|
+
default:
|
|
65449
|
+
break;
|
|
65450
|
+
}
|
|
65223
65451
|
D_ASSERT(left.GetType() == right.GetType());
|
|
65224
65452
|
switch (comparison_type) {
|
|
65225
65453
|
case ExpressionType::COMPARE_EQUAL:
|
|
@@ -71250,6 +71478,7 @@ class LimitPercentOperatorState : public GlobalSourceState {
|
|
|
71250
71478
|
public:
|
|
71251
71479
|
explicit LimitPercentOperatorState(const PhysicalLimitPercent &op)
|
|
71252
71480
|
: limit(DConstants::INVALID_INDEX), current_offset(0) {
|
|
71481
|
+
D_ASSERT(op.sink_state);
|
|
71253
71482
|
auto &gstate = (LimitPercentGlobalState &)*op.sink_state;
|
|
71254
71483
|
gstate.data.InitializeScan(scan_state);
|
|
71255
71484
|
}
|
|
@@ -72271,7 +72500,12 @@ void PhysicalTransaction::GetData(ExecutionContext &context, DataChunk &chunk, G
|
|
|
72271
72500
|
LocalSourceState &lstate) const {
|
|
72272
72501
|
auto &client = context.client;
|
|
72273
72502
|
|
|
72274
|
-
|
|
72503
|
+
auto type = info->type;
|
|
72504
|
+
if (type == TransactionType::COMMIT && ValidChecker::IsInvalidated(client.ActiveTransaction())) {
|
|
72505
|
+
// transaction is invalidated - turn COMMIT into ROLLBACK
|
|
72506
|
+
type = TransactionType::ROLLBACK;
|
|
72507
|
+
}
|
|
72508
|
+
switch (type) {
|
|
72275
72509
|
case TransactionType::BEGIN_TRANSACTION: {
|
|
72276
72510
|
if (client.transaction.IsAutoCommit()) {
|
|
72277
72511
|
// start the active transaction
|
|
@@ -72493,6 +72727,7 @@ public:
|
|
|
72493
72727
|
public:
|
|
72494
72728
|
bool EmptyResultIfRHSIsEmpty() const;
|
|
72495
72729
|
|
|
72730
|
+
static bool HasNullValues(DataChunk &chunk);
|
|
72496
72731
|
static void ConstructSemiJoinResult(DataChunk &left, DataChunk &result, bool found_match[]);
|
|
72497
72732
|
static void ConstructAntiJoinResult(DataChunk &left, DataChunk &result, bool found_match[]);
|
|
72498
72733
|
static void ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left, DataChunk &result, bool found_match[],
|
|
@@ -74496,6 +74731,10 @@ public:
|
|
|
74496
74731
|
};
|
|
74497
74732
|
|
|
74498
74733
|
void HashJoinGlobalSinkState::ScheduleFinalize(Pipeline &pipeline, Event &event) {
|
|
74734
|
+
if (hash_table->Count() == 0) {
|
|
74735
|
+
hash_table->finalized = true;
|
|
74736
|
+
return;
|
|
74737
|
+
}
|
|
74499
74738
|
hash_table->InitializePointerTable();
|
|
74500
74739
|
auto new_event = make_shared<HashJoinFinalizeEvent>(pipeline, *this);
|
|
74501
74740
|
event.InsertEvent(move(new_event));
|
|
@@ -76494,7 +76733,7 @@ namespace duckdb {
|
|
|
76494
76733
|
class IndexJoinOperatorState : public CachingOperatorState {
|
|
76495
76734
|
public:
|
|
76496
76735
|
IndexJoinOperatorState(ClientContext &context, const PhysicalIndexJoin &op)
|
|
76497
|
-
: probe_executor(context), arena_allocator(
|
|
76736
|
+
: probe_executor(context), arena_allocator(BufferAllocator::Get(context)), keys(STANDARD_VECTOR_SIZE) {
|
|
76498
76737
|
auto &allocator = Allocator::Get(context);
|
|
76499
76738
|
rhs_rows.resize(STANDARD_VECTOR_SIZE);
|
|
76500
76739
|
result_sizes.resize(STANDARD_VECTOR_SIZE);
|
|
@@ -76862,7 +77101,7 @@ public:
|
|
|
76862
77101
|
return true;
|
|
76863
77102
|
}
|
|
76864
77103
|
|
|
76865
|
-
static bool IsSupported(const vector<JoinCondition> &conditions);
|
|
77104
|
+
static bool IsSupported(const vector<JoinCondition> &conditions, JoinType join_type);
|
|
76866
77105
|
|
|
76867
77106
|
public:
|
|
76868
77107
|
//! Returns a list of the types of the join conditions
|
|
@@ -76896,7 +77135,7 @@ PhysicalNestedLoopJoin::PhysicalNestedLoopJoin(LogicalOperator &op, unique_ptr<P
|
|
|
76896
77135
|
children.push_back(move(right));
|
|
76897
77136
|
}
|
|
76898
77137
|
|
|
76899
|
-
|
|
77138
|
+
bool PhysicalJoin::HasNullValues(DataChunk &chunk) {
|
|
76900
77139
|
for (idx_t col_idx = 0; col_idx < chunk.ColumnCount(); col_idx++) {
|
|
76901
77140
|
UnifiedVectorFormat vdata;
|
|
76902
77141
|
chunk.data[col_idx].ToUnifiedFormat(chunk.size(), vdata);
|
|
@@ -76985,7 +77224,10 @@ void PhysicalJoin::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left
|
|
|
76985
77224
|
}
|
|
76986
77225
|
}
|
|
76987
77226
|
|
|
76988
|
-
bool PhysicalNestedLoopJoin::IsSupported(const vector<JoinCondition> &conditions) {
|
|
77227
|
+
bool PhysicalNestedLoopJoin::IsSupported(const vector<JoinCondition> &conditions, JoinType join_type) {
|
|
77228
|
+
if (join_type == JoinType::MARK) {
|
|
77229
|
+
return true;
|
|
77230
|
+
}
|
|
76989
77231
|
for (auto &cond : conditions) {
|
|
76990
77232
|
if (cond.left->return_type.InternalType() == PhysicalType::STRUCT ||
|
|
76991
77233
|
cond.left->return_type.InternalType() == PhysicalType::LIST) {
|
|
@@ -77029,7 +77271,7 @@ public:
|
|
|
77029
77271
|
//! Materialized join condition of the RHS
|
|
77030
77272
|
ColumnDataCollection right_condition_data;
|
|
77031
77273
|
//! Whether or not the RHS of the nested loop join has NULL values
|
|
77032
|
-
bool has_null;
|
|
77274
|
+
atomic<bool> has_null;
|
|
77033
77275
|
//! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
|
|
77034
77276
|
OuterJoinMarker right_outer;
|
|
77035
77277
|
};
|
|
@@ -85827,15 +86069,14 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
|
|
|
85827
86069
|
for (auto &pipeline : pipelines) {
|
|
85828
86070
|
auto sink = pipeline->GetSink();
|
|
85829
86071
|
if (sink != this) {
|
|
85830
|
-
|
|
85831
|
-
sink->sink_state = sink->GetGlobalSinkState(context.client);
|
|
86072
|
+
sink->sink_state.reset();
|
|
85832
86073
|
}
|
|
85833
86074
|
for (auto &op : pipeline->GetOperators()) {
|
|
85834
86075
|
if (op) {
|
|
85835
|
-
op->op_state
|
|
86076
|
+
op->op_state.reset();
|
|
85836
86077
|
}
|
|
85837
86078
|
}
|
|
85838
|
-
pipeline->
|
|
86079
|
+
pipeline->ClearSource();
|
|
85839
86080
|
}
|
|
85840
86081
|
|
|
85841
86082
|
// get the MetaPipelines in the recursive_meta_pipeline and reschedule them
|
|
@@ -86810,6 +87051,7 @@ public:
|
|
|
86810
87051
|
void Serialize(FieldWriter &writer) const override;
|
|
86811
87052
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
86812
87053
|
idx_t EstimateCardinality(ClientContext &context) override;
|
|
87054
|
+
vector<idx_t> GetTableIndex() const override;
|
|
86813
87055
|
|
|
86814
87056
|
protected:
|
|
86815
87057
|
void ResolveTypes() override;
|
|
@@ -87092,6 +87334,7 @@ public:
|
|
|
87092
87334
|
|
|
87093
87335
|
void Serialize(FieldWriter &writer) const override;
|
|
87094
87336
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
87337
|
+
vector<idx_t> GetTableIndex() const override;
|
|
87095
87338
|
|
|
87096
87339
|
protected:
|
|
87097
87340
|
void ResolveTypes() override {
|
|
@@ -87361,6 +87604,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
|
|
|
87361
87604
|
op.estimated_cardinality, perfect_join_stats);
|
|
87362
87605
|
|
|
87363
87606
|
} else {
|
|
87607
|
+
static constexpr const idx_t NESTED_LOOP_JOIN_THRESHOLD = 5;
|
|
87364
87608
|
bool can_merge = has_range > 0;
|
|
87365
87609
|
bool can_iejoin = has_range >= 2 && recursive_cte_tables.empty();
|
|
87366
87610
|
switch (op.join_type) {
|
|
@@ -87373,6 +87617,11 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
|
|
|
87373
87617
|
default:
|
|
87374
87618
|
break;
|
|
87375
87619
|
}
|
|
87620
|
+
if (left->estimated_cardinality <= NESTED_LOOP_JOIN_THRESHOLD ||
|
|
87621
|
+
right->estimated_cardinality <= NESTED_LOOP_JOIN_THRESHOLD) {
|
|
87622
|
+
can_iejoin = false;
|
|
87623
|
+
can_merge = false;
|
|
87624
|
+
}
|
|
87376
87625
|
if (can_iejoin) {
|
|
87377
87626
|
plan = make_unique<PhysicalIEJoin>(op, move(left), move(right), move(op.conditions), op.join_type,
|
|
87378
87627
|
op.estimated_cardinality);
|
|
@@ -87380,7 +87629,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalComparison
|
|
|
87380
87629
|
// range join: use piecewise merge join
|
|
87381
87630
|
plan = make_unique<PhysicalPiecewiseMergeJoin>(op, move(left), move(right), move(op.conditions),
|
|
87382
87631
|
op.join_type, op.estimated_cardinality);
|
|
87383
|
-
} else if (PhysicalNestedLoopJoin::IsSupported(op.conditions)) {
|
|
87632
|
+
} else if (PhysicalNestedLoopJoin::IsSupported(op.conditions, op.join_type)) {
|
|
87384
87633
|
// inequality join: use nested loop
|
|
87385
87634
|
plan = make_unique<PhysicalNestedLoopJoin>(op, move(left), move(right), move(op.conditions), op.join_type,
|
|
87386
87635
|
op.estimated_cardinality);
|
|
@@ -87604,7 +87853,6 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
|
|
|
87604
87853
|
|
|
87605
87854
|
|
|
87606
87855
|
|
|
87607
|
-
|
|
87608
87856
|
//===----------------------------------------------------------------------===//
|
|
87609
87857
|
// DuckDB
|
|
87610
87858
|
//
|
|
@@ -87646,25 +87894,11 @@ protected:
|
|
|
87646
87894
|
|
|
87647
87895
|
|
|
87648
87896
|
|
|
87649
|
-
namespace duckdb {
|
|
87650
87897
|
|
|
87651
|
-
|
|
87652
|
-
|
|
87653
|
-
auto &function = (BoundFunctionExpression &)expr;
|
|
87654
|
-
if (function.function.dependency) {
|
|
87655
|
-
function.function.dependency(function, dependencies);
|
|
87656
|
-
}
|
|
87657
|
-
}
|
|
87658
|
-
ExpressionIterator::EnumerateChildren(expr, [&](Expression &child) { ExtractDependencies(child, dependencies); });
|
|
87659
|
-
}
|
|
87898
|
+
|
|
87899
|
+
namespace duckdb {
|
|
87660
87900
|
|
|
87661
87901
|
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateTable &op) {
|
|
87662
|
-
// extract dependencies from any default values
|
|
87663
|
-
for (auto &default_value : op.info->bound_defaults) {
|
|
87664
|
-
if (default_value) {
|
|
87665
|
-
ExtractDependencies(*default_value, op.info->dependencies);
|
|
87666
|
-
}
|
|
87667
|
-
}
|
|
87668
87902
|
auto &create_info = (CreateTableInfo &)*op.info->base;
|
|
87669
87903
|
auto &catalog = Catalog::GetCatalog(context);
|
|
87670
87904
|
auto existing_entry =
|
|
@@ -87675,13 +87909,14 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateTabl
|
|
|
87675
87909
|
|
|
87676
87910
|
bool parallel_streaming_insert = !PreserveInsertionOrder(*plan);
|
|
87677
87911
|
bool use_batch_index = UseBatchIndex(*plan);
|
|
87912
|
+
auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
|
87678
87913
|
unique_ptr<PhysicalOperator> create;
|
|
87679
87914
|
if (!parallel_streaming_insert && use_batch_index) {
|
|
87680
87915
|
create = make_unique<PhysicalBatchInsert>(op, op.schema, move(op.info), op.estimated_cardinality);
|
|
87681
87916
|
|
|
87682
87917
|
} else {
|
|
87683
87918
|
create = make_unique<PhysicalInsert>(op, op.schema, move(op.info), op.estimated_cardinality,
|
|
87684
|
-
parallel_streaming_insert);
|
|
87919
|
+
parallel_streaming_insert && num_threads > 1);
|
|
87685
87920
|
}
|
|
87686
87921
|
|
|
87687
87922
|
D_ASSERT(op.children.size() == 1);
|
|
@@ -87763,8 +87998,9 @@ namespace duckdb {
|
|
|
87763
87998
|
|
|
87764
87999
|
class LogicalDelete : public LogicalOperator {
|
|
87765
88000
|
public:
|
|
87766
|
-
explicit LogicalDelete(TableCatalogEntry *table)
|
|
87767
|
-
: LogicalOperator(LogicalOperatorType::LOGICAL_DELETE), table(table), table_index(
|
|
88001
|
+
explicit LogicalDelete(TableCatalogEntry *table, idx_t table_index)
|
|
88002
|
+
: LogicalOperator(LogicalOperatorType::LOGICAL_DELETE), table(table), table_index(table_index),
|
|
88003
|
+
return_chunk(false) {
|
|
87768
88004
|
}
|
|
87769
88005
|
|
|
87770
88006
|
TableCatalogEntry *table;
|
|
@@ -87775,6 +88011,7 @@ public:
|
|
|
87775
88011
|
void Serialize(FieldWriter &writer) const override;
|
|
87776
88012
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
87777
88013
|
idx_t EstimateCardinality(ClientContext &context) override;
|
|
88014
|
+
vector<idx_t> GetTableIndex() const override;
|
|
87778
88015
|
|
|
87779
88016
|
protected:
|
|
87780
88017
|
vector<ColumnBinding> GetColumnBindings() override {
|
|
@@ -87851,6 +88088,7 @@ public:
|
|
|
87851
88088
|
}
|
|
87852
88089
|
void Serialize(FieldWriter &writer) const override;
|
|
87853
88090
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
88091
|
+
vector<idx_t> GetTableIndex() const override;
|
|
87854
88092
|
|
|
87855
88093
|
protected:
|
|
87856
88094
|
void ResolveTypes() override {
|
|
@@ -88091,6 +88329,7 @@ public:
|
|
|
88091
88329
|
}
|
|
88092
88330
|
void Serialize(FieldWriter &writer) const override;
|
|
88093
88331
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
88332
|
+
vector<idx_t> GetTableIndex() const override;
|
|
88094
88333
|
|
|
88095
88334
|
protected:
|
|
88096
88335
|
void ResolveTypes() override {
|
|
@@ -88474,6 +88713,7 @@ public:
|
|
|
88474
88713
|
idx_t EstimateCardinality(ClientContext &context) override {
|
|
88475
88714
|
return expressions.size();
|
|
88476
88715
|
}
|
|
88716
|
+
vector<idx_t> GetTableIndex() const override;
|
|
88477
88717
|
|
|
88478
88718
|
protected:
|
|
88479
88719
|
void ResolveTypes() override {
|
|
@@ -89145,8 +89385,9 @@ namespace duckdb {
|
|
|
89145
89385
|
//! LogicalInsert represents an insertion of data into a base table
|
|
89146
89386
|
class LogicalInsert : public LogicalOperator {
|
|
89147
89387
|
public:
|
|
89148
|
-
|
|
89149
|
-
: LogicalOperator(LogicalOperatorType::LOGICAL_INSERT), table(table), table_index(
|
|
89388
|
+
LogicalInsert(TableCatalogEntry *table, idx_t table_index)
|
|
89389
|
+
: LogicalOperator(LogicalOperatorType::LOGICAL_INSERT), table(table), table_index(table_index),
|
|
89390
|
+
return_chunk(false) {
|
|
89150
89391
|
}
|
|
89151
89392
|
|
|
89152
89393
|
vector<vector<unique_ptr<Expression>>> insert_values;
|
|
@@ -89183,6 +89424,7 @@ protected:
|
|
|
89183
89424
|
}
|
|
89184
89425
|
|
|
89185
89426
|
idx_t EstimateCardinality(ClientContext &context) override;
|
|
89427
|
+
vector<idx_t> GetTableIndex() const override;
|
|
89186
89428
|
};
|
|
89187
89429
|
} // namespace duckdb
|
|
89188
89430
|
|
|
@@ -89237,6 +89479,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalInsert &op
|
|
|
89237
89479
|
|
|
89238
89480
|
bool parallel_streaming_insert = !PreserveInsertionOrder(*plan);
|
|
89239
89481
|
bool use_batch_index = UseBatchIndex(*plan);
|
|
89482
|
+
auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
|
89240
89483
|
if (op.return_chunk) {
|
|
89241
89484
|
// not supported for RETURNING (yet?)
|
|
89242
89485
|
parallel_streaming_insert = false;
|
|
@@ -89248,7 +89491,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalInsert &op
|
|
|
89248
89491
|
op.estimated_cardinality);
|
|
89249
89492
|
} else {
|
|
89250
89493
|
insert = make_unique<PhysicalInsert>(op.types, op.table, op.column_index_map, move(op.bound_defaults),
|
|
89251
|
-
op.estimated_cardinality, op.return_chunk,
|
|
89494
|
+
op.estimated_cardinality, op.return_chunk,
|
|
89495
|
+
parallel_streaming_insert && num_threads > 1);
|
|
89252
89496
|
}
|
|
89253
89497
|
if (plan) {
|
|
89254
89498
|
insert->children.push_back(move(plan));
|
|
@@ -89591,6 +89835,7 @@ public:
|
|
|
89591
89835
|
vector<ColumnBinding> GetColumnBindings() override;
|
|
89592
89836
|
void Serialize(FieldWriter &writer) const override;
|
|
89593
89837
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
89838
|
+
vector<idx_t> GetTableIndex() const override;
|
|
89594
89839
|
|
|
89595
89840
|
protected:
|
|
89596
89841
|
void ResolveTypes() override;
|
|
@@ -89680,6 +89925,7 @@ public:
|
|
|
89680
89925
|
}
|
|
89681
89926
|
void Serialize(FieldWriter &writer) const override;
|
|
89682
89927
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
89928
|
+
vector<idx_t> GetTableIndex() const override;
|
|
89683
89929
|
|
|
89684
89930
|
protected:
|
|
89685
89931
|
void ResolveTypes() override {
|
|
@@ -89727,6 +89973,7 @@ public:
|
|
|
89727
89973
|
}
|
|
89728
89974
|
void Serialize(FieldWriter &writer) const override;
|
|
89729
89975
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
89976
|
+
vector<idx_t> GetTableIndex() const override;
|
|
89730
89977
|
|
|
89731
89978
|
protected:
|
|
89732
89979
|
void ResolveTypes() override {
|
|
@@ -89939,6 +90186,7 @@ public:
|
|
|
89939
90186
|
|
|
89940
90187
|
void Serialize(FieldWriter &writer) const override;
|
|
89941
90188
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
90189
|
+
vector<idx_t> GetTableIndex() const override;
|
|
89942
90190
|
|
|
89943
90191
|
protected:
|
|
89944
90192
|
void ResolveTypes() override {
|
|
@@ -90243,6 +90491,7 @@ public:
|
|
|
90243
90491
|
vector<ColumnBinding> GetColumnBindings() override;
|
|
90244
90492
|
void Serialize(FieldWriter &writer) const override;
|
|
90245
90493
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
90494
|
+
vector<idx_t> GetTableIndex() const override;
|
|
90246
90495
|
|
|
90247
90496
|
protected:
|
|
90248
90497
|
void ResolveTypes() override;
|
|
@@ -90370,6 +90619,7 @@ public:
|
|
|
90370
90619
|
vector<ColumnBinding> GetColumnBindings() override;
|
|
90371
90620
|
void Serialize(FieldWriter &writer) const override;
|
|
90372
90621
|
static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
90622
|
+
vector<idx_t> GetTableIndex() const override;
|
|
90373
90623
|
|
|
90374
90624
|
protected:
|
|
90375
90625
|
void ResolveTypes() override;
|
|
@@ -90528,6 +90778,8 @@ struct LogicalExtensionOperator : public LogicalOperator {
|
|
|
90528
90778
|
: LogicalOperator(LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR, move(expressions)) {
|
|
90529
90779
|
}
|
|
90530
90780
|
|
|
90781
|
+
static unique_ptr<LogicalExtensionOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
|
|
90782
|
+
|
|
90531
90783
|
virtual unique_ptr<PhysicalOperator> CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) = 0;
|
|
90532
90784
|
};
|
|
90533
90785
|
} // namespace duckdb
|
|
@@ -98818,13 +99070,16 @@ struct LinkedList {
|
|
|
98818
99070
|
// forward declarations
|
|
98819
99071
|
struct WriteDataToSegment;
|
|
98820
99072
|
struct ReadDataFromSegment;
|
|
99073
|
+
struct CopyDataFromSegment;
|
|
98821
99074
|
typedef ListSegment *(*create_segment_t)(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
|
|
98822
|
-
vector<AllocatedData> &owning_vector, uint16_t &capacity);
|
|
99075
|
+
vector<AllocatedData> &owning_vector, const uint16_t &capacity);
|
|
98823
99076
|
typedef void (*write_data_to_segment_t)(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
|
|
98824
99077
|
vector<AllocatedData> &owning_vector, ListSegment *segment, Vector &input,
|
|
98825
99078
|
idx_t &entry_idx, idx_t &count);
|
|
98826
|
-
typedef void (*read_data_from_segment_t)(ReadDataFromSegment &read_data_from_segment, ListSegment *segment,
|
|
99079
|
+
typedef void (*read_data_from_segment_t)(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
|
|
98827
99080
|
Vector &result, idx_t &total_count);
|
|
99081
|
+
typedef ListSegment *(*copy_data_from_segment_t)(CopyDataFromSegment ©_data_from_segment, const ListSegment *source,
|
|
99082
|
+
Allocator &allocator, vector<AllocatedData> &owning_vector);
|
|
98828
99083
|
|
|
98829
99084
|
struct WriteDataToSegment {
|
|
98830
99085
|
create_segment_t create_segment;
|
|
@@ -98835,6 +99090,10 @@ struct ReadDataFromSegment {
|
|
|
98835
99090
|
read_data_from_segment_t segment_function;
|
|
98836
99091
|
vector<ReadDataFromSegment> child_functions;
|
|
98837
99092
|
};
|
|
99093
|
+
struct CopyDataFromSegment {
|
|
99094
|
+
copy_data_from_segment_t segment_function;
|
|
99095
|
+
vector<CopyDataFromSegment> child_functions;
|
|
99096
|
+
};
|
|
98838
99097
|
|
|
98839
99098
|
// forward declarations
|
|
98840
99099
|
static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
|
|
@@ -98842,24 +99101,27 @@ static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allo
|
|
|
98842
99101
|
idx_t &count);
|
|
98843
99102
|
static void BuildListVector(ReadDataFromSegment &read_data_from_segment, LinkedList *linked_list, Vector &result,
|
|
98844
99103
|
idx_t &initial_total_count);
|
|
99104
|
+
static void CopyLinkedList(CopyDataFromSegment ©_data_from_segment, const LinkedList *source_list,
|
|
99105
|
+
LinkedList &target_list, Allocator &allocator, vector<AllocatedData> &owning_vector);
|
|
98845
99106
|
|
|
98846
99107
|
template <class T>
|
|
98847
99108
|
static data_ptr_t AllocatePrimitiveData(Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
98848
|
-
uint16_t &capacity) {
|
|
99109
|
+
const uint16_t &capacity) {
|
|
98849
99110
|
|
|
98850
99111
|
owning_vector.emplace_back(allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(T))));
|
|
98851
99112
|
return owning_vector.back().get();
|
|
98852
99113
|
}
|
|
98853
99114
|
|
|
98854
|
-
static data_ptr_t AllocateListData(Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
99115
|
+
static data_ptr_t AllocateListData(Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
99116
|
+
const uint16_t &capacity) {
|
|
98855
99117
|
|
|
98856
99118
|
owning_vector.emplace_back(
|
|
98857
99119
|
allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList)));
|
|
98858
99120
|
return owning_vector.back().get();
|
|
98859
99121
|
}
|
|
98860
99122
|
|
|
98861
|
-
static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
98862
|
-
idx_t child_count) {
|
|
99123
|
+
static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
99124
|
+
const uint16_t &capacity, const idx_t &child_count) {
|
|
98863
99125
|
|
|
98864
99126
|
owning_vector.emplace_back(
|
|
98865
99127
|
allocator.Allocate(sizeof(ListSegment) + capacity * sizeof(bool) + child_count * sizeof(ListSegment *)));
|
|
@@ -98867,28 +99129,28 @@ static data_ptr_t AllocateStructData(Allocator &allocator, vector<AllocatedData>
|
|
|
98867
99129
|
}
|
|
98868
99130
|
|
|
98869
99131
|
template <class T>
|
|
98870
|
-
static T *GetPrimitiveData(ListSegment *segment) {
|
|
99132
|
+
static T *GetPrimitiveData(const ListSegment *segment) {
|
|
98871
99133
|
return (T *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
|
|
98872
99134
|
}
|
|
98873
99135
|
|
|
98874
|
-
static uint64_t *GetListLengthData(ListSegment *segment) {
|
|
99136
|
+
static uint64_t *GetListLengthData(const ListSegment *segment) {
|
|
98875
99137
|
return (uint64_t *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
|
|
98876
99138
|
}
|
|
98877
99139
|
|
|
98878
|
-
static LinkedList *GetListChildData(ListSegment *segment) {
|
|
99140
|
+
static LinkedList *GetListChildData(const ListSegment *segment) {
|
|
98879
99141
|
return (LinkedList *)(((char *)segment) + sizeof(ListSegment) +
|
|
98880
99142
|
segment->capacity * (sizeof(bool) + sizeof(uint64_t)));
|
|
98881
99143
|
}
|
|
98882
99144
|
|
|
98883
|
-
static ListSegment **GetStructData(ListSegment *segment) {
|
|
99145
|
+
static ListSegment **GetStructData(const ListSegment *segment) {
|
|
98884
99146
|
return (ListSegment **)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool));
|
|
98885
99147
|
}
|
|
98886
99148
|
|
|
98887
|
-
static bool *GetNullMask(ListSegment *segment) {
|
|
99149
|
+
static bool *GetNullMask(const ListSegment *segment) {
|
|
98888
99150
|
return (bool *)(((char *)segment) + sizeof(ListSegment));
|
|
98889
99151
|
}
|
|
98890
99152
|
|
|
98891
|
-
static uint16_t GetCapacityForNewSegment(LinkedList *linked_list) {
|
|
99153
|
+
static uint16_t GetCapacityForNewSegment(const LinkedList *linked_list) {
|
|
98892
99154
|
|
|
98893
99155
|
// consecutive segments grow by the power of two
|
|
98894
99156
|
uint16_t capacity = 4;
|
|
@@ -98901,7 +99163,7 @@ static uint16_t GetCapacityForNewSegment(LinkedList *linked_list) {
|
|
|
98901
99163
|
|
|
98902
99164
|
template <class T>
|
|
98903
99165
|
static ListSegment *CreatePrimitiveSegment(WriteDataToSegment &, Allocator &allocator,
|
|
98904
|
-
vector<AllocatedData> &owning_vector, uint16_t &capacity) {
|
|
99166
|
+
vector<AllocatedData> &owning_vector, const uint16_t &capacity) {
|
|
98905
99167
|
|
|
98906
99168
|
// allocate data and set the header
|
|
98907
99169
|
auto segment = (ListSegment *)AllocatePrimitiveData<T>(allocator, owning_vector, capacity);
|
|
@@ -98912,7 +99174,7 @@ static ListSegment *CreatePrimitiveSegment(WriteDataToSegment &, Allocator &allo
|
|
|
98912
99174
|
}
|
|
98913
99175
|
|
|
98914
99176
|
static ListSegment *CreateListSegment(WriteDataToSegment &, Allocator &allocator, vector<AllocatedData> &owning_vector,
|
|
98915
|
-
uint16_t &capacity) {
|
|
99177
|
+
const uint16_t &capacity) {
|
|
98916
99178
|
|
|
98917
99179
|
// allocate data and set the header
|
|
98918
99180
|
auto segment = (ListSegment *)AllocateListData(allocator, owning_vector, capacity);
|
|
@@ -98929,7 +99191,7 @@ static ListSegment *CreateListSegment(WriteDataToSegment &, Allocator &allocator
|
|
|
98929
99191
|
}
|
|
98930
99192
|
|
|
98931
99193
|
static ListSegment *CreateStructSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator,
|
|
98932
|
-
vector<AllocatedData> &owning_vector, uint16_t &capacity) {
|
|
99194
|
+
vector<AllocatedData> &owning_vector, const uint16_t &capacity) {
|
|
98933
99195
|
|
|
98934
99196
|
// allocate data and set header
|
|
98935
99197
|
auto segment = (ListSegment *)AllocateStructData(allocator, owning_vector, capacity,
|
|
@@ -99123,7 +99385,7 @@ static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allo
|
|
|
99123
99385
|
}
|
|
99124
99386
|
|
|
99125
99387
|
template <class T>
|
|
99126
|
-
static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, ListSegment *segment, Vector &result,
|
|
99388
|
+
static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result,
|
|
99127
99389
|
idx_t &total_count) {
|
|
99128
99390
|
|
|
99129
99391
|
auto &aggr_vector_validity = FlatVector::Validity(result);
|
|
@@ -99147,7 +99409,7 @@ static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, ListSegment *seg
|
|
|
99147
99409
|
}
|
|
99148
99410
|
}
|
|
99149
99411
|
|
|
99150
|
-
static void ReadDataFromVarcharSegment(ReadDataFromSegment &, ListSegment *segment, Vector &result,
|
|
99412
|
+
static void ReadDataFromVarcharSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result,
|
|
99151
99413
|
idx_t &total_count) {
|
|
99152
99414
|
|
|
99153
99415
|
auto &aggr_vector_validity = FlatVector::Validity(result);
|
|
@@ -99188,8 +99450,8 @@ static void ReadDataFromVarcharSegment(ReadDataFromSegment &, ListSegment *segme
|
|
|
99188
99450
|
}
|
|
99189
99451
|
}
|
|
99190
99452
|
|
|
99191
|
-
static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, ListSegment *segment,
|
|
99192
|
-
idx_t &total_count) {
|
|
99453
|
+
static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
|
|
99454
|
+
Vector &result, idx_t &total_count) {
|
|
99193
99455
|
|
|
99194
99456
|
auto &aggr_vector_validity = FlatVector::Validity(result);
|
|
99195
99457
|
|
|
@@ -99228,8 +99490,8 @@ static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment,
|
|
|
99228
99490
|
BuildListVector(read_data_from_segment.child_functions[0], &linked_child_list, child_vector, starting_offset);
|
|
99229
99491
|
}
|
|
99230
99492
|
|
|
99231
|
-
static void ReadDataFromStructSegment(ReadDataFromSegment &read_data_from_segment, ListSegment *segment,
|
|
99232
|
-
idx_t &total_count) {
|
|
99493
|
+
static void ReadDataFromStructSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment,
|
|
99494
|
+
Vector &result, idx_t &total_count) {
|
|
99233
99495
|
|
|
99234
99496
|
auto &aggr_vector_validity = FlatVector::Validity(result);
|
|
99235
99497
|
|
|
@@ -99268,6 +99530,86 @@ static void BuildListVector(ReadDataFromSegment &read_data_from_segment, LinkedL
|
|
|
99268
99530
|
linked_list->last_segment = nullptr;
|
|
99269
99531
|
}
|
|
99270
99532
|
|
|
99533
|
+
template <class T>
|
|
99534
|
+
static ListSegment *CopyDataFromPrimitiveSegment(CopyDataFromSegment &, const ListSegment *source, Allocator &allocator,
|
|
99535
|
+
vector<AllocatedData> &owning_vector) {
|
|
99536
|
+
|
|
99537
|
+
auto target = (ListSegment *)AllocatePrimitiveData<T>(allocator, owning_vector, source->capacity);
|
|
99538
|
+
memcpy(target, source, sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(T)));
|
|
99539
|
+
target->next = nullptr;
|
|
99540
|
+
return target;
|
|
99541
|
+
}
|
|
99542
|
+
|
|
99543
|
+
static ListSegment *CopyDataFromListSegment(CopyDataFromSegment ©_data_from_segment, const ListSegment *source,
|
|
99544
|
+
Allocator &allocator, vector<AllocatedData> &owning_vector) {
|
|
99545
|
+
|
|
99546
|
+
// create an empty linked list for the child vector of target
|
|
99547
|
+
auto source_linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(source));
|
|
99548
|
+
|
|
99549
|
+
// create the segment
|
|
99550
|
+
auto target = (ListSegment *)AllocateListData(allocator, owning_vector, source->capacity);
|
|
99551
|
+
memcpy(target, source,
|
|
99552
|
+
sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList));
|
|
99553
|
+
target->next = nullptr;
|
|
99554
|
+
|
|
99555
|
+
auto target_linked_list = GetListChildData(target);
|
|
99556
|
+
LinkedList linked_list(source_linked_child_list.total_capacity, nullptr, nullptr);
|
|
99557
|
+
Store<LinkedList>(linked_list, (data_ptr_t)target_linked_list);
|
|
99558
|
+
|
|
99559
|
+
// recurse to copy the linked child list
|
|
99560
|
+
auto target_linked_child_list = Load<LinkedList>((data_ptr_t)GetListChildData(target));
|
|
99561
|
+
D_ASSERT(copy_data_from_segment.child_functions.size() == 1);
|
|
99562
|
+
CopyLinkedList(copy_data_from_segment.child_functions[0], &source_linked_child_list, target_linked_child_list,
|
|
99563
|
+
allocator, owning_vector);
|
|
99564
|
+
|
|
99565
|
+
// store the updated linked list
|
|
99566
|
+
Store<LinkedList>(target_linked_child_list, (data_ptr_t)GetListChildData(target));
|
|
99567
|
+
return target;
|
|
99568
|
+
}
|
|
99569
|
+
|
|
99570
|
+
static ListSegment *CopyDataFromStructSegment(CopyDataFromSegment ©_data_from_segment, const ListSegment *source,
|
|
99571
|
+
Allocator &allocator, vector<AllocatedData> &owning_vector) {
|
|
99572
|
+
|
|
99573
|
+
auto source_child_count = copy_data_from_segment.child_functions.size();
|
|
99574
|
+
auto target = (ListSegment *)AllocateStructData(allocator, owning_vector, source->capacity, source_child_count);
|
|
99575
|
+
memcpy(target, source,
|
|
99576
|
+
sizeof(ListSegment) + source->capacity * sizeof(bool) + source_child_count * sizeof(ListSegment *));
|
|
99577
|
+
target->next = nullptr;
|
|
99578
|
+
|
|
99579
|
+
// recurse and copy the children
|
|
99580
|
+
auto source_child_segments = GetStructData(source);
|
|
99581
|
+
auto target_child_segments = GetStructData(target);
|
|
99582
|
+
|
|
99583
|
+
for (idx_t i = 0; i < copy_data_from_segment.child_functions.size(); i++) {
|
|
99584
|
+
auto child_function = copy_data_from_segment.child_functions[i];
|
|
99585
|
+
auto source_child_segment = Load<ListSegment *>((data_ptr_t)(source_child_segments + i));
|
|
99586
|
+
auto target_child_segment =
|
|
99587
|
+
child_function.segment_function(child_function, source_child_segment, allocator, owning_vector);
|
|
99588
|
+
Store<ListSegment *>(target_child_segment, (data_ptr_t)(target_child_segments + i));
|
|
99589
|
+
}
|
|
99590
|
+
return target;
|
|
99591
|
+
}
|
|
99592
|
+
|
|
99593
|
+
static void CopyLinkedList(CopyDataFromSegment ©_data_from_segment, const LinkedList *source_list,
|
|
99594
|
+
LinkedList &target_list, Allocator &allocator, vector<AllocatedData> &owning_vector) {
|
|
99595
|
+
|
|
99596
|
+
auto source_segment = source_list->first_segment;
|
|
99597
|
+
|
|
99598
|
+
while (source_segment) {
|
|
99599
|
+
auto target_segment =
|
|
99600
|
+
copy_data_from_segment.segment_function(copy_data_from_segment, source_segment, allocator, owning_vector);
|
|
99601
|
+
source_segment = source_segment->next;
|
|
99602
|
+
|
|
99603
|
+
if (!target_list.first_segment) {
|
|
99604
|
+
target_list.first_segment = target_segment;
|
|
99605
|
+
}
|
|
99606
|
+
if (target_list.last_segment) {
|
|
99607
|
+
target_list.last_segment->next = target_segment;
|
|
99608
|
+
}
|
|
99609
|
+
target_list.last_segment = target_segment;
|
|
99610
|
+
}
|
|
99611
|
+
}
|
|
99612
|
+
|
|
99271
99613
|
static void InitializeValidities(Vector &vector, idx_t &capacity) {
|
|
99272
99614
|
|
|
99273
99615
|
auto &validity_mask = FlatVector::Validity(vector);
|
|
@@ -99311,6 +99653,7 @@ struct ListBindData : public FunctionData {
|
|
|
99311
99653
|
LogicalType stype;
|
|
99312
99654
|
WriteDataToSegment write_data_to_segment;
|
|
99313
99655
|
ReadDataFromSegment read_data_from_segment;
|
|
99656
|
+
CopyDataFromSegment copy_data_from_segment;
|
|
99314
99657
|
|
|
99315
99658
|
unique_ptr<FunctionData> Copy() const override {
|
|
99316
99659
|
return make_unique<ListBindData>(stype);
|
|
@@ -99323,7 +99666,8 @@ struct ListBindData : public FunctionData {
|
|
|
99323
99666
|
};
|
|
99324
99667
|
|
|
99325
99668
|
static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment,
|
|
99326
|
-
ReadDataFromSegment &read_data_from_segment,
|
|
99669
|
+
ReadDataFromSegment &read_data_from_segment,
|
|
99670
|
+
CopyDataFromSegment ©_data_from_segment, const LogicalType &type) {
|
|
99327
99671
|
|
|
99328
99672
|
auto physical_type = type.InternalType();
|
|
99329
99673
|
switch (physical_type) {
|
|
@@ -99332,113 +99676,135 @@ static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment,
|
|
|
99332
99676
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<bool>;
|
|
99333
99677
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<bool>;
|
|
99334
99678
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<bool>;
|
|
99679
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<bool>;
|
|
99335
99680
|
break;
|
|
99336
99681
|
}
|
|
99337
99682
|
case PhysicalType::INT8: {
|
|
99338
99683
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<int8_t>;
|
|
99339
99684
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int8_t>;
|
|
99340
99685
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int8_t>;
|
|
99686
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int8_t>;
|
|
99341
99687
|
break;
|
|
99342
99688
|
}
|
|
99343
99689
|
case PhysicalType::INT16: {
|
|
99344
99690
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<int16_t>;
|
|
99345
99691
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int16_t>;
|
|
99346
99692
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int16_t>;
|
|
99693
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int16_t>;
|
|
99347
99694
|
break;
|
|
99348
99695
|
}
|
|
99349
99696
|
case PhysicalType::INT32: {
|
|
99350
99697
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<int32_t>;
|
|
99351
99698
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int32_t>;
|
|
99352
99699
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int32_t>;
|
|
99700
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int32_t>;
|
|
99353
99701
|
break;
|
|
99354
99702
|
}
|
|
99355
99703
|
case PhysicalType::INT64: {
|
|
99356
99704
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<int64_t>;
|
|
99357
99705
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<int64_t>;
|
|
99358
99706
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<int64_t>;
|
|
99707
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<int64_t>;
|
|
99359
99708
|
break;
|
|
99360
99709
|
}
|
|
99361
99710
|
case PhysicalType::UINT8: {
|
|
99362
99711
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<uint8_t>;
|
|
99363
99712
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint8_t>;
|
|
99364
99713
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint8_t>;
|
|
99714
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint8_t>;
|
|
99365
99715
|
break;
|
|
99366
99716
|
}
|
|
99367
99717
|
case PhysicalType::UINT16: {
|
|
99368
99718
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<uint16_t>;
|
|
99369
99719
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint16_t>;
|
|
99370
99720
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint16_t>;
|
|
99721
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint16_t>;
|
|
99371
99722
|
break;
|
|
99372
99723
|
}
|
|
99373
99724
|
case PhysicalType::UINT32: {
|
|
99374
99725
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<uint32_t>;
|
|
99375
99726
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint32_t>;
|
|
99376
99727
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint32_t>;
|
|
99728
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint32_t>;
|
|
99377
99729
|
break;
|
|
99378
99730
|
}
|
|
99379
99731
|
case PhysicalType::UINT64: {
|
|
99380
99732
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<uint64_t>;
|
|
99381
99733
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<uint64_t>;
|
|
99382
99734
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<uint64_t>;
|
|
99735
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<uint64_t>;
|
|
99383
99736
|
break;
|
|
99384
99737
|
}
|
|
99385
99738
|
case PhysicalType::FLOAT: {
|
|
99386
99739
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<float>;
|
|
99387
99740
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<float>;
|
|
99388
99741
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<float>;
|
|
99742
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<float>;
|
|
99389
99743
|
break;
|
|
99390
99744
|
}
|
|
99391
99745
|
case PhysicalType::DOUBLE: {
|
|
99392
99746
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<double>;
|
|
99393
99747
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<double>;
|
|
99394
99748
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<double>;
|
|
99749
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<double>;
|
|
99395
99750
|
break;
|
|
99396
99751
|
}
|
|
99397
99752
|
case PhysicalType::INT128: {
|
|
99398
99753
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<hugeint_t>;
|
|
99399
99754
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<hugeint_t>;
|
|
99400
99755
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<hugeint_t>;
|
|
99756
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<hugeint_t>;
|
|
99401
99757
|
break;
|
|
99402
99758
|
}
|
|
99403
99759
|
case PhysicalType::INTERVAL: {
|
|
99404
99760
|
write_data_to_segment.create_segment = CreatePrimitiveSegment<interval_t>;
|
|
99405
99761
|
write_data_to_segment.segment_function = WriteDataToPrimitiveSegment<interval_t>;
|
|
99406
99762
|
read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment<interval_t>;
|
|
99763
|
+
copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment<interval_t>;
|
|
99407
99764
|
break;
|
|
99408
99765
|
}
|
|
99409
99766
|
case PhysicalType::VARCHAR: {
|
|
99410
99767
|
write_data_to_segment.create_segment = CreateListSegment;
|
|
99411
99768
|
write_data_to_segment.segment_function = WriteDataToVarcharSegment;
|
|
99412
99769
|
read_data_from_segment.segment_function = ReadDataFromVarcharSegment;
|
|
99770
|
+
copy_data_from_segment.segment_function = CopyDataFromListSegment;
|
|
99413
99771
|
|
|
99414
99772
|
write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
|
|
99415
99773
|
write_data_to_segment.child_functions.back().create_segment = CreatePrimitiveSegment<char>;
|
|
99774
|
+
copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
|
|
99775
|
+
copy_data_from_segment.child_functions.back().segment_function = CopyDataFromPrimitiveSegment<char>;
|
|
99416
99776
|
break;
|
|
99417
99777
|
}
|
|
99418
99778
|
case PhysicalType::LIST: {
|
|
99419
99779
|
write_data_to_segment.create_segment = CreateListSegment;
|
|
99420
99780
|
write_data_to_segment.segment_function = WriteDataToListSegment;
|
|
99421
99781
|
read_data_from_segment.segment_function = ReadDataFromListSegment;
|
|
99782
|
+
copy_data_from_segment.segment_function = CopyDataFromListSegment;
|
|
99422
99783
|
|
|
99423
99784
|
// recurse
|
|
99424
99785
|
write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
|
|
99425
99786
|
read_data_from_segment.child_functions.emplace_back(ReadDataFromSegment());
|
|
99787
|
+
copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
|
|
99426
99788
|
GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
|
|
99427
|
-
read_data_from_segment.child_functions.back(),
|
|
99789
|
+
read_data_from_segment.child_functions.back(),
|
|
99790
|
+
copy_data_from_segment.child_functions.back(), ListType::GetChildType(type));
|
|
99428
99791
|
break;
|
|
99429
99792
|
}
|
|
99430
99793
|
case PhysicalType::STRUCT: {
|
|
99431
99794
|
write_data_to_segment.create_segment = CreateStructSegment;
|
|
99432
99795
|
write_data_to_segment.segment_function = WriteDataToStructSegment;
|
|
99433
99796
|
read_data_from_segment.segment_function = ReadDataFromStructSegment;
|
|
99797
|
+
copy_data_from_segment.segment_function = CopyDataFromStructSegment;
|
|
99434
99798
|
|
|
99435
99799
|
// recurse
|
|
99436
99800
|
auto child_types = StructType::GetChildTypes(type);
|
|
99437
99801
|
for (idx_t i = 0; i < child_types.size(); i++) {
|
|
99438
99802
|
write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
|
|
99439
99803
|
read_data_from_segment.child_functions.emplace_back(ReadDataFromSegment());
|
|
99804
|
+
copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
|
|
99440
99805
|
GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
|
|
99441
|
-
read_data_from_segment.child_functions.back(),
|
|
99806
|
+
read_data_from_segment.child_functions.back(),
|
|
99807
|
+
copy_data_from_segment.child_functions.back(), child_types[i].second);
|
|
99442
99808
|
}
|
|
99443
99809
|
break;
|
|
99444
99810
|
}
|
|
@@ -99451,7 +99817,7 @@ ListBindData::ListBindData(const LogicalType &stype_p) : stype(stype_p) {
|
|
|
99451
99817
|
|
|
99452
99818
|
// always unnest once because the result vector is of type LIST
|
|
99453
99819
|
auto type = ListType::GetChildType(stype_p);
|
|
99454
|
-
GetSegmentDataFunctions(write_data_to_segment, read_data_from_segment, type);
|
|
99820
|
+
GetSegmentDataFunctions(write_data_to_segment, read_data_from_segment, copy_data_from_segment, type);
|
|
99455
99821
|
}
|
|
99456
99822
|
|
|
99457
99823
|
ListBindData::~ListBindData() {
|
|
@@ -99519,11 +99885,13 @@ static void ListUpdateFunction(Vector inputs[], AggregateInputData &aggr_input_d
|
|
|
99519
99885
|
}
|
|
99520
99886
|
}
|
|
99521
99887
|
|
|
99522
|
-
static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData
|
|
99888
|
+
static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData &aggr_input_data, idx_t count) {
|
|
99523
99889
|
UnifiedVectorFormat sdata;
|
|
99524
99890
|
state.ToUnifiedFormat(count, sdata);
|
|
99525
99891
|
auto states_ptr = (ListAggState **)sdata.data;
|
|
99526
99892
|
|
|
99893
|
+
auto &list_bind_data = (ListBindData &)*aggr_input_data.bind_data;
|
|
99894
|
+
|
|
99527
99895
|
auto combined_ptr = FlatVector::GetData<ListAggState *>(combined);
|
|
99528
99896
|
for (idx_t i = 0; i < count; i++) {
|
|
99529
99897
|
auto state = states_ptr[sdata.sel->get_index(i)];
|
|
@@ -99533,32 +99901,27 @@ static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputD
|
|
|
99533
99901
|
}
|
|
99534
99902
|
D_ASSERT(state->type);
|
|
99535
99903
|
D_ASSERT(state->owning_vector);
|
|
99536
|
-
if (!combined_ptr[i]->linked_list) {
|
|
99537
99904
|
|
|
99538
|
-
|
|
99905
|
+
if (!combined_ptr[i]->linked_list) {
|
|
99539
99906
|
combined_ptr[i]->linked_list = new LinkedList(0, nullptr, nullptr);
|
|
99540
|
-
combined_ptr[i]->
|
|
99541
|
-
combined_ptr[i]->linked_list->last_segment = state->linked_list->last_segment;
|
|
99542
|
-
combined_ptr[i]->linked_list->total_capacity = state->linked_list->total_capacity;
|
|
99543
|
-
|
|
99544
|
-
// copy the type
|
|
99907
|
+
combined_ptr[i]->owning_vector = new vector<AllocatedData>;
|
|
99545
99908
|
combined_ptr[i]->type = new LogicalType(*state->type);
|
|
99909
|
+
}
|
|
99910
|
+
auto owning_vector = combined_ptr[i]->owning_vector;
|
|
99546
99911
|
|
|
99547
|
-
|
|
99548
|
-
|
|
99912
|
+
// copy the linked list of the state
|
|
99913
|
+
auto copied_linked_list = LinkedList(state->linked_list->total_capacity, nullptr, nullptr);
|
|
99914
|
+
CopyLinkedList(list_bind_data.copy_data_from_segment, state->linked_list, copied_linked_list,
|
|
99915
|
+
aggr_input_data.allocator, *owning_vector);
|
|
99549
99916
|
|
|
99917
|
+
// append the copied linked list to the combined state
|
|
99918
|
+
if (combined_ptr[i]->linked_list->last_segment) {
|
|
99919
|
+
combined_ptr[i]->linked_list->last_segment->next = copied_linked_list.first_segment;
|
|
99550
99920
|
} else {
|
|
99551
|
-
combined_ptr[i]->linked_list->
|
|
99552
|
-
combined_ptr[i]->linked_list->last_segment = state->linked_list->last_segment;
|
|
99553
|
-
combined_ptr[i]->linked_list->total_capacity += state->linked_list->total_capacity;
|
|
99554
|
-
}
|
|
99555
|
-
|
|
99556
|
-
// copy the owning vector (and its unique pointers to the allocated data)
|
|
99557
|
-
// FIXME: more efficient way of copying the unique pointers?
|
|
99558
|
-
auto &owning_vector = *state->owning_vector;
|
|
99559
|
-
for (idx_t j = 0; j < state->owning_vector->size(); j++) {
|
|
99560
|
-
combined_ptr[i]->owning_vector->push_back(move(owning_vector[j]));
|
|
99921
|
+
combined_ptr[i]->linked_list->first_segment = copied_linked_list.first_segment;
|
|
99561
99922
|
}
|
|
99923
|
+
combined_ptr[i]->linked_list->last_segment = copied_linked_list.last_segment;
|
|
99924
|
+
combined_ptr[i]->linked_list->total_capacity += copied_linked_list.total_capacity;
|
|
99562
99925
|
}
|
|
99563
99926
|
}
|
|
99564
99927
|
|
|
@@ -99822,10 +100185,11 @@ struct RegrCountFunction {
|
|
|
99822
100185
|
namespace duckdb {
|
|
99823
100186
|
|
|
99824
100187
|
void RegrCountFun::RegisterFunction(BuiltinFunctions &set) {
|
|
99825
|
-
|
|
99826
|
-
|
|
99827
|
-
|
|
99828
|
-
|
|
100188
|
+
auto regr_count = AggregateFunction::BinaryAggregate<size_t, double, double, uint32_t, RegrCountFunction>(
|
|
100189
|
+
LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::UINTEGER);
|
|
100190
|
+
regr_count.name = "regr_count";
|
|
100191
|
+
regr_count.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
|
|
100192
|
+
set.AddFunction(regr_count);
|
|
99829
100193
|
}
|
|
99830
100194
|
|
|
99831
100195
|
} // namespace duckdb
|
|
@@ -102674,18 +103038,9 @@ unique_ptr<BoundCastData> BindUnionToUnionCast(BindCastInput &input, const Logic
|
|
|
102674
103038
|
for (idx_t target_idx = 0; target_idx < UnionType::GetMemberCount(target); target_idx++) {
|
|
102675
103039
|
auto &target_member_name = UnionType::GetMemberName(target, target_idx);
|
|
102676
103040
|
|
|
102677
|
-
// found a matching member
|
|
103041
|
+
// found a matching member
|
|
102678
103042
|
if (source_member_name == target_member_name) {
|
|
102679
103043
|
auto &target_member_type = UnionType::GetMemberType(target, target_idx);
|
|
102680
|
-
|
|
102681
|
-
if (input.function_set.ImplicitCastCost(source_member_type, target_member_type) < 0) {
|
|
102682
|
-
auto message = StringUtil::Format(
|
|
102683
|
-
"Type %s can't be cast as %s. The member '%s' can't be implicitly cast from %s to %s",
|
|
102684
|
-
source.ToString(), target.ToString(), source_member_name, source_member_type.ToString(),
|
|
102685
|
-
target_member_type.ToString());
|
|
102686
|
-
throw CastException(message);
|
|
102687
|
-
}
|
|
102688
|
-
|
|
102689
103044
|
tag_map[source_idx] = target_idx;
|
|
102690
103045
|
member_casts.push_back(input.GetCastFunction(source_member_type, target_member_type));
|
|
102691
103046
|
found = true;
|
|
@@ -102759,6 +103114,14 @@ static bool UnionToUnionCast(Vector &source, Vector &result, idx_t count, CastPa
|
|
|
102759
103114
|
}
|
|
102760
103115
|
} else {
|
|
102761
103116
|
// Otherwise, use the unified vector format to access the source vector.
|
|
103117
|
+
|
|
103118
|
+
// Ensure that all the result members are flat vectors
|
|
103119
|
+
// This is not always the case, e.g. when a member is cast using the default TryNullCast function
|
|
103120
|
+
// the resulting member vector will be a constant null vector.
|
|
103121
|
+
for (idx_t target_member_idx = 0; target_member_idx < target_member_count; target_member_idx++) {
|
|
103122
|
+
UnionVector::GetMember(result, target_member_idx).Flatten(count);
|
|
103123
|
+
}
|
|
103124
|
+
|
|
102762
103125
|
// We assume that a union tag vector validity matches the union vector validity.
|
|
102763
103126
|
UnifiedVectorFormat source_tag_format;
|
|
102764
103127
|
source_tag_vector.ToUnifiedFormat(count, source_tag_format);
|
|
@@ -102771,6 +103134,9 @@ static bool UnionToUnionCast(Vector &source, Vector &result, idx_t count, CastPa
|
|
|
102771
103134
|
auto target_tag = cast_data.tag_map[source_tag];
|
|
102772
103135
|
FlatVector::GetData<union_tag_t>(result_tag_vector)[row_idx] = target_tag;
|
|
102773
103136
|
} else {
|
|
103137
|
+
|
|
103138
|
+
// Issue: The members of the result is not always flatvectors
|
|
103139
|
+
// In the case of TryNullCast, the result member is constant.
|
|
102774
103140
|
FlatVector::SetNull(result, row_idx, true);
|
|
102775
103141
|
}
|
|
102776
103142
|
}
|
|
@@ -108931,15 +109297,16 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
|
|
|
108931
109297
|
}
|
|
108932
109298
|
|
|
108933
109299
|
struct StrfTimeBindData : public FunctionData {
|
|
108934
|
-
explicit StrfTimeBindData(StrfTimeFormat format_p, string format_string_p)
|
|
108935
|
-
: format(move(format_p)), format_string(move(format_string_p)) {
|
|
109300
|
+
explicit StrfTimeBindData(StrfTimeFormat format_p, string format_string_p, bool is_null)
|
|
109301
|
+
: format(move(format_p)), format_string(move(format_string_p)), is_null(is_null) {
|
|
108936
109302
|
}
|
|
108937
109303
|
|
|
108938
109304
|
StrfTimeFormat format;
|
|
108939
109305
|
string format_string;
|
|
109306
|
+
bool is_null;
|
|
108940
109307
|
|
|
108941
109308
|
unique_ptr<FunctionData> Copy() const override {
|
|
108942
|
-
return make_unique<StrfTimeBindData>(format, format_string);
|
|
109309
|
+
return make_unique<StrfTimeBindData>(format, format_string, is_null);
|
|
108943
109310
|
}
|
|
108944
109311
|
|
|
108945
109312
|
bool Equals(const FunctionData &other_p) const override {
|
|
@@ -108962,13 +109329,14 @@ static unique_ptr<FunctionData> StrfTimeBindFunction(ClientContext &context, Sca
|
|
|
108962
109329
|
Value options_str = ExpressionExecutor::EvaluateScalar(context, *format_arg);
|
|
108963
109330
|
auto format_string = options_str.GetValue<string>();
|
|
108964
109331
|
StrfTimeFormat format;
|
|
108965
|
-
|
|
109332
|
+
bool is_null = options_str.IsNull();
|
|
109333
|
+
if (!is_null) {
|
|
108966
109334
|
string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
|
|
108967
109335
|
if (!error.empty()) {
|
|
108968
109336
|
throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
|
|
108969
109337
|
}
|
|
108970
109338
|
}
|
|
108971
|
-
return make_unique<StrfTimeBindData>(format, format_string);
|
|
109339
|
+
return make_unique<StrfTimeBindData>(format, format_string, is_null);
|
|
108972
109340
|
}
|
|
108973
109341
|
|
|
108974
109342
|
void StrfTimeFormat::ConvertDateVector(Vector &input, Vector &result, idx_t count) {
|
|
@@ -108995,7 +109363,7 @@ static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector
|
|
|
108995
109363
|
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
108996
109364
|
auto &info = (StrfTimeBindData &)*func_expr.bind_info;
|
|
108997
109365
|
|
|
108998
|
-
if (
|
|
109366
|
+
if (info.is_null) {
|
|
108999
109367
|
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
109000
109368
|
ConstantVector::SetNull(result, true);
|
|
109001
109369
|
return;
|
|
@@ -109029,7 +109397,7 @@ static void StrfTimeFunctionTimestamp(DataChunk &args, ExpressionState &state, V
|
|
|
109029
109397
|
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
109030
109398
|
auto &info = (StrfTimeBindData &)*func_expr.bind_info;
|
|
109031
109399
|
|
|
109032
|
-
if (
|
|
109400
|
+
if (info.is_null) {
|
|
109033
109401
|
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
109034
109402
|
ConstantVector::SetNull(result, true);
|
|
109035
109403
|
return;
|
|
@@ -122527,7 +122895,9 @@ static unique_ptr<FunctionData> StructInsertBind(ClientContext &context, ScalarF
|
|
|
122527
122895
|
unique_ptr<BaseStatistics> StructInsertStats(ClientContext &context, FunctionStatisticsInput &input) {
|
|
122528
122896
|
auto &child_stats = input.child_stats;
|
|
122529
122897
|
auto &expr = input.expr;
|
|
122530
|
-
|
|
122898
|
+
if (child_stats.empty() || !child_stats[0]) {
|
|
122899
|
+
return nullptr;
|
|
122900
|
+
}
|
|
122531
122901
|
auto &existing_struct_stats = (StructStatistics &)*child_stats[0];
|
|
122532
122902
|
auto new_struct_stats = make_unique<StructStatistics>(expr.return_type);
|
|
122533
122903
|
|
|
@@ -127954,6 +128324,9 @@ static unique_ptr<FunctionData> DuckDBFunctionsBind(ClientContext &context, Tabl
|
|
|
127954
128324
|
names.emplace_back("has_side_effects");
|
|
127955
128325
|
return_types.emplace_back(LogicalType::BOOLEAN);
|
|
127956
128326
|
|
|
128327
|
+
names.emplace_back("function_oid");
|
|
128328
|
+
return_types.emplace_back(LogicalType::BIGINT);
|
|
128329
|
+
|
|
127957
128330
|
return nullptr;
|
|
127958
128331
|
}
|
|
127959
128332
|
|
|
@@ -128340,6 +128713,9 @@ bool ExtractFunctionData(StandardEntry *entry, idx_t function_idx, DataChunk &ou
|
|
|
128340
128713
|
// has_side_effects, LogicalType::BOOLEAN
|
|
128341
128714
|
output.SetValue(9, output_offset, OP::HasSideEffects(function, function_idx));
|
|
128342
128715
|
|
|
128716
|
+
// function_oid, LogicalType::BIGINT
|
|
128717
|
+
output.SetValue(10, output_offset, Value::BIGINT(entry->oid));
|
|
128718
|
+
|
|
128343
128719
|
return function_idx + 1 == OP::FunctionCount(function);
|
|
128344
128720
|
}
|
|
128345
128721
|
|
|
@@ -135347,6 +135723,10 @@ public:
|
|
|
135347
135723
|
|
|
135348
135724
|
private:
|
|
135349
135725
|
void RunOptimizer(OptimizerType type, const std::function<void()> &callback);
|
|
135726
|
+
void Verify(LogicalOperator &op);
|
|
135727
|
+
|
|
135728
|
+
private:
|
|
135729
|
+
unique_ptr<LogicalOperator> plan;
|
|
135350
135730
|
};
|
|
135351
135731
|
|
|
135352
135732
|
} // namespace duckdb
|
|
@@ -136051,6 +136431,7 @@ unique_ptr<LogicalOperator> ClientContext::ExtractPlan(const string &query) {
|
|
|
136051
136431
|
}
|
|
136052
136432
|
|
|
136053
136433
|
ColumnBindingResolver resolver;
|
|
136434
|
+
resolver.Verify(*plan);
|
|
136054
136435
|
resolver.VisitOperator(*plan);
|
|
136055
136436
|
|
|
136056
136437
|
plan->ResolveOperatorTypes();
|
|
@@ -137142,6 +137523,14 @@ struct MaximumMemorySetting {
|
|
|
137142
137523
|
static Value GetSetting(ClientContext &context);
|
|
137143
137524
|
};
|
|
137144
137525
|
|
|
137526
|
+
struct PasswordSetting {
|
|
137527
|
+
static constexpr const char *Name = "password";
|
|
137528
|
+
static constexpr const char *Description = "The password to use. Ignored for legacy compatibility.";
|
|
137529
|
+
static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
|
|
137530
|
+
static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter);
|
|
137531
|
+
static Value GetSetting(ClientContext &context);
|
|
137532
|
+
};
|
|
137533
|
+
|
|
137145
137534
|
struct PerfectHashThresholdSetting {
|
|
137146
137535
|
static constexpr const char *Name = "perfect_ht_threshold";
|
|
137147
137536
|
static constexpr const char *Description = "Threshold in bytes for when to use a perfect hash table (default: 12)";
|
|
@@ -137237,6 +137626,14 @@ struct ThreadsSetting {
|
|
|
137237
137626
|
static Value GetSetting(ClientContext &context);
|
|
137238
137627
|
};
|
|
137239
137628
|
|
|
137629
|
+
struct UsernameSetting {
|
|
137630
|
+
static constexpr const char *Name = "username";
|
|
137631
|
+
static constexpr const char *Description = "The username to use. Ignored for legacy compatibility.";
|
|
137632
|
+
static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
|
|
137633
|
+
static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter);
|
|
137634
|
+
static Value GetSetting(ClientContext &context);
|
|
137635
|
+
};
|
|
137636
|
+
|
|
137240
137637
|
} // namespace duckdb
|
|
137241
137638
|
|
|
137242
137639
|
|
|
@@ -137286,6 +137683,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
|
|
|
137286
137683
|
DUCKDB_GLOBAL(MaximumMemorySetting),
|
|
137287
137684
|
DUCKDB_GLOBAL_ALIAS("memory_limit", MaximumMemorySetting),
|
|
137288
137685
|
DUCKDB_GLOBAL_ALIAS("null_order", DefaultNullOrderSetting),
|
|
137686
|
+
DUCKDB_GLOBAL(PasswordSetting),
|
|
137289
137687
|
DUCKDB_LOCAL(PerfectHashThresholdSetting),
|
|
137290
137688
|
DUCKDB_LOCAL(PreserveIdentifierCase),
|
|
137291
137689
|
DUCKDB_GLOBAL(PreserveInsertionOrder),
|
|
@@ -137298,6 +137696,8 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
|
|
|
137298
137696
|
DUCKDB_LOCAL(SearchPathSetting),
|
|
137299
137697
|
DUCKDB_GLOBAL(TempDirectorySetting),
|
|
137300
137698
|
DUCKDB_GLOBAL(ThreadsSetting),
|
|
137699
|
+
DUCKDB_GLOBAL(UsernameSetting),
|
|
137700
|
+
DUCKDB_GLOBAL_ALIAS("user", UsernameSetting),
|
|
137301
137701
|
DUCKDB_GLOBAL_ALIAS("wal_autocheckpoint", CheckpointThresholdSetting),
|
|
137302
137702
|
DUCKDB_GLOBAL_ALIAS("worker_threads", ThreadsSetting),
|
|
137303
137703
|
FINAL_SETTING};
|
|
@@ -137791,6 +138191,34 @@ unique_ptr<MaterializedQueryResult> Connection::Query(const string &query) {
|
|
|
137791
138191
|
return unique_ptr_cast<QueryResult, MaterializedQueryResult>(move(result));
|
|
137792
138192
|
}
|
|
137793
138193
|
|
|
138194
|
+
DUCKDB_API string Connection::GetSubstrait(const string &query) {
|
|
138195
|
+
vector<Value> params;
|
|
138196
|
+
params.emplace_back(query);
|
|
138197
|
+
auto result = TableFunction("get_substrait", params)->Execute();
|
|
138198
|
+
auto protobuf = result->FetchRaw()->GetValue(0, 0);
|
|
138199
|
+
return protobuf.GetValueUnsafe<string_t>().GetString();
|
|
138200
|
+
}
|
|
138201
|
+
|
|
138202
|
+
DUCKDB_API unique_ptr<QueryResult> Connection::FromSubstrait(const string &proto) {
|
|
138203
|
+
vector<Value> params;
|
|
138204
|
+
params.emplace_back(Value::BLOB_RAW(proto));
|
|
138205
|
+
return TableFunction("from_substrait", params)->Execute();
|
|
138206
|
+
}
|
|
138207
|
+
|
|
138208
|
+
DUCKDB_API string Connection::GetSubstraitJSON(const string &query) {
|
|
138209
|
+
vector<Value> params;
|
|
138210
|
+
params.emplace_back(query);
|
|
138211
|
+
auto result = TableFunction("get_substrait_json", params)->Execute();
|
|
138212
|
+
auto protobuf = result->FetchRaw()->GetValue(0, 0);
|
|
138213
|
+
return protobuf.GetValueUnsafe<string_t>().GetString();
|
|
138214
|
+
}
|
|
138215
|
+
|
|
138216
|
+
DUCKDB_API unique_ptr<QueryResult> Connection::FromSubstraitJSON(const string &json) {
|
|
138217
|
+
vector<Value> params;
|
|
138218
|
+
params.emplace_back(json);
|
|
138219
|
+
return TableFunction("from_substrait_json", params)->Execute();
|
|
138220
|
+
}
|
|
138221
|
+
|
|
137794
138222
|
unique_ptr<MaterializedQueryResult> Connection::Query(unique_ptr<SQLStatement> statement) {
|
|
137795
138223
|
auto result = context->Query(move(statement), false);
|
|
137796
138224
|
D_ASSERT(result->type == QueryResultType::MATERIALIZED_RESULT);
|
|
@@ -151761,6 +152189,17 @@ Value MaximumMemorySetting::GetSetting(ClientContext &context) {
|
|
|
151761
152189
|
return Value(StringUtil::BytesToHumanReadableString(config.options.maximum_memory));
|
|
151762
152190
|
}
|
|
151763
152191
|
|
|
152192
|
+
//===--------------------------------------------------------------------===//
|
|
152193
|
+
// Password Setting
|
|
152194
|
+
//===--------------------------------------------------------------------===//
|
|
152195
|
+
void PasswordSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
|
|
152196
|
+
// nop
|
|
152197
|
+
}
|
|
152198
|
+
|
|
152199
|
+
Value PasswordSetting::GetSetting(ClientContext &context) {
|
|
152200
|
+
return Value();
|
|
152201
|
+
}
|
|
152202
|
+
|
|
151764
152203
|
//===--------------------------------------------------------------------===//
|
|
151765
152204
|
// Perfect Hash Threshold
|
|
151766
152205
|
//===--------------------------------------------------------------------===//
|
|
@@ -151927,6 +152366,17 @@ Value ThreadsSetting::GetSetting(ClientContext &context) {
|
|
|
151927
152366
|
return Value::BIGINT(config.options.maximum_threads);
|
|
151928
152367
|
}
|
|
151929
152368
|
|
|
152369
|
+
//===--------------------------------------------------------------------===//
|
|
152370
|
+
// Username Setting
|
|
152371
|
+
//===--------------------------------------------------------------------===//
|
|
152372
|
+
void UsernameSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
|
|
152373
|
+
// nop
|
|
152374
|
+
}
|
|
152375
|
+
|
|
152376
|
+
Value UsernameSetting::GetSetting(ClientContext &context) {
|
|
152377
|
+
return Value();
|
|
152378
|
+
}
|
|
152379
|
+
|
|
151930
152380
|
} // namespace duckdb
|
|
151931
152381
|
|
|
151932
152382
|
|
|
@@ -153807,7 +154257,8 @@ bool Deliminator::RemoveInequalityCandidate(unique_ptr<LogicalOperator> *plan, u
|
|
|
153807
154257
|
}
|
|
153808
154258
|
parent_expr =
|
|
153809
154259
|
make_unique<BoundColumnRefExpression>(parent_expr->alias, parent_expr->return_type, it->first);
|
|
153810
|
-
parent_cond.comparison =
|
|
154260
|
+
parent_cond.comparison =
|
|
154261
|
+
parent_delim_get_side == 0 ? child_cond.comparison : FlipComparisionExpression(child_cond.comparison);
|
|
153811
154262
|
break;
|
|
153812
154263
|
}
|
|
153813
154264
|
}
|
|
@@ -154266,6 +154717,9 @@ idx_t FilterCombiner::GetEquivalenceSet(Expression *expr) {
|
|
|
154266
154717
|
|
|
154267
154718
|
FilterResult FilterCombiner::AddConstantComparison(vector<ExpressionValueInformation> &info_list,
|
|
154268
154719
|
ExpressionValueInformation info) {
|
|
154720
|
+
if (info.constant.IsNull()) {
|
|
154721
|
+
return FilterResult::UNSATISFIABLE;
|
|
154722
|
+
}
|
|
154269
154723
|
for (idx_t i = 0; i < info_list.size(); i++) {
|
|
154270
154724
|
auto comparison = CompareValueInformation(info_list[i], info);
|
|
154271
154725
|
switch (comparison) {
|
|
@@ -155730,7 +156184,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownJoin(unique_ptr<LogicalOpera
|
|
|
155730
156184
|
void FilterPushdown::PushFilters() {
|
|
155731
156185
|
for (auto &f : filters) {
|
|
155732
156186
|
auto result = combiner.AddFilter(move(f->filter));
|
|
155733
|
-
D_ASSERT(result
|
|
156187
|
+
D_ASSERT(result != FilterResult::UNSUPPORTED);
|
|
155734
156188
|
(void)result;
|
|
155735
156189
|
}
|
|
155736
156190
|
filters.clear();
|
|
@@ -157927,6 +158381,7 @@ public:
|
|
|
157927
158381
|
|
|
157928
158382
|
|
|
157929
158383
|
|
|
158384
|
+
|
|
157930
158385
|
namespace duckdb {
|
|
157931
158386
|
|
|
157932
158387
|
Optimizer::Optimizer(Binder &binder, ClientContext &context) : context(context), binder(binder), rewriter(context) {
|
|
@@ -157963,9 +158418,18 @@ void Optimizer::RunOptimizer(OptimizerType type, const std::function<void()> &ca
|
|
|
157963
158418
|
profiler.StartPhase(OptimizerTypeToString(type));
|
|
157964
158419
|
callback();
|
|
157965
158420
|
profiler.EndPhase();
|
|
158421
|
+
if (plan) {
|
|
158422
|
+
Verify(*plan);
|
|
158423
|
+
}
|
|
158424
|
+
}
|
|
158425
|
+
|
|
158426
|
+
void Optimizer::Verify(LogicalOperator &op) {
|
|
158427
|
+
ColumnBindingResolver::Verify(op);
|
|
157966
158428
|
}
|
|
157967
158429
|
|
|
157968
|
-
unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator>
|
|
158430
|
+
unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan_p) {
|
|
158431
|
+
Verify(*plan_p);
|
|
158432
|
+
this->plan = move(plan_p);
|
|
157969
158433
|
// first we perform expression rewrites using the ExpressionRewriter
|
|
157970
158434
|
// this does not change the logical plan structure, but only simplifies the expression trees
|
|
157971
158435
|
RunOptimizer(OptimizerType::EXPRESSION_REWRITER, [&]() { rewriter.VisitOperator(*plan); });
|
|
@@ -158052,7 +158516,7 @@ unique_ptr<LogicalOperator> Optimizer::Optimize(unique_ptr<LogicalOperator> plan
|
|
|
158052
158516
|
|
|
158053
158517
|
Planner::VerifyPlan(context, plan);
|
|
158054
158518
|
|
|
158055
|
-
return plan;
|
|
158519
|
+
return move(plan);
|
|
158056
158520
|
}
|
|
158057
158521
|
|
|
158058
158522
|
} // namespace duckdb
|
|
@@ -158091,7 +158555,8 @@ namespace duckdb {
|
|
|
158091
158555
|
unique_ptr<LogicalOperator> FilterPullup::PullupFilter(unique_ptr<LogicalOperator> op) {
|
|
158092
158556
|
D_ASSERT(op->type == LogicalOperatorType::LOGICAL_FILTER);
|
|
158093
158557
|
|
|
158094
|
-
|
|
158558
|
+
auto &filter = (LogicalFilter &)*op;
|
|
158559
|
+
if (can_pullup && filter.projection_map.empty()) {
|
|
158095
158560
|
unique_ptr<LogicalOperator> child = move(op->children[0]);
|
|
158096
158561
|
child = Rewrite(move(child));
|
|
158097
158562
|
// moving filter's expressions
|
|
@@ -158398,6 +158863,9 @@ using Filter = FilterPushdown::Filter;
|
|
|
158398
158863
|
unique_ptr<LogicalOperator> FilterPushdown::PushdownFilter(unique_ptr<LogicalOperator> op) {
|
|
158399
158864
|
D_ASSERT(op->type == LogicalOperatorType::LOGICAL_FILTER);
|
|
158400
158865
|
auto &filter = (LogicalFilter &)*op;
|
|
158866
|
+
if (!filter.projection_map.empty()) {
|
|
158867
|
+
return FinishPushdown(move(op));
|
|
158868
|
+
}
|
|
158401
158869
|
// filter: gather the filters and remove the filter from the set of operations
|
|
158402
158870
|
for (auto &expression : filter.expressions) {
|
|
158403
158871
|
if (AddFilter(move(expression)) == FilterResult::UNSATISFIABLE) {
|
|
@@ -158659,7 +159127,16 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownLeftJoin(unique_ptr<LogicalO
|
|
|
158659
159127
|
right_pushdown.GenerateFilters();
|
|
158660
159128
|
op->children[0] = left_pushdown.Rewrite(move(op->children[0]));
|
|
158661
159129
|
op->children[1] = right_pushdown.Rewrite(move(op->children[1]));
|
|
158662
|
-
|
|
159130
|
+
if (filters.empty()) {
|
|
159131
|
+
// no filters to push
|
|
159132
|
+
return op;
|
|
159133
|
+
}
|
|
159134
|
+
auto filter = make_unique<LogicalFilter>();
|
|
159135
|
+
for (auto &f : filters) {
|
|
159136
|
+
filter->expressions.push_back(move(f->filter));
|
|
159137
|
+
}
|
|
159138
|
+
filter->children.push_back(move(op));
|
|
159139
|
+
return move(filter);
|
|
158663
159140
|
}
|
|
158664
159141
|
|
|
158665
159142
|
} // namespace duckdb
|
|
@@ -158701,8 +159178,8 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
|
|
|
158701
159178
|
|
|
158702
159179
|
right_bindings.insert(comp_join.mark_index);
|
|
158703
159180
|
FilterPushdown left_pushdown(optimizer), right_pushdown(optimizer);
|
|
158704
|
-
#
|
|
158705
|
-
bool
|
|
159181
|
+
#ifdef DEBUG
|
|
159182
|
+
bool simplified_mark_join = false;
|
|
158706
159183
|
#endif
|
|
158707
159184
|
// now check the set of filters
|
|
158708
159185
|
for (idx_t i = 0; i < filters.size(); i++) {
|
|
@@ -158714,15 +159191,16 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
|
|
|
158714
159191
|
filters.erase(filters.begin() + i);
|
|
158715
159192
|
i--;
|
|
158716
159193
|
} else if (side == JoinSide::RIGHT) {
|
|
158717
|
-
|
|
158718
|
-
|
|
158719
|
-
D_ASSERT(!found_mark_reference);
|
|
158720
|
-
found_mark_reference = true;
|
|
159194
|
+
#ifdef DEBUG
|
|
159195
|
+
D_ASSERT(!simplified_mark_join);
|
|
158721
159196
|
#endif
|
|
158722
159197
|
// this filter references the marker
|
|
158723
159198
|
// we can turn this into a SEMI join if the filter is on only the marker
|
|
158724
159199
|
if (filters[i]->filter->type == ExpressionType::BOUND_COLUMN_REF) {
|
|
158725
159200
|
// filter just references the marker: turn into semi join
|
|
159201
|
+
#ifdef DEBUG
|
|
159202
|
+
simplified_mark_join = true;
|
|
159203
|
+
#endif
|
|
158726
159204
|
join.join_type = JoinType::SEMI;
|
|
158727
159205
|
filters.erase(filters.begin() + i);
|
|
158728
159206
|
i--;
|
|
@@ -158745,6 +159223,9 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
|
|
|
158745
159223
|
}
|
|
158746
159224
|
}
|
|
158747
159225
|
if (all_null_values_are_equal) {
|
|
159226
|
+
#ifdef DEBUG
|
|
159227
|
+
simplified_mark_join = true;
|
|
159228
|
+
#endif
|
|
158748
159229
|
// all null values are equal, convert to ANTI join
|
|
158749
159230
|
join.join_type = JoinType::ANTI;
|
|
158750
159231
|
filters.erase(filters.begin() + i);
|
|
@@ -159507,7 +159988,8 @@ unique_ptr<Expression> ComparisonSimplificationRule::Apply(LogicalOperator &op,
|
|
|
159507
159988
|
}
|
|
159508
159989
|
|
|
159509
159990
|
// Is the constant cast invertible?
|
|
159510
|
-
if (!
|
|
159991
|
+
if (!cast_constant.IsNull() &&
|
|
159992
|
+
!BoundCastExpression::CastIsInvertible(cast_expression->return_type, target_type)) {
|
|
159511
159993
|
// Is it actually invertible?
|
|
159512
159994
|
Value uncast_constant;
|
|
159513
159995
|
if (!cast_constant.DefaultTryCastAs(constant_value.type(), uncast_constant, &error_message, true) ||
|
|
@@ -188507,13 +188989,14 @@ protected:
|
|
|
188507
188989
|
|
|
188508
188990
|
|
|
188509
188991
|
|
|
188992
|
+
|
|
188510
188993
|
namespace duckdb {
|
|
188511
188994
|
|
|
188512
188995
|
//! The HAVING binder is responsible for binding an expression within the HAVING clause of a SQL statement
|
|
188513
188996
|
class HavingBinder : public SelectBinder {
|
|
188514
188997
|
public:
|
|
188515
188998
|
HavingBinder(Binder &binder, ClientContext &context, BoundSelectNode &node, BoundGroupInformation &info,
|
|
188516
|
-
case_insensitive_map_t<idx_t> &alias_map);
|
|
188999
|
+
case_insensitive_map_t<idx_t> &alias_map, AggregateHandling aggregate_handling);
|
|
188517
189000
|
|
|
188518
189001
|
protected:
|
|
188519
189002
|
BindResult BindExpression(unique_ptr<ParsedExpression> *expr_ptr, idx_t depth,
|
|
@@ -188523,6 +189006,7 @@ private:
|
|
|
188523
189006
|
BindResult BindColumnRef(unique_ptr<ParsedExpression> *expr_ptr, idx_t depth, bool root_expression);
|
|
188524
189007
|
|
|
188525
189008
|
ColumnAliasBinder column_alias_binder;
|
|
189009
|
+
AggregateHandling aggregate_handling;
|
|
188526
189010
|
};
|
|
188527
189011
|
|
|
188528
189012
|
} // namespace duckdb
|
|
@@ -188653,6 +189137,9 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
|
|
|
188653
189137
|
delimiter_value = ExpressionExecutor::EvaluateScalar(context, *expr).CastAs(context, type);
|
|
188654
189138
|
return nullptr;
|
|
188655
189139
|
}
|
|
189140
|
+
if (!new_binder->correlated_columns.empty()) {
|
|
189141
|
+
throw BinderException("Correlated columns not supported in LIMIT/OFFSET");
|
|
189142
|
+
}
|
|
188656
189143
|
// move any correlated columns to this binder
|
|
188657
189144
|
MoveCorrelatedExpressions(*new_binder);
|
|
188658
189145
|
return expr;
|
|
@@ -189018,16 +189505,22 @@ unique_ptr<BoundQueryNode> Binder::BindNode(SelectNode &statement) {
|
|
|
189018
189505
|
|
|
189019
189506
|
// bind the HAVING clause, if any
|
|
189020
189507
|
if (statement.having) {
|
|
189021
|
-
HavingBinder having_binder(*this, context, *result, info, alias_map);
|
|
189508
|
+
HavingBinder having_binder(*this, context, *result, info, alias_map, statement.aggregate_handling);
|
|
189022
189509
|
ExpressionBinder::QualifyColumnNames(*this, statement.having);
|
|
189023
189510
|
result->having = having_binder.Bind(statement.having);
|
|
189024
189511
|
}
|
|
189025
189512
|
|
|
189026
189513
|
// bind the QUALIFY clause, if any
|
|
189027
189514
|
if (statement.qualify) {
|
|
189515
|
+
if (statement.aggregate_handling == AggregateHandling::FORCE_AGGREGATES) {
|
|
189516
|
+
throw BinderException("Combining QUALIFY with GROUP BY ALL is not supported yet");
|
|
189517
|
+
}
|
|
189028
189518
|
QualifyBinder qualify_binder(*this, context, *result, info, alias_map);
|
|
189029
189519
|
ExpressionBinder::QualifyColumnNames(*this, statement.qualify);
|
|
189030
189520
|
result->qualify = qualify_binder.Bind(statement.qualify);
|
|
189521
|
+
if (qualify_binder.HasBoundColumns() && qualify_binder.BoundAggregates()) {
|
|
189522
|
+
throw BinderException("Cannot mix aggregates with non-aggregated columns!");
|
|
189523
|
+
}
|
|
189031
189524
|
}
|
|
189032
189525
|
|
|
189033
189526
|
// after that, we bind to the SELECT list
|
|
@@ -190230,7 +190723,7 @@ unique_ptr<Expression> Binder::PlanSubquery(BoundSubqueryExpression &expr, uniqu
|
|
|
190230
190723
|
D_ASSERT(root);
|
|
190231
190724
|
// first we translate the QueryNode of the subquery into a logical plan
|
|
190232
190725
|
// note that we do not plan nested subqueries yet
|
|
190233
|
-
auto sub_binder = Binder::CreateBinder(context);
|
|
190726
|
+
auto sub_binder = Binder::CreateBinder(context, this);
|
|
190234
190727
|
sub_binder->plan_subquery = false;
|
|
190235
190728
|
auto subquery_root = sub_binder->CreatePlan(*expr.subquery);
|
|
190236
190729
|
D_ASSERT(subquery_root);
|
|
@@ -190446,8 +190939,8 @@ BoundStatement Binder::BindCopyFrom(CopyStatement &stmt) {
|
|
|
190446
190939
|
|
|
190447
190940
|
auto function_data =
|
|
190448
190941
|
copy_function->function.copy_from_bind(context, *stmt.info, expected_names, bound_insert.expected_types);
|
|
190449
|
-
auto get = make_unique<LogicalGet>(
|
|
190450
|
-
bound_insert.expected_types, expected_names);
|
|
190942
|
+
auto get = make_unique<LogicalGet>(GenerateTableIndex(), copy_function->function.copy_from_function,
|
|
190943
|
+
move(function_data), bound_insert.expected_types, expected_names);
|
|
190451
190944
|
for (idx_t i = 0; i < bound_insert.expected_types.size(); i++) {
|
|
190452
190945
|
get->column_ids.push_back(i);
|
|
190453
190946
|
}
|
|
@@ -191126,6 +191619,7 @@ protected:
|
|
|
191126
191619
|
|
|
191127
191620
|
|
|
191128
191621
|
|
|
191622
|
+
|
|
191129
191623
|
#include <algorithm>
|
|
191130
191624
|
|
|
191131
191625
|
namespace duckdb {
|
|
@@ -191326,6 +191820,31 @@ void Binder::BindDefaultValues(ColumnList &columns, vector<unique_ptr<Expression
|
|
|
191326
191820
|
}
|
|
191327
191821
|
}
|
|
191328
191822
|
|
|
191823
|
+
static void ExtractExpressionDependencies(Expression &expr, unordered_set<CatalogEntry *> &dependencies) {
|
|
191824
|
+
if (expr.type == ExpressionType::BOUND_FUNCTION) {
|
|
191825
|
+
auto &function = (BoundFunctionExpression &)expr;
|
|
191826
|
+
if (function.function.dependency) {
|
|
191827
|
+
function.function.dependency(function, dependencies);
|
|
191828
|
+
}
|
|
191829
|
+
}
|
|
191830
|
+
ExpressionIterator::EnumerateChildren(
|
|
191831
|
+
expr, [&](Expression &child) { ExtractExpressionDependencies(child, dependencies); });
|
|
191832
|
+
}
|
|
191833
|
+
|
|
191834
|
+
static void ExtractDependencies(BoundCreateTableInfo &info) {
|
|
191835
|
+
for (auto &default_value : info.bound_defaults) {
|
|
191836
|
+
if (default_value) {
|
|
191837
|
+
ExtractExpressionDependencies(*default_value, info.dependencies);
|
|
191838
|
+
}
|
|
191839
|
+
}
|
|
191840
|
+
for (auto &constraint : info.bound_constraints) {
|
|
191841
|
+
if (constraint->type == ConstraintType::CHECK) {
|
|
191842
|
+
auto &bound_check = (BoundCheckConstraint &)*constraint;
|
|
191843
|
+
ExtractExpressionDependencies(*bound_check.expression, info.dependencies);
|
|
191844
|
+
}
|
|
191845
|
+
}
|
|
191846
|
+
}
|
|
191847
|
+
|
|
191329
191848
|
unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateInfo> info) {
|
|
191330
191849
|
auto &base = (CreateTableInfo &)*info;
|
|
191331
191850
|
|
|
@@ -191356,6 +191875,8 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
|
|
|
191356
191875
|
// bind the default values
|
|
191357
191876
|
BindDefaultValues(base.columns, result->bound_defaults);
|
|
191358
191877
|
}
|
|
191878
|
+
// extract dependencies from any default values or CHECK constraints
|
|
191879
|
+
ExtractDependencies(*result);
|
|
191359
191880
|
|
|
191360
191881
|
if (base.columns.PhysicalColumnCount() == 0) {
|
|
191361
191882
|
throw BinderException("Creating a table without physical (non-generated) columns is not supported");
|
|
@@ -191449,7 +191970,8 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) {
|
|
|
191449
191970
|
unique_ptr<LogicalOperator> child_operator;
|
|
191450
191971
|
for (auto &using_clause : stmt.using_clauses) {
|
|
191451
191972
|
// bind the using clause
|
|
191452
|
-
auto
|
|
191973
|
+
auto using_binder = Binder::CreateBinder(context, this);
|
|
191974
|
+
auto bound_node = using_binder->Bind(*using_clause);
|
|
191453
191975
|
auto op = CreatePlan(*bound_node);
|
|
191454
191976
|
if (child_operator) {
|
|
191455
191977
|
// already bound a child: create a cross product to unify the two
|
|
@@ -191457,6 +191979,7 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) {
|
|
|
191457
191979
|
} else {
|
|
191458
191980
|
child_operator = move(op);
|
|
191459
191981
|
}
|
|
191982
|
+
bind_context.AddContext(move(using_binder->bind_context));
|
|
191460
191983
|
}
|
|
191461
191984
|
if (child_operator) {
|
|
191462
191985
|
root = LogicalCrossProduct::Create(move(root), move(child_operator));
|
|
@@ -191475,7 +191998,7 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) {
|
|
|
191475
191998
|
root = move(filter);
|
|
191476
191999
|
}
|
|
191477
192000
|
// create the delete node
|
|
191478
|
-
auto del = make_unique<LogicalDelete>(table);
|
|
192001
|
+
auto del = make_unique<LogicalDelete>(table, GenerateTableIndex());
|
|
191479
192002
|
del->AddChild(move(root));
|
|
191480
192003
|
|
|
191481
192004
|
// set up the delete expression
|
|
@@ -191603,6 +192126,7 @@ BoundStatement Binder::Bind(ExecuteStatement &stmt) {
|
|
|
191603
192126
|
prepared = prepared_planner.PrepareSQLStatement(entry->second->unbound_statement->Copy());
|
|
191604
192127
|
rebound_plan = move(prepared_planner.plan);
|
|
191605
192128
|
D_ASSERT(prepared->properties.bound_all_parameters);
|
|
192129
|
+
this->bound_tables = prepared_planner.binder->bound_tables;
|
|
191606
192130
|
}
|
|
191607
192131
|
// copy the properties of the prepared statement into the planner
|
|
191608
192132
|
this->properties = prepared->properties;
|
|
@@ -191824,7 +192348,7 @@ BoundStatement Binder::Bind(ExportStatement &stmt) {
|
|
|
191824
192348
|
CopyStatement copy_stmt;
|
|
191825
192349
|
copy_stmt.info = move(info);
|
|
191826
192350
|
|
|
191827
|
-
auto copy_binder = Binder::CreateBinder(context);
|
|
192351
|
+
auto copy_binder = Binder::CreateBinder(context, this);
|
|
191828
192352
|
auto bound_statement = copy_binder->Bind(copy_stmt);
|
|
191829
192353
|
if (child_operator) {
|
|
191830
192354
|
// use UNION ALL to combine the individual copy statements into a single node
|
|
@@ -191954,7 +192478,7 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
|
|
|
191954
192478
|
properties.read_only = false;
|
|
191955
192479
|
}
|
|
191956
192480
|
|
|
191957
|
-
auto insert = make_unique<LogicalInsert>(table);
|
|
192481
|
+
auto insert = make_unique<LogicalInsert>(table, GenerateTableIndex());
|
|
191958
192482
|
|
|
191959
192483
|
// Add CTEs as bindable
|
|
191960
192484
|
AddCTEMap(stmt.cte_map);
|
|
@@ -192158,6 +192682,7 @@ namespace duckdb {
|
|
|
192158
192682
|
BoundStatement Binder::Bind(PrepareStatement &stmt) {
|
|
192159
192683
|
Planner prepared_planner(context);
|
|
192160
192684
|
auto prepared_data = prepared_planner.PrepareSQLStatement(move(stmt.statement));
|
|
192685
|
+
this->bound_tables = prepared_planner.binder->bound_tables;
|
|
192161
192686
|
|
|
192162
192687
|
auto prepare = make_unique<LogicalPrepare>(stmt.name, move(prepared_data), move(prepared_planner.plan));
|
|
192163
192688
|
// we can prepare in read-only mode: prepared statements are not written to the catalog
|
|
@@ -192284,7 +192809,7 @@ BoundStatement Binder::Bind(AlterStatement &stmt) {
|
|
|
192284
192809
|
|
|
192285
192810
|
BoundStatement Binder::Bind(TransactionStatement &stmt) {
|
|
192286
192811
|
// transaction statements do not require a valid transaction
|
|
192287
|
-
properties.requires_valid_transaction =
|
|
192812
|
+
properties.requires_valid_transaction = stmt.info->type == TransactionType::BEGIN_TRANSACTION;
|
|
192288
192813
|
|
|
192289
192814
|
BoundStatement result;
|
|
192290
192815
|
result.names = {"Success"};
|
|
@@ -192578,6 +193103,13 @@ static void BindUpdateConstraints(TableCatalogEntry &table, LogicalGet &get, Log
|
|
|
192578
193103
|
BindExtraColumns(table, get, proj, update, check.bound_columns);
|
|
192579
193104
|
}
|
|
192580
193105
|
}
|
|
193106
|
+
if (update.return_chunk) {
|
|
193107
|
+
physical_index_set_t all_columns;
|
|
193108
|
+
for (idx_t i = 0; i < table.storage->column_definitions.size(); i++) {
|
|
193109
|
+
all_columns.insert(PhysicalIndex(i));
|
|
193110
|
+
}
|
|
193111
|
+
BindExtraColumns(table, get, proj, update, all_columns);
|
|
193112
|
+
}
|
|
192581
193113
|
// for index updates we always turn any update into an insert and a delete
|
|
192582
193114
|
// we thus need all the columns to be available, hence we check if the update touches any index columns
|
|
192583
193115
|
// If the returning keyword is used, we need access to the whole row in case the user requests it.
|
|
@@ -192600,7 +193132,7 @@ static void BindUpdateConstraints(TableCatalogEntry &table, LogicalGet &get, Log
|
|
|
192600
193132
|
}
|
|
192601
193133
|
}
|
|
192602
193134
|
|
|
192603
|
-
if (update.update_is_del_and_insert
|
|
193135
|
+
if (update.update_is_del_and_insert) {
|
|
192604
193136
|
// the update updates a column required by an index or requires returning the updated rows,
|
|
192605
193137
|
// push projections for all columns
|
|
192606
193138
|
physical_index_set_t all_columns;
|
|
@@ -192711,16 +193243,15 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) {
|
|
|
192711
193243
|
// set the projection as child of the update node and finalize the result
|
|
192712
193244
|
update->AddChild(move(proj));
|
|
192713
193245
|
|
|
193246
|
+
auto update_table_index = GenerateTableIndex();
|
|
193247
|
+
update->table_index = update_table_index;
|
|
192714
193248
|
if (!stmt.returning_list.empty()) {
|
|
192715
|
-
auto update_table_index = GenerateTableIndex();
|
|
192716
|
-
update->table_index = update_table_index;
|
|
192717
193249
|
unique_ptr<LogicalOperator> update_as_logicaloperator = move(update);
|
|
192718
193250
|
|
|
192719
193251
|
return BindReturning(move(stmt.returning_list), table, update_table_index, move(update_as_logicaloperator),
|
|
192720
193252
|
move(result));
|
|
192721
193253
|
}
|
|
192722
193254
|
|
|
192723
|
-
update->table_index = 0;
|
|
192724
193255
|
result.names = {"Count"};
|
|
192725
193256
|
result.types = {LogicalType::BIGINT};
|
|
192726
193257
|
result.plan = move(update);
|
|
@@ -193046,6 +193577,9 @@ unique_ptr<BoundTableRef> Binder::Bind(BaseTableRef &ref) {
|
|
|
193046
193577
|
// bind the child subquery
|
|
193047
193578
|
view_binder->AddBoundView(view_catalog_entry);
|
|
193048
193579
|
auto bound_child = view_binder->Bind(subquery);
|
|
193580
|
+
if (!view_binder->correlated_columns.empty()) {
|
|
193581
|
+
throw BinderException("Contents of view were altered - view bound correlated columns");
|
|
193582
|
+
}
|
|
193049
193583
|
|
|
193050
193584
|
D_ASSERT(bound_child->type == TableReferenceType::SUBQUERY);
|
|
193051
193585
|
// verify that the types and names match up with the expected types and names
|
|
@@ -193557,6 +194091,33 @@ unique_ptr<BoundTableRef> Binder::Bind(SubqueryRef &ref, CommonTableExpressionIn
|
|
|
193557
194091
|
|
|
193558
194092
|
|
|
193559
194093
|
|
|
194094
|
+
//===----------------------------------------------------------------------===//
|
|
194095
|
+
// DuckDB
|
|
194096
|
+
//
|
|
194097
|
+
// duckdb/planner/expression_binder/table_function_binder.hpp
|
|
194098
|
+
//
|
|
194099
|
+
//
|
|
194100
|
+
//===----------------------------------------------------------------------===//
|
|
194101
|
+
|
|
194102
|
+
|
|
194103
|
+
|
|
194104
|
+
|
|
194105
|
+
|
|
194106
|
+
namespace duckdb {
|
|
194107
|
+
|
|
194108
|
+
//! The Table function binder can bind standard table function parameters (i.e. non-table-in-out functions)
|
|
194109
|
+
class TableFunctionBinder : public ExpressionBinder {
|
|
194110
|
+
public:
|
|
194111
|
+
TableFunctionBinder(Binder &binder, ClientContext &context);
|
|
194112
|
+
|
|
194113
|
+
protected:
|
|
194114
|
+
BindResult BindColumnReference(ColumnRefExpression &expr);
|
|
194115
|
+
BindResult BindExpression(unique_ptr<ParsedExpression> *expr, idx_t depth, bool root_expression = false) override;
|
|
194116
|
+
|
|
194117
|
+
string UnsupportedAggregateMessage() override;
|
|
194118
|
+
};
|
|
194119
|
+
|
|
194120
|
+
} // namespace duckdb
|
|
193560
194121
|
|
|
193561
194122
|
|
|
193562
194123
|
|
|
@@ -193633,17 +194194,17 @@ bool Binder::BindTableFunctionParameters(TableFunctionCatalogEntry &table_functi
|
|
|
193633
194194
|
continue;
|
|
193634
194195
|
}
|
|
193635
194196
|
|
|
193636
|
-
|
|
194197
|
+
TableFunctionBinder binder(*this, context);
|
|
193637
194198
|
LogicalType sql_type;
|
|
193638
194199
|
auto expr = binder.Bind(child, &sql_type);
|
|
193639
194200
|
if (expr->HasParameter()) {
|
|
193640
194201
|
throw ParameterNotResolvedException();
|
|
193641
194202
|
}
|
|
193642
|
-
if (!expr->
|
|
194203
|
+
if (!expr->IsScalar()) {
|
|
193643
194204
|
error = "Table function requires a constant parameter";
|
|
193644
194205
|
return false;
|
|
193645
194206
|
}
|
|
193646
|
-
auto constant = ExpressionExecutor::EvaluateScalar(context, *expr);
|
|
194207
|
+
auto constant = ExpressionExecutor::EvaluateScalar(context, *expr, true);
|
|
193647
194208
|
if (parameter_name.empty()) {
|
|
193648
194209
|
// unnamed parameter
|
|
193649
194210
|
if (!named_parameters.empty()) {
|
|
@@ -196596,8 +197157,9 @@ BindResult GroupBinder::BindColumnRef(ColumnRefExpression &colref) {
|
|
|
196596
197157
|
namespace duckdb {
|
|
196597
197158
|
|
|
196598
197159
|
HavingBinder::HavingBinder(Binder &binder, ClientContext &context, BoundSelectNode &node, BoundGroupInformation &info,
|
|
196599
|
-
case_insensitive_map_t<idx_t> &alias_map)
|
|
196600
|
-
: SelectBinder(binder, context, node, info), column_alias_binder(node, alias_map)
|
|
197160
|
+
case_insensitive_map_t<idx_t> &alias_map, AggregateHandling aggregate_handling)
|
|
197161
|
+
: SelectBinder(binder, context, node, info), column_alias_binder(node, alias_map),
|
|
197162
|
+
aggregate_handling(aggregate_handling) {
|
|
196601
197163
|
target_type = LogicalType(LogicalTypeId::BOOLEAN);
|
|
196602
197164
|
}
|
|
196603
197165
|
|
|
@@ -196607,7 +197169,16 @@ BindResult HavingBinder::BindColumnRef(unique_ptr<ParsedExpression> *expr_ptr, i
|
|
|
196607
197169
|
if (!alias_result.HasError()) {
|
|
196608
197170
|
return alias_result;
|
|
196609
197171
|
}
|
|
196610
|
-
|
|
197172
|
+
if (aggregate_handling == AggregateHandling::FORCE_AGGREGATES) {
|
|
197173
|
+
auto expr = duckdb::SelectBinder::BindExpression(expr_ptr, depth);
|
|
197174
|
+
if (expr.HasError()) {
|
|
197175
|
+
return expr;
|
|
197176
|
+
}
|
|
197177
|
+
auto group_ref = make_unique<BoundColumnRefExpression>(
|
|
197178
|
+
expr.expression->return_type, ColumnBinding(node.group_index, node.groups.group_expressions.size()));
|
|
197179
|
+
node.groups.group_expressions.push_back(move(expr.expression));
|
|
197180
|
+
return BindResult(move(group_ref));
|
|
197181
|
+
}
|
|
196611
197182
|
return BindResult(StringUtil::Format(
|
|
196612
197183
|
"column %s must appear in the GROUP BY clause or be used in an aggregate function", expr.ToString()));
|
|
196613
197184
|
}
|
|
@@ -197089,6 +197660,42 @@ BindResult SelectBinder::BindGroup(ParsedExpression &expr, idx_t depth, idx_t gr
|
|
|
197089
197660
|
} // namespace duckdb
|
|
197090
197661
|
|
|
197091
197662
|
|
|
197663
|
+
|
|
197664
|
+
|
|
197665
|
+
namespace duckdb {
|
|
197666
|
+
|
|
197667
|
+
TableFunctionBinder::TableFunctionBinder(Binder &binder, ClientContext &context) : ExpressionBinder(binder, context) {
|
|
197668
|
+
}
|
|
197669
|
+
|
|
197670
|
+
BindResult TableFunctionBinder::BindColumnReference(ColumnRefExpression &expr) {
|
|
197671
|
+
auto result_name = StringUtil::Join(expr.column_names, ".");
|
|
197672
|
+
return BindResult(make_unique<BoundConstantExpression>(Value(result_name)));
|
|
197673
|
+
}
|
|
197674
|
+
|
|
197675
|
+
BindResult TableFunctionBinder::BindExpression(unique_ptr<ParsedExpression> *expr_ptr, idx_t depth,
|
|
197676
|
+
bool root_expression) {
|
|
197677
|
+
auto &expr = **expr_ptr;
|
|
197678
|
+
switch (expr.GetExpressionClass()) {
|
|
197679
|
+
case ExpressionClass::COLUMN_REF:
|
|
197680
|
+
return BindColumnReference((ColumnRefExpression &)expr);
|
|
197681
|
+
case ExpressionClass::SUBQUERY:
|
|
197682
|
+
throw BinderException("Table function cannot contain subqueries");
|
|
197683
|
+
case ExpressionClass::DEFAULT:
|
|
197684
|
+
return BindResult("Table function cannot contain DEFAULT clause");
|
|
197685
|
+
case ExpressionClass::WINDOW:
|
|
197686
|
+
return BindResult("Table function cannot contain window functions!");
|
|
197687
|
+
default:
|
|
197688
|
+
return ExpressionBinder::BindExpression(expr_ptr, depth);
|
|
197689
|
+
}
|
|
197690
|
+
}
|
|
197691
|
+
|
|
197692
|
+
string TableFunctionBinder::UnsupportedAggregateMessage() {
|
|
197693
|
+
return "Table function cannot contain aggregates!";
|
|
197694
|
+
}
|
|
197695
|
+
|
|
197696
|
+
} // namespace duckdb
|
|
197697
|
+
|
|
197698
|
+
|
|
197092
197699
|
namespace duckdb {
|
|
197093
197700
|
|
|
197094
197701
|
UpdateBinder::UpdateBinder(Binder &binder, ClientContext &context) : ExpressionBinder(binder, context) {
|
|
@@ -198023,6 +198630,7 @@ JoinSide JoinSide::GetJoinSide(const unordered_set<idx_t> &bindings, unordered_s
|
|
|
198023
198630
|
|
|
198024
198631
|
|
|
198025
198632
|
|
|
198633
|
+
|
|
198026
198634
|
namespace duckdb {
|
|
198027
198635
|
|
|
198028
198636
|
const uint64_t PLAN_SERIALIZATION_VERSION = 1;
|
|
@@ -198354,7 +198962,8 @@ unique_ptr<LogicalOperator> LogicalOperator::Deserialize(Deserializer &deseriali
|
|
|
198354
198962
|
result = LogicalSimple::Deserialize(state, reader);
|
|
198355
198963
|
break;
|
|
198356
198964
|
case LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR:
|
|
198357
|
-
|
|
198965
|
+
result = LogicalExtensionOperator::Deserialize(state, reader);
|
|
198966
|
+
break;
|
|
198358
198967
|
case LogicalOperatorType::LOGICAL_INVALID:
|
|
198359
198968
|
/* no default here to trigger a warning if we forget to implement deserialize for a new operator */
|
|
198360
198969
|
throw SerializationException("Invalid type for operator deserialization");
|
|
@@ -198366,6 +198975,10 @@ unique_ptr<LogicalOperator> LogicalOperator::Deserialize(Deserializer &deseriali
|
|
|
198366
198975
|
return result;
|
|
198367
198976
|
}
|
|
198368
198977
|
|
|
198978
|
+
vector<idx_t> LogicalOperator::GetTableIndex() const {
|
|
198979
|
+
return vector<idx_t> {};
|
|
198980
|
+
}
|
|
198981
|
+
|
|
198369
198982
|
unique_ptr<LogicalOperator> LogicalOperator::Copy(ClientContext &context) const {
|
|
198370
198983
|
BufferedSerializer logical_op_serializer;
|
|
198371
198984
|
try {
|
|
@@ -198757,6 +199370,14 @@ idx_t LogicalAggregate::EstimateCardinality(ClientContext &context) {
|
|
|
198757
199370
|
return LogicalOperator::EstimateCardinality(context);
|
|
198758
199371
|
}
|
|
198759
199372
|
|
|
199373
|
+
vector<idx_t> LogicalAggregate::GetTableIndex() const {
|
|
199374
|
+
vector<idx_t> result {group_index, aggregate_index};
|
|
199375
|
+
if (groupings_index != DConstants::INVALID_INDEX) {
|
|
199376
|
+
result.push_back(groupings_index);
|
|
199377
|
+
}
|
|
199378
|
+
return result;
|
|
199379
|
+
}
|
|
199380
|
+
|
|
198760
199381
|
} // namespace duckdb
|
|
198761
199382
|
|
|
198762
199383
|
|
|
@@ -198823,6 +199444,10 @@ unique_ptr<LogicalOperator> LogicalColumnDataGet::Deserialize(LogicalDeserializa
|
|
|
198823
199444
|
return make_unique<LogicalColumnDataGet>(table_index, move(chunk_types), move(collection));
|
|
198824
199445
|
}
|
|
198825
199446
|
|
|
199447
|
+
vector<idx_t> LogicalColumnDataGet::GetTableIndex() const {
|
|
199448
|
+
return vector<idx_t> {table_index};
|
|
199449
|
+
}
|
|
199450
|
+
|
|
198826
199451
|
} // namespace duckdb
|
|
198827
199452
|
|
|
198828
199453
|
|
|
@@ -199087,6 +199712,10 @@ unique_ptr<LogicalOperator> LogicalCTERef::Deserialize(LogicalDeserializationSta
|
|
|
199087
199712
|
return make_unique<LogicalCTERef>(table_index, cte_index, chunk_types, bound_columns);
|
|
199088
199713
|
}
|
|
199089
199714
|
|
|
199715
|
+
vector<idx_t> LogicalCTERef::GetTableIndex() const {
|
|
199716
|
+
return vector<idx_t> {table_index};
|
|
199717
|
+
}
|
|
199718
|
+
|
|
199090
199719
|
} // namespace duckdb
|
|
199091
199720
|
|
|
199092
199721
|
|
|
@@ -199107,8 +199736,8 @@ unique_ptr<LogicalOperator> LogicalDelete::Deserialize(LogicalDeserializationSta
|
|
|
199107
199736
|
|
|
199108
199737
|
TableCatalogEntry *table_catalog_entry = catalog.GetEntry<TableCatalogEntry>(context, info->schema, info->table);
|
|
199109
199738
|
|
|
199110
|
-
auto
|
|
199111
|
-
result
|
|
199739
|
+
auto table_index = reader.ReadRequired<idx_t>();
|
|
199740
|
+
auto result = make_unique<LogicalDelete>(table_catalog_entry, table_index);
|
|
199112
199741
|
result->return_chunk = reader.ReadRequired<bool>();
|
|
199113
199742
|
return move(result);
|
|
199114
199743
|
}
|
|
@@ -199117,6 +199746,10 @@ idx_t LogicalDelete::EstimateCardinality(ClientContext &context) {
|
|
|
199117
199746
|
return return_chunk ? LogicalOperator::EstimateCardinality(context) : 1;
|
|
199118
199747
|
}
|
|
199119
199748
|
|
|
199749
|
+
vector<idx_t> LogicalDelete::GetTableIndex() const {
|
|
199750
|
+
return vector<idx_t> {table_index};
|
|
199751
|
+
}
|
|
199752
|
+
|
|
199120
199753
|
} // namespace duckdb
|
|
199121
199754
|
|
|
199122
199755
|
|
|
@@ -199134,6 +199767,10 @@ unique_ptr<LogicalOperator> LogicalDelimGet::Deserialize(LogicalDeserializationS
|
|
|
199134
199767
|
return make_unique<LogicalDelimGet>(table_index, chunk_types);
|
|
199135
199768
|
}
|
|
199136
199769
|
|
|
199770
|
+
vector<idx_t> LogicalDelimGet::GetTableIndex() const {
|
|
199771
|
+
return vector<idx_t> {table_index};
|
|
199772
|
+
}
|
|
199773
|
+
|
|
199137
199774
|
} // namespace duckdb
|
|
199138
199775
|
|
|
199139
199776
|
|
|
@@ -199201,6 +199838,10 @@ unique_ptr<LogicalOperator> LogicalDummyScan::Deserialize(LogicalDeserialization
|
|
|
199201
199838
|
return make_unique<LogicalDummyScan>(table_index);
|
|
199202
199839
|
}
|
|
199203
199840
|
|
|
199841
|
+
vector<idx_t> LogicalDummyScan::GetTableIndex() const {
|
|
199842
|
+
return vector<idx_t> {table_index};
|
|
199843
|
+
}
|
|
199844
|
+
|
|
199204
199845
|
} // namespace duckdb
|
|
199205
199846
|
|
|
199206
199847
|
|
|
@@ -199309,6 +199950,28 @@ unique_ptr<LogicalOperator> LogicalExpressionGet::Deserialize(LogicalDeserializa
|
|
|
199309
199950
|
return make_unique<LogicalExpressionGet>(table_index, expr_types, move(expressions));
|
|
199310
199951
|
}
|
|
199311
199952
|
|
|
199953
|
+
vector<idx_t> LogicalExpressionGet::GetTableIndex() const {
|
|
199954
|
+
return vector<idx_t> {table_index};
|
|
199955
|
+
}
|
|
199956
|
+
|
|
199957
|
+
} // namespace duckdb
|
|
199958
|
+
|
|
199959
|
+
|
|
199960
|
+
|
|
199961
|
+
namespace duckdb {
|
|
199962
|
+
unique_ptr<LogicalExtensionOperator> LogicalExtensionOperator::Deserialize(LogicalDeserializationState &state,
|
|
199963
|
+
FieldReader &reader) {
|
|
199964
|
+
auto &config = DBConfig::GetConfig(state.gstate.context);
|
|
199965
|
+
|
|
199966
|
+
auto extension_name = reader.ReadRequired<std::string>();
|
|
199967
|
+
for (auto &extension : config.operator_extensions) {
|
|
199968
|
+
if (extension->GetName() == extension_name) {
|
|
199969
|
+
return extension->Deserialize(state, reader);
|
|
199970
|
+
}
|
|
199971
|
+
}
|
|
199972
|
+
|
|
199973
|
+
throw SerializationException("No serialization method exists for extension: " + extension_name);
|
|
199974
|
+
}
|
|
199312
199975
|
} // namespace duckdb
|
|
199313
199976
|
|
|
199314
199977
|
|
|
@@ -199546,6 +200209,10 @@ unique_ptr<LogicalOperator> LogicalGet::Deserialize(LogicalDeserializationState
|
|
|
199546
200209
|
return move(result);
|
|
199547
200210
|
}
|
|
199548
200211
|
|
|
200212
|
+
vector<idx_t> LogicalGet::GetTableIndex() const {
|
|
200213
|
+
return vector<idx_t> {table_index};
|
|
200214
|
+
}
|
|
200215
|
+
|
|
199549
200216
|
} // namespace duckdb
|
|
199550
200217
|
|
|
199551
200218
|
|
|
@@ -199591,10 +200258,9 @@ unique_ptr<LogicalOperator> LogicalInsert::Deserialize(LogicalDeserializationSta
|
|
|
199591
200258
|
throw InternalException("Cant find catalog entry for table %s", info->table);
|
|
199592
200259
|
}
|
|
199593
200260
|
|
|
199594
|
-
auto result = make_unique<LogicalInsert>(table_catalog_entry);
|
|
200261
|
+
auto result = make_unique<LogicalInsert>(table_catalog_entry, table_index);
|
|
199595
200262
|
result->type = state.type;
|
|
199596
200263
|
result->table = table_catalog_entry;
|
|
199597
|
-
result->table_index = table_index;
|
|
199598
200264
|
result->return_chunk = return_chunk;
|
|
199599
200265
|
result->insert_values = move(insert_values);
|
|
199600
200266
|
result->column_index_map = column_index_map;
|
|
@@ -199607,6 +200273,10 @@ idx_t LogicalInsert::EstimateCardinality(ClientContext &context) {
|
|
|
199607
200273
|
return return_chunk ? LogicalOperator::EstimateCardinality(context) : 1;
|
|
199608
200274
|
}
|
|
199609
200275
|
|
|
200276
|
+
vector<idx_t> LogicalInsert::GetTableIndex() const {
|
|
200277
|
+
return vector<idx_t> {table_index};
|
|
200278
|
+
}
|
|
200279
|
+
|
|
199610
200280
|
} // namespace duckdb
|
|
199611
200281
|
|
|
199612
200282
|
|
|
@@ -199843,6 +200513,10 @@ unique_ptr<LogicalOperator> LogicalProjection::Deserialize(LogicalDeserializatio
|
|
|
199843
200513
|
return make_unique<LogicalProjection>(table_index, move(expressions));
|
|
199844
200514
|
}
|
|
199845
200515
|
|
|
200516
|
+
vector<idx_t> LogicalProjection::GetTableIndex() const {
|
|
200517
|
+
return vector<idx_t> {table_index};
|
|
200518
|
+
}
|
|
200519
|
+
|
|
199846
200520
|
} // namespace duckdb
|
|
199847
200521
|
|
|
199848
200522
|
|
|
@@ -199863,6 +200537,10 @@ unique_ptr<LogicalOperator> LogicalRecursiveCTE::Deserialize(LogicalDeserializat
|
|
|
199863
200537
|
return unique_ptr<LogicalRecursiveCTE>(new LogicalRecursiveCTE(table_index, column_count, union_all, state.type));
|
|
199864
200538
|
}
|
|
199865
200539
|
|
|
200540
|
+
vector<idx_t> LogicalRecursiveCTE::GetTableIndex() const {
|
|
200541
|
+
return vector<idx_t> {table_index};
|
|
200542
|
+
}
|
|
200543
|
+
|
|
199866
200544
|
} // namespace duckdb
|
|
199867
200545
|
|
|
199868
200546
|
|
|
@@ -199881,7 +200559,12 @@ vector<ColumnBinding> LogicalSample::GetColumnBindings() {
|
|
|
199881
200559
|
idx_t LogicalSample::EstimateCardinality(ClientContext &context) {
|
|
199882
200560
|
auto child_cardinality = children[0]->EstimateCardinality(context);
|
|
199883
200561
|
if (sample_options->is_percentage) {
|
|
199884
|
-
|
|
200562
|
+
double sample_cardinality =
|
|
200563
|
+
double(child_cardinality) * (sample_options->sample_size.GetValue<double>() / 100.0);
|
|
200564
|
+
if (sample_cardinality > double(child_cardinality)) {
|
|
200565
|
+
return child_cardinality;
|
|
200566
|
+
}
|
|
200567
|
+
return idx_t(sample_cardinality);
|
|
199885
200568
|
} else {
|
|
199886
200569
|
auto sample_size = sample_options->sample_size.GetValue<uint64_t>();
|
|
199887
200570
|
if (sample_size < child_cardinality) {
|
|
@@ -199945,6 +200628,11 @@ unique_ptr<LogicalOperator> LogicalSetOperation::Deserialize(LogicalDeserializat
|
|
|
199945
200628
|
// TODO(stephwang): review if unique_ptr<LogicalOperator> plan is needed
|
|
199946
200629
|
return unique_ptr<LogicalSetOperation>(new LogicalSetOperation(table_index, column_count, state.type));
|
|
199947
200630
|
}
|
|
200631
|
+
|
|
200632
|
+
vector<idx_t> LogicalSetOperation::GetTableIndex() const {
|
|
200633
|
+
return vector<idx_t> {table_index};
|
|
200634
|
+
}
|
|
200635
|
+
|
|
199948
200636
|
} // namespace duckdb
|
|
199949
200637
|
|
|
199950
200638
|
|
|
@@ -200043,6 +200731,11 @@ unique_ptr<LogicalOperator> LogicalUnnest::Deserialize(LogicalDeserializationSta
|
|
|
200043
200731
|
result->expressions = move(expressions);
|
|
200044
200732
|
return move(result);
|
|
200045
200733
|
}
|
|
200734
|
+
|
|
200735
|
+
vector<idx_t> LogicalUnnest::GetTableIndex() const {
|
|
200736
|
+
return vector<idx_t> {unnest_index};
|
|
200737
|
+
}
|
|
200738
|
+
|
|
200046
200739
|
} // namespace duckdb
|
|
200047
200740
|
|
|
200048
200741
|
|
|
@@ -200117,6 +200810,10 @@ unique_ptr<LogicalOperator> LogicalWindow::Deserialize(LogicalDeserializationSta
|
|
|
200117
200810
|
return move(result);
|
|
200118
200811
|
}
|
|
200119
200812
|
|
|
200813
|
+
vector<idx_t> LogicalWindow::GetTableIndex() const {
|
|
200814
|
+
return vector<idx_t> {window_index};
|
|
200815
|
+
}
|
|
200816
|
+
|
|
200120
200817
|
} // namespace duckdb
|
|
200121
200818
|
|
|
200122
200819
|
|
|
@@ -200249,7 +200946,7 @@ void Planner::CreatePlan(SQLStatement &statement) {
|
|
|
200249
200946
|
this->plan = nullptr;
|
|
200250
200947
|
for (auto &extension_op : config.operator_extensions) {
|
|
200251
200948
|
auto bound_statement =
|
|
200252
|
-
extension_op
|
|
200949
|
+
extension_op->Bind(context, *this->binder, extension_op->operator_info.get(), statement);
|
|
200253
200950
|
if (bound_statement.plan != nullptr) {
|
|
200254
200951
|
this->names = bound_statement.names;
|
|
200255
200952
|
this->types = bound_statement.types;
|
|
@@ -200648,10 +201345,13 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
200648
201345
|
// we reached a node without correlated expressions
|
|
200649
201346
|
// we can eliminate the dependent join now and create a simple cross product
|
|
200650
201347
|
// now create the duplicate eliminated scan for this node
|
|
201348
|
+
auto left_columns = plan->GetColumnBindings().size();
|
|
200651
201349
|
auto delim_index = binder.GenerateTableIndex();
|
|
200652
201350
|
this->base_binding = ColumnBinding(delim_index, 0);
|
|
201351
|
+
this->delim_offset = 0;
|
|
201352
|
+
this->data_offset = left_columns;
|
|
200653
201353
|
auto delim_scan = make_unique<LogicalDelimGet>(delim_index, delim_types);
|
|
200654
|
-
return LogicalCrossProduct::Create(move(
|
|
201354
|
+
return LogicalCrossProduct::Create(move(plan), move(delim_scan));
|
|
200655
201355
|
}
|
|
200656
201356
|
switch (plan->type) {
|
|
200657
201357
|
case LogicalOperatorType::LOGICAL_UNNEST:
|
|
@@ -201015,8 +201715,19 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
201015
201715
|
case LogicalOperatorType::LOGICAL_UNION: {
|
|
201016
201716
|
auto &setop = (LogicalSetOperation &)*plan;
|
|
201017
201717
|
// set operator, push into both children
|
|
201718
|
+
#ifdef DEBUG
|
|
201719
|
+
plan->children[0]->ResolveOperatorTypes();
|
|
201720
|
+
plan->children[1]->ResolveOperatorTypes();
|
|
201721
|
+
D_ASSERT(plan->children[0]->types == plan->children[1]->types);
|
|
201722
|
+
#endif
|
|
201018
201723
|
plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
|
|
201019
201724
|
plan->children[1] = PushDownDependentJoin(move(plan->children[1]));
|
|
201725
|
+
#ifdef DEBUG
|
|
201726
|
+
D_ASSERT(plan->children[0]->GetColumnBindings().size() == plan->children[1]->GetColumnBindings().size());
|
|
201727
|
+
plan->children[0]->ResolveOperatorTypes();
|
|
201728
|
+
plan->children[1]->ResolveOperatorTypes();
|
|
201729
|
+
D_ASSERT(plan->children[0]->types == plan->children[1]->types);
|
|
201730
|
+
#endif
|
|
201020
201731
|
// we have to refer to the setop index now
|
|
201021
201732
|
base_binding.table_index = setop.table_index;
|
|
201022
201733
|
base_binding.column_index = setop.column_count;
|
|
@@ -201924,6 +202635,7 @@ BlockHandle::~BlockHandle() {
|
|
|
201924
202635
|
} else {
|
|
201925
202636
|
D_ASSERT(memory_charge.size == 0);
|
|
201926
202637
|
}
|
|
202638
|
+
buffer_manager.PurgeQueue();
|
|
201927
202639
|
block_manager.UnregisterBlock(block_id, can_destroy);
|
|
201928
202640
|
}
|
|
201929
202641
|
|
|
@@ -201950,7 +202662,7 @@ unique_ptr<FileBuffer> BufferManager::ConstructManagedBuffer(idx_t size, unique_
|
|
|
201950
202662
|
FileBufferType type) {
|
|
201951
202663
|
if (source) {
|
|
201952
202664
|
auto tmp = move(source);
|
|
201953
|
-
D_ASSERT(tmp->
|
|
202665
|
+
D_ASSERT(tmp->AllocSize() == BufferManager::GetAllocSize(size));
|
|
201954
202666
|
return make_unique<FileBuffer>(*tmp, type);
|
|
201955
202667
|
} else {
|
|
201956
202668
|
// no re-usable buffer: allocate a new buffer
|
|
@@ -202085,7 +202797,7 @@ void BufferManager::SetTemporaryDirectory(string new_dir) {
|
|
|
202085
202797
|
|
|
202086
202798
|
BufferManager::BufferManager(DatabaseInstance &db, string tmp, idx_t maximum_memory)
|
|
202087
202799
|
: db(db), current_memory(0), maximum_memory(maximum_memory), temp_directory(move(tmp)),
|
|
202088
|
-
queue(make_unique<EvictionQueue>()), temporary_id(MAXIMUM_BLOCK),
|
|
202800
|
+
queue(make_unique<EvictionQueue>()), temporary_id(MAXIMUM_BLOCK), queue_insertions(0),
|
|
202089
202801
|
buffer_allocator(BufferAllocatorAllocate, BufferAllocatorFree, BufferAllocatorRealloc,
|
|
202090
202802
|
make_unique<BufferAllocatorData>(*this)) {
|
|
202091
202803
|
temp_block_manager = make_unique<InMemoryBlockManager>(*this);
|
|
@@ -202161,6 +202873,7 @@ TempBufferPoolReservation BufferManager::EvictBlocksOrThrow(idx_t memory_delta,
|
|
|
202161
202873
|
}
|
|
202162
202874
|
|
|
202163
202875
|
shared_ptr<BlockHandle> BufferManager::RegisterSmallMemory(idx_t block_size) {
|
|
202876
|
+
D_ASSERT(block_size < Storage::BLOCK_SIZE);
|
|
202164
202877
|
auto res = EvictBlocksOrThrow(block_size, maximum_memory, nullptr,
|
|
202165
202878
|
"could not allocate block of %lld bytes (%lld/%lld used) %s", block_size,
|
|
202166
202879
|
GetUsedMemory(), GetMaxMemory());
|
|
@@ -202173,7 +202886,7 @@ shared_ptr<BlockHandle> BufferManager::RegisterSmallMemory(idx_t block_size) {
|
|
|
202173
202886
|
|
|
202174
202887
|
shared_ptr<BlockHandle> BufferManager::RegisterMemory(idx_t block_size, bool can_destroy) {
|
|
202175
202888
|
D_ASSERT(block_size >= Storage::BLOCK_SIZE);
|
|
202176
|
-
auto alloc_size =
|
|
202889
|
+
auto alloc_size = GetAllocSize(block_size);
|
|
202177
202890
|
// first evict blocks until we have enough memory to store this buffer
|
|
202178
202891
|
unique_ptr<FileBuffer> reusable_buffer;
|
|
202179
202892
|
auto res = EvictBlocksOrThrow(alloc_size, maximum_memory, &reusable_buffer,
|
|
@@ -202187,9 +202900,11 @@ shared_ptr<BlockHandle> BufferManager::RegisterMemory(idx_t block_size, bool can
|
|
|
202187
202900
|
move(res));
|
|
202188
202901
|
}
|
|
202189
202902
|
|
|
202190
|
-
BufferHandle BufferManager::Allocate(idx_t block_size) {
|
|
202191
|
-
|
|
202192
|
-
|
|
202903
|
+
BufferHandle BufferManager::Allocate(idx_t block_size, bool can_destroy, shared_ptr<BlockHandle> *block) {
|
|
202904
|
+
shared_ptr<BlockHandle> local_block;
|
|
202905
|
+
auto block_ptr = block ? block : &local_block;
|
|
202906
|
+
*block_ptr = RegisterMemory(block_size, can_destroy);
|
|
202907
|
+
return Pin(*block_ptr);
|
|
202193
202908
|
}
|
|
202194
202909
|
|
|
202195
202910
|
void BufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size) {
|
|
@@ -202219,6 +202934,7 @@ void BufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size
|
|
|
202219
202934
|
// resize and adjust current memory
|
|
202220
202935
|
handle->buffer->Resize(block_size);
|
|
202221
202936
|
handle->memory_usage += memory_delta;
|
|
202937
|
+
D_ASSERT(handle->memory_usage == handle->buffer->AllocSize());
|
|
202222
202938
|
}
|
|
202223
202939
|
|
|
202224
202940
|
BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
|
|
@@ -202259,6 +202975,7 @@ BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
|
|
|
202259
202975
|
handle->memory_usage += delta;
|
|
202260
202976
|
handle->memory_charge.Resize(current_memory, handle->memory_usage);
|
|
202261
202977
|
}
|
|
202978
|
+
D_ASSERT(handle->memory_usage == handle->buffer->AllocSize());
|
|
202262
202979
|
return buf;
|
|
202263
202980
|
}
|
|
202264
202981
|
|
|
@@ -202551,7 +203268,9 @@ private:
|
|
|
202551
203268
|
// as a result we can truncate the file
|
|
202552
203269
|
auto max_index = index_manager.GetMaxIndex();
|
|
202553
203270
|
auto &fs = FileSystem::GetFileSystem(db);
|
|
203271
|
+
#ifndef WIN32 // this ended up causing issues when sorting
|
|
202554
203272
|
fs.Truncate(*handle, GetPositionInFile(max_index + 1));
|
|
203273
|
+
#endif
|
|
202555
203274
|
}
|
|
202556
203275
|
}
|
|
202557
203276
|
|
|
@@ -202827,6 +203546,9 @@ void BufferManager::BufferAllocatorFree(PrivateAllocatorData *private_data, data
|
|
|
202827
203546
|
|
|
202828
203547
|
data_ptr_t BufferManager::BufferAllocatorRealloc(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t old_size,
|
|
202829
203548
|
idx_t size) {
|
|
203549
|
+
if (old_size == size) {
|
|
203550
|
+
return pointer;
|
|
203551
|
+
}
|
|
202830
203552
|
auto &data = (BufferAllocatorData &)*private_data;
|
|
202831
203553
|
BufferPoolReservation r;
|
|
202832
203554
|
r.size = old_size;
|
|
@@ -202840,6 +203562,10 @@ Allocator &BufferAllocator::Get(ClientContext &context) {
|
|
|
202840
203562
|
return manager.GetBufferAllocator();
|
|
202841
203563
|
}
|
|
202842
203564
|
|
|
203565
|
+
Allocator &BufferAllocator::Get(DatabaseInstance &db) {
|
|
203566
|
+
return BufferManager::GetBufferManager(db).GetBufferAllocator();
|
|
203567
|
+
}
|
|
203568
|
+
|
|
202843
203569
|
Allocator &BufferManager::GetBufferAllocator() {
|
|
202844
203570
|
return buffer_allocator;
|
|
202845
203571
|
}
|
|
@@ -208409,11 +209135,15 @@ public:
|
|
|
208409
209135
|
new_string = !LookupString(data[idx]);
|
|
208410
209136
|
}
|
|
208411
209137
|
|
|
208412
|
-
bool fits =
|
|
209138
|
+
bool fits = CalculateSpaceRequirements(new_string, string_size);
|
|
208413
209139
|
if (!fits) {
|
|
208414
209140
|
Flush();
|
|
208415
209141
|
new_string = true;
|
|
208416
|
-
|
|
209142
|
+
|
|
209143
|
+
fits = CalculateSpaceRequirements(new_string, string_size);
|
|
209144
|
+
if (!fits) {
|
|
209145
|
+
throw InternalException("Dictionary compression could not write to new segment");
|
|
209146
|
+
}
|
|
208417
209147
|
}
|
|
208418
209148
|
|
|
208419
209149
|
if (!row_is_valid) {
|
|
@@ -208441,8 +209171,8 @@ protected:
|
|
|
208441
209171
|
virtual void AddNewString(string_t str) = 0;
|
|
208442
209172
|
// Add a null value to the compression state
|
|
208443
209173
|
virtual void AddNull() = 0;
|
|
208444
|
-
//
|
|
208445
|
-
virtual bool
|
|
209174
|
+
// Needs to be called before adding a value. Will return false if a flush is required first.
|
|
209175
|
+
virtual bool CalculateSpaceRequirements(bool new_string, size_t string_size) = 0;
|
|
208446
209176
|
// Flush the segment to disk if compressing or reset the counters if analyzing
|
|
208447
209177
|
virtual void Flush(bool final = false) = 0;
|
|
208448
209178
|
};
|
|
@@ -208499,7 +209229,8 @@ struct DictionaryCompressionStorage {
|
|
|
208499
209229
|
// scanning the whole dictionary at once and then scanning the selection buffer for each emitted vector. Secondly, it
|
|
208500
209230
|
// allows for efficient bitpacking compression as the selection values should remain relatively small.
|
|
208501
209231
|
struct DictionaryCompressionCompressState : public DictionaryCompressionState {
|
|
208502
|
-
explicit DictionaryCompressionCompressState(ColumnDataCheckpointer &checkpointer)
|
|
209232
|
+
explicit DictionaryCompressionCompressState(ColumnDataCheckpointer &checkpointer)
|
|
209233
|
+
: checkpointer(checkpointer), heap(BufferAllocator::Get(checkpointer.GetDatabase())) {
|
|
208503
209234
|
auto &db = checkpointer.GetDatabase();
|
|
208504
209235
|
auto &config = DBConfig::GetConfig(db);
|
|
208505
209236
|
function = config.GetCompressionFunction(CompressionType::COMPRESSION_DICTIONARY, PhysicalType::VARCHAR);
|
|
@@ -208605,7 +209336,7 @@ public:
|
|
|
208605
209336
|
current_segment->count++;
|
|
208606
209337
|
}
|
|
208607
209338
|
|
|
208608
|
-
bool
|
|
209339
|
+
bool CalculateSpaceRequirements(bool new_string, size_t string_size) override {
|
|
208609
209340
|
if (new_string) {
|
|
208610
209341
|
next_width = BitpackingPrimitives::MinimumBitWidth(index_buffer.size() - 1 + new_string);
|
|
208611
209342
|
return DictionaryCompressionStorage::HasEnoughSpace(current_segment->count.load() + 1,
|
|
@@ -208726,7 +209457,7 @@ struct DictionaryAnalyzeState : public DictionaryCompressionState {
|
|
|
208726
209457
|
current_tuple_count++;
|
|
208727
209458
|
}
|
|
208728
209459
|
|
|
208729
|
-
bool
|
|
209460
|
+
bool CalculateSpaceRequirements(bool new_string, size_t string_size) override {
|
|
208730
209461
|
if (new_string) {
|
|
208731
209462
|
next_width =
|
|
208732
209463
|
BitpackingPrimitives::MinimumBitWidth(current_unique_count + 2); // 1 for null, one for new string
|
|
@@ -211977,8 +212708,7 @@ void UncompressedStringStorage::WriteStringMemory(ColumnSegment &segment, string
|
|
|
211977
212708
|
new_block->offset = 0;
|
|
211978
212709
|
new_block->size = alloc_size;
|
|
211979
212710
|
// allocate an in-memory buffer for it
|
|
211980
|
-
|
|
211981
|
-
handle = buffer_manager.Pin(block);
|
|
212711
|
+
handle = buffer_manager.Allocate(alloc_size, false, &block);
|
|
211982
212712
|
state.overflow_blocks[block->BlockId()] = new_block.get();
|
|
211983
212713
|
new_block->block = move(block);
|
|
211984
212714
|
new_block->next = move(state.head);
|
|
@@ -213413,7 +214143,12 @@ bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t
|
|
|
213413
214143
|
bool append_failed = false;
|
|
213414
214144
|
// now append the entries to the indices
|
|
213415
214145
|
indexes.Scan([&](Index &index) {
|
|
213416
|
-
|
|
214146
|
+
try {
|
|
214147
|
+
if (!index.Append(chunk, row_identifiers)) {
|
|
214148
|
+
append_failed = true;
|
|
214149
|
+
return true;
|
|
214150
|
+
}
|
|
214151
|
+
} catch (...) {
|
|
213417
214152
|
append_failed = true;
|
|
213418
214153
|
return true;
|
|
213419
214154
|
}
|
|
@@ -213427,7 +214162,6 @@ bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t
|
|
|
213427
214162
|
for (auto *index : already_appended) {
|
|
213428
214163
|
index->Delete(chunk, row_identifiers);
|
|
213429
214164
|
}
|
|
213430
|
-
|
|
213431
214165
|
return false;
|
|
213432
214166
|
}
|
|
213433
214167
|
return true;
|
|
@@ -214070,12 +214804,21 @@ void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendSta
|
|
|
214070
214804
|
append_state.current_row);
|
|
214071
214805
|
}
|
|
214072
214806
|
if (constraint_violated) {
|
|
214807
|
+
PreservedError error;
|
|
214073
214808
|
// need to revert the append
|
|
214074
214809
|
row_t current_row = append_state.row_start;
|
|
214075
214810
|
// remove the data from the indexes, if there are any indexes
|
|
214076
214811
|
row_groups->Scan(transaction, [&](DataChunk &chunk) -> bool {
|
|
214077
214812
|
// append this chunk to the indexes of the table
|
|
214078
|
-
|
|
214813
|
+
try {
|
|
214814
|
+
table->RemoveFromIndexes(append_state, chunk, current_row);
|
|
214815
|
+
} catch (Exception &ex) {
|
|
214816
|
+
error = PreservedError(ex);
|
|
214817
|
+
return false;
|
|
214818
|
+
} catch (std::exception &ex) {
|
|
214819
|
+
error = PreservedError(ex);
|
|
214820
|
+
return false;
|
|
214821
|
+
}
|
|
214079
214822
|
|
|
214080
214823
|
current_row += chunk.size();
|
|
214081
214824
|
if (current_row >= append_state.current_row) {
|
|
@@ -214087,6 +214830,9 @@ void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendSta
|
|
|
214087
214830
|
if (append_to_table) {
|
|
214088
214831
|
table->RevertAppendInternal(append_state.row_start, append_count);
|
|
214089
214832
|
}
|
|
214833
|
+
if (error) {
|
|
214834
|
+
error.Throw();
|
|
214835
|
+
}
|
|
214090
214836
|
throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
|
|
214091
214837
|
}
|
|
214092
214838
|
}
|
|
@@ -214218,7 +214964,7 @@ void LocalStorage::InitializeAppend(LocalAppendState &state, DataTable *table) {
|
|
|
214218
214964
|
void LocalStorage::Append(LocalAppendState &state, DataChunk &chunk) {
|
|
214219
214965
|
// append to unique indices (if any)
|
|
214220
214966
|
auto storage = state.storage;
|
|
214221
|
-
idx_t base_id = MAX_ROW_ID + storage->row_groups->GetTotalRows();
|
|
214967
|
+
idx_t base_id = MAX_ROW_ID + storage->row_groups->GetTotalRows() + state.append_state.total_append_count;
|
|
214222
214968
|
if (!DataTable::AppendToIndexes(storage->indexes, chunk, base_id)) {
|
|
214223
214969
|
throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
|
|
214224
214970
|
}
|
|
@@ -215000,6 +215746,7 @@ block_id_t SingleFileBlockManager::GetFreeBlockId() {
|
|
|
215000
215746
|
void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) {
|
|
215001
215747
|
lock_guard<mutex> lock(block_lock);
|
|
215002
215748
|
D_ASSERT(block_id >= 0);
|
|
215749
|
+
D_ASSERT(block_id < max_block);
|
|
215003
215750
|
D_ASSERT(free_list.find(block_id) == free_list.end());
|
|
215004
215751
|
multi_use_blocks.erase(block_id);
|
|
215005
215752
|
free_list.insert(block_id);
|
|
@@ -215008,6 +215755,7 @@ void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) {
|
|
|
215008
215755
|
void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) {
|
|
215009
215756
|
lock_guard<mutex> lock(block_lock);
|
|
215010
215757
|
D_ASSERT(block_id >= 0);
|
|
215758
|
+
D_ASSERT(block_id < max_block);
|
|
215011
215759
|
|
|
215012
215760
|
// check if the block is a multi-use block
|
|
215013
215761
|
auto entry = multi_use_blocks.find(block_id);
|
|
@@ -215030,6 +215778,8 @@ void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) {
|
|
|
215030
215778
|
|
|
215031
215779
|
void SingleFileBlockManager::IncreaseBlockReferenceCount(block_id_t block_id) {
|
|
215032
215780
|
lock_guard<mutex> lock(block_lock);
|
|
215781
|
+
D_ASSERT(block_id >= 0);
|
|
215782
|
+
D_ASSERT(block_id < max_block);
|
|
215033
215783
|
D_ASSERT(free_list.find(block_id) == free_list.end());
|
|
215034
215784
|
auto entry = multi_use_blocks.find(block_id);
|
|
215035
215785
|
if (entry != multi_use_blocks.end()) {
|
|
@@ -218111,7 +218861,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
|
|
|
218111
218861
|
block = block_manager.RegisterBlock(block_id);
|
|
218112
218862
|
}
|
|
218113
218863
|
auto segment_size = Storage::BLOCK_SIZE;
|
|
218114
|
-
return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::PERSISTENT, start, count, function,
|
|
218864
|
+
return make_unique<ColumnSegment>(db, move(block), type, ColumnSegmentType::PERSISTENT, start, count, function,
|
|
218115
218865
|
move(statistics), block_id, offset, segment_size);
|
|
218116
218866
|
}
|
|
218117
218867
|
|
|
@@ -218125,9 +218875,9 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance
|
|
|
218125
218875
|
if (segment_size < Storage::BLOCK_SIZE) {
|
|
218126
218876
|
block = buffer_manager.RegisterSmallMemory(segment_size);
|
|
218127
218877
|
} else {
|
|
218128
|
-
|
|
218878
|
+
buffer_manager.Allocate(segment_size, false, &block);
|
|
218129
218879
|
}
|
|
218130
|
-
return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr,
|
|
218880
|
+
return make_unique<ColumnSegment>(db, move(block), type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr,
|
|
218131
218881
|
INVALID_BLOCK, 0, segment_size);
|
|
218132
218882
|
}
|
|
218133
218883
|
|
|
@@ -218208,9 +218958,9 @@ void ColumnSegment::Resize(idx_t new_size) {
|
|
|
218208
218958
|
D_ASSERT(new_size > this->segment_size);
|
|
218209
218959
|
D_ASSERT(offset == 0);
|
|
218210
218960
|
auto &buffer_manager = BufferManager::GetBufferManager(db);
|
|
218211
|
-
auto new_block = buffer_manager.RegisterMemory(Storage::BLOCK_SIZE, false);
|
|
218212
218961
|
auto old_handle = buffer_manager.Pin(block);
|
|
218213
|
-
|
|
218962
|
+
shared_ptr<BlockHandle> new_block;
|
|
218963
|
+
auto new_handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block);
|
|
218214
218964
|
memcpy(new_handle.Ptr(), old_handle.Ptr(), segment_size);
|
|
218215
218965
|
this->block_id = new_block->BlockId();
|
|
218216
218966
|
this->block = move(new_block);
|
|
@@ -221658,7 +222408,8 @@ static UpdateSegment::rollback_update_function_t GetRollbackUpdateFunction(Physi
|
|
|
221658
222408
|
static UpdateSegment::statistics_update_function_t GetStatisticsUpdateFunction(PhysicalType type);
|
|
221659
222409
|
static UpdateSegment::fetch_row_function_t GetFetchRowFunction(PhysicalType type);
|
|
221660
222410
|
|
|
221661
|
-
UpdateSegment::UpdateSegment(ColumnData &column_data)
|
|
222411
|
+
UpdateSegment::UpdateSegment(ColumnData &column_data)
|
|
222412
|
+
: column_data(column_data), stats(column_data.type), heap(BufferAllocator::Get(column_data.GetDatabase())) {
|
|
221662
222413
|
auto physical_type = column_data.type.InternalType();
|
|
221663
222414
|
|
|
221664
222415
|
this->type_size = GetTypeIdSize(physical_type);
|
|
@@ -223877,7 +224628,10 @@ void CleanupState::Flush() {
|
|
|
223877
224628
|
Vector row_identifiers(LogicalType::ROW_TYPE, (data_ptr_t)row_numbers);
|
|
223878
224629
|
|
|
223879
224630
|
// delete the tuples from all the indexes
|
|
223880
|
-
|
|
224631
|
+
try {
|
|
224632
|
+
current_table->RemoveFromIndexes(row_identifiers, count);
|
|
224633
|
+
} catch (...) {
|
|
224634
|
+
}
|
|
223881
224635
|
|
|
223882
224636
|
count = 0;
|
|
223883
224637
|
}
|
|
@@ -228169,626 +228923,628 @@ size_t duckdb_fsst_compressAVX512(SymbolTable &symbolTable, u8* codeBase, u8* sy
|
|
|
228169
228923
|
|
|
228170
228924
|
|
|
228171
228925
|
Symbol concat(Symbol a, Symbol b) {
|
|
228172
|
-
|
|
228173
|
-
|
|
228174
|
-
|
|
228175
|
-
|
|
228176
|
-
|
|
228177
|
-
|
|
228926
|
+
Symbol s;
|
|
228927
|
+
u32 length = a.length()+b.length();
|
|
228928
|
+
if (length > Symbol::maxLength) length = Symbol::maxLength;
|
|
228929
|
+
s.set_code_len(FSST_CODE_MASK, length);
|
|
228930
|
+
s.val.num = (b.val.num << (8*a.length())) | a.val.num;
|
|
228931
|
+
return s;
|
|
228178
228932
|
}
|
|
228179
228933
|
|
|
228180
228934
|
namespace std {
|
|
228181
228935
|
template <>
|
|
228182
228936
|
class hash<QSymbol> {
|
|
228183
|
-
|
|
228184
|
-
|
|
228185
|
-
|
|
228186
|
-
|
|
228187
|
-
|
|
228188
|
-
|
|
228189
|
-
|
|
228190
|
-
|
|
228191
|
-
|
|
228192
|
-
|
|
228193
|
-
|
|
228194
|
-
|
|
228195
|
-
|
|
228196
|
-
|
|
228197
|
-
|
|
228198
|
-
|
|
228937
|
+
public:
|
|
228938
|
+
size_t operator()(const QSymbol& q) const {
|
|
228939
|
+
uint64_t k = q.symbol.val.num;
|
|
228940
|
+
const uint64_t m = 0xc6a4a7935bd1e995;
|
|
228941
|
+
const int r = 47;
|
|
228942
|
+
uint64_t h = 0x8445d61a4e774912 ^ (8*m);
|
|
228943
|
+
k *= m;
|
|
228944
|
+
k ^= k >> r;
|
|
228945
|
+
k *= m;
|
|
228946
|
+
h ^= k;
|
|
228947
|
+
h *= m;
|
|
228948
|
+
h ^= h >> r;
|
|
228949
|
+
h *= m;
|
|
228950
|
+
h ^= h >> r;
|
|
228951
|
+
return h;
|
|
228952
|
+
}
|
|
228199
228953
|
};
|
|
228200
228954
|
}
|
|
228201
228955
|
|
|
228202
228956
|
bool isEscapeCode(u16 pos) { return pos < FSST_CODE_BASE; }
|
|
228203
228957
|
|
|
228204
228958
|
std::ostream& operator<<(std::ostream& out, const Symbol& s) {
|
|
228205
|
-
|
|
228206
|
-
|
|
228207
|
-
|
|
228959
|
+
for (u32 i=0; i<s.length(); i++)
|
|
228960
|
+
out << s.val.str[i];
|
|
228961
|
+
return out;
|
|
228208
228962
|
}
|
|
228209
|
-
//static u64 iter = 0;
|
|
228210
228963
|
|
|
228211
228964
|
SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[], bool zeroTerminated=false) {
|
|
228212
|
-
|
|
228213
|
-
|
|
228214
|
-
|
|
228215
|
-
|
|
228216
|
-
|
|
228217
|
-
|
|
228218
|
-
|
|
228219
|
-
|
|
228220
|
-
|
|
228221
|
-
|
|
228222
|
-
|
|
228223
|
-
|
|
228224
|
-
|
|
228225
|
-
|
|
228226
|
-
|
|
228227
|
-
|
|
228228
|
-
|
|
228229
|
-
|
|
228230
|
-
|
|
228231
|
-
|
|
228232
|
-
|
|
228233
|
-
|
|
228234
|
-
|
|
228235
|
-
|
|
228965
|
+
SymbolTable *st = new SymbolTable(), *bestTable = new SymbolTable();
|
|
228966
|
+
int bestGain = (int) -FSST_SAMPLEMAXSZ; // worst case (everything exception)
|
|
228967
|
+
size_t sampleFrac = 128;
|
|
228968
|
+
|
|
228969
|
+
// start by determining the terminator. We use the (lowest) most infrequent byte as terminator
|
|
228970
|
+
st->zeroTerminated = zeroTerminated;
|
|
228971
|
+
if (zeroTerminated) {
|
|
228972
|
+
st->terminator = 0; // except in case of zeroTerminated mode, then byte 0 is terminator regardless frequency
|
|
228973
|
+
} else {
|
|
228974
|
+
u16 byteHisto[256];
|
|
228975
|
+
memset(byteHisto, 0, sizeof(byteHisto));
|
|
228976
|
+
for(size_t i=0; i<line.size(); i++) {
|
|
228977
|
+
u8* cur = line[i];
|
|
228978
|
+
u8* end = cur + len[i];
|
|
228979
|
+
while(cur < end) byteHisto[*cur++]++;
|
|
228980
|
+
}
|
|
228981
|
+
u32 minSize = FSST_SAMPLEMAXSZ, i = st->terminator = 256;
|
|
228982
|
+
while(i-- > 0) {
|
|
228983
|
+
if (byteHisto[i] > minSize) continue;
|
|
228984
|
+
st->terminator = i;
|
|
228985
|
+
minSize = byteHisto[i];
|
|
228986
|
+
}
|
|
228987
|
+
}
|
|
228988
|
+
assert(st->terminator != 256);
|
|
228989
|
+
|
|
228990
|
+
// a random number between 0 and 128
|
|
228991
|
+
auto rnd128 = [&](size_t i) { return 1 + (FSST_HASH((i+1UL)*sampleFrac)&127); };
|
|
228992
|
+
|
|
228993
|
+
// compress sample, and compute (pair-)frequencies
|
|
228994
|
+
auto compressCount = [&](SymbolTable *st, Counters &counters) { // returns gain
|
|
228995
|
+
int gain = 0;
|
|
228996
|
+
|
|
228997
|
+
for(size_t i=0; i<line.size(); i++) {
|
|
228998
|
+
u8* cur = line[i];
|
|
228999
|
+
u8* end = cur + len[i];
|
|
229000
|
+
|
|
229001
|
+
if (sampleFrac < 128) {
|
|
229002
|
+
// in earlier rounds (sampleFrac < 128) we skip data in the sample (reduces overall work ~2x)
|
|
229003
|
+
if (rnd128(i) > sampleFrac) continue;
|
|
229004
|
+
}
|
|
229005
|
+
if (cur < end) {
|
|
229006
|
+
u8* start = cur;
|
|
229007
|
+
u16 code2 = 255, code1 = st->findLongestSymbol(cur, end);
|
|
229008
|
+
cur += st->symbols[code1].length();
|
|
229009
|
+
gain += (int) (st->symbols[code1].length()-(1+isEscapeCode(code1)));
|
|
229010
|
+
while (true) {
|
|
229011
|
+
// count single symbol (i.e. an option is not extending it)
|
|
229012
|
+
counters.count1Inc(code1);
|
|
228236
229013
|
|
|
228237
|
-
|
|
228238
|
-
|
|
229014
|
+
// as an alternative, consider just using the next byte..
|
|
229015
|
+
if (st->symbols[code1].length() != 1) // .. but do not count single byte symbols doubly
|
|
229016
|
+
counters.count1Inc(*start);
|
|
228239
229017
|
|
|
228240
|
-
|
|
228241
|
-
|
|
228242
|
-
|
|
229018
|
+
if (cur==end) {
|
|
229019
|
+
break;
|
|
229020
|
+
}
|
|
228243
229021
|
|
|
228244
|
-
|
|
228245
|
-
|
|
228246
|
-
|
|
229022
|
+
// now match a new symbol
|
|
229023
|
+
start = cur;
|
|
229024
|
+
if (cur<end-7) {
|
|
229025
|
+
u64 word = fsst_unaligned_load(cur);
|
|
229026
|
+
size_t code = word & 0xFFFFFF;
|
|
229027
|
+
size_t idx = FSST_HASH(code)&(st->hashTabSize-1);
|
|
229028
|
+
Symbol s = st->hashTab[idx];
|
|
229029
|
+
code2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK;
|
|
229030
|
+
word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
|
|
229031
|
+
if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) {
|
|
229032
|
+
code2 = s.code();
|
|
229033
|
+
cur += s.length();
|
|
229034
|
+
} else if (code2 >= FSST_CODE_BASE) {
|
|
229035
|
+
cur += 2;
|
|
229036
|
+
} else {
|
|
229037
|
+
code2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK;
|
|
229038
|
+
cur += 1;
|
|
229039
|
+
}
|
|
229040
|
+
} else {
|
|
229041
|
+
code2 = st->findLongestSymbol(cur, end);
|
|
229042
|
+
cur += st->symbols[code2].length();
|
|
229043
|
+
}
|
|
228247
229044
|
|
|
228248
|
-
|
|
228249
|
-
|
|
228250
|
-
if (rnd128(i) > sampleFrac) continue;
|
|
228251
|
-
}
|
|
228252
|
-
if (cur < end) {
|
|
228253
|
-
u16 pos2 = 255, pos1 = st->findLongestSymbol(cur, end);
|
|
228254
|
-
cur += st->symbols[pos1].length();
|
|
228255
|
-
gain += (int) (st->symbols[pos1].length()-(1+isEscapeCode(pos1)));
|
|
228256
|
-
while (true) {
|
|
228257
|
-
u8* old = cur;
|
|
228258
|
-
counters.count1Inc(pos1);
|
|
228259
|
-
// count single symbol (i.e. an option is not extending it)
|
|
228260
|
-
if (cur>=end)
|
|
228261
|
-
break;
|
|
228262
|
-
if (st->symbols[pos1].length() != 1)
|
|
228263
|
-
counters.count1Inc(*cur);
|
|
228264
|
-
if (cur<end-7) {
|
|
228265
|
-
u64 word = fsst_unaligned_load(cur);
|
|
228266
|
-
size_t pos = word & 0xFFFFFF;
|
|
228267
|
-
size_t idx = FSST_HASH(pos)&(st->hashTabSize-1);
|
|
228268
|
-
Symbol s = st->hashTab[idx];
|
|
228269
|
-
pos2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK;
|
|
228270
|
-
word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
|
|
228271
|
-
if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) {
|
|
228272
|
-
pos2 = s.code();
|
|
228273
|
-
cur += s.length();
|
|
228274
|
-
} else if (pos2 >= FSST_CODE_BASE) {
|
|
228275
|
-
cur += 2;
|
|
228276
|
-
} else {
|
|
228277
|
-
pos2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK;
|
|
228278
|
-
cur += 1;
|
|
228279
|
-
}
|
|
228280
|
-
} else {
|
|
228281
|
-
assert(cur<end);
|
|
228282
|
-
pos2 = st->findLongestSymbol(cur, end);
|
|
228283
|
-
cur += st->symbols[pos2].length();
|
|
228284
|
-
}
|
|
228285
|
-
|
|
228286
|
-
// compute compressed output size
|
|
228287
|
-
gain += ((int) (cur-old))-(1+isEscapeCode(pos2));
|
|
228288
|
-
|
|
228289
|
-
// now count the subsequent two symbols we encode as an extension possibility
|
|
228290
|
-
if (sampleFrac < 128) { // no need to count pairs in final round
|
|
228291
|
-
counters.count2Inc(pos1, pos2);
|
|
228292
|
-
if ((cur-old) > 1) // do not count escaped bytes doubly
|
|
228293
|
-
counters.count2Inc(pos1, *old);
|
|
228294
|
-
}
|
|
228295
|
-
pos1 = pos2;
|
|
228296
|
-
}
|
|
228297
|
-
}
|
|
228298
|
-
}
|
|
228299
|
-
return gain;
|
|
228300
|
-
};
|
|
229045
|
+
// compute compressed output size
|
|
229046
|
+
gain += ((int) (cur-start))-(1+isEscapeCode(code2));
|
|
228301
229047
|
|
|
228302
|
-
|
|
228303
|
-
|
|
228304
|
-
|
|
228305
|
-
|
|
228306
|
-
// artificially make terminater the most frequent symbol so it gets included
|
|
228307
|
-
u16 terminator = st->nSymbols?FSST_CODE_BASE:st->terminator;
|
|
228308
|
-
counters.count1Set(terminator,65535);
|
|
228309
|
-
|
|
228310
|
-
auto addOrInc = [&](unordered_set<QSymbol> &cands, Symbol s, u64 count) {
|
|
228311
|
-
if (count < (5*sampleFrac)/128) return; // improves both compression speed (less candidates), but also quality!!
|
|
228312
|
-
QSymbol q;
|
|
228313
|
-
q.symbol = s;
|
|
228314
|
-
q.gain = count * s.length();
|
|
228315
|
-
auto it = cands.find(q);
|
|
228316
|
-
if (it != cands.end()) {
|
|
228317
|
-
q.gain += (*it).gain;
|
|
228318
|
-
cands.erase(*it);
|
|
228319
|
-
}
|
|
228320
|
-
cands.insert(q);
|
|
228321
|
-
};
|
|
229048
|
+
// now count the subsequent two symbols we encode as an extension codesibility
|
|
229049
|
+
if (sampleFrac < 128) { // no need to count pairs in final round
|
|
229050
|
+
// consider the symbol that is the concatenation of the two last symbols
|
|
229051
|
+
counters.count2Inc(code1, code2);
|
|
228322
229052
|
|
|
228323
|
-
|
|
228324
|
-
|
|
228325
|
-
|
|
228326
|
-
|
|
229053
|
+
// as an alternative, consider just extending with the next byte..
|
|
229054
|
+
if ((cur-start) > 1) // ..but do not count single byte extensions doubly
|
|
229055
|
+
counters.count2Inc(code1, *start);
|
|
229056
|
+
}
|
|
229057
|
+
code1 = code2;
|
|
229058
|
+
}
|
|
229059
|
+
}
|
|
229060
|
+
}
|
|
229061
|
+
return gain;
|
|
229062
|
+
};
|
|
228327
229063
|
|
|
228328
|
-
|
|
228329
|
-
|
|
228330
|
-
|
|
229064
|
+
auto makeTable = [&](SymbolTable *st, Counters &counters) {
|
|
229065
|
+
// hashmap of c (needed because we can generate duplicate candidates)
|
|
229066
|
+
unordered_set<QSymbol> cands;
|
|
229067
|
+
|
|
229068
|
+
// artificially make terminater the most frequent symbol so it gets included
|
|
229069
|
+
u16 terminator = st->nSymbols?FSST_CODE_BASE:st->terminator;
|
|
229070
|
+
counters.count1Set(terminator,65535);
|
|
229071
|
+
|
|
229072
|
+
auto addOrInc = [&](unordered_set<QSymbol> &cands, Symbol s, u64 count) {
|
|
229073
|
+
if (count < (5*sampleFrac)/128) return; // improves both compression speed (less candidates), but also quality!!
|
|
229074
|
+
QSymbol q;
|
|
229075
|
+
q.symbol = s;
|
|
229076
|
+
q.gain = count * s.length();
|
|
229077
|
+
auto it = cands.find(q);
|
|
229078
|
+
if (it != cands.end()) {
|
|
229079
|
+
q.gain += (*it).gain;
|
|
229080
|
+
cands.erase(*it);
|
|
229081
|
+
}
|
|
229082
|
+
cands.insert(q);
|
|
229083
|
+
};
|
|
228331
229084
|
|
|
228332
|
-
|
|
228333
|
-
|
|
228334
|
-
|
|
228335
|
-
|
|
228336
|
-
}
|
|
228337
|
-
for (u32 pos2=0; pos2<FSST_CODE_BASE+(size_t)st->nSymbols; pos2++) {
|
|
228338
|
-
u32 cnt2 = counters.count2GetNext(pos1, pos2); // may advance pos2!!
|
|
228339
|
-
if (!cnt2) continue;
|
|
228340
|
-
|
|
228341
|
-
// create a new symbol
|
|
228342
|
-
Symbol s2 = st->symbols[pos2];
|
|
228343
|
-
Symbol s3 = concat(s1, s2);
|
|
228344
|
-
if (s2.val.str[0] != st->terminator) // multi-byte symbols cannot contain the terminator byte
|
|
228345
|
-
addOrInc(cands, s3, cnt2);
|
|
228346
|
-
}
|
|
228347
|
-
}
|
|
229085
|
+
// add candidate symbols based on counted frequency
|
|
229086
|
+
for (u32 pos1=0; pos1<FSST_CODE_BASE+(size_t) st->nSymbols; pos1++) {
|
|
229087
|
+
u32 cnt1 = counters.count1GetNext(pos1); // may advance pos1!!
|
|
229088
|
+
if (!cnt1) continue;
|
|
228348
229089
|
|
|
228349
|
-
|
|
228350
|
-
|
|
228351
|
-
|
|
228352
|
-
|
|
228353
|
-
|
|
228354
|
-
|
|
228355
|
-
|
|
228356
|
-
|
|
228357
|
-
|
|
228358
|
-
|
|
228359
|
-
|
|
228360
|
-
|
|
228361
|
-
|
|
228362
|
-
|
|
229090
|
+
// heuristic: promoting single-byte symbols (*8) helps reduce exception rates and increases [de]compression speed
|
|
229091
|
+
Symbol s1 = st->symbols[pos1];
|
|
229092
|
+
addOrInc(cands, s1, ((s1.length()==1)?8LL:1LL)*cnt1);
|
|
229093
|
+
|
|
229094
|
+
if (sampleFrac >= 128 || // last round we do not create new (combined) symbols
|
|
229095
|
+
s1.length() == Symbol::maxLength || // symbol cannot be extended
|
|
229096
|
+
s1.val.str[0] == st->terminator) { // multi-byte symbols cannot contain the terminator byte
|
|
229097
|
+
continue;
|
|
229098
|
+
}
|
|
229099
|
+
for (u32 pos2=0; pos2<FSST_CODE_BASE+(size_t)st->nSymbols; pos2++) {
|
|
229100
|
+
u32 cnt2 = counters.count2GetNext(pos1, pos2); // may advance pos2!!
|
|
229101
|
+
if (!cnt2) continue;
|
|
229102
|
+
|
|
229103
|
+
// create a new symbol
|
|
229104
|
+
Symbol s2 = st->symbols[pos2];
|
|
229105
|
+
Symbol s3 = concat(s1, s2);
|
|
229106
|
+
if (s2.val.str[0] != st->terminator) // multi-byte symbols cannot contain the terminator byte
|
|
229107
|
+
addOrInc(cands, s3, cnt2);
|
|
229108
|
+
}
|
|
229109
|
+
}
|
|
229110
|
+
|
|
229111
|
+
// insert candidates into priority queue (by gain)
|
|
229112
|
+
auto cmpGn = [](const QSymbol& q1, const QSymbol& q2) { return (q1.gain < q2.gain) || (q1.gain == q2.gain && q1.symbol.val.num > q2.symbol.val.num); };
|
|
229113
|
+
priority_queue<QSymbol,vector<QSymbol>,decltype(cmpGn)> pq(cmpGn);
|
|
229114
|
+
for (auto& q : cands)
|
|
229115
|
+
pq.push(q);
|
|
228363
229116
|
|
|
228364
|
-
|
|
229117
|
+
// Create new symbol map using best candidates
|
|
229118
|
+
st->clear();
|
|
229119
|
+
while (st->nSymbols < 255 && !pq.empty()) {
|
|
229120
|
+
QSymbol q = pq.top();
|
|
229121
|
+
pq.pop();
|
|
229122
|
+
st->add(q.symbol);
|
|
229123
|
+
}
|
|
229124
|
+
};
|
|
229125
|
+
|
|
229126
|
+
u8 bestCounters[512*sizeof(u16)];
|
|
228365
229127
|
#ifdef NONOPT_FSST
|
|
228366
|
-
|
|
228367
|
-
|
|
229128
|
+
for(size_t frac : {127, 127, 127, 127, 127, 127, 127, 127, 127, 128}) {
|
|
229129
|
+
sampleFrac = frac;
|
|
228368
229130
|
#else
|
|
228369
|
-
|
|
229131
|
+
for(sampleFrac=8; true; sampleFrac += 30) {
|
|
228370
229132
|
#endif
|
|
228371
|
-
|
|
228372
|
-
|
|
228373
|
-
|
|
228374
|
-
|
|
228375
|
-
|
|
228376
|
-
|
|
228377
|
-
|
|
228378
|
-
|
|
228379
|
-
|
|
228380
|
-
|
|
228381
|
-
|
|
228382
|
-
|
|
228383
|
-
|
|
228384
|
-
|
|
229133
|
+
memset(&counters, 0, sizeof(Counters));
|
|
229134
|
+
long gain = compressCount(st, counters);
|
|
229135
|
+
if (gain >= bestGain) { // a new best solution!
|
|
229136
|
+
counters.backup1(bestCounters);
|
|
229137
|
+
*bestTable = *st; bestGain = gain;
|
|
229138
|
+
}
|
|
229139
|
+
if (sampleFrac >= 128) break; // we do 5 rounds (sampleFrac=8,38,68,98,128)
|
|
229140
|
+
makeTable(st, counters);
|
|
229141
|
+
}
|
|
229142
|
+
delete st;
|
|
229143
|
+
counters.restore1(bestCounters);
|
|
229144
|
+
makeTable(bestTable, counters);
|
|
229145
|
+
bestTable->finalize(zeroTerminated); // renumber codes for more efficient compression
|
|
229146
|
+
return bestTable;
|
|
228385
229147
|
}
|
|
228386
229148
|
|
|
228387
229149
|
static inline size_t compressSIMD(SymbolTable &symbolTable, u8* symbolBase, size_t nlines, size_t len[], u8* line[], size_t size, u8* dst, size_t lenOut[], u8* strOut[], int unroll) {
|
|
228388
|
-
|
|
228389
|
-
|
|
228390
|
-
|
|
228391
|
-
|
|
228392
|
-
|
|
228393
|
-
|
|
228394
|
-
while (curLine < nlines && outOff <= (1<<19)) {
|
|
228395
|
-
size_t prevLine = curLine, chunk, curOff = 0;
|
|
228396
|
-
|
|
228397
|
-
// bail out if the output buffer cannot hold the compressed next string fully
|
|
228398
|
-
if (((len[curLine]-curOff)*2 + 7) > budget) break; // see below for the +7
|
|
228399
|
-
else budget -= (len[curLine]-curOff)*2;
|
|
228400
|
-
|
|
228401
|
-
strOut[curLine] = (u8*) 0;
|
|
228402
|
-
lenOut[curLine] = 0;
|
|
229150
|
+
size_t curLine = 0, inOff = 0, outOff = 0, batchPos = 0, empty = 0, budget = size;
|
|
229151
|
+
u8 *lim = dst + size, *codeBase = symbolBase + (1<<18); // 512KB temp space for compressing 512 strings
|
|
229152
|
+
SIMDjob input[512]; // combined offsets of input strings (cur,end), and string #id (pos) and output (dst) pointer
|
|
229153
|
+
SIMDjob output[512]; // output are (pos:9,dst:19) end pointers (compute compressed length from this)
|
|
229154
|
+
size_t jobLine[512]; // for which line in the input sequence was this job (needed because we may split a line into multiple jobs)
|
|
228403
229155
|
|
|
228404
|
-
|
|
228405
|
-
|
|
228406
|
-
|
|
228407
|
-
|
|
228408
|
-
|
|
228409
|
-
|
|
228410
|
-
|
|
228411
|
-
|
|
228412
|
-
|
|
228413
|
-
|
|
228414
|
-
|
|
228415
|
-
|
|
228416
|
-
|
|
228417
|
-
|
|
228418
|
-
|
|
228419
|
-
|
|
228420
|
-
|
|
228421
|
-
|
|
228422
|
-
|
|
228423
|
-
|
|
228424
|
-
|
|
228425
|
-
|
|
228426
|
-
|
|
228427
|
-
|
|
228428
|
-
|
|
228429
|
-
|
|
228430
|
-
|
|
228431
|
-
|
|
228432
|
-
|
|
228433
|
-
|
|
228434
|
-
|
|
228435
|
-
|
|
228436
|
-
|
|
228437
|
-
|
|
228438
|
-
|
|
228439
|
-
|
|
228440
|
-
|
|
228441
|
-
|
|
228442
|
-
|
|
228443
|
-
|
|
228444
|
-
|
|
228445
|
-
|
|
228446
|
-
|
|
228447
|
-
|
|
228448
|
-
|
|
228449
|
-
|
|
228450
|
-
|
|
228451
|
-
|
|
228452
|
-
|
|
228453
|
-
|
|
228454
|
-
|
|
228455
|
-
|
|
228456
|
-
|
|
228457
|
-
|
|
228458
|
-
|
|
228459
|
-
|
|
228460
|
-
|
|
228461
|
-
|
|
228462
|
-
|
|
228463
|
-
|
|
228464
|
-
|
|
228465
|
-
|
|
228466
|
-
|
|
228467
|
-
|
|
228468
|
-
|
|
228469
|
-
|
|
228470
|
-
|
|
228471
|
-
|
|
228472
|
-
|
|
228473
|
-
|
|
228474
|
-
|
|
228475
|
-
|
|
228476
|
-
|
|
228477
|
-
|
|
228478
|
-
|
|
228479
|
-
|
|
228480
|
-
|
|
228481
|
-
|
|
228482
|
-
|
|
228483
|
-
|
|
228484
|
-
|
|
228485
|
-
|
|
228486
|
-
|
|
228487
|
-
|
|
228488
|
-
|
|
228489
|
-
|
|
228490
|
-
|
|
228491
|
-
|
|
228492
|
-
|
|
228493
|
-
|
|
228494
|
-
|
|
228495
|
-
|
|
228496
|
-
|
|
228497
|
-
|
|
228498
|
-
|
|
228499
|
-
|
|
228500
|
-
|
|
228501
|
-
|
|
228502
|
-
|
|
228503
|
-
|
|
228504
|
-
|
|
228505
|
-
|
|
228506
|
-
|
|
228507
|
-
|
|
228508
|
-
|
|
228509
|
-
|
|
228510
|
-
|
|
228511
|
-
|
|
228512
|
-
|
|
228513
|
-
|
|
228514
|
-
|
|
228515
|
-
|
|
228516
|
-
|
|
228517
|
-
|
|
229156
|
+
while (curLine < nlines && outOff <= (1<<19)) {
|
|
229157
|
+
size_t prevLine = curLine, chunk, curOff = 0;
|
|
229158
|
+
|
|
229159
|
+
// bail out if the output buffer cannot hold the compressed next string fully
|
|
229160
|
+
if (((len[curLine]-curOff)*2 + 7) > budget) break; // see below for the +7
|
|
229161
|
+
else budget -= (len[curLine]-curOff)*2;
|
|
229162
|
+
|
|
229163
|
+
strOut[curLine] = (u8*) 0;
|
|
229164
|
+
lenOut[curLine] = 0;
|
|
229165
|
+
|
|
229166
|
+
do {
|
|
229167
|
+
do {
|
|
229168
|
+
chunk = len[curLine] - curOff;
|
|
229169
|
+
if (chunk > 511) {
|
|
229170
|
+
chunk = 511; // large strings need to be chopped up into segments of 511 bytes
|
|
229171
|
+
}
|
|
229172
|
+
// create a job in this batch
|
|
229173
|
+
SIMDjob job;
|
|
229174
|
+
job.cur = inOff;
|
|
229175
|
+
job.end = job.cur + chunk;
|
|
229176
|
+
job.pos = batchPos;
|
|
229177
|
+
job.out = outOff;
|
|
229178
|
+
|
|
229179
|
+
// worst case estimate for compressed size (+7 is for the scatter that writes extra 7 zeros)
|
|
229180
|
+
outOff += 7 + 2*(size_t)(job.end - job.cur); // note, total size needed is 512*(511*2+7) bytes.
|
|
229181
|
+
if (outOff > (1<<19)) break; // simdbuf may get full, stop before this chunk
|
|
229182
|
+
|
|
229183
|
+
// register job in this batch
|
|
229184
|
+
input[batchPos] = job;
|
|
229185
|
+
jobLine[batchPos] = curLine;
|
|
229186
|
+
|
|
229187
|
+
if (chunk == 0) {
|
|
229188
|
+
empty++; // detect empty chunks -- SIMD code cannot handle empty strings, so they need to be filtered out
|
|
229189
|
+
} else {
|
|
229190
|
+
// copy string chunk into temp buffer
|
|
229191
|
+
memcpy(symbolBase + inOff, line[curLine] + curOff, chunk);
|
|
229192
|
+
inOff += chunk;
|
|
229193
|
+
curOff += chunk;
|
|
229194
|
+
symbolBase[inOff++] = (u8) symbolTable.terminator; // write an extra char at the end that will not be encoded
|
|
229195
|
+
}
|
|
229196
|
+
if (++batchPos == 512) break;
|
|
229197
|
+
} while(curOff < len[curLine]);
|
|
229198
|
+
|
|
229199
|
+
if ((batchPos == 512) || (outOff > (1<<19)) || (++curLine >= nlines)) { // cannot accumulate more?
|
|
229200
|
+
if (batchPos-empty >= 32) { // if we have enough work, fire off fsst_compressAVX512 (32 is due to max 4x8 unrolling)
|
|
229201
|
+
// radix-sort jobs on length (longest string first)
|
|
229202
|
+
// -- this provides best load balancing and allows to skip empty jobs at the end
|
|
229203
|
+
u16 sortpos[513];
|
|
229204
|
+
memset(sortpos, 0, sizeof(sortpos));
|
|
229205
|
+
|
|
229206
|
+
// calculate length histo
|
|
229207
|
+
for(size_t i=0; i<batchPos; i++) {
|
|
229208
|
+
size_t len = input[i].end - input[i].cur;
|
|
229209
|
+
sortpos[512UL - len]++;
|
|
229210
|
+
}
|
|
229211
|
+
// calculate running sum
|
|
229212
|
+
for(size_t i=1; i<=512; i++)
|
|
229213
|
+
sortpos[i] += sortpos[i-1];
|
|
229214
|
+
|
|
229215
|
+
// move jobs to their final destination
|
|
229216
|
+
SIMDjob inputOrdered[512];
|
|
229217
|
+
for(size_t i=0; i<batchPos; i++) {
|
|
229218
|
+
size_t len = input[i].end - input[i].cur;
|
|
229219
|
+
size_t pos = sortpos[511UL - len]++;
|
|
229220
|
+
inputOrdered[pos] = input[i];
|
|
229221
|
+
}
|
|
229222
|
+
// finally.. SIMD compress max 256KB of simdbuf into (max) 512KB of simdbuf (but presumably much less..)
|
|
229223
|
+
for(size_t done = duckdb_fsst_compressAVX512(symbolTable, codeBase, symbolBase, inputOrdered, output, batchPos-empty, unroll);
|
|
229224
|
+
done < batchPos; done++) output[done] = inputOrdered[done];
|
|
229225
|
+
} else {
|
|
229226
|
+
memcpy(output, input, batchPos*sizeof(SIMDjob));
|
|
229227
|
+
}
|
|
229228
|
+
|
|
229229
|
+
// finish encoding (unfinished strings in process, plus the few last strings not yet processed)
|
|
229230
|
+
for(size_t i=0; i<batchPos; i++) {
|
|
229231
|
+
SIMDjob job = output[i];
|
|
229232
|
+
if (job.cur < job.end) { // finish encoding this string with scalar code
|
|
229233
|
+
u8* cur = symbolBase + job.cur;
|
|
229234
|
+
u8* end = symbolBase + job.end;
|
|
229235
|
+
u8* out = codeBase + job.out;
|
|
229236
|
+
while (cur < end) {
|
|
229237
|
+
u64 word = fsst_unaligned_load(cur);
|
|
229238
|
+
size_t code = symbolTable.shortCodes[word & 0xFFFF];
|
|
229239
|
+
size_t pos = word & 0xFFFFFF;
|
|
229240
|
+
size_t idx = FSST_HASH(pos)&(symbolTable.hashTabSize-1);
|
|
229241
|
+
Symbol s = symbolTable.hashTab[idx];
|
|
229242
|
+
out[1] = (u8) word; // speculatively write out escaped byte
|
|
229243
|
+
word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
|
|
229244
|
+
if ((s.icl < FSST_ICL_FREE) && s.val.num == word) {
|
|
229245
|
+
*out++ = (u8) s.code(); cur += s.length();
|
|
229246
|
+
} else {
|
|
229247
|
+
// could be a 2-byte or 1-byte code, or miss
|
|
229248
|
+
// handle everything with predication
|
|
229249
|
+
*out = (u8) code;
|
|
229250
|
+
out += 1+((code&FSST_CODE_BASE)>>8);
|
|
229251
|
+
cur += (code>>FSST_LEN_BITS);
|
|
229252
|
+
}
|
|
229253
|
+
}
|
|
229254
|
+
job.out = out - codeBase;
|
|
229255
|
+
}
|
|
229256
|
+
// postprocess job info
|
|
229257
|
+
job.cur = 0;
|
|
229258
|
+
job.end = job.out - input[job.pos].out; // misuse .end field as compressed size
|
|
229259
|
+
job.out = input[job.pos].out; // reset offset to start of encoded string
|
|
229260
|
+
input[job.pos] = job;
|
|
229261
|
+
}
|
|
229262
|
+
|
|
229263
|
+
// copy out the result data
|
|
229264
|
+
for(size_t i=0; i<batchPos; i++) {
|
|
229265
|
+
size_t lineNr = jobLine[i]; // the sort must be order-preserving, as we concatenate results string in order
|
|
229266
|
+
size_t sz = input[i].end; // had stored compressed lengths here
|
|
229267
|
+
if (!strOut[lineNr]) strOut[lineNr] = dst; // first segment will be the strOut pointer
|
|
229268
|
+
lenOut[lineNr] += sz; // add segment (lenOut starts at 0 for this reason)
|
|
229269
|
+
memcpy(dst, codeBase+input[i].out, sz);
|
|
229270
|
+
dst += sz;
|
|
229271
|
+
}
|
|
229272
|
+
|
|
229273
|
+
// go for the next batch of 512 chunks
|
|
229274
|
+
inOff = outOff = batchPos = empty = 0;
|
|
229275
|
+
budget = (size_t) (lim - dst);
|
|
229276
|
+
}
|
|
229277
|
+
} while (curLine == prevLine && outOff <= (1<<19));
|
|
229278
|
+
}
|
|
229279
|
+
return curLine;
|
|
228518
229280
|
}
|
|
228519
229281
|
|
|
228520
229282
|
|
|
228521
229283
|
// optimized adaptive *scalar* compression method
|
|
228522
229284
|
static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, size_t lenIn[], u8* strIn[], size_t size, u8* out, size_t lenOut[], u8* strOut[], bool noSuffixOpt, bool avoidBranch) {
|
|
228523
|
-
|
|
228524
|
-
|
|
228525
|
-
|
|
228526
|
-
|
|
228527
|
-
|
|
228528
|
-
|
|
228529
|
-
|
|
228530
|
-
|
|
228531
|
-
|
|
228532
|
-
|
|
228533
|
-
|
|
228534
|
-
|
|
228535
|
-
|
|
228536
|
-
|
|
228537
|
-
|
|
228538
|
-
|
|
228539
|
-
|
|
228540
|
-
|
|
228541
|
-
|
|
228542
|
-
|
|
228543
|
-
|
|
228544
|
-
|
|
228545
|
-
|
|
228546
|
-
|
|
228547
|
-
|
|
228548
|
-
|
|
228549
|
-
|
|
228550
|
-
|
|
228551
|
-
|
|
228552
|
-
|
|
228553
|
-
|
|
228554
|
-
|
|
228555
|
-
|
|
228556
|
-
|
|
228557
|
-
|
|
228558
|
-
|
|
228559
|
-
|
|
228560
|
-
|
|
228561
|
-
|
|
228562
|
-
|
|
228563
|
-
|
|
228564
|
-
}
|
|
228565
|
-
}
|
|
228566
|
-
};
|
|
229285
|
+
u8 *cur = NULL, *end = NULL, *lim = out + size;
|
|
229286
|
+
size_t curLine, suffixLim = symbolTable.suffixLim;
|
|
229287
|
+
u8 byteLim = symbolTable.nSymbols + symbolTable.zeroTerminated - symbolTable.lenHisto[0];
|
|
229288
|
+
|
|
229289
|
+
u8 buf[512+7]; /* +7 sentinel is to avoid 8-byte unaligned-loads going beyond 511 out-of-bounds */
|
|
229290
|
+
memset(buf+511, 0, 8); /* and initialize the sentinal bytes */
|
|
229291
|
+
|
|
229292
|
+
// three variants are possible. dead code falls away since the bool arguments are constants
|
|
229293
|
+
auto compressVariant = [&](bool noSuffixOpt, bool avoidBranch) {
|
|
229294
|
+
while (cur < end) {
|
|
229295
|
+
u64 word = fsst_unaligned_load(cur);
|
|
229296
|
+
size_t code = symbolTable.shortCodes[word & 0xFFFF];
|
|
229297
|
+
if (noSuffixOpt && ((u8) code) < suffixLim) {
|
|
229298
|
+
// 2 byte code without having to worry about longer matches
|
|
229299
|
+
*out++ = (u8) code; cur += 2;
|
|
229300
|
+
} else {
|
|
229301
|
+
size_t pos = word & 0xFFFFFF;
|
|
229302
|
+
size_t idx = FSST_HASH(pos)&(symbolTable.hashTabSize-1);
|
|
229303
|
+
Symbol s = symbolTable.hashTab[idx];
|
|
229304
|
+
out[1] = (u8) word; // speculatively write out escaped byte
|
|
229305
|
+
word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
|
|
229306
|
+
if ((s.icl < FSST_ICL_FREE) && s.val.num == word) {
|
|
229307
|
+
*out++ = (u8) s.code(); cur += s.length();
|
|
229308
|
+
} else if (avoidBranch) {
|
|
229309
|
+
// could be a 2-byte or 1-byte code, or miss
|
|
229310
|
+
// handle everything with predication
|
|
229311
|
+
*out = (u8) code;
|
|
229312
|
+
out += 1+((code&FSST_CODE_BASE)>>8);
|
|
229313
|
+
cur += (code>>FSST_LEN_BITS);
|
|
229314
|
+
} else if ((u8) code < byteLim) {
|
|
229315
|
+
// 2 byte code after checking there is no longer pattern
|
|
229316
|
+
*out++ = (u8) code; cur += 2;
|
|
229317
|
+
} else {
|
|
229318
|
+
// 1 byte code or miss.
|
|
229319
|
+
*out = (u8) code;
|
|
229320
|
+
out += 1+((code&FSST_CODE_BASE)>>8); // predicated - tested with a branch, that was always worse
|
|
229321
|
+
cur++;
|
|
229322
|
+
}
|
|
229323
|
+
}
|
|
229324
|
+
}
|
|
229325
|
+
};
|
|
228567
229326
|
|
|
228568
|
-
|
|
228569
|
-
|
|
228570
|
-
|
|
228571
|
-
|
|
228572
|
-
|
|
228573
|
-
|
|
228574
|
-
|
|
228575
|
-
|
|
228576
|
-
|
|
228577
|
-
|
|
228578
|
-
|
|
228579
|
-
|
|
228580
|
-
|
|
228581
|
-
|
|
228582
|
-
|
|
228583
|
-
|
|
228584
|
-
|
|
228585
|
-
|
|
228586
|
-
|
|
228587
|
-
|
|
228588
|
-
|
|
228589
|
-
|
|
228590
|
-
|
|
228591
|
-
|
|
228592
|
-
|
|
228593
|
-
|
|
228594
|
-
|
|
228595
|
-
|
|
228596
|
-
|
|
228597
|
-
|
|
228598
|
-
}
|
|
228599
|
-
return curLine;
|
|
229327
|
+
for(curLine=0; curLine<nlines; curLine++) {
|
|
229328
|
+
size_t chunk, curOff = 0;
|
|
229329
|
+
strOut[curLine] = out;
|
|
229330
|
+
do {
|
|
229331
|
+
cur = strIn[curLine] + curOff;
|
|
229332
|
+
chunk = lenIn[curLine] - curOff;
|
|
229333
|
+
if (chunk > 511) {
|
|
229334
|
+
chunk = 511; // we need to compress in chunks of 511 in order to be byte-compatible with simd-compressed FSST
|
|
229335
|
+
}
|
|
229336
|
+
if ((2*chunk+7) > (size_t) (lim-out)) {
|
|
229337
|
+
return curLine; // out of memory
|
|
229338
|
+
}
|
|
229339
|
+
// copy the string to the 511-byte buffer
|
|
229340
|
+
memcpy(buf, cur, chunk);
|
|
229341
|
+
buf[chunk] = (u8) symbolTable.terminator;
|
|
229342
|
+
cur = buf;
|
|
229343
|
+
end = cur + chunk;
|
|
229344
|
+
|
|
229345
|
+
// based on symboltable stats, choose a variant that is nice to the branch predictor
|
|
229346
|
+
if (noSuffixOpt) {
|
|
229347
|
+
compressVariant(true,false);
|
|
229348
|
+
} else if (avoidBranch) {
|
|
229349
|
+
compressVariant(false,true);
|
|
229350
|
+
} else {
|
|
229351
|
+
compressVariant(false, false);
|
|
229352
|
+
}
|
|
229353
|
+
} while((curOff += chunk) < lenIn[curLine]);
|
|
229354
|
+
lenOut[curLine] = (size_t) (out - strOut[curLine]);
|
|
229355
|
+
}
|
|
229356
|
+
return curLine;
|
|
228600
229357
|
}
|
|
228601
229358
|
|
|
228602
229359
|
#define FSST_SAMPLELINE ((size_t) 512)
|
|
228603
229360
|
|
|
228604
229361
|
// quickly select a uniformly random set of lines such that we have between [FSST_SAMPLETARGET,FSST_SAMPLEMAXSZ) string bytes
|
|
228605
229362
|
vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t **lenRef, size_t nlines) {
|
|
228606
|
-
|
|
228607
|
-
|
|
229363
|
+
size_t totSize = 0, *lenIn = *lenRef;
|
|
229364
|
+
vector<u8*> sample;
|
|
228608
229365
|
|
|
228609
|
-
|
|
228610
|
-
|
|
229366
|
+
for(size_t i=0; i<nlines; i++)
|
|
229367
|
+
totSize += lenIn[i];
|
|
228611
229368
|
|
|
228612
|
-
|
|
228613
|
-
|
|
228614
|
-
|
|
228615
|
-
|
|
228616
|
-
|
|
228617
|
-
|
|
228618
|
-
|
|
228619
|
-
|
|
228620
|
-
|
|
228621
|
-
|
|
228622
|
-
|
|
228623
|
-
|
|
228624
|
-
|
|
228625
|
-
|
|
228626
|
-
|
|
228627
|
-
|
|
228628
|
-
|
|
228629
|
-
|
|
228630
|
-
|
|
228631
|
-
|
|
228632
|
-
|
|
228633
|
-
|
|
228634
|
-
|
|
228635
|
-
|
|
228636
|
-
|
|
228637
|
-
|
|
228638
|
-
|
|
228639
|
-
|
|
229369
|
+
if (totSize < FSST_SAMPLETARGET) {
|
|
229370
|
+
for(size_t i=0; i<nlines; i++)
|
|
229371
|
+
sample.push_back(strIn[i]);
|
|
229372
|
+
} else {
|
|
229373
|
+
size_t sampleRnd = FSST_HASH(4637947);
|
|
229374
|
+
u8* sampleLim = sampleBuf + FSST_SAMPLETARGET;
|
|
229375
|
+
size_t *sampleLen = *lenRef = new size_t[nlines + FSST_SAMPLEMAXSZ/FSST_SAMPLELINE];
|
|
229376
|
+
|
|
229377
|
+
while(sampleBuf < sampleLim) {
|
|
229378
|
+
// choose a non-empty line
|
|
229379
|
+
sampleRnd = FSST_HASH(sampleRnd);
|
|
229380
|
+
size_t linenr = sampleRnd % nlines;
|
|
229381
|
+
while (lenIn[linenr] == 0)
|
|
229382
|
+
if (++linenr == nlines) linenr = 0;
|
|
229383
|
+
|
|
229384
|
+
// choose a chunk
|
|
229385
|
+
size_t chunks = 1 + ((lenIn[linenr]-1) / FSST_SAMPLELINE);
|
|
229386
|
+
sampleRnd = FSST_HASH(sampleRnd);
|
|
229387
|
+
size_t chunk = FSST_SAMPLELINE*(sampleRnd % chunks);
|
|
229388
|
+
|
|
229389
|
+
// add the chunk to the sample
|
|
229390
|
+
size_t len = min(lenIn[linenr]-chunk,FSST_SAMPLELINE);
|
|
229391
|
+
memcpy(sampleBuf, strIn[linenr]+chunk, len);
|
|
229392
|
+
sample.push_back(sampleBuf);
|
|
229393
|
+
sampleBuf += *sampleLen++ = len;
|
|
229394
|
+
}
|
|
229395
|
+
}
|
|
229396
|
+
return sample;
|
|
228640
229397
|
}
|
|
228641
229398
|
|
|
228642
229399
|
extern "C" duckdb_fsst_encoder_t* duckdb_fsst_create(size_t n, size_t lenIn[], u8 *strIn[], int zeroTerminated) {
|
|
228643
|
-
|
|
228644
|
-
|
|
228645
|
-
|
|
228646
|
-
|
|
228647
|
-
|
|
228648
|
-
|
|
228649
|
-
|
|
228650
|
-
|
|
229400
|
+
u8* sampleBuf = new u8[FSST_SAMPLEMAXSZ];
|
|
229401
|
+
size_t *sampleLen = lenIn;
|
|
229402
|
+
vector<u8*> sample = makeSample(sampleBuf, strIn, &sampleLen, n?n:1); // careful handling of input to get a right-size and representative sample
|
|
229403
|
+
Encoder *encoder = new Encoder();
|
|
229404
|
+
encoder->symbolTable = shared_ptr<SymbolTable>(buildSymbolTable(encoder->counters, sample, sampleLen, zeroTerminated));
|
|
229405
|
+
if (sampleLen != lenIn) delete[] sampleLen;
|
|
229406
|
+
delete[] sampleBuf;
|
|
229407
|
+
return (duckdb_fsst_encoder_t*) encoder;
|
|
228651
229408
|
}
|
|
228652
229409
|
|
|
228653
229410
|
/* create another encoder instance, necessary to do multi-threaded encoding using the same symbol table */
|
|
228654
229411
|
extern "C" duckdb_fsst_encoder_t* duckdb_fsst_duplicate(duckdb_fsst_encoder_t *encoder) {
|
|
228655
|
-
|
|
228656
|
-
|
|
228657
|
-
|
|
229412
|
+
Encoder *e = new Encoder();
|
|
229413
|
+
e->symbolTable = ((Encoder*)encoder)->symbolTable; // it is a shared_ptr
|
|
229414
|
+
return (duckdb_fsst_encoder_t*) e;
|
|
228658
229415
|
}
|
|
228659
229416
|
|
|
228660
|
-
// export a symbol table in compact format.
|
|
229417
|
+
// export a symbol table in compact format.
|
|
228661
229418
|
extern "C" u32 duckdb_fsst_export(duckdb_fsst_encoder_t *encoder, u8 *buf) {
|
|
228662
|
-
|
|
228663
|
-
|
|
228664
|
-
|
|
228665
|
-
|
|
228666
|
-
|
|
228667
|
-
|
|
228668
|
-
|
|
228669
|
-
|
|
228670
|
-
|
|
228671
|
-
|
|
228672
|
-
|
|
228673
|
-
|
|
228674
|
-
|
|
228675
|
-
|
|
228676
|
-
|
|
228677
|
-
|
|
228678
|
-
|
|
228679
|
-
|
|
228680
|
-
|
|
228681
|
-
|
|
229419
|
+
Encoder *e = (Encoder*) encoder;
|
|
229420
|
+
// In ->version there is a versionnr, but we hide also suffixLim/terminator/nSymbols there.
|
|
229421
|
+
// This is sufficient in principle to *reconstruct* a duckdb_fsst_encoder_t from a duckdb_fsst_decoder_t
|
|
229422
|
+
// (such functionality could be useful to append compressed data to an existing block).
|
|
229423
|
+
//
|
|
229424
|
+
// However, the hash function in the encoder hash table is endian-sensitive, and given its
|
|
229425
|
+
// 'lossy perfect' hashing scheme is *unable* to contain other-endian-produced symbol tables.
|
|
229426
|
+
// Doing a endian-conversion during hashing will be slow and self-defeating.
|
|
229427
|
+
//
|
|
229428
|
+
// Overall, we could support reconstructing an encoder for incremental compression, but
|
|
229429
|
+
// should enforce equal-endianness. Bit of a bummer. Not going there now.
|
|
229430
|
+
//
|
|
229431
|
+
// The version field is now there just for future-proofness, but not used yet
|
|
229432
|
+
|
|
229433
|
+
// version allows keeping track of fsst versions, track endianness, and encoder reconstruction
|
|
229434
|
+
u64 version = (FSST_VERSION << 32) | // version is 24 bits, most significant byte is 0
|
|
229435
|
+
(((u64) e->symbolTable->suffixLim) << 24) |
|
|
229436
|
+
(((u64) e->symbolTable->terminator) << 16) |
|
|
229437
|
+
(((u64) e->symbolTable->nSymbols) << 8) |
|
|
229438
|
+
FSST_ENDIAN_MARKER; // least significant byte is nonzero
|
|
228682
229439
|
|
|
228683
|
-
|
|
228684
|
-
|
|
228685
|
-
|
|
228686
|
-
|
|
228687
|
-
|
|
228688
|
-
|
|
229440
|
+
/* do not assume unaligned reads here */
|
|
229441
|
+
memcpy(buf, &version, 8);
|
|
229442
|
+
buf[8] = e->symbolTable->zeroTerminated;
|
|
229443
|
+
for(u32 i=0; i<8; i++)
|
|
229444
|
+
buf[9+i] = (u8) e->symbolTable->lenHisto[i];
|
|
229445
|
+
u32 pos = 17;
|
|
228689
229446
|
|
|
228690
|
-
|
|
228691
|
-
|
|
228692
|
-
|
|
228693
|
-
|
|
229447
|
+
// emit only the used bytes of the symbols
|
|
229448
|
+
for(u32 i = e->symbolTable->zeroTerminated; i < e->symbolTable->nSymbols; i++)
|
|
229449
|
+
for(u32 j = 0; j < e->symbolTable->symbols[i].length(); j++)
|
|
229450
|
+
buf[pos++] = e->symbolTable->symbols[i].val.str[j]; // serialize used symbol bytes
|
|
228694
229451
|
|
|
228695
|
-
|
|
229452
|
+
return pos; // length of what was serialized
|
|
228696
229453
|
}
|
|
228697
229454
|
|
|
228698
229455
|
#define FSST_CORRUPT 32774747032022883 /* 7-byte number in little endian containing "corrupt" */
|
|
228699
229456
|
|
|
228700
229457
|
extern "C" u32 duckdb_fsst_import(duckdb_fsst_decoder_t *decoder, u8 *buf) {
|
|
228701
|
-
|
|
228702
|
-
|
|
228703
|
-
|
|
228704
|
-
|
|
228705
|
-
// version field (first 8 bytes) is now there just for future-proofness, unused still (skipped)
|
|
228706
|
-
memcpy(&version, buf, 8);
|
|
228707
|
-
if ((version>>32) != FSST_VERSION) return 0;
|
|
228708
|
-
decoder->zeroTerminated = buf[8]&1;
|
|
228709
|
-
memcpy(lenHisto, buf+9, 8);
|
|
228710
|
-
|
|
228711
|
-
// in case of zero-terminated, first symbol is "" (zero always, may be overwritten)
|
|
228712
|
-
decoder->len[0] = 1;
|
|
228713
|
-
decoder->symbol[0] = 0;
|
|
228714
|
-
|
|
228715
|
-
// we use lenHisto[0] as 1-byte symbol run length (at the end)
|
|
228716
|
-
code = decoder->zeroTerminated;
|
|
228717
|
-
if (decoder->zeroTerminated) lenHisto[0]--; // if zeroTerminated, then symbol "" aka 1-byte code=0, is not stored at the end
|
|
228718
|
-
|
|
228719
|
-
// now get all symbols from the buffer
|
|
228720
|
-
for(u32 l=1; l<=8; l++) { /* l = 1,2,3,4,5,6,7,8 */
|
|
228721
|
-
for(u32 i=0; i < lenHisto[(l&7) /* 1,2,3,4,5,6,7,0 */]; i++, code++) {
|
|
228722
|
-
decoder->len[code] = (l&7)+1; /* len = 2,3,4,5,6,7,8,1 */
|
|
228723
|
-
decoder->symbol[code] = 0;
|
|
228724
|
-
for(u32 j=0; j<decoder->len[code]; j++)
|
|
228725
|
-
((u8*) &decoder->symbol[code])[j] = buf[pos++]; // note this enforces 'little endian' symbols
|
|
228726
|
-
}
|
|
228727
|
-
}
|
|
228728
|
-
if (decoder->zeroTerminated) lenHisto[0]++;
|
|
229458
|
+
u64 version = 0;
|
|
229459
|
+
u32 code, pos = 17;
|
|
229460
|
+
u8 lenHisto[8];
|
|
228729
229461
|
|
|
228730
|
-
|
|
228731
|
-
|
|
228732
|
-
|
|
228733
|
-
|
|
228734
|
-
|
|
228735
|
-
|
|
229462
|
+
// version field (first 8 bytes) is now there just for future-proofness, unused still (skipped)
|
|
229463
|
+
memcpy(&version, buf, 8);
|
|
229464
|
+
if ((version>>32) != FSST_VERSION) return 0;
|
|
229465
|
+
decoder->zeroTerminated = buf[8]&1;
|
|
229466
|
+
memcpy(lenHisto, buf+9, 8);
|
|
229467
|
+
|
|
229468
|
+
// in case of zero-terminated, first symbol is "" (zero always, may be overwritten)
|
|
229469
|
+
decoder->len[0] = 1;
|
|
229470
|
+
decoder->symbol[0] = 0;
|
|
229471
|
+
|
|
229472
|
+
// we use lenHisto[0] as 1-byte symbol run length (at the end)
|
|
229473
|
+
code = decoder->zeroTerminated;
|
|
229474
|
+
if (decoder->zeroTerminated) lenHisto[0]--; // if zeroTerminated, then symbol "" aka 1-byte code=0, is not stored at the end
|
|
229475
|
+
|
|
229476
|
+
// now get all symbols from the buffer
|
|
229477
|
+
for(u32 l=1; l<=8; l++) { /* l = 1,2,3,4,5,6,7,8 */
|
|
229478
|
+
for(u32 i=0; i < lenHisto[(l&7) /* 1,2,3,4,5,6,7,0 */]; i++, code++) {
|
|
229479
|
+
decoder->len[code] = (l&7)+1; /* len = 2,3,4,5,6,7,8,1 */
|
|
229480
|
+
decoder->symbol[code] = 0;
|
|
229481
|
+
for(u32 j=0; j<decoder->len[code]; j++)
|
|
229482
|
+
((u8*) &decoder->symbol[code])[j] = buf[pos++]; // note this enforces 'little endian' symbols
|
|
229483
|
+
}
|
|
229484
|
+
}
|
|
229485
|
+
if (decoder->zeroTerminated) lenHisto[0]++;
|
|
229486
|
+
|
|
229487
|
+
// fill unused symbols with text "corrupt". Gives a chance to detect corrupted code sequences (if there are unused symbols).
|
|
229488
|
+
while(code<255) {
|
|
229489
|
+
decoder->symbol[code] = FSST_CORRUPT;
|
|
229490
|
+
decoder->len[code++] = 8;
|
|
229491
|
+
}
|
|
229492
|
+
return pos;
|
|
228736
229493
|
}
|
|
228737
229494
|
|
|
228738
229495
|
// runtime check for simd
|
|
228739
229496
|
inline size_t _compressImpl(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd) {
|
|
228740
229497
|
#ifndef NONOPT_FSST
|
|
228741
|
-
|
|
228742
|
-
|
|
229498
|
+
if (simd && duckdb_fsst_hasAVX512())
|
|
229499
|
+
return compressSIMD(*e->symbolTable, e->simdbuf, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
|
|
228743
229500
|
#endif
|
|
228744
|
-
|
|
228745
|
-
|
|
229501
|
+
(void) simd;
|
|
229502
|
+
return compressBulk(*e->symbolTable, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch);
|
|
228746
229503
|
}
|
|
228747
229504
|
size_t compressImpl(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd) {
|
|
228748
|
-
|
|
229505
|
+
return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd);
|
|
228749
229506
|
}
|
|
228750
229507
|
|
|
228751
|
-
// adaptive choosing of scalar compression method based on symbol length histogram
|
|
229508
|
+
// adaptive choosing of scalar compression method based on symbol length histogram
|
|
228752
229509
|
inline size_t _compressAuto(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) {
|
|
228753
|
-
|
|
228754
|
-
|
|
228755
|
-
|
|
228756
|
-
|
|
228757
|
-
|
|
228758
|
-
|
|
228759
|
-
|
|
228760
|
-
|
|
228761
|
-
|
|
229510
|
+
bool avoidBranch = false, noSuffixOpt = false;
|
|
229511
|
+
if (100*e->symbolTable->lenHisto[1] > 65*e->symbolTable->nSymbols && 100*e->symbolTable->suffixLim > 95*e->symbolTable->lenHisto[1]) {
|
|
229512
|
+
noSuffixOpt = true;
|
|
229513
|
+
} else if ((e->symbolTable->lenHisto[0] > 24 && e->symbolTable->lenHisto[0] < 92) &&
|
|
229514
|
+
(e->symbolTable->lenHisto[0] < 43 || e->symbolTable->lenHisto[6] + e->symbolTable->lenHisto[7] < 29) &&
|
|
229515
|
+
(e->symbolTable->lenHisto[0] < 72 || e->symbolTable->lenHisto[2] < 72)) {
|
|
229516
|
+
avoidBranch = true;
|
|
229517
|
+
}
|
|
229518
|
+
return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd);
|
|
228762
229519
|
}
|
|
228763
229520
|
size_t compressAuto(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) {
|
|
228764
|
-
|
|
229521
|
+
return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
|
|
228765
229522
|
}
|
|
228766
229523
|
|
|
228767
229524
|
// the main compression function (everything automatic)
|
|
228768
229525
|
extern "C" size_t duckdb_fsst_compress(duckdb_fsst_encoder_t *encoder, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[]) {
|
|
228769
|
-
|
|
228770
|
-
|
|
228771
|
-
|
|
228772
|
-
|
|
229526
|
+
// to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB)
|
|
229527
|
+
size_t totLen = accumulate(lenIn, lenIn+nlines, 0);
|
|
229528
|
+
int simd = totLen > nlines*12 && (nlines > 64 || totLen > (size_t) 1<<15);
|
|
229529
|
+
return _compressAuto((Encoder*) encoder, nlines, lenIn, strIn, size, output, lenOut, strOut, 3*simd);
|
|
228773
229530
|
}
|
|
228774
229531
|
|
|
228775
229532
|
/* deallocate encoder */
|
|
228776
229533
|
extern "C" void duckdb_fsst_destroy(duckdb_fsst_encoder_t* encoder) {
|
|
228777
|
-
|
|
228778
|
-
|
|
229534
|
+
Encoder *e = (Encoder*) encoder;
|
|
229535
|
+
delete e;
|
|
228779
229536
|
}
|
|
228780
229537
|
|
|
228781
229538
|
/* very lazy implementation relying on export and import */
|
|
228782
229539
|
extern "C" duckdb_fsst_decoder_t duckdb_fsst_decoder(duckdb_fsst_encoder_t *encoder) {
|
|
228783
|
-
|
|
228784
|
-
|
|
228785
|
-
|
|
228786
|
-
|
|
228787
|
-
|
|
228788
|
-
|
|
229540
|
+
u8 buf[sizeof(duckdb_fsst_decoder_t)];
|
|
229541
|
+
u32 cnt1 = duckdb_fsst_export(encoder, buf);
|
|
229542
|
+
duckdb_fsst_decoder_t decoder;
|
|
229543
|
+
u32 cnt2 = duckdb_fsst_import(&decoder, buf);
|
|
229544
|
+
assert(cnt1 == cnt2); (void) cnt1; (void) cnt2;
|
|
229545
|
+
return decoder;
|
|
228789
229546
|
}
|
|
228790
229547
|
|
|
228791
|
-
|
|
228792
229548
|
// LICENSE_CHANGE_END
|
|
228793
229549
|
|
|
228794
229550
|
|