duckdb 0.5.2-dev809.0 → 0.5.2-dev815.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +118 -58
- package/src/duckdb.hpp +673 -665
- package/src/parquet-amalgamation.cpp +31208 -31208
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -201589,6 +201589,7 @@ void DataTable::AppendLock(TableAppendState &state) {
|
|
|
201589
201589
|
throw TransactionException("Transaction conflict: adding entries to a table that has been altered!");
|
|
201590
201590
|
}
|
|
201591
201591
|
state.row_start = row_groups->GetTotalRows();
|
|
201592
|
+
state.current_row = state.row_start;
|
|
201592
201593
|
}
|
|
201593
201594
|
|
|
201594
201595
|
void DataTable::InitializeAppend(Transaction &transaction, TableAppendState &state, idx_t append_count) {
|
|
@@ -201652,9 +201653,6 @@ void DataTable::MergeStorage(RowGroupCollection &data, TableIndexList &indexes,
|
|
|
201652
201653
|
row_groups->MergeStorage(data);
|
|
201653
201654
|
stats.MergeStats(other_stats);
|
|
201654
201655
|
row_groups->Verify();
|
|
201655
|
-
if (!indexes.Empty()) {
|
|
201656
|
-
throw InternalException("FIXME: merge indexes");
|
|
201657
|
-
}
|
|
201658
201656
|
}
|
|
201659
201657
|
|
|
201660
201658
|
void DataTable::WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count) {
|
|
@@ -202149,7 +202147,7 @@ namespace duckdb {
|
|
|
202149
202147
|
// Local Table Storage
|
|
202150
202148
|
//===--------------------------------------------------------------------===//
|
|
202151
202149
|
LocalTableStorage::LocalTableStorage(DataTable &table)
|
|
202152
|
-
: table(table), allocator(Allocator::Get(table.db)), deleted_rows(0) {
|
|
202150
|
+
: table(&table), allocator(Allocator::Get(table.db)), deleted_rows(0) {
|
|
202153
202151
|
auto types = table.GetTypes();
|
|
202154
202152
|
row_groups = make_shared<RowGroupCollection>(table.info, TableIOManager::Get(table).GetBlockManagerForRowData(),
|
|
202155
202153
|
types, MAX_ROW_ID, 0);
|
|
@@ -202174,7 +202172,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
|
|
|
202174
202172
|
LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, idx_t changed_idx,
|
|
202175
202173
|
const LogicalType &target_type, const vector<column_t> &bound_columns,
|
|
202176
202174
|
Expression &cast_expr)
|
|
202177
|
-
: table(new_dt), allocator(Allocator::Get(table
|
|
202175
|
+
: table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
|
|
202178
202176
|
partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
|
|
202179
202177
|
if (partial_manager) {
|
|
202180
202178
|
partial_manager->FlushPartialBlocks();
|
|
@@ -202187,7 +202185,7 @@ LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &paren
|
|
|
202187
202185
|
}
|
|
202188
202186
|
|
|
202189
202187
|
LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, idx_t drop_idx)
|
|
202190
|
-
: table(new_dt), allocator(Allocator::Get(table
|
|
202188
|
+
: table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
|
|
202191
202189
|
partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
|
|
202192
202190
|
if (partial_manager) {
|
|
202193
202191
|
partial_manager->FlushPartialBlocks();
|
|
@@ -202200,9 +202198,9 @@ LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &paren
|
|
|
202200
202198
|
|
|
202201
202199
|
LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, ColumnDefinition &new_column,
|
|
202202
202200
|
Expression *default_value)
|
|
202203
|
-
: table(new_dt), allocator(Allocator::Get(table
|
|
202201
|
+
: table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
|
|
202204
202202
|
partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
|
|
202205
|
-
idx_t new_column_idx = parent.table
|
|
202203
|
+
idx_t new_column_idx = parent.table->column_definitions.size();
|
|
202206
202204
|
stats.InitializeAddColumn(parent.stats, new_column.GetType());
|
|
202207
202205
|
row_groups = parent.row_groups->AddColumn(new_column, default_value, stats.GetStats(new_column_idx));
|
|
202208
202206
|
parent.row_groups.reset();
|
|
@@ -202312,11 +202310,7 @@ void LocalStorage::Append(LocalAppendState &state, DataChunk &chunk) {
|
|
|
202312
202310
|
void LocalTableStorage::CheckFlushToDisk() {
|
|
202313
202311
|
// we finished writing a complete row group
|
|
202314
202312
|
// check if we should pre-emptively write it to disk
|
|
202315
|
-
if (table
|
|
202316
|
-
return;
|
|
202317
|
-
}
|
|
202318
|
-
if (!table.info->indexes.Empty()) {
|
|
202319
|
-
// we have indexes - we cannot merge
|
|
202313
|
+
if (table->info->IsTemporary() || StorageManager::GetStorageManager(table->db).InMemory()) {
|
|
202320
202314
|
return;
|
|
202321
202315
|
}
|
|
202322
202316
|
if (deleted_rows != 0) {
|
|
@@ -202326,7 +202320,7 @@ void LocalTableStorage::CheckFlushToDisk() {
|
|
|
202326
202320
|
// we should! write the second-to-last row group to disk
|
|
202327
202321
|
// allocate the partial block-manager if none is allocated yet
|
|
202328
202322
|
if (!partial_manager) {
|
|
202329
|
-
auto &block_manager = table
|
|
202323
|
+
auto &block_manager = table->info->table_io_manager->GetBlockManagerForRowData();
|
|
202330
202324
|
partial_manager = make_unique<PartialBlockManager>(block_manager);
|
|
202331
202325
|
}
|
|
202332
202326
|
// flush second-to-last row group
|
|
@@ -202338,12 +202332,11 @@ void LocalTableStorage::FlushToDisk(RowGroup *row_group) {
|
|
|
202338
202332
|
// flush the specified row group
|
|
202339
202333
|
D_ASSERT(row_group);
|
|
202340
202334
|
D_ASSERT(deleted_rows == 0);
|
|
202341
|
-
D_ASSERT(table.info->indexes.Empty());
|
|
202342
202335
|
D_ASSERT(partial_manager);
|
|
202343
202336
|
//! The set of column compression types (if any)
|
|
202344
202337
|
vector<CompressionType> compression_types;
|
|
202345
202338
|
D_ASSERT(compression_types.empty());
|
|
202346
|
-
for (auto &column : table
|
|
202339
|
+
for (auto &column : table->column_definitions) {
|
|
202347
202340
|
compression_types.push_back(column.CompressionType());
|
|
202348
202341
|
}
|
|
202349
202342
|
auto row_group_pointer = row_group->WriteToDisk(*partial_manager, compression_types);
|
|
@@ -202410,24 +202403,23 @@ void LocalStorage::Update(DataTable *table, Vector &row_ids, const vector<column
|
|
|
202410
202403
|
}
|
|
202411
202404
|
|
|
202412
202405
|
template <class T>
|
|
202413
|
-
bool
|
|
202414
|
-
|
|
202415
|
-
|
|
202416
|
-
for (idx_t i = 0; i <
|
|
202417
|
-
|
|
202406
|
+
bool LocalTableStorage::ScanTableStorage(Transaction &transaction, const vector<column_t> &column_ids, T &&fun) {
|
|
202407
|
+
auto all_types = table->GetTypes();
|
|
202408
|
+
vector<LogicalType> scan_types;
|
|
202409
|
+
for (idx_t i = 0; i < column_ids.size(); i++) {
|
|
202410
|
+
scan_types.push_back(all_types[column_ids[i]]);
|
|
202418
202411
|
}
|
|
202419
|
-
|
|
202420
202412
|
DataChunk chunk;
|
|
202421
|
-
chunk.Initialize(
|
|
202413
|
+
chunk.Initialize(allocator, scan_types);
|
|
202422
202414
|
|
|
202423
202415
|
// initialize the scan
|
|
202424
202416
|
TableScanState state;
|
|
202425
202417
|
state.Initialize(column_ids, nullptr);
|
|
202426
|
-
|
|
202418
|
+
InitializeScan(state.local_state, nullptr);
|
|
202427
202419
|
|
|
202428
202420
|
while (true) {
|
|
202429
202421
|
chunk.Reset();
|
|
202430
|
-
|
|
202422
|
+
state.local_state.Scan(transaction, chunk);
|
|
202431
202423
|
if (chunk.size() == 0) {
|
|
202432
202424
|
return true;
|
|
202433
202425
|
}
|
|
@@ -202437,6 +202429,78 @@ bool LocalStorage::ScanTableStorage(DataTable &table, LocalTableStorage &storage
|
|
|
202437
202429
|
}
|
|
202438
202430
|
}
|
|
202439
202431
|
|
|
202432
|
+
template <class T>
|
|
202433
|
+
bool LocalTableStorage::ScanTableStorage(Transaction &transaction, T &&fun) {
|
|
202434
|
+
vector<column_t> column_ids;
|
|
202435
|
+
column_ids.reserve(table->column_definitions.size());
|
|
202436
|
+
for (idx_t i = 0; i < table->column_definitions.size(); i++) {
|
|
202437
|
+
column_ids.push_back(i);
|
|
202438
|
+
}
|
|
202439
|
+
return ScanTableStorage(transaction, column_ids, fun);
|
|
202440
|
+
}
|
|
202441
|
+
|
|
202442
|
+
void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendState &append_state, idx_t append_count,
|
|
202443
|
+
bool append_to_table) {
|
|
202444
|
+
bool constraint_violated = false;
|
|
202445
|
+
if (append_to_table) {
|
|
202446
|
+
table->InitializeAppend(transaction, append_state, append_count);
|
|
202447
|
+
}
|
|
202448
|
+
if (append_to_table) {
|
|
202449
|
+
// appending: need to scan entire
|
|
202450
|
+
ScanTableStorage(transaction, [&](DataChunk &chunk) -> bool {
|
|
202451
|
+
// append this chunk to the indexes of the table
|
|
202452
|
+
if (!table->AppendToIndexes(chunk, append_state.current_row)) {
|
|
202453
|
+
constraint_violated = true;
|
|
202454
|
+
return false;
|
|
202455
|
+
}
|
|
202456
|
+
// append to base table
|
|
202457
|
+
table->Append(chunk, append_state);
|
|
202458
|
+
return true;
|
|
202459
|
+
});
|
|
202460
|
+
} else {
|
|
202461
|
+
// only need to scan for index append
|
|
202462
|
+
// figure out which columns we need to scan for the set of indexes
|
|
202463
|
+
auto columns = table->info->indexes.GetRequiredColumns();
|
|
202464
|
+
// create an empty mock chunk that contains all the correct types for the table
|
|
202465
|
+
DataChunk mock_chunk;
|
|
202466
|
+
mock_chunk.InitializeEmpty(table->GetTypes());
|
|
202467
|
+
ScanTableStorage(transaction, columns, [&](DataChunk &chunk) -> bool {
|
|
202468
|
+
// construct the mock chunk by referencing the required columns
|
|
202469
|
+
for (idx_t i = 0; i < columns.size(); i++) {
|
|
202470
|
+
mock_chunk.data[columns[i]].Reference(chunk.data[i]);
|
|
202471
|
+
}
|
|
202472
|
+
mock_chunk.SetCardinality(chunk);
|
|
202473
|
+
// append this chunk to the indexes of the table
|
|
202474
|
+
if (!table->AppendToIndexes(mock_chunk, append_state.current_row)) {
|
|
202475
|
+
constraint_violated = true;
|
|
202476
|
+
return false;
|
|
202477
|
+
}
|
|
202478
|
+
append_state.current_row += chunk.size();
|
|
202479
|
+
return true;
|
|
202480
|
+
});
|
|
202481
|
+
}
|
|
202482
|
+
if (constraint_violated) {
|
|
202483
|
+
// need to revert the append
|
|
202484
|
+
row_t current_row = append_state.row_start;
|
|
202485
|
+
// remove the data from the indexes, if there are any indexes
|
|
202486
|
+
ScanTableStorage(transaction, [&](DataChunk &chunk) -> bool {
|
|
202487
|
+
// append this chunk to the indexes of the table
|
|
202488
|
+
table->RemoveFromIndexes(append_state, chunk, current_row);
|
|
202489
|
+
|
|
202490
|
+
current_row += chunk.size();
|
|
202491
|
+
if (current_row >= append_state.current_row) {
|
|
202492
|
+
// finished deleting all rows from the index: abort now
|
|
202493
|
+
return false;
|
|
202494
|
+
}
|
|
202495
|
+
return true;
|
|
202496
|
+
});
|
|
202497
|
+
if (append_to_table) {
|
|
202498
|
+
table->RevertAppendInternal(append_state.row_start, append_count);
|
|
202499
|
+
}
|
|
202500
|
+
throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
|
|
202501
|
+
}
|
|
202502
|
+
}
|
|
202503
|
+
|
|
202440
202504
|
void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
|
|
202441
202505
|
// bulk append threshold: a full row group
|
|
202442
202506
|
static constexpr const idx_t MERGE_THRESHOLD = RowGroup::ROW_GROUP_SIZE;
|
|
@@ -202452,10 +202516,17 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
|
|
|
202452
202516
|
TableAppendState append_state;
|
|
202453
202517
|
table.AppendLock(append_state);
|
|
202454
202518
|
if ((append_state.row_start == 0 || storage.row_groups->GetTotalRows() >= MERGE_THRESHOLD) &&
|
|
202455
|
-
storage.
|
|
202519
|
+
storage.deleted_rows == 0) {
|
|
202456
202520
|
// table is currently empty OR we are bulk appending: move over the storage directly
|
|
202457
202521
|
// first flush any out-standing storage nodes
|
|
202458
202522
|
storage.FlushToDisk();
|
|
202523
|
+
// now append to the indexes (if there are any)
|
|
202524
|
+
// FIXME: we should be able to merge the transaction-local index directly into the main table index
|
|
202525
|
+
// as long we just rewrite some row-ids
|
|
202526
|
+
if (!table.info->indexes.Empty()) {
|
|
202527
|
+
storage.AppendToIndexes(transaction, append_state, append_count, false);
|
|
202528
|
+
}
|
|
202529
|
+
// finally move over the row groups
|
|
202459
202530
|
table.MergeStorage(*storage.row_groups, storage.indexes, storage.stats);
|
|
202460
202531
|
} else {
|
|
202461
202532
|
if (storage.partial_manager || !storage.written_blocks.empty()) {
|
|
@@ -202463,36 +202534,8 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
|
|
|
202463
202534
|
// revert the data we have already written
|
|
202464
202535
|
storage.Rollback();
|
|
202465
202536
|
}
|
|
202466
|
-
|
|
202467
|
-
|
|
202468
|
-
ScanTableStorage(table, storage, [&](DataChunk &chunk) -> bool {
|
|
202469
|
-
// append this chunk to the indexes of the table
|
|
202470
|
-
if (!table.AppendToIndexes(chunk, append_state.current_row)) {
|
|
202471
|
-
constraint_violated = true;
|
|
202472
|
-
return false;
|
|
202473
|
-
}
|
|
202474
|
-
// append to base table
|
|
202475
|
-
table.Append(chunk, append_state);
|
|
202476
|
-
return true;
|
|
202477
|
-
});
|
|
202478
|
-
if (constraint_violated) {
|
|
202479
|
-
// need to revert the append
|
|
202480
|
-
row_t current_row = append_state.row_start;
|
|
202481
|
-
// remove the data from the indexes, if there are any indexes
|
|
202482
|
-
ScanTableStorage(table, storage, [&](DataChunk &chunk) -> bool {
|
|
202483
|
-
// append this chunk to the indexes of the table
|
|
202484
|
-
table.RemoveFromIndexes(append_state, chunk, current_row);
|
|
202485
|
-
|
|
202486
|
-
current_row += chunk.size();
|
|
202487
|
-
if (current_row >= append_state.current_row) {
|
|
202488
|
-
// finished deleting all rows from the index: abort now
|
|
202489
|
-
return false;
|
|
202490
|
-
}
|
|
202491
|
-
return true;
|
|
202492
|
-
});
|
|
202493
|
-
table.RevertAppendInternal(append_state.row_start, append_count);
|
|
202494
|
-
throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
|
|
202495
|
-
}
|
|
202537
|
+
// append to the indexes and append to the base table
|
|
202538
|
+
storage.AppendToIndexes(transaction, append_state, append_count, true);
|
|
202496
202539
|
}
|
|
202497
202540
|
transaction.PushAppend(&table, append_state.row_start, append_count);
|
|
202498
202541
|
}
|
|
@@ -202523,7 +202566,7 @@ void LocalTableStorage::Rollback() {
|
|
|
202523
202566
|
partial_manager->Clear();
|
|
202524
202567
|
partial_manager.reset();
|
|
202525
202568
|
}
|
|
202526
|
-
auto &block_manager = table
|
|
202569
|
+
auto &block_manager = table->info->table_io_manager->GetBlockManagerForRowData();
|
|
202527
202570
|
for (auto block_id : written_blocks) {
|
|
202528
202571
|
block_manager.MarkBlockAsModified(block_id);
|
|
202529
202572
|
}
|
|
@@ -202545,6 +202588,7 @@ void LocalStorage::MoveStorage(DataTable *old_dt, DataTable *new_dt) {
|
|
|
202545
202588
|
}
|
|
202546
202589
|
// take over the storage from the old entry
|
|
202547
202590
|
auto new_storage = move(entry->second);
|
|
202591
|
+
new_storage->table = new_dt;
|
|
202548
202592
|
table_storage.erase(entry);
|
|
202549
202593
|
table_storage[new_dt] = move(new_storage);
|
|
202550
202594
|
}
|
|
@@ -202596,7 +202640,7 @@ void LocalStorage::FetchChunk(DataTable *table, Vector &row_ids, idx_t count, Da
|
|
|
202596
202640
|
|
|
202597
202641
|
ColumnFetchState fetch_state;
|
|
202598
202642
|
vector<column_t> col_ids;
|
|
202599
|
-
vector<LogicalType> types = storage->table
|
|
202643
|
+
vector<LogicalType> types = storage->table->GetTypes();
|
|
202600
202644
|
for (idx_t i = 0; i < types.size(); i++) {
|
|
202601
202645
|
col_ids.push_back(i);
|
|
202602
202646
|
}
|
|
@@ -210911,6 +210955,22 @@ void TableIndexList::VerifyForeignKey(const vector<idx_t> &fk_keys, bool is_appe
|
|
|
210911
210955
|
}
|
|
210912
210956
|
}
|
|
210913
210957
|
|
|
210958
|
+
vector<column_t> TableIndexList::GetRequiredColumns() {
|
|
210959
|
+
lock_guard<mutex> lock(indexes_lock);
|
|
210960
|
+
set<column_t> unique_indexes;
|
|
210961
|
+
for (auto &index : indexes) {
|
|
210962
|
+
for (auto col_index : index->column_ids) {
|
|
210963
|
+
unique_indexes.insert(col_index);
|
|
210964
|
+
}
|
|
210965
|
+
}
|
|
210966
|
+
vector<column_t> result;
|
|
210967
|
+
result.reserve(unique_indexes.size());
|
|
210968
|
+
for (auto column_index : unique_indexes) {
|
|
210969
|
+
result.emplace_back(column_index);
|
|
210970
|
+
}
|
|
210971
|
+
return result;
|
|
210972
|
+
}
|
|
210973
|
+
|
|
210914
210974
|
vector<BlockPointer> TableIndexList::SerializeIndexes(duckdb::MetaBlockWriter &writer) {
|
|
210915
210975
|
vector<BlockPointer> blocks_info;
|
|
210916
210976
|
for (auto &index : indexes) {
|