duckdb 0.5.2-dev809.0 → 0.5.2-dev815.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.2-dev809.0",
4
+ "version": "0.5.2-dev815.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -201589,6 +201589,7 @@ void DataTable::AppendLock(TableAppendState &state) {
201589
201589
  throw TransactionException("Transaction conflict: adding entries to a table that has been altered!");
201590
201590
  }
201591
201591
  state.row_start = row_groups->GetTotalRows();
201592
+ state.current_row = state.row_start;
201592
201593
  }
201593
201594
 
201594
201595
  void DataTable::InitializeAppend(Transaction &transaction, TableAppendState &state, idx_t append_count) {
@@ -201652,9 +201653,6 @@ void DataTable::MergeStorage(RowGroupCollection &data, TableIndexList &indexes,
201652
201653
  row_groups->MergeStorage(data);
201653
201654
  stats.MergeStats(other_stats);
201654
201655
  row_groups->Verify();
201655
- if (!indexes.Empty()) {
201656
- throw InternalException("FIXME: merge indexes");
201657
- }
201658
201656
  }
201659
201657
 
201660
201658
  void DataTable::WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count) {
@@ -202149,7 +202147,7 @@ namespace duckdb {
202149
202147
  // Local Table Storage
202150
202148
  //===--------------------------------------------------------------------===//
202151
202149
  LocalTableStorage::LocalTableStorage(DataTable &table)
202152
- : table(table), allocator(Allocator::Get(table.db)), deleted_rows(0) {
202150
+ : table(&table), allocator(Allocator::Get(table.db)), deleted_rows(0) {
202153
202151
  auto types = table.GetTypes();
202154
202152
  row_groups = make_shared<RowGroupCollection>(table.info, TableIOManager::Get(table).GetBlockManagerForRowData(),
202155
202153
  types, MAX_ROW_ID, 0);
@@ -202174,7 +202172,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
202174
202172
  LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, idx_t changed_idx,
202175
202173
  const LogicalType &target_type, const vector<column_t> &bound_columns,
202176
202174
  Expression &cast_expr)
202177
- : table(new_dt), allocator(Allocator::Get(table.db)), deleted_rows(parent.deleted_rows),
202175
+ : table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
202178
202176
  partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
202179
202177
  if (partial_manager) {
202180
202178
  partial_manager->FlushPartialBlocks();
@@ -202187,7 +202185,7 @@ LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &paren
202187
202185
  }
202188
202186
 
202189
202187
  LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, idx_t drop_idx)
202190
- : table(new_dt), allocator(Allocator::Get(table.db)), deleted_rows(parent.deleted_rows),
202188
+ : table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
202191
202189
  partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
202192
202190
  if (partial_manager) {
202193
202191
  partial_manager->FlushPartialBlocks();
@@ -202200,9 +202198,9 @@ LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &paren
202200
202198
 
202201
202199
  LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, ColumnDefinition &new_column,
202202
202200
  Expression *default_value)
202203
- : table(new_dt), allocator(Allocator::Get(table.db)), deleted_rows(parent.deleted_rows),
202201
+ : table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
202204
202202
  partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
202205
- idx_t new_column_idx = parent.table.column_definitions.size();
202203
+ idx_t new_column_idx = parent.table->column_definitions.size();
202206
202204
  stats.InitializeAddColumn(parent.stats, new_column.GetType());
202207
202205
  row_groups = parent.row_groups->AddColumn(new_column, default_value, stats.GetStats(new_column_idx));
202208
202206
  parent.row_groups.reset();
@@ -202312,11 +202310,7 @@ void LocalStorage::Append(LocalAppendState &state, DataChunk &chunk) {
202312
202310
  void LocalTableStorage::CheckFlushToDisk() {
202313
202311
  // we finished writing a complete row group
202314
202312
  // check if we should pre-emptively write it to disk
202315
- if (table.info->IsTemporary() || StorageManager::GetStorageManager(table.db).InMemory()) {
202316
- return;
202317
- }
202318
- if (!table.info->indexes.Empty()) {
202319
- // we have indexes - we cannot merge
202313
+ if (table->info->IsTemporary() || StorageManager::GetStorageManager(table->db).InMemory()) {
202320
202314
  return;
202321
202315
  }
202322
202316
  if (deleted_rows != 0) {
@@ -202326,7 +202320,7 @@ void LocalTableStorage::CheckFlushToDisk() {
202326
202320
  // we should! write the second-to-last row group to disk
202327
202321
  // allocate the partial block-manager if none is allocated yet
202328
202322
  if (!partial_manager) {
202329
- auto &block_manager = table.info->table_io_manager->GetBlockManagerForRowData();
202323
+ auto &block_manager = table->info->table_io_manager->GetBlockManagerForRowData();
202330
202324
  partial_manager = make_unique<PartialBlockManager>(block_manager);
202331
202325
  }
202332
202326
  // flush second-to-last row group
@@ -202338,12 +202332,11 @@ void LocalTableStorage::FlushToDisk(RowGroup *row_group) {
202338
202332
  // flush the specified row group
202339
202333
  D_ASSERT(row_group);
202340
202334
  D_ASSERT(deleted_rows == 0);
202341
- D_ASSERT(table.info->indexes.Empty());
202342
202335
  D_ASSERT(partial_manager);
202343
202336
  //! The set of column compression types (if any)
202344
202337
  vector<CompressionType> compression_types;
202345
202338
  D_ASSERT(compression_types.empty());
202346
- for (auto &column : table.column_definitions) {
202339
+ for (auto &column : table->column_definitions) {
202347
202340
  compression_types.push_back(column.CompressionType());
202348
202341
  }
202349
202342
  auto row_group_pointer = row_group->WriteToDisk(*partial_manager, compression_types);
@@ -202410,24 +202403,23 @@ void LocalStorage::Update(DataTable *table, Vector &row_ids, const vector<column
202410
202403
  }
202411
202404
 
202412
202405
  template <class T>
202413
- bool LocalStorage::ScanTableStorage(DataTable &table, LocalTableStorage &storage, T &&fun) {
202414
- vector<column_t> column_ids;
202415
- column_ids.reserve(table.column_definitions.size());
202416
- for (idx_t i = 0; i < table.column_definitions.size(); i++) {
202417
- column_ids.push_back(i);
202406
+ bool LocalTableStorage::ScanTableStorage(Transaction &transaction, const vector<column_t> &column_ids, T &&fun) {
202407
+ auto all_types = table->GetTypes();
202408
+ vector<LogicalType> scan_types;
202409
+ for (idx_t i = 0; i < column_ids.size(); i++) {
202410
+ scan_types.push_back(all_types[column_ids[i]]);
202418
202411
  }
202419
-
202420
202412
  DataChunk chunk;
202421
- chunk.Initialize(storage.allocator, table.GetTypes());
202413
+ chunk.Initialize(allocator, scan_types);
202422
202414
 
202423
202415
  // initialize the scan
202424
202416
  TableScanState state;
202425
202417
  state.Initialize(column_ids, nullptr);
202426
- storage.InitializeScan(state.local_state, nullptr);
202418
+ InitializeScan(state.local_state, nullptr);
202427
202419
 
202428
202420
  while (true) {
202429
202421
  chunk.Reset();
202430
- Scan(state.local_state, column_ids, chunk);
202422
+ state.local_state.Scan(transaction, chunk);
202431
202423
  if (chunk.size() == 0) {
202432
202424
  return true;
202433
202425
  }
@@ -202437,6 +202429,78 @@ bool LocalStorage::ScanTableStorage(DataTable &table, LocalTableStorage &storage
202437
202429
  }
202438
202430
  }
202439
202431
 
202432
+ template <class T>
202433
+ bool LocalTableStorage::ScanTableStorage(Transaction &transaction, T &&fun) {
202434
+ vector<column_t> column_ids;
202435
+ column_ids.reserve(table->column_definitions.size());
202436
+ for (idx_t i = 0; i < table->column_definitions.size(); i++) {
202437
+ column_ids.push_back(i);
202438
+ }
202439
+ return ScanTableStorage(transaction, column_ids, fun);
202440
+ }
202441
+
202442
+ void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendState &append_state, idx_t append_count,
202443
+ bool append_to_table) {
202444
+ bool constraint_violated = false;
202445
+ if (append_to_table) {
202446
+ table->InitializeAppend(transaction, append_state, append_count);
202447
+ }
202448
+ if (append_to_table) {
202449
+ // appending: need to scan entire
202450
+ ScanTableStorage(transaction, [&](DataChunk &chunk) -> bool {
202451
+ // append this chunk to the indexes of the table
202452
+ if (!table->AppendToIndexes(chunk, append_state.current_row)) {
202453
+ constraint_violated = true;
202454
+ return false;
202455
+ }
202456
+ // append to base table
202457
+ table->Append(chunk, append_state);
202458
+ return true;
202459
+ });
202460
+ } else {
202461
+ // only need to scan for index append
202462
+ // figure out which columns we need to scan for the set of indexes
202463
+ auto columns = table->info->indexes.GetRequiredColumns();
202464
+ // create an empty mock chunk that contains all the correct types for the table
202465
+ DataChunk mock_chunk;
202466
+ mock_chunk.InitializeEmpty(table->GetTypes());
202467
+ ScanTableStorage(transaction, columns, [&](DataChunk &chunk) -> bool {
202468
+ // construct the mock chunk by referencing the required columns
202469
+ for (idx_t i = 0; i < columns.size(); i++) {
202470
+ mock_chunk.data[columns[i]].Reference(chunk.data[i]);
202471
+ }
202472
+ mock_chunk.SetCardinality(chunk);
202473
+ // append this chunk to the indexes of the table
202474
+ if (!table->AppendToIndexes(mock_chunk, append_state.current_row)) {
202475
+ constraint_violated = true;
202476
+ return false;
202477
+ }
202478
+ append_state.current_row += chunk.size();
202479
+ return true;
202480
+ });
202481
+ }
202482
+ if (constraint_violated) {
202483
+ // need to revert the append
202484
+ row_t current_row = append_state.row_start;
202485
+ // remove the data from the indexes, if there are any indexes
202486
+ ScanTableStorage(transaction, [&](DataChunk &chunk) -> bool {
202487
+ // append this chunk to the indexes of the table
202488
+ table->RemoveFromIndexes(append_state, chunk, current_row);
202489
+
202490
+ current_row += chunk.size();
202491
+ if (current_row >= append_state.current_row) {
202492
+ // finished deleting all rows from the index: abort now
202493
+ return false;
202494
+ }
202495
+ return true;
202496
+ });
202497
+ if (append_to_table) {
202498
+ table->RevertAppendInternal(append_state.row_start, append_count);
202499
+ }
202500
+ throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
202501
+ }
202502
+ }
202503
+
202440
202504
  void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
202441
202505
  // bulk append threshold: a full row group
202442
202506
  static constexpr const idx_t MERGE_THRESHOLD = RowGroup::ROW_GROUP_SIZE;
@@ -202452,10 +202516,17 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
202452
202516
  TableAppendState append_state;
202453
202517
  table.AppendLock(append_state);
202454
202518
  if ((append_state.row_start == 0 || storage.row_groups->GetTotalRows() >= MERGE_THRESHOLD) &&
202455
- storage.table.info->indexes.Empty() && storage.deleted_rows == 0) {
202519
+ storage.deleted_rows == 0) {
202456
202520
  // table is currently empty OR we are bulk appending: move over the storage directly
202457
202521
  // first flush any out-standing storage nodes
202458
202522
  storage.FlushToDisk();
202523
+ // now append to the indexes (if there are any)
202524
+ // FIXME: we should be able to merge the transaction-local index directly into the main table index
202525
+ // as long we just rewrite some row-ids
202526
+ if (!table.info->indexes.Empty()) {
202527
+ storage.AppendToIndexes(transaction, append_state, append_count, false);
202528
+ }
202529
+ // finally move over the row groups
202459
202530
  table.MergeStorage(*storage.row_groups, storage.indexes, storage.stats);
202460
202531
  } else {
202461
202532
  if (storage.partial_manager || !storage.written_blocks.empty()) {
@@ -202463,36 +202534,8 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
202463
202534
  // revert the data we have already written
202464
202535
  storage.Rollback();
202465
202536
  }
202466
- bool constraint_violated = false;
202467
- table.InitializeAppend(transaction, append_state, append_count);
202468
- ScanTableStorage(table, storage, [&](DataChunk &chunk) -> bool {
202469
- // append this chunk to the indexes of the table
202470
- if (!table.AppendToIndexes(chunk, append_state.current_row)) {
202471
- constraint_violated = true;
202472
- return false;
202473
- }
202474
- // append to base table
202475
- table.Append(chunk, append_state);
202476
- return true;
202477
- });
202478
- if (constraint_violated) {
202479
- // need to revert the append
202480
- row_t current_row = append_state.row_start;
202481
- // remove the data from the indexes, if there are any indexes
202482
- ScanTableStorage(table, storage, [&](DataChunk &chunk) -> bool {
202483
- // append this chunk to the indexes of the table
202484
- table.RemoveFromIndexes(append_state, chunk, current_row);
202485
-
202486
- current_row += chunk.size();
202487
- if (current_row >= append_state.current_row) {
202488
- // finished deleting all rows from the index: abort now
202489
- return false;
202490
- }
202491
- return true;
202492
- });
202493
- table.RevertAppendInternal(append_state.row_start, append_count);
202494
- throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
202495
- }
202537
+ // append to the indexes and append to the base table
202538
+ storage.AppendToIndexes(transaction, append_state, append_count, true);
202496
202539
  }
202497
202540
  transaction.PushAppend(&table, append_state.row_start, append_count);
202498
202541
  }
@@ -202523,7 +202566,7 @@ void LocalTableStorage::Rollback() {
202523
202566
  partial_manager->Clear();
202524
202567
  partial_manager.reset();
202525
202568
  }
202526
- auto &block_manager = table.info->table_io_manager->GetBlockManagerForRowData();
202569
+ auto &block_manager = table->info->table_io_manager->GetBlockManagerForRowData();
202527
202570
  for (auto block_id : written_blocks) {
202528
202571
  block_manager.MarkBlockAsModified(block_id);
202529
202572
  }
@@ -202545,6 +202588,7 @@ void LocalStorage::MoveStorage(DataTable *old_dt, DataTable *new_dt) {
202545
202588
  }
202546
202589
  // take over the storage from the old entry
202547
202590
  auto new_storage = move(entry->second);
202591
+ new_storage->table = new_dt;
202548
202592
  table_storage.erase(entry);
202549
202593
  table_storage[new_dt] = move(new_storage);
202550
202594
  }
@@ -202596,7 +202640,7 @@ void LocalStorage::FetchChunk(DataTable *table, Vector &row_ids, idx_t count, Da
202596
202640
 
202597
202641
  ColumnFetchState fetch_state;
202598
202642
  vector<column_t> col_ids;
202599
- vector<LogicalType> types = storage->table.GetTypes();
202643
+ vector<LogicalType> types = storage->table->GetTypes();
202600
202644
  for (idx_t i = 0; i < types.size(); i++) {
202601
202645
  col_ids.push_back(i);
202602
202646
  }
@@ -210911,6 +210955,22 @@ void TableIndexList::VerifyForeignKey(const vector<idx_t> &fk_keys, bool is_appe
210911
210955
  }
210912
210956
  }
210913
210957
 
210958
+ vector<column_t> TableIndexList::GetRequiredColumns() {
210959
+ lock_guard<mutex> lock(indexes_lock);
210960
+ set<column_t> unique_indexes;
210961
+ for (auto &index : indexes) {
210962
+ for (auto col_index : index->column_ids) {
210963
+ unique_indexes.insert(col_index);
210964
+ }
210965
+ }
210966
+ vector<column_t> result;
210967
+ result.reserve(unique_indexes.size());
210968
+ for (auto column_index : unique_indexes) {
210969
+ result.emplace_back(column_index);
210970
+ }
210971
+ return result;
210972
+ }
210973
+
210914
210974
  vector<BlockPointer> TableIndexList::SerializeIndexes(duckdb::MetaBlockWriter &writer) {
210915
210975
  vector<BlockPointer> blocks_info;
210916
210976
  for (auto &index : indexes) {