duckdb 0.5.2-dev809.0 → 0.5.2-dev833.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.2-dev809.0",
4
+ "version": "0.5.2-dev833.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -29106,7 +29106,7 @@ struct RowOperations {
29106
29106
  namespace duckdb {
29107
29107
 
29108
29108
  template <class OP, class RETURN_TYPE, typename... ARGS>
29109
- RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
29109
+ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
29110
29110
  D_ASSERT(radix_bits <= sizeof(hash_t) * 8);
29111
29111
  switch (radix_bits) {
29112
29112
  case 1:
@@ -29135,7 +29135,7 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
29135
29135
  }
29136
29136
 
29137
29137
  template <class OP, class RETURN_TYPE, idx_t radix_bits_1, typename... ARGS>
29138
- RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&... args) {
29138
+ RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&...args) {
29139
29139
  D_ASSERT(radix_bits_2 <= sizeof(hash_t) * 8);
29140
29140
  switch (radix_bits_2) {
29141
29141
  case 1:
@@ -29164,7 +29164,7 @@ RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&... args) {
29164
29164
  }
29165
29165
 
29166
29166
  template <class OP, class RETURN_TYPE, typename... ARGS>
29167
- RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&... args) {
29167
+ RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&...args) {
29168
29168
  D_ASSERT(radix_bits_1 <= sizeof(hash_t) * 8);
29169
29169
  switch (radix_bits_1) {
29170
29170
  case 1:
@@ -49455,6 +49455,9 @@ void Vector::Initialize(bool zero_data, idx_t capacity) {
49455
49455
  memset(data, 0, capacity * type_size);
49456
49456
  }
49457
49457
  }
49458
+ if (capacity > STANDARD_VECTOR_SIZE) {
49459
+ validity.Resize(STANDARD_VECTOR_SIZE, capacity);
49460
+ }
49458
49461
  }
49459
49462
 
49460
49463
  struct DataArrays {
@@ -93661,6 +93664,13 @@ unique_ptr<FunctionData> HistogramBindFunction(ClientContext &context, Aggregate
93661
93664
  vector<unique_ptr<Expression>> &arguments) {
93662
93665
 
93663
93666
  D_ASSERT(arguments.size() == 1);
93667
+
93668
+ if (arguments[0]->return_type.id() == LogicalTypeId::LIST ||
93669
+ arguments[0]->return_type.id() == LogicalTypeId::STRUCT ||
93670
+ arguments[0]->return_type.id() == LogicalTypeId::MAP) {
93671
+ throw NotImplementedException("Unimplemented type for histogram %s", arguments[0]->return_type.ToString());
93672
+ }
93673
+
93664
93674
  child_list_t<LogicalType> struct_children;
93665
93675
  struct_children.push_back({"key", LogicalType::LIST(arguments[0]->return_type)});
93666
93676
  struct_children.push_back({"value", LogicalType::LIST(LogicalType::UBIGINT)});
@@ -95714,6 +95724,11 @@ struct VectorCastHelpers {
95714
95724
  }
95715
95725
  };
95716
95726
 
95727
+ struct VectorStringifiedListParser {
95728
+ static idx_t CountParts(const string_t &input);
95729
+ static bool SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start, Vector &child);
95730
+ };
95731
+
95717
95732
  } // namespace duckdb
95718
95733
 
95719
95734
 
@@ -96484,20 +96499,8 @@ BoundCastInfo DefaultCasts::EnumCastSwitch(BindCastInput &input, const LogicalTy
96484
96499
 
96485
96500
  namespace duckdb {
96486
96501
 
96487
- struct ListBoundCastData : public BoundCastData {
96488
- explicit ListBoundCastData(BoundCastInfo child_cast) : child_cast_info(move(child_cast)) {
96489
- }
96490
-
96491
- BoundCastInfo child_cast_info;
96492
-
96493
- public:
96494
- unique_ptr<BoundCastData> Copy() const override {
96495
- return make_unique<ListBoundCastData>(child_cast_info.Copy());
96496
- }
96497
- };
96498
-
96499
- unique_ptr<BoundCastData> BindListToListCast(BindCastInput &input, const LogicalType &source,
96500
- const LogicalType &target) {
96502
+ unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &input, const LogicalType &source,
96503
+ const LogicalType &target) {
96501
96504
  vector<BoundCastInfo> child_cast_info;
96502
96505
  auto &source_child_type = ListType::GetChildType(source);
96503
96506
  auto &result_child_type = ListType::GetChildType(target);
@@ -96608,11 +96611,11 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
96608
96611
  BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
96609
96612
  switch (target.id()) {
96610
96613
  case LogicalTypeId::LIST:
96611
- return BoundCastInfo(ListToListCast, BindListToListCast(input, source, target));
96614
+ return BoundCastInfo(ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
96612
96615
  case LogicalTypeId::VARCHAR:
96613
96616
  case LogicalTypeId::JSON:
96614
- return BoundCastInfo(ListToVarcharCast,
96615
- BindListToListCast(input, source, LogicalType::LIST(LogicalType::VARCHAR)));
96617
+ return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
96618
+ input, source, LogicalType::LIST(LogicalType::VARCHAR)));
96616
96619
  default:
96617
96620
  return DefaultCasts::TryVectorNullCast;
96618
96621
  }
@@ -96950,9 +96953,97 @@ static BoundCastInfo VectorStringCastNumericSwitch(BindCastInput &input, const L
96950
96953
  }
96951
96954
  }
96952
96955
 
96956
+ bool StringListCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result, ValidityMask &result_mask,
96957
+ idx_t count, CastParameters &parameters, const SelectionVector *sel) {
96958
+
96959
+ idx_t total_list_size = 0;
96960
+ for (idx_t i = 0; i < count; i++) {
96961
+ idx_t idx = i;
96962
+ if (sel) {
96963
+ idx = sel->get_index(i);
96964
+ }
96965
+ if (!source_mask.RowIsValid(idx)) {
96966
+ continue;
96967
+ }
96968
+ total_list_size += VectorStringifiedListParser::CountParts(source_data[idx]);
96969
+ }
96970
+
96971
+ Vector varchar_vector(LogicalType::VARCHAR, total_list_size);
96972
+
96973
+ ListVector::Reserve(result, total_list_size);
96974
+ ListVector::SetListSize(result, total_list_size);
96975
+
96976
+ auto list_data = ListVector::GetData(result);
96977
+ auto child_data = FlatVector::GetData<string_t>(varchar_vector);
96978
+
96979
+ bool all_converted = true;
96980
+ idx_t total = 0;
96981
+ for (idx_t i = 0; i < count; i++) {
96982
+ idx_t idx = i;
96983
+ if (sel) {
96984
+ idx = sel->get_index(i);
96985
+ }
96986
+ if (!source_mask.RowIsValid(idx)) {
96987
+ result_mask.SetInvalid(i);
96988
+ continue;
96989
+ }
96990
+
96991
+ list_data[i].offset = total;
96992
+ auto valid =
96993
+ VectorStringifiedListParser::SplitStringifiedList(source_data[idx], child_data, total, varchar_vector);
96994
+ if (!valid) {
96995
+ string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
96996
+ "' can't be cast to the destination type LIST";
96997
+ HandleVectorCastError::Operation<string_t>(text, result_mask, idx, parameters.error_message, all_converted);
96998
+ }
96999
+ list_data[i].length = total - list_data[i].offset; // length is the amount of parts coming from this string
97000
+ }
97001
+ D_ASSERT(total_list_size == total);
97002
+
97003
+ auto &result_child = ListVector::GetEntry(result);
97004
+ auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
97005
+ CastParameters child_parameters(parameters, cast_data.child_cast_info.cast_data.get());
97006
+ return cast_data.child_cast_info.function(varchar_vector, result_child, total_list_size, child_parameters) &&
97007
+ all_converted;
97008
+ }
97009
+
97010
+ bool StringListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
97011
+ D_ASSERT(source.GetType().id() == LogicalTypeId::VARCHAR);
97012
+ D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
97013
+
97014
+ switch (source.GetVectorType()) {
97015
+ case VectorType::CONSTANT_VECTOR: {
97016
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
97017
+
97018
+ auto source_data = ConstantVector::GetData<string_t>(source);
97019
+ auto &source_mask = ConstantVector::Validity(source);
97020
+ auto &result_mask = ConstantVector::Validity(result);
97021
+
97022
+ return StringListCastLoop(source_data, source_mask, result, result_mask, 1, parameters, nullptr);
97023
+ }
97024
+ default: {
97025
+ UnifiedVectorFormat unified_source;
97026
+ result.SetVectorType(VectorType::FLAT_VECTOR);
97027
+
97028
+ source.ToUnifiedFormat(count, unified_source);
97029
+ auto source_sel = unified_source.sel;
97030
+ auto source_data = (string_t *)unified_source.data;
97031
+ auto &source_mask = unified_source.validity;
97032
+ auto &result_mask = FlatVector::Validity(result);
97033
+
97034
+ return StringListCastLoop(source_data, source_mask, result, result_mask, count, parameters, source_sel);
97035
+ }
97036
+ }
97037
+ }
97038
+
97039
+ BoundCastInfo StringToListCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
97040
+ // second argument allows for a secondary casting function to be passed in the CastParameters
97041
+ return BoundCastInfo(&StringListCast,
97042
+ ListBoundCastData::BindListToListCast(input, LogicalType::LIST(LogicalType::VARCHAR), target));
97043
+ }
97044
+
96953
97045
  BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const LogicalType &source,
96954
97046
  const LogicalType &target) {
96955
- // now switch on the target type
96956
97047
  switch (target.id()) {
96957
97048
  case LogicalTypeId::DATE:
96958
97049
  return BoundCastInfo(&VectorCastHelpers::TryCastErrorLoop<string_t, date_t, duckdb::TryCastErrorMessage>);
@@ -96980,6 +97071,8 @@ BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const Logical
96980
97071
  case LogicalTypeId::VARCHAR:
96981
97072
  case LogicalTypeId::JSON:
96982
97073
  return &DefaultCasts::ReinterpretCast;
97074
+ case LogicalTypeId::LIST:
97075
+ return StringToListCast(input, source, target);
96983
97076
  default:
96984
97077
  return VectorStringCastNumericSwitch(input, source, target);
96985
97078
  }
@@ -97341,6 +97434,144 @@ BoundCastInfo DefaultCasts::UUIDCastSwitch(BindCastInput &input, const LogicalTy
97341
97434
  } // namespace duckdb
97342
97435
 
97343
97436
 
97437
+ namespace duckdb {
97438
+
97439
+ struct CountPartOperation {
97440
+ idx_t count = 0;
97441
+
97442
+ void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
97443
+ count++;
97444
+ }
97445
+ };
97446
+
97447
+ struct SplitStringOperation {
97448
+ SplitStringOperation(string_t *child_data, idx_t &child_start, Vector &child)
97449
+ : child_data(child_data), child_start(child_start), child(child) {
97450
+ }
97451
+
97452
+ string_t *child_data;
97453
+ idx_t &child_start;
97454
+ Vector &child;
97455
+
97456
+ void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
97457
+
97458
+ if ((pos - start_pos) >= 4 && buf[start_pos] == 'N' && buf[start_pos + 1] == 'U' && buf[start_pos + 2] == 'L' &&
97459
+ buf[start_pos + 3] == 'L') {
97460
+ FlatVector::SetNull(child, child_start, true);
97461
+ child_start++;
97462
+ return;
97463
+ }
97464
+ child_data[child_start] = StringVector::AddString(child, buf + start_pos, pos - start_pos);
97465
+ child_start++;
97466
+ }
97467
+ };
97468
+
97469
+ static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
97470
+ char quote = buf[pos];
97471
+ pos++;
97472
+
97473
+ while (pos < len) {
97474
+ if (buf[pos] == quote) {
97475
+ return true;
97476
+ }
97477
+ pos++;
97478
+ }
97479
+ return false;
97480
+ }
97481
+
97482
+ static bool SkipToClose(idx_t &idx, const char *buf, idx_t &len, idx_t &lvl) {
97483
+ while (idx < len) {
97484
+ if (buf[idx] == '[') {
97485
+ if (!SkipToClose(++idx, buf, len, lvl)) {
97486
+ return false;
97487
+ }
97488
+ lvl++;
97489
+ idx++;
97490
+ }
97491
+ if (buf[idx] == '"' || buf[idx] == '\'') {
97492
+ SkipToCloseQuotes(idx, buf, len);
97493
+ }
97494
+ if (buf[idx] == ']') {
97495
+ lvl--;
97496
+ return true;
97497
+ }
97498
+ idx++;
97499
+ }
97500
+ return false;
97501
+ }
97502
+
97503
+ template <class OP>
97504
+ static bool SplitStringifiedListInternal(const string_t &input, OP &state) {
97505
+ const char *buf = input.GetDataUnsafe();
97506
+ idx_t len = input.GetSize();
97507
+ idx_t lvl = 1;
97508
+ idx_t pos = 0;
97509
+
97510
+ while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
97511
+ pos++;
97512
+ }
97513
+ if (pos == len || buf[pos] != '[') {
97514
+ return false;
97515
+ }
97516
+ pos++;
97517
+ while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
97518
+ pos++;
97519
+ }
97520
+
97521
+ idx_t start_pos = pos;
97522
+ while (pos < len) {
97523
+ if (buf[pos] == '[') {
97524
+ if (!SkipToClose(++pos, buf, len, ++lvl)) {
97525
+ return false;
97526
+ }
97527
+ } else if (buf[pos] == '"' || buf[pos] == '\'') {
97528
+ SkipToCloseQuotes(pos, buf, len);
97529
+ } else if (buf[pos] == ',' || buf[pos] == ']') {
97530
+ idx_t trailing_whitespace = 0;
97531
+ while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
97532
+ trailing_whitespace++;
97533
+ }
97534
+ if (!(buf[pos] == ']' && start_pos == (pos))) {
97535
+ state.HandleValue(buf, start_pos, pos - trailing_whitespace);
97536
+ } // else the list is empty
97537
+ if (buf[pos] == ']') {
97538
+ lvl--;
97539
+ break;
97540
+ }
97541
+ while (pos + 1 < len && StringUtil::CharacterIsSpace(buf[pos + 1])) {
97542
+ pos++;
97543
+ }
97544
+ start_pos = pos + 1;
97545
+ }
97546
+ pos++;
97547
+ }
97548
+ pos++;
97549
+ while (pos < len) {
97550
+ if (!StringUtil::CharacterIsSpace(buf[pos])) {
97551
+ return false;
97552
+ }
97553
+ pos++;
97554
+ }
97555
+ if (lvl != 0) {
97556
+ return false;
97557
+ }
97558
+ return true;
97559
+ }
97560
+
97561
+ bool VectorStringifiedListParser::SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start,
97562
+ Vector &child) {
97563
+ SplitStringOperation state(child_data, child_start, child);
97564
+ return SplitStringifiedListInternal<SplitStringOperation>(input, state);
97565
+ }
97566
+
97567
+ idx_t VectorStringifiedListParser::CountParts(const string_t &input) {
97568
+ CountPartOperation state;
97569
+ SplitStringifiedListInternal<CountPartOperation>(input, state);
97570
+ return state.count;
97571
+ }
97572
+ } // namespace duckdb
97573
+
97574
+
97344
97575
  namespace duckdb {
97345
97576
 
97346
97577
  //! The target type determines the preferred implicit casts
@@ -201589,6 +201820,7 @@ void DataTable::AppendLock(TableAppendState &state) {
201589
201820
  throw TransactionException("Transaction conflict: adding entries to a table that has been altered!");
201590
201821
  }
201591
201822
  state.row_start = row_groups->GetTotalRows();
201823
+ state.current_row = state.row_start;
201592
201824
  }
201593
201825
 
201594
201826
  void DataTable::InitializeAppend(Transaction &transaction, TableAppendState &state, idx_t append_count) {
@@ -201652,9 +201884,6 @@ void DataTable::MergeStorage(RowGroupCollection &data, TableIndexList &indexes,
201652
201884
  row_groups->MergeStorage(data);
201653
201885
  stats.MergeStats(other_stats);
201654
201886
  row_groups->Verify();
201655
- if (!indexes.Empty()) {
201656
- throw InternalException("FIXME: merge indexes");
201657
- }
201658
201887
  }
201659
201888
 
201660
201889
  void DataTable::WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count) {
@@ -202149,7 +202378,7 @@ namespace duckdb {
202149
202378
  // Local Table Storage
202150
202379
  //===--------------------------------------------------------------------===//
202151
202380
  LocalTableStorage::LocalTableStorage(DataTable &table)
202152
- : table(table), allocator(Allocator::Get(table.db)), deleted_rows(0) {
202381
+ : table(&table), allocator(Allocator::Get(table.db)), deleted_rows(0) {
202153
202382
  auto types = table.GetTypes();
202154
202383
  row_groups = make_shared<RowGroupCollection>(table.info, TableIOManager::Get(table).GetBlockManagerForRowData(),
202155
202384
  types, MAX_ROW_ID, 0);
@@ -202174,7 +202403,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
202174
202403
  LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, idx_t changed_idx,
202175
202404
  const LogicalType &target_type, const vector<column_t> &bound_columns,
202176
202405
  Expression &cast_expr)
202177
- : table(new_dt), allocator(Allocator::Get(table.db)), deleted_rows(parent.deleted_rows),
202406
+ : table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
202178
202407
  partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
202179
202408
  if (partial_manager) {
202180
202409
  partial_manager->FlushPartialBlocks();
@@ -202187,7 +202416,7 @@ LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &paren
202187
202416
  }
202188
202417
 
202189
202418
  LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, idx_t drop_idx)
202190
- : table(new_dt), allocator(Allocator::Get(table.db)), deleted_rows(parent.deleted_rows),
202419
+ : table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
202191
202420
  partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
202192
202421
  if (partial_manager) {
202193
202422
  partial_manager->FlushPartialBlocks();
@@ -202200,9 +202429,9 @@ LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &paren
202200
202429
 
202201
202430
  LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, ColumnDefinition &new_column,
202202
202431
  Expression *default_value)
202203
- : table(new_dt), allocator(Allocator::Get(table.db)), deleted_rows(parent.deleted_rows),
202432
+ : table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
202204
202433
  partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
202205
- idx_t new_column_idx = parent.table.column_definitions.size();
202434
+ idx_t new_column_idx = parent.table->column_definitions.size();
202206
202435
  stats.InitializeAddColumn(parent.stats, new_column.GetType());
202207
202436
  row_groups = parent.row_groups->AddColumn(new_column, default_value, stats.GetStats(new_column_idx));
202208
202437
  parent.row_groups.reset();
@@ -202312,11 +202541,7 @@ void LocalStorage::Append(LocalAppendState &state, DataChunk &chunk) {
202312
202541
  void LocalTableStorage::CheckFlushToDisk() {
202313
202542
  // we finished writing a complete row group
202314
202543
  // check if we should pre-emptively write it to disk
202315
- if (table.info->IsTemporary() || StorageManager::GetStorageManager(table.db).InMemory()) {
202316
- return;
202317
- }
202318
- if (!table.info->indexes.Empty()) {
202319
- // we have indexes - we cannot merge
202544
+ if (table->info->IsTemporary() || StorageManager::GetStorageManager(table->db).InMemory()) {
202320
202545
  return;
202321
202546
  }
202322
202547
  if (deleted_rows != 0) {
@@ -202326,7 +202551,7 @@ void LocalTableStorage::CheckFlushToDisk() {
202326
202551
  // we should! write the second-to-last row group to disk
202327
202552
  // allocate the partial block-manager if none is allocated yet
202328
202553
  if (!partial_manager) {
202329
- auto &block_manager = table.info->table_io_manager->GetBlockManagerForRowData();
202554
+ auto &block_manager = table->info->table_io_manager->GetBlockManagerForRowData();
202330
202555
  partial_manager = make_unique<PartialBlockManager>(block_manager);
202331
202556
  }
202332
202557
  // flush second-to-last row group
@@ -202338,12 +202563,11 @@ void LocalTableStorage::FlushToDisk(RowGroup *row_group) {
202338
202563
  // flush the specified row group
202339
202564
  D_ASSERT(row_group);
202340
202565
  D_ASSERT(deleted_rows == 0);
202341
- D_ASSERT(table.info->indexes.Empty());
202342
202566
  D_ASSERT(partial_manager);
202343
202567
  //! The set of column compression types (if any)
202344
202568
  vector<CompressionType> compression_types;
202345
202569
  D_ASSERT(compression_types.empty());
202346
- for (auto &column : table.column_definitions) {
202570
+ for (auto &column : table->column_definitions) {
202347
202571
  compression_types.push_back(column.CompressionType());
202348
202572
  }
202349
202573
  auto row_group_pointer = row_group->WriteToDisk(*partial_manager, compression_types);
@@ -202410,24 +202634,23 @@ void LocalStorage::Update(DataTable *table, Vector &row_ids, const vector<column
202410
202634
  }
202411
202635
 
202412
202636
  template <class T>
202413
- bool LocalStorage::ScanTableStorage(DataTable &table, LocalTableStorage &storage, T &&fun) {
202414
- vector<column_t> column_ids;
202415
- column_ids.reserve(table.column_definitions.size());
202416
- for (idx_t i = 0; i < table.column_definitions.size(); i++) {
202417
- column_ids.push_back(i);
202637
+ bool LocalTableStorage::ScanTableStorage(Transaction &transaction, const vector<column_t> &column_ids, T &&fun) {
202638
+ auto all_types = table->GetTypes();
202639
+ vector<LogicalType> scan_types;
202640
+ for (idx_t i = 0; i < column_ids.size(); i++) {
202641
+ scan_types.push_back(all_types[column_ids[i]]);
202418
202642
  }
202419
-
202420
202643
  DataChunk chunk;
202421
- chunk.Initialize(storage.allocator, table.GetTypes());
202644
+ chunk.Initialize(allocator, scan_types);
202422
202645
 
202423
202646
  // initialize the scan
202424
202647
  TableScanState state;
202425
202648
  state.Initialize(column_ids, nullptr);
202426
- storage.InitializeScan(state.local_state, nullptr);
202649
+ InitializeScan(state.local_state, nullptr);
202427
202650
 
202428
202651
  while (true) {
202429
202652
  chunk.Reset();
202430
- Scan(state.local_state, column_ids, chunk);
202653
+ state.local_state.Scan(transaction, chunk);
202431
202654
  if (chunk.size() == 0) {
202432
202655
  return true;
202433
202656
  }
@@ -202437,6 +202660,78 @@ bool LocalStorage::ScanTableStorage(DataTable &table, LocalTableStorage &storage
202437
202660
  }
202438
202661
  }
202439
202662
 
202663
+ template <class T>
202664
+ bool LocalTableStorage::ScanTableStorage(Transaction &transaction, T &&fun) {
202665
+ vector<column_t> column_ids;
202666
+ column_ids.reserve(table->column_definitions.size());
202667
+ for (idx_t i = 0; i < table->column_definitions.size(); i++) {
202668
+ column_ids.push_back(i);
202669
+ }
202670
+ return ScanTableStorage(transaction, column_ids, fun);
202671
+ }
202672
+
202673
+ void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendState &append_state, idx_t append_count,
202674
+ bool append_to_table) {
202675
+ bool constraint_violated = false;
202676
+ if (append_to_table) {
202677
+ table->InitializeAppend(transaction, append_state, append_count);
202678
+ }
202679
+ if (append_to_table) {
202680
+ // appending: need to scan entire
202681
+ ScanTableStorage(transaction, [&](DataChunk &chunk) -> bool {
202682
+ // append this chunk to the indexes of the table
202683
+ if (!table->AppendToIndexes(chunk, append_state.current_row)) {
202684
+ constraint_violated = true;
202685
+ return false;
202686
+ }
202687
+ // append to base table
202688
+ table->Append(chunk, append_state);
202689
+ return true;
202690
+ });
202691
+ } else {
202692
+ // only need to scan for index append
202693
+ // figure out which columns we need to scan for the set of indexes
202694
+ auto columns = table->info->indexes.GetRequiredColumns();
202695
+ // create an empty mock chunk that contains all the correct types for the table
202696
+ DataChunk mock_chunk;
202697
+ mock_chunk.InitializeEmpty(table->GetTypes());
202698
+ ScanTableStorage(transaction, columns, [&](DataChunk &chunk) -> bool {
202699
+ // construct the mock chunk by referencing the required columns
202700
+ for (idx_t i = 0; i < columns.size(); i++) {
202701
+ mock_chunk.data[columns[i]].Reference(chunk.data[i]);
202702
+ }
202703
+ mock_chunk.SetCardinality(chunk);
202704
+ // append this chunk to the indexes of the table
202705
+ if (!table->AppendToIndexes(mock_chunk, append_state.current_row)) {
202706
+ constraint_violated = true;
202707
+ return false;
202708
+ }
202709
+ append_state.current_row += chunk.size();
202710
+ return true;
202711
+ });
202712
+ }
202713
+ if (constraint_violated) {
202714
+ // need to revert the append
202715
+ row_t current_row = append_state.row_start;
202716
+ // remove the data from the indexes, if there are any indexes
202717
+ ScanTableStorage(transaction, [&](DataChunk &chunk) -> bool {
202718
+ // append this chunk to the indexes of the table
202719
+ table->RemoveFromIndexes(append_state, chunk, current_row);
202720
+
202721
+ current_row += chunk.size();
202722
+ if (current_row >= append_state.current_row) {
202723
+ // finished deleting all rows from the index: abort now
202724
+ return false;
202725
+ }
202726
+ return true;
202727
+ });
202728
+ if (append_to_table) {
202729
+ table->RevertAppendInternal(append_state.row_start, append_count);
202730
+ }
202731
+ throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
202732
+ }
202733
+ }
202734
+
202440
202735
  void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
202441
202736
  // bulk append threshold: a full row group
202442
202737
  static constexpr const idx_t MERGE_THRESHOLD = RowGroup::ROW_GROUP_SIZE;
@@ -202452,10 +202747,17 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
202452
202747
  TableAppendState append_state;
202453
202748
  table.AppendLock(append_state);
202454
202749
  if ((append_state.row_start == 0 || storage.row_groups->GetTotalRows() >= MERGE_THRESHOLD) &&
202455
- storage.table.info->indexes.Empty() && storage.deleted_rows == 0) {
202750
+ storage.deleted_rows == 0) {
202456
202751
  // table is currently empty OR we are bulk appending: move over the storage directly
202457
202752
  // first flush any out-standing storage nodes
202458
202753
  storage.FlushToDisk();
202754
+ // now append to the indexes (if there are any)
202755
+ // FIXME: we should be able to merge the transaction-local index directly into the main table index
202756
+ // as long we just rewrite some row-ids
202757
+ if (!table.info->indexes.Empty()) {
202758
+ storage.AppendToIndexes(transaction, append_state, append_count, false);
202759
+ }
202760
+ // finally move over the row groups
202459
202761
  table.MergeStorage(*storage.row_groups, storage.indexes, storage.stats);
202460
202762
  } else {
202461
202763
  if (storage.partial_manager || !storage.written_blocks.empty()) {
@@ -202463,36 +202765,8 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
202463
202765
  // revert the data we have already written
202464
202766
  storage.Rollback();
202465
202767
  }
202466
- bool constraint_violated = false;
202467
- table.InitializeAppend(transaction, append_state, append_count);
202468
- ScanTableStorage(table, storage, [&](DataChunk &chunk) -> bool {
202469
- // append this chunk to the indexes of the table
202470
- if (!table.AppendToIndexes(chunk, append_state.current_row)) {
202471
- constraint_violated = true;
202472
- return false;
202473
- }
202474
- // append to base table
202475
- table.Append(chunk, append_state);
202476
- return true;
202477
- });
202478
- if (constraint_violated) {
202479
- // need to revert the append
202480
- row_t current_row = append_state.row_start;
202481
- // remove the data from the indexes, if there are any indexes
202482
- ScanTableStorage(table, storage, [&](DataChunk &chunk) -> bool {
202483
- // append this chunk to the indexes of the table
202484
- table.RemoveFromIndexes(append_state, chunk, current_row);
202485
-
202486
- current_row += chunk.size();
202487
- if (current_row >= append_state.current_row) {
202488
- // finished deleting all rows from the index: abort now
202489
- return false;
202490
- }
202491
- return true;
202492
- });
202493
- table.RevertAppendInternal(append_state.row_start, append_count);
202494
- throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
202495
- }
202768
+ // append to the indexes and append to the base table
202769
+ storage.AppendToIndexes(transaction, append_state, append_count, true);
202496
202770
  }
202497
202771
  transaction.PushAppend(&table, append_state.row_start, append_count);
202498
202772
  }
@@ -202523,7 +202797,7 @@ void LocalTableStorage::Rollback() {
202523
202797
  partial_manager->Clear();
202524
202798
  partial_manager.reset();
202525
202799
  }
202526
- auto &block_manager = table.info->table_io_manager->GetBlockManagerForRowData();
202800
+ auto &block_manager = table->info->table_io_manager->GetBlockManagerForRowData();
202527
202801
  for (auto block_id : written_blocks) {
202528
202802
  block_manager.MarkBlockAsModified(block_id);
202529
202803
  }
@@ -202545,6 +202819,7 @@ void LocalStorage::MoveStorage(DataTable *old_dt, DataTable *new_dt) {
202545
202819
  }
202546
202820
  // take over the storage from the old entry
202547
202821
  auto new_storage = move(entry->second);
202822
+ new_storage->table = new_dt;
202548
202823
  table_storage.erase(entry);
202549
202824
  table_storage[new_dt] = move(new_storage);
202550
202825
  }
@@ -202596,7 +202871,7 @@ void LocalStorage::FetchChunk(DataTable *table, Vector &row_ids, idx_t count, Da
202596
202871
 
202597
202872
  ColumnFetchState fetch_state;
202598
202873
  vector<column_t> col_ids;
202599
- vector<LogicalType> types = storage->table.GetTypes();
202874
+ vector<LogicalType> types = storage->table->GetTypes();
202600
202875
  for (idx_t i = 0; i < types.size(); i++) {
202601
202876
  col_ids.push_back(i);
202602
202877
  }
@@ -210911,6 +211186,22 @@ void TableIndexList::VerifyForeignKey(const vector<idx_t> &fk_keys, bool is_appe
210911
211186
  }
210912
211187
  }
210913
211188
 
211189
+ vector<column_t> TableIndexList::GetRequiredColumns() {
211190
+ lock_guard<mutex> lock(indexes_lock);
211191
+ set<column_t> unique_indexes;
211192
+ for (auto &index : indexes) {
211193
+ for (auto col_index : index->column_ids) {
211194
+ unique_indexes.insert(col_index);
211195
+ }
211196
+ }
211197
+ vector<column_t> result;
211198
+ result.reserve(unique_indexes.size());
211199
+ for (auto column_index : unique_indexes) {
211200
+ result.emplace_back(column_index);
211201
+ }
211202
+ return result;
211203
+ }
211204
+
210914
211205
  vector<BlockPointer> TableIndexList::SerializeIndexes(duckdb::MetaBlockWriter &writer) {
210915
211206
  vector<BlockPointer> blocks_info;
210916
211207
  for (auto &index : indexes) {