duckdb 0.8.2-dev4314.0 → 0.8.2-dev4376.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/parquet_extension.cpp +1 -1
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/file_buffer.cpp +1 -1
- package/src/duckdb/src/common/types/validity_mask.cpp +56 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/function/table/arrow_conversion.cpp +9 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -15
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/block.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp +1 -8
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +6 -2
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +19 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +19 -13
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +15 -15
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +59 -0
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +1 -6
- package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +4 -2
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +0 -1
- package/src/duckdb/src/main/settings/settings.cpp +5 -10
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -0
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +0 -1
- package/src/duckdb/src/storage/compression/bitpacking.cpp +55 -48
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +1 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +41 -2
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +12 -3
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +8 -2
- package/src/duckdb/src/storage/single_file_block_manager.cpp +1 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +39 -33
- package/src/duckdb/src/storage/table/column_data.cpp +14 -9
- package/src/duckdb/src/storage/table/list_column_data.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group.cpp +102 -192
- package/src/duckdb/src/storage/table/row_group_collection.cpp +2 -2
- package/src/duckdb/src/storage/table/row_version_manager.cpp +228 -0
- package/src/duckdb/src/storage/table/update_segment.cpp +2 -2
- package/src/duckdb/src/transaction/cleanup_state.cpp +2 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -4
- package/src/duckdb/src/transaction/duck_transaction.cpp +4 -2
- package/src/duckdb/src/transaction/rollback_state.cpp +2 -1
- package/src/duckdb/src/transaction/undo_buffer.cpp +3 -5
- package/src/duckdb/ub_src_storage_table.cpp +2 -0
@@ -16,15 +16,13 @@
|
|
16
16
|
#include "duckdb/transaction/duck_transaction.hpp"
|
17
17
|
#include "duckdb/storage/table/append_state.hpp"
|
18
18
|
#include "duckdb/storage/table/scan_state.hpp"
|
19
|
+
#include "duckdb/storage/table/row_version_manager.hpp"
|
19
20
|
#include "duckdb/common/serializer/serializer.hpp"
|
20
21
|
#include "duckdb/common/serializer/deserializer.hpp"
|
21
22
|
#include "duckdb/common/serializer/binary_serializer.hpp"
|
22
23
|
|
23
24
|
namespace duckdb {
|
24
25
|
|
25
|
-
constexpr const idx_t RowGroup::ROW_GROUP_VECTOR_COUNT;
|
26
|
-
constexpr const idx_t RowGroup::ROW_GROUP_SIZE;
|
27
|
-
|
28
26
|
RowGroup::RowGroup(RowGroupCollection &collection, idx_t start, idx_t count)
|
29
27
|
: SegmentBase<RowGroup>(start, count), collection(collection) {
|
30
28
|
Verify();
|
@@ -42,7 +40,8 @@ RowGroup::RowGroup(RowGroupCollection &collection, RowGroupPointer &&pointer)
|
|
42
40
|
for (idx_t c = 0; c < columns.size(); c++) {
|
43
41
|
this->is_loaded[c] = false;
|
44
42
|
}
|
45
|
-
this->
|
43
|
+
this->deletes_pointers = std::move(pointer.deletes_pointers);
|
44
|
+
this->deletes_is_loaded = false;
|
46
45
|
|
47
46
|
Verify();
|
48
47
|
}
|
@@ -53,34 +52,12 @@ void RowGroup::MoveToCollection(RowGroupCollection &collection, idx_t new_start)
|
|
53
52
|
for (auto &column : GetColumns()) {
|
54
53
|
column->SetStart(new_start);
|
55
54
|
}
|
56
|
-
if (
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
void VersionNode::SetStart(idx_t start) {
|
62
|
-
idx_t current_start = start;
|
63
|
-
for (idx_t i = 0; i < RowGroup::ROW_GROUP_VECTOR_COUNT; i++) {
|
64
|
-
if (info[i]) {
|
65
|
-
info[i]->start = current_start;
|
66
|
-
}
|
67
|
-
current_start += STANDARD_VECTOR_SIZE;
|
68
|
-
}
|
69
|
-
}
|
70
|
-
|
71
|
-
idx_t VersionNode::GetCommittedDeletedCount(idx_t count) {
|
72
|
-
idx_t deleted_count = 0;
|
73
|
-
for (idx_t r = 0, i = 0; r < count; r += STANDARD_VECTOR_SIZE, i++) {
|
74
|
-
if (!info[i]) {
|
75
|
-
continue;
|
76
|
-
}
|
77
|
-
idx_t max_count = MinValue<idx_t>(STANDARD_VECTOR_SIZE, count - r);
|
78
|
-
if (max_count == 0) {
|
79
|
-
break;
|
55
|
+
if (!HasUnloadedDeletes()) {
|
56
|
+
auto &vinfo = GetVersionInfo();
|
57
|
+
if (vinfo) {
|
58
|
+
vinfo->SetStart(new_start);
|
80
59
|
}
|
81
|
-
deleted_count += info[i]->GetCommittedDeletedCount(max_count);
|
82
60
|
}
|
83
|
-
return deleted_count;
|
84
61
|
}
|
85
62
|
|
86
63
|
RowGroup::~RowGroup() {
|
@@ -124,6 +101,11 @@ ColumnData &RowGroup::GetColumn(storage_t c) {
|
|
124
101
|
this->columns[c] =
|
125
102
|
ColumnData::Deserialize(GetBlockManager(), GetTableInfo(), c, start, column_data_reader, types[c], nullptr);
|
126
103
|
is_loaded[c] = true;
|
104
|
+
if (this->columns[c]->count != this->count) {
|
105
|
+
throw InternalException("Corrupted database - loaded column with index %llu at row start %llu, count %llu did "
|
106
|
+
"not match count of row group %llu",
|
107
|
+
c, start, this->columns[c]->count, this->count.load());
|
108
|
+
}
|
127
109
|
return *columns[c];
|
128
110
|
}
|
129
111
|
|
@@ -265,7 +247,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(RowGroupCollection &new_collection, con
|
|
265
247
|
|
266
248
|
// set up the row_group based on this row_group
|
267
249
|
auto row_group = make_uniq<RowGroup>(new_collection, this->start, this->count);
|
268
|
-
row_group->version_info =
|
250
|
+
row_group->version_info = GetOrCreateVersionInfoPtr();
|
269
251
|
auto &cols = GetColumns();
|
270
252
|
for (idx_t i = 0; i < cols.size(); i++) {
|
271
253
|
if (i == changed_idx) {
|
@@ -304,7 +286,7 @@ unique_ptr<RowGroup> RowGroup::AddColumn(RowGroupCollection &new_collection, Col
|
|
304
286
|
|
305
287
|
// set up the row_group based on this row_group
|
306
288
|
auto row_group = make_uniq<RowGroup>(new_collection, this->start, this->count);
|
307
|
-
row_group->version_info =
|
289
|
+
row_group->version_info = GetOrCreateVersionInfoPtr();
|
308
290
|
row_group->columns = GetColumns();
|
309
291
|
// now add the new column
|
310
292
|
row_group->columns.push_back(std::move(added_column));
|
@@ -319,7 +301,7 @@ unique_ptr<RowGroup> RowGroup::RemoveColumn(RowGroupCollection &new_collection,
|
|
319
301
|
D_ASSERT(removed_column < columns.size());
|
320
302
|
|
321
303
|
auto row_group = make_uniq<RowGroup>(new_collection, this->start, this->count);
|
322
|
-
row_group->version_info =
|
304
|
+
row_group->version_info = GetOrCreateVersionInfoPtr();
|
323
305
|
// copy over all columns except for the removed one
|
324
306
|
auto &cols = GetColumns();
|
325
307
|
for (idx_t i = 0; i < cols.size(); i++) {
|
@@ -566,45 +548,62 @@ void RowGroup::ScanCommitted(CollectionScanState &state, DataChunk &result, Tabl
|
|
566
548
|
}
|
567
549
|
}
|
568
550
|
|
569
|
-
|
570
|
-
if (!
|
571
|
-
return
|
551
|
+
shared_ptr<RowVersionManager> &RowGroup::GetVersionInfo() {
|
552
|
+
if (!HasUnloadedDeletes()) {
|
553
|
+
// deletes are loaded - return the version info
|
554
|
+
return version_info;
|
555
|
+
}
|
556
|
+
lock_guard<mutex> lock(row_group_lock);
|
557
|
+
// double-check after obtaining the lock whether or not deletes are still not loaded to avoid double load
|
558
|
+
if (HasUnloadedDeletes()) {
|
559
|
+
// deletes are not loaded - reload
|
560
|
+
auto root_delete = deletes_pointers[0];
|
561
|
+
version_info = RowVersionManager::Deserialize(root_delete, GetBlockManager().GetMetadataManager(), start);
|
562
|
+
deletes_is_loaded = true;
|
563
|
+
}
|
564
|
+
return version_info;
|
565
|
+
}
|
566
|
+
|
567
|
+
shared_ptr<RowVersionManager> &RowGroup::GetOrCreateVersionInfoPtr() {
|
568
|
+
auto vinfo = GetVersionInfo();
|
569
|
+
if (!vinfo) {
|
570
|
+
lock_guard<mutex> lock(row_group_lock);
|
571
|
+
if (!version_info) {
|
572
|
+
version_info = make_shared<RowVersionManager>(start);
|
573
|
+
}
|
572
574
|
}
|
573
|
-
return version_info
|
575
|
+
return version_info;
|
576
|
+
}
|
577
|
+
|
578
|
+
RowVersionManager &RowGroup::GetOrCreateVersionInfo() {
|
579
|
+
return *GetOrCreateVersionInfoPtr();
|
574
580
|
}
|
575
581
|
|
576
582
|
idx_t RowGroup::GetSelVector(TransactionData transaction, idx_t vector_idx, SelectionVector &sel_vector,
|
577
583
|
idx_t max_count) {
|
578
|
-
|
579
|
-
|
580
|
-
auto info = GetChunkInfo(vector_idx);
|
581
|
-
if (!info) {
|
584
|
+
auto &vinfo = GetVersionInfo();
|
585
|
+
if (!vinfo) {
|
582
586
|
return max_count;
|
583
587
|
}
|
584
|
-
return
|
588
|
+
return vinfo->GetSelVector(transaction, vector_idx, sel_vector, max_count);
|
585
589
|
}
|
586
590
|
|
587
591
|
idx_t RowGroup::GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx,
|
588
592
|
SelectionVector &sel_vector, idx_t max_count) {
|
589
|
-
|
590
|
-
|
591
|
-
auto info = GetChunkInfo(vector_idx);
|
592
|
-
if (!info) {
|
593
|
+
auto &vinfo = GetVersionInfo();
|
594
|
+
if (!vinfo) {
|
593
595
|
return max_count;
|
594
596
|
}
|
595
|
-
return
|
597
|
+
return vinfo->GetCommittedSelVector(start_time, transaction_id, vector_idx, sel_vector, max_count);
|
596
598
|
}
|
597
599
|
|
598
600
|
bool RowGroup::Fetch(TransactionData transaction, idx_t row) {
|
599
601
|
D_ASSERT(row < this->count);
|
600
|
-
|
601
|
-
|
602
|
-
idx_t vector_index = row / STANDARD_VECTOR_SIZE;
|
603
|
-
auto info = GetChunkInfo(vector_index);
|
604
|
-
if (!info) {
|
602
|
+
auto &vinfo = GetVersionInfo();
|
603
|
+
if (!vinfo) {
|
605
604
|
return true;
|
606
605
|
}
|
607
|
-
return
|
606
|
+
return vinfo->Fetch(transaction, row);
|
608
607
|
}
|
609
608
|
|
610
609
|
void RowGroup::FetchRow(TransactionData transaction, ColumnFetchState &state, const vector<column_t> &column_ids,
|
@@ -628,72 +627,23 @@ void RowGroup::FetchRow(TransactionData transaction, ColumnFetchState &state, co
|
|
628
627
|
void RowGroup::AppendVersionInfo(TransactionData transaction, idx_t count) {
|
629
628
|
idx_t row_group_start = this->count.load();
|
630
629
|
idx_t row_group_end = row_group_start + count;
|
631
|
-
if (row_group_end >
|
632
|
-
row_group_end =
|
630
|
+
if (row_group_end > Storage::ROW_GROUP_SIZE) {
|
631
|
+
row_group_end = Storage::ROW_GROUP_SIZE;
|
633
632
|
}
|
634
|
-
lock_guard<mutex> lock(row_group_lock);
|
635
|
-
|
636
633
|
// create the version_info if it doesn't exist yet
|
637
|
-
|
638
|
-
|
639
|
-
}
|
640
|
-
idx_t start_vector_idx = row_group_start / STANDARD_VECTOR_SIZE;
|
641
|
-
idx_t end_vector_idx = (row_group_end - 1) / STANDARD_VECTOR_SIZE;
|
642
|
-
for (idx_t vector_idx = start_vector_idx; vector_idx <= end_vector_idx; vector_idx++) {
|
643
|
-
idx_t start = vector_idx == start_vector_idx ? row_group_start - start_vector_idx * STANDARD_VECTOR_SIZE : 0;
|
644
|
-
idx_t end =
|
645
|
-
vector_idx == end_vector_idx ? row_group_end - end_vector_idx * STANDARD_VECTOR_SIZE : STANDARD_VECTOR_SIZE;
|
646
|
-
if (start == 0 && end == STANDARD_VECTOR_SIZE) {
|
647
|
-
// entire vector is encapsulated by append: append a single constant
|
648
|
-
auto constant_info = make_uniq<ChunkConstantInfo>(this->start + vector_idx * STANDARD_VECTOR_SIZE);
|
649
|
-
constant_info->insert_id = transaction.transaction_id;
|
650
|
-
constant_info->delete_id = NOT_DELETED_ID;
|
651
|
-
version_info->info[vector_idx] = std::move(constant_info);
|
652
|
-
} else {
|
653
|
-
// part of a vector is encapsulated: append to that part
|
654
|
-
ChunkVectorInfo *info;
|
655
|
-
if (!version_info->info[vector_idx]) {
|
656
|
-
// first time appending to this vector: create new info
|
657
|
-
auto insert_info = make_uniq<ChunkVectorInfo>(this->start + vector_idx * STANDARD_VECTOR_SIZE);
|
658
|
-
info = insert_info.get();
|
659
|
-
version_info->info[vector_idx] = std::move(insert_info);
|
660
|
-
} else {
|
661
|
-
D_ASSERT(version_info->info[vector_idx]->type == ChunkInfoType::VECTOR_INFO);
|
662
|
-
// use existing vector
|
663
|
-
info = &version_info->info[vector_idx]->Cast<ChunkVectorInfo>();
|
664
|
-
}
|
665
|
-
info->Append(start, end, transaction.transaction_id);
|
666
|
-
}
|
667
|
-
}
|
634
|
+
auto &vinfo = GetOrCreateVersionInfo();
|
635
|
+
vinfo.AppendVersionInfo(transaction, count, row_group_start, row_group_end);
|
668
636
|
this->count = row_group_end;
|
669
637
|
}
|
670
638
|
|
671
639
|
void RowGroup::CommitAppend(transaction_t commit_id, idx_t row_group_start, idx_t count) {
|
672
|
-
|
673
|
-
|
674
|
-
lock_guard<mutex> lock(row_group_lock);
|
675
|
-
|
676
|
-
idx_t start_vector_idx = row_group_start / STANDARD_VECTOR_SIZE;
|
677
|
-
idx_t end_vector_idx = (row_group_end - 1) / STANDARD_VECTOR_SIZE;
|
678
|
-
for (idx_t vector_idx = start_vector_idx; vector_idx <= end_vector_idx; vector_idx++) {
|
679
|
-
idx_t start = vector_idx == start_vector_idx ? row_group_start - start_vector_idx * STANDARD_VECTOR_SIZE : 0;
|
680
|
-
idx_t end =
|
681
|
-
vector_idx == end_vector_idx ? row_group_end - end_vector_idx * STANDARD_VECTOR_SIZE : STANDARD_VECTOR_SIZE;
|
682
|
-
|
683
|
-
auto info = version_info->info[vector_idx].get();
|
684
|
-
info->CommitAppend(commit_id, start, end);
|
685
|
-
}
|
640
|
+
auto &vinfo = GetOrCreateVersionInfo();
|
641
|
+
vinfo.CommitAppend(commit_id, row_group_start, count);
|
686
642
|
}
|
687
643
|
|
688
644
|
void RowGroup::RevertAppend(idx_t row_group_start) {
|
689
|
-
|
690
|
-
|
691
|
-
}
|
692
|
-
idx_t start_row = row_group_start - this->start;
|
693
|
-
idx_t start_vector_idx = (start_row + (STANDARD_VECTOR_SIZE - 1)) / STANDARD_VECTOR_SIZE;
|
694
|
-
for (idx_t vector_idx = start_vector_idx; vector_idx < RowGroup::ROW_GROUP_VECTOR_COUNT; vector_idx++) {
|
695
|
-
version_info->info[vector_idx].reset();
|
696
|
-
}
|
645
|
+
auto &vinfo = GetOrCreateVersionInfo();
|
646
|
+
vinfo.RevertAppend(row_group_start - this->start);
|
697
647
|
for (auto &column : columns) {
|
698
648
|
column->RevertAppend(row_group_start);
|
699
649
|
}
|
@@ -806,10 +756,24 @@ RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
|
|
806
756
|
}
|
807
757
|
|
808
758
|
bool RowGroup::AllDeleted() {
|
809
|
-
if (
|
759
|
+
if (HasUnloadedDeletes()) {
|
760
|
+
// deletes aren't loaded yet - we know not everything is deleted
|
761
|
+
return false;
|
762
|
+
}
|
763
|
+
auto &vinfo = GetVersionInfo();
|
764
|
+
if (!vinfo) {
|
765
|
+
return false;
|
766
|
+
}
|
767
|
+
return vinfo->GetCommittedDeletedCount(count) == count;
|
768
|
+
}
|
769
|
+
|
770
|
+
bool RowGroup::HasUnloadedDeletes() const {
|
771
|
+
if (deletes_pointers.empty()) {
|
772
|
+
// no stored deletes at all
|
810
773
|
return false;
|
811
774
|
}
|
812
|
-
return
|
775
|
+
// return whether or not the deletes have been loaded
|
776
|
+
return !deletes_is_loaded;
|
813
777
|
}
|
814
778
|
|
815
779
|
RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats) {
|
@@ -846,48 +810,30 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, TableStatistics &gl
|
|
846
810
|
state->WriteDataPointers(writer, serializer);
|
847
811
|
serializer.End();
|
848
812
|
}
|
849
|
-
row_group_pointer.
|
813
|
+
row_group_pointer.deletes_pointers = CheckpointDeletes(writer.GetPayloadWriter().GetManager());
|
850
814
|
Verify();
|
851
815
|
return row_group_pointer;
|
852
816
|
}
|
853
817
|
|
818
|
+
vector<MetaBlockPointer> RowGroup::CheckpointDeletes(MetadataManager &manager) {
|
819
|
+
if (HasUnloadedDeletes()) {
|
820
|
+
// deletes were not loaded so they cannot be changed
|
821
|
+
// re-use them as-is
|
822
|
+
manager.ClearModifiedBlocks(deletes_pointers);
|
823
|
+
return deletes_pointers;
|
824
|
+
}
|
825
|
+
if (!version_info) {
|
826
|
+
// no version information: write nothing
|
827
|
+
return vector<MetaBlockPointer>();
|
828
|
+
}
|
829
|
+
return version_info->Checkpoint(manager);
|
830
|
+
}
|
831
|
+
|
854
832
|
void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &serializer) {
|
855
833
|
serializer.WriteProperty(100, "row_start", pointer.row_start);
|
856
834
|
serializer.WriteProperty(101, "tuple_count", pointer.tuple_count);
|
857
835
|
serializer.WriteProperty(102, "data_pointers", pointer.data_pointers);
|
858
|
-
|
859
|
-
// Checkpoint deletes
|
860
|
-
auto versions = pointer.versions.get();
|
861
|
-
|
862
|
-
if (!versions) {
|
863
|
-
// no version information: write nothing
|
864
|
-
serializer.WriteProperty(103, "versions_count", 0);
|
865
|
-
return;
|
866
|
-
}
|
867
|
-
// first count how many ChunkInfo's we need to deserialize
|
868
|
-
idx_t chunk_info_count = 0;
|
869
|
-
idx_t idx_map[ROW_GROUP_VECTOR_COUNT];
|
870
|
-
for (idx_t vector_idx = 0; vector_idx < RowGroup::ROW_GROUP_VECTOR_COUNT; vector_idx++) {
|
871
|
-
auto chunk_info = versions->info[vector_idx].get();
|
872
|
-
if (!chunk_info) {
|
873
|
-
continue;
|
874
|
-
}
|
875
|
-
idx_map[chunk_info_count++] = vector_idx;
|
876
|
-
}
|
877
|
-
|
878
|
-
// now serialize the actual version information
|
879
|
-
serializer.WriteProperty(103, "versions_count", chunk_info_count);
|
880
|
-
if (chunk_info_count == 0) {
|
881
|
-
return;
|
882
|
-
}
|
883
|
-
serializer.WriteList(104, "versions", chunk_info_count, [&](Serializer::List &list, idx_t i) {
|
884
|
-
auto vector_idx = idx_map[i];
|
885
|
-
auto chunk_info = versions->info[vector_idx].get();
|
886
|
-
list.WriteObject([&](Serializer &obj) {
|
887
|
-
obj.WriteProperty(100, "vector_index", vector_idx);
|
888
|
-
obj.WriteProperty(101, "chunk_info", const_cast<const ChunkInfo *>(chunk_info));
|
889
|
-
});
|
890
|
-
});
|
836
|
+
serializer.WriteProperty(103, "delete_pointers", pointer.deletes_pointers);
|
891
837
|
}
|
892
838
|
|
893
839
|
RowGroupPointer RowGroup::Deserialize(Deserializer &deserializer) {
|
@@ -895,26 +841,7 @@ RowGroupPointer RowGroup::Deserialize(Deserializer &deserializer) {
|
|
895
841
|
result.row_start = deserializer.ReadProperty<uint64_t>(100, "row_start");
|
896
842
|
result.tuple_count = deserializer.ReadProperty<uint64_t>(101, "tuple_count");
|
897
843
|
result.data_pointers = deserializer.ReadProperty<vector<MetaBlockPointer>>(102, "data_pointers");
|
898
|
-
result.
|
899
|
-
// Deserialize Deletes
|
900
|
-
auto chunk_count = deserializer.ReadProperty<idx_t>(103, "versions_count");
|
901
|
-
if (chunk_count == 0) {
|
902
|
-
return result;
|
903
|
-
}
|
904
|
-
auto version_info = make_shared<VersionNode>();
|
905
|
-
deserializer.ReadList(104, "versions", [&](Deserializer::List &list, idx_t i) {
|
906
|
-
list.ReadObject([&](Deserializer &obj) {
|
907
|
-
auto vector_index = obj.ReadProperty<idx_t>(100, "vector_index");
|
908
|
-
if (vector_index >= RowGroup::ROW_GROUP_VECTOR_COUNT) {
|
909
|
-
throw Exception("In DeserializeDeletes, vector_index is out of range for the row group. Corrupted "
|
910
|
-
"file?");
|
911
|
-
}
|
912
|
-
version_info->info[vector_index] = obj.ReadProperty<unique_ptr<ChunkInfo>>(101, "chunk_info");
|
913
|
-
});
|
914
|
-
});
|
915
|
-
|
916
|
-
result.versions = version_info;
|
917
|
-
|
844
|
+
result.deletes_pointers = deserializer.ReadProperty<vector<MetaBlockPointer>>(103, "delete_pointers");
|
918
845
|
return result;
|
919
846
|
}
|
920
847
|
|
@@ -934,14 +861,13 @@ void RowGroup::GetColumnSegmentInfo(idx_t row_group_index, vector<ColumnSegmentI
|
|
934
861
|
class VersionDeleteState {
|
935
862
|
public:
|
936
863
|
VersionDeleteState(RowGroup &info, TransactionData transaction, DataTable &table, idx_t base_row)
|
937
|
-
: info(info), transaction(transaction), table(table),
|
938
|
-
|
864
|
+
: info(info), transaction(transaction), table(table), current_chunk(DConstants::INVALID_INDEX), count(0),
|
865
|
+
base_row(base_row), delete_count(0) {
|
939
866
|
}
|
940
867
|
|
941
868
|
RowGroup &info;
|
942
869
|
TransactionData transaction;
|
943
870
|
DataTable &table;
|
944
|
-
ChunkVectorInfo *current_info;
|
945
871
|
idx_t current_chunk;
|
946
872
|
row_t rows[STANDARD_VECTOR_SIZE];
|
947
873
|
idx_t count;
|
@@ -955,7 +881,6 @@ public:
|
|
955
881
|
};
|
956
882
|
|
957
883
|
idx_t RowGroup::Delete(TransactionData transaction, DataTable &table, row_t *ids, idx_t count) {
|
958
|
-
lock_guard<mutex> lock(row_group_lock);
|
959
884
|
VersionDeleteState del_state(*this, transaction, table, this->start);
|
960
885
|
|
961
886
|
// obtain a write lock
|
@@ -976,6 +901,10 @@ void RowGroup::Verify() {
|
|
976
901
|
#endif
|
977
902
|
}
|
978
903
|
|
904
|
+
idx_t RowGroup::DeleteRows(idx_t vector_idx, transaction_t transaction_id, row_t rows[], idx_t count) {
|
905
|
+
return GetOrCreateVersionInfo().DeleteRows(vector_idx, transaction_id, rows, count);
|
906
|
+
}
|
907
|
+
|
979
908
|
void VersionDeleteState::Delete(row_t row_id) {
|
980
909
|
D_ASSERT(row_id >= 0);
|
981
910
|
idx_t vector_idx = row_id / STANDARD_VECTOR_SIZE;
|
@@ -983,26 +912,6 @@ void VersionDeleteState::Delete(row_t row_id) {
|
|
983
912
|
if (current_chunk != vector_idx) {
|
984
913
|
Flush();
|
985
914
|
|
986
|
-
if (!info.version_info) {
|
987
|
-
info.version_info = make_shared<VersionNode>();
|
988
|
-
}
|
989
|
-
|
990
|
-
if (!info.version_info->info[vector_idx]) {
|
991
|
-
// no info yet: create it
|
992
|
-
info.version_info->info[vector_idx] =
|
993
|
-
make_uniq<ChunkVectorInfo>(info.start + vector_idx * STANDARD_VECTOR_SIZE);
|
994
|
-
} else if (info.version_info->info[vector_idx]->type == ChunkInfoType::CONSTANT_INFO) {
|
995
|
-
auto &constant = info.version_info->info[vector_idx]->Cast<ChunkConstantInfo>();
|
996
|
-
// info exists but it's a constant info: convert to a vector info
|
997
|
-
auto new_info = make_uniq<ChunkVectorInfo>(info.start + vector_idx * STANDARD_VECTOR_SIZE);
|
998
|
-
new_info->insert_id = constant.insert_id.load();
|
999
|
-
for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) {
|
1000
|
-
new_info->inserted[i] = constant.insert_id.load();
|
1001
|
-
}
|
1002
|
-
info.version_info->info[vector_idx] = std::move(new_info);
|
1003
|
-
}
|
1004
|
-
D_ASSERT(info.version_info->info[vector_idx]->type == ChunkInfoType::VECTOR_INFO);
|
1005
|
-
current_info = &info.version_info->info[vector_idx]->Cast<ChunkVectorInfo>();
|
1006
915
|
current_chunk = vector_idx;
|
1007
916
|
chunk_row = vector_idx * STANDARD_VECTOR_SIZE;
|
1008
917
|
}
|
@@ -1016,11 +925,12 @@ void VersionDeleteState::Flush() {
|
|
1016
925
|
// it is possible for delete statements to delete the same tuple multiple times when combined with a USING clause
|
1017
926
|
// in the current_info->Delete, we check which tuples are actually deleted (excluding duplicate deletions)
|
1018
927
|
// this is returned in the actual_delete_count
|
1019
|
-
auto actual_delete_count =
|
928
|
+
auto actual_delete_count = info.DeleteRows(current_chunk, transaction.transaction_id, rows, count);
|
1020
929
|
delete_count += actual_delete_count;
|
1021
930
|
if (transaction.transaction && actual_delete_count > 0) {
|
1022
931
|
// now push the delete into the undo buffer, but only if any deletes were actually performed
|
1023
|
-
transaction.transaction->PushDelete(table,
|
932
|
+
transaction.transaction->PushDelete(table, info.GetOrCreateVersionInfo(), current_chunk, rows,
|
933
|
+
actual_delete_count, base_row + chunk_row);
|
1024
934
|
}
|
1025
935
|
count = 0;
|
1026
936
|
}
|
@@ -339,7 +339,7 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
|
|
339
339
|
auto current_row_group = state.row_group_append_state.row_group;
|
340
340
|
// check how much we can fit into the current row_group
|
341
341
|
idx_t append_count =
|
342
|
-
MinValue<idx_t>(remaining,
|
342
|
+
MinValue<idx_t>(remaining, Storage::ROW_GROUP_SIZE - state.row_group_append_state.offset_in_row_group);
|
343
343
|
if (append_count > 0) {
|
344
344
|
current_row_group->Append(state.row_group_append_state, chunk, append_count);
|
345
345
|
// merge the stats
|
@@ -393,7 +393,7 @@ void RowGroupCollection::FinalizeAppend(TransactionData transaction, TableAppend
|
|
393
393
|
auto remaining = state.total_append_count;
|
394
394
|
auto row_group = state.start_row_group;
|
395
395
|
while (remaining > 0) {
|
396
|
-
auto append_count = MinValue<idx_t>(remaining,
|
396
|
+
auto append_count = MinValue<idx_t>(remaining, Storage::ROW_GROUP_SIZE - row_group->count);
|
397
397
|
row_group->AppendVersionInfo(transaction, append_count);
|
398
398
|
remaining -= append_count;
|
399
399
|
row_group = row_groups->GetNextSegment(row_group);
|