duckdb 0.8.2-dev4314.0 → 0.8.2-dev4376.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/parquet_extension.cpp +1 -1
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/file_buffer.cpp +1 -1
- package/src/duckdb/src/common/types/validity_mask.cpp +56 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/function/table/arrow_conversion.cpp +9 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -15
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/block.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp +1 -8
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +6 -2
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +19 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +19 -13
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +15 -15
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +59 -0
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +1 -6
- package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +4 -2
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +0 -1
- package/src/duckdb/src/main/settings/settings.cpp +5 -10
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -0
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +0 -1
- package/src/duckdb/src/storage/compression/bitpacking.cpp +55 -48
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +1 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +41 -2
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +12 -3
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +8 -2
- package/src/duckdb/src/storage/single_file_block_manager.cpp +1 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +39 -33
- package/src/duckdb/src/storage/table/column_data.cpp +14 -9
- package/src/duckdb/src/storage/table/list_column_data.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group.cpp +102 -192
- package/src/duckdb/src/storage/table/row_group_collection.cpp +2 -2
- package/src/duckdb/src/storage/table/row_version_manager.cpp +228 -0
- package/src/duckdb/src/storage/table/update_segment.cpp +2 -2
- package/src/duckdb/src/transaction/cleanup_state.cpp +2 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -4
- package/src/duckdb/src/transaction/duck_transaction.cpp +4 -2
- package/src/duckdb/src/transaction/rollback_state.cpp +2 -1
- package/src/duckdb/src/transaction/undo_buffer.cpp +3 -5
- package/src/duckdb/ub_src_storage_table.cpp +2 -0
package/package.json
CHANGED
@@ -118,7 +118,7 @@ struct ParquetWriteBindData : public TableFunctionData {
|
|
118
118
|
vector<LogicalType> sql_types;
|
119
119
|
vector<string> column_names;
|
120
120
|
duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
|
121
|
-
idx_t row_group_size =
|
121
|
+
idx_t row_group_size = Storage::ROW_GROUP_SIZE;
|
122
122
|
|
123
123
|
//! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW
|
124
124
|
static constexpr const idx_t BYTES_PER_ROW = 1024;
|
@@ -551,6 +551,8 @@ BindingMode EnumUtil::FromString<BindingMode>(const char *value) {
|
|
551
551
|
template<>
|
552
552
|
const char* EnumUtil::ToChars<BitpackingMode>(BitpackingMode value) {
|
553
553
|
switch(value) {
|
554
|
+
case BitpackingMode::INVALID:
|
555
|
+
return "INVALID";
|
554
556
|
case BitpackingMode::AUTO:
|
555
557
|
return "AUTO";
|
556
558
|
case BitpackingMode::CONSTANT:
|
@@ -568,6 +570,9 @@ const char* EnumUtil::ToChars<BitpackingMode>(BitpackingMode value) {
|
|
568
570
|
|
569
571
|
template<>
|
570
572
|
BitpackingMode EnumUtil::FromString<BitpackingMode>(const char *value) {
|
573
|
+
if (StringUtil::Equals(value, "INVALID")) {
|
574
|
+
return BitpackingMode::INVALID;
|
575
|
+
}
|
571
576
|
if (StringUtil::Equals(value, "AUTO")) {
|
572
577
|
return BitpackingMode::AUTO;
|
573
578
|
}
|
@@ -1,4 +1,7 @@
|
|
1
1
|
#include "duckdb/common/types/validity_mask.hpp"
|
2
|
+
#include "duckdb/common/limits.hpp"
|
3
|
+
#include "duckdb/common/serializer/write_stream.hpp"
|
4
|
+
#include "duckdb/common/serializer/read_stream.hpp"
|
2
5
|
|
3
6
|
namespace duckdb {
|
4
7
|
|
@@ -173,4 +176,57 @@ void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset,
|
|
173
176
|
#endif
|
174
177
|
}
|
175
178
|
|
179
|
+
enum class ValiditySerialization : uint8_t { BITMASK = 0, VALID_VALUES = 1, INVALID_VALUES = 2 };
|
180
|
+
|
181
|
+
void ValidityMask::Write(WriteStream &writer, idx_t count) {
|
182
|
+
auto valid_values = CountValid(count);
|
183
|
+
auto invalid_values = count - valid_values;
|
184
|
+
auto bitmask_bytes = ValidityMask::ValidityMaskSize(count);
|
185
|
+
auto need_u32 = count >= NumericLimits<uint16_t>::Maximum();
|
186
|
+
auto bytes_per_value = need_u32 ? sizeof(uint32_t) : sizeof(uint16_t);
|
187
|
+
auto valid_value_size = bytes_per_value * valid_values + sizeof(uint32_t);
|
188
|
+
auto invalid_value_size = bytes_per_value * invalid_values + sizeof(uint32_t);
|
189
|
+
if (valid_value_size < bitmask_bytes || invalid_value_size < bitmask_bytes) {
|
190
|
+
auto serialize_valid = valid_value_size < invalid_value_size;
|
191
|
+
// serialize (in)valid value indexes as [COUNT][V0][V1][...][VN]
|
192
|
+
auto flag = serialize_valid ? ValiditySerialization::VALID_VALUES : ValiditySerialization::INVALID_VALUES;
|
193
|
+
writer.Write(flag);
|
194
|
+
writer.Write<uint32_t>(MinValue<uint32_t>(valid_values, invalid_values));
|
195
|
+
for (idx_t i = 0; i < count; i++) {
|
196
|
+
if (RowIsValid(i) == serialize_valid) {
|
197
|
+
if (need_u32) {
|
198
|
+
writer.Write<uint32_t>(i);
|
199
|
+
} else {
|
200
|
+
writer.Write<uint16_t>(i);
|
201
|
+
}
|
202
|
+
}
|
203
|
+
}
|
204
|
+
} else {
|
205
|
+
// serialize the entire bitmask
|
206
|
+
writer.Write(ValiditySerialization::BITMASK);
|
207
|
+
writer.WriteData(const_data_ptr_cast(GetData()), bitmask_bytes);
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
211
|
+
void ValidityMask::Read(ReadStream &reader, idx_t count) {
|
212
|
+
Initialize(count);
|
213
|
+
// deserialize the storage type
|
214
|
+
auto flag = reader.Read<ValiditySerialization>();
|
215
|
+
if (flag == ValiditySerialization::BITMASK) {
|
216
|
+
// deserialize the bitmask
|
217
|
+
reader.ReadData(data_ptr_cast(GetData()), ValidityMask::ValidityMaskSize(count));
|
218
|
+
return;
|
219
|
+
}
|
220
|
+
auto is_u32 = count >= NumericLimits<uint16_t>::Maximum();
|
221
|
+
auto is_valid = flag == ValiditySerialization::VALID_VALUES;
|
222
|
+
auto serialize_count = reader.Read<uint32_t>();
|
223
|
+
if (is_valid) {
|
224
|
+
SetAllInvalid(count);
|
225
|
+
}
|
226
|
+
for (idx_t i = 0; i < serialize_count; i++) {
|
227
|
+
idx_t index = is_u32 ? reader.Read<uint32_t>() : reader.Read<uint16_t>();
|
228
|
+
Set(index, is_valid);
|
229
|
+
}
|
230
|
+
}
|
231
|
+
|
176
232
|
} // namespace duckdb
|
@@ -194,7 +194,7 @@ public:
|
|
194
194
|
}
|
195
195
|
auto new_count = current_collection->GetTotalRows();
|
196
196
|
auto batch_type =
|
197
|
-
new_count <
|
197
|
+
new_count < Storage::ROW_GROUP_SIZE ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
|
198
198
|
if (batch_type == RowGroupBatchType::FLUSHED && writer) {
|
199
199
|
writer->WriteLastRowGroup(*current_collection);
|
200
200
|
}
|
@@ -482,7 +482,7 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
|
|
482
482
|
|
483
483
|
lock_guard<mutex> lock(gstate.lock);
|
484
484
|
gstate.insert_count += append_count;
|
485
|
-
if (append_count <
|
485
|
+
if (append_count < Storage::ROW_GROUP_SIZE) {
|
486
486
|
// we have few rows - append to the local storage directly
|
487
487
|
auto &table = gstate.table;
|
488
488
|
auto &storage = table.GetStorage();
|
@@ -837,7 +837,15 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const ar
|
|
837
837
|
throw InvalidInputException("arrow_scan: array length mismatch");
|
838
838
|
}
|
839
839
|
// Make sure this Vector keeps the Arrow chunk alive in case we can zero-copy the data
|
840
|
-
|
840
|
+
if (scan_state.arrow_owned_data.find(idx) == scan_state.arrow_owned_data.end()) {
|
841
|
+
auto arrow_data = make_shared<ArrowArrayWrapper>();
|
842
|
+
arrow_data->arrow_array = scan_state.chunk->arrow_array;
|
843
|
+
scan_state.chunk->arrow_array.release = nullptr;
|
844
|
+
scan_state.arrow_owned_data[idx] = arrow_data;
|
845
|
+
}
|
846
|
+
|
847
|
+
output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.arrow_owned_data[idx]));
|
848
|
+
|
841
849
|
D_ASSERT(arrow_convert_data.find(col_idx) != arrow_convert_data.end());
|
842
850
|
auto &arrow_type = *arrow_convert_data.at(col_idx);
|
843
851
|
if (array.dictionary) {
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev4376"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "312b995450"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -58,21 +58,6 @@ struct DConstants {
|
|
58
58
|
static constexpr const idx_t INVALID_INDEX = idx_t(-1);
|
59
59
|
};
|
60
60
|
|
61
|
-
struct Storage {
|
62
|
-
//! The size of a hard disk sector, only really needed for Direct IO
|
63
|
-
constexpr static int SECTOR_SIZE = 4096;
|
64
|
-
//! Block header size for blocks written to the storage
|
65
|
-
constexpr static int BLOCK_HEADER_SIZE = sizeof(uint64_t);
|
66
|
-
// Size of a memory slot managed by the StorageManager. This is the quantum of allocation for Blocks on DuckDB. We
|
67
|
-
// default to 256KB. (1 << 18)
|
68
|
-
constexpr static int BLOCK_ALLOC_SIZE = 262144;
|
69
|
-
//! The actual memory space that is available within the blocks
|
70
|
-
constexpr static int BLOCK_SIZE = BLOCK_ALLOC_SIZE - BLOCK_HEADER_SIZE;
|
71
|
-
//! The size of the headers. This should be small and written more or less atomically by the hard disk. We default
|
72
|
-
//! to the page size, which is 4KB. (1 << 12)
|
73
|
-
constexpr static int FILE_HEADER_SIZE = 4096;
|
74
|
-
};
|
75
|
-
|
76
61
|
struct LogicalIndex {
|
77
62
|
explicit LogicalIndex(idx_t index) : index(index) {
|
78
63
|
}
|
@@ -1,7 +1,7 @@
|
|
1
1
|
//===----------------------------------------------------------------------===//
|
2
2
|
// DuckDB
|
3
3
|
//
|
4
|
-
// duckdb/common/serializer/
|
4
|
+
// duckdb/common/serializer/memory_stream.hpp
|
5
5
|
//
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
@@ -332,6 +332,9 @@ public:
|
|
332
332
|
DUCKDB_API string ToString(idx_t count) const;
|
333
333
|
|
334
334
|
DUCKDB_API static bool IsAligned(idx_t count);
|
335
|
+
|
336
|
+
void Write(WriteStream &writer, idx_t count);
|
337
|
+
void Read(ReadStream &reader, idx_t count);
|
335
338
|
};
|
336
339
|
|
337
340
|
} // namespace duckdb
|
@@ -67,6 +67,9 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
|
|
67
67
|
|
68
68
|
unique_ptr<ArrowArrayStreamWrapper> stream;
|
69
69
|
shared_ptr<ArrowArrayWrapper> chunk;
|
70
|
+
// This vector hold the Arrow Vectors owned by DuckDB to allow for zero-copy
|
71
|
+
// Note that only DuckDB can release these vectors
|
72
|
+
unordered_map<idx_t, shared_ptr<ArrowArrayWrapper>> arrow_owned_data;
|
70
73
|
idx_t chunk_offset = 0;
|
71
74
|
idx_t batch_index = 0;
|
72
75
|
vector<column_t> column_ids;
|
@@ -52,11 +52,11 @@ struct MetaBlockPointer {
|
|
52
52
|
idx_t block_pointer;
|
53
53
|
uint32_t offset;
|
54
54
|
|
55
|
-
bool IsValid() {
|
55
|
+
bool IsValid() const {
|
56
56
|
return block_pointer != DConstants::INVALID_INDEX;
|
57
57
|
}
|
58
|
-
block_id_t GetBlockId();
|
59
|
-
uint32_t GetBlockIndex();
|
58
|
+
block_id_t GetBlockId() const;
|
59
|
+
uint32_t GetBlockIndex() const;
|
60
60
|
|
61
61
|
void Serialize(Serializer &serializer) const;
|
62
62
|
static MetaBlockPointer Deserialize(Deserializer &source);
|
@@ -12,14 +12,7 @@
|
|
12
12
|
|
13
13
|
namespace duckdb {
|
14
14
|
|
15
|
-
enum class BitpackingMode : uint8_t {
|
16
|
-
AUTO,
|
17
|
-
|
18
|
-
CONSTANT,
|
19
|
-
CONSTANT_DELTA,
|
20
|
-
DELTA_FOR,
|
21
|
-
FOR
|
22
|
-
};
|
15
|
+
enum class BitpackingMode : uint8_t { INVALID, AUTO, CONSTANT, CONSTANT_DELTA, DELTA_FOR, FOR };
|
23
16
|
|
24
17
|
BitpackingMode BitpackingModeFromString(const string &str);
|
25
18
|
string BitpackingModeToString(const BitpackingMode &mode);
|
@@ -40,8 +40,8 @@ struct RowGroupPointer {
|
|
40
40
|
uint64_t tuple_count;
|
41
41
|
//! The data pointers of the column segments stored in the row group
|
42
42
|
vector<MetaBlockPointer> data_pointers;
|
43
|
-
//!
|
44
|
-
|
43
|
+
//! Data pointers to the delete information of the row group (if any)
|
44
|
+
vector<MetaBlockPointer> deletes_pointers;
|
45
45
|
};
|
46
46
|
|
47
47
|
} // namespace duckdb
|
@@ -64,6 +64,7 @@ public:
|
|
64
64
|
void Flush();
|
65
65
|
|
66
66
|
void MarkBlocksAsModified();
|
67
|
+
void ClearModifiedBlocks(const vector<MetaBlockPointer> &pointers);
|
67
68
|
|
68
69
|
idx_t BlockCount();
|
69
70
|
|
@@ -82,6 +83,7 @@ protected:
|
|
82
83
|
|
83
84
|
void AddBlock(MetadataBlock new_block, bool if_exists = false);
|
84
85
|
void AddAndRegisterBlock(MetadataBlock block);
|
86
|
+
void ConvertToTransient(MetadataBlock &block);
|
85
87
|
};
|
86
88
|
|
87
89
|
} // namespace duckdb
|
@@ -18,6 +18,7 @@ enum class BlockReaderType { EXISTING_BLOCKS, REGISTER_BLOCKS };
|
|
18
18
|
class MetadataReader : public ReadStream {
|
19
19
|
public:
|
20
20
|
MetadataReader(MetadataManager &manager, MetaBlockPointer pointer,
|
21
|
+
optional_ptr<vector<MetaBlockPointer>> read_pointers = nullptr,
|
21
22
|
BlockReaderType type = BlockReaderType::EXISTING_BLOCKS);
|
22
23
|
MetadataReader(MetadataManager &manager, BlockPointer pointer);
|
23
24
|
~MetadataReader() override;
|
@@ -46,6 +47,7 @@ private:
|
|
46
47
|
MetadataHandle block;
|
47
48
|
MetadataPointer next_pointer;
|
48
49
|
bool has_next_block;
|
50
|
+
optional_ptr<vector<MetaBlockPointer>> read_pointers;
|
49
51
|
idx_t index;
|
50
52
|
idx_t offset;
|
51
53
|
idx_t next_offset;
|
@@ -15,10 +15,10 @@ namespace duckdb {
|
|
15
15
|
|
16
16
|
class MetadataWriter : public WriteStream {
|
17
17
|
public:
|
18
|
+
explicit MetadataWriter(MetadataManager &manager,
|
19
|
+
optional_ptr<vector<MetaBlockPointer>> written_pointers = nullptr);
|
18
20
|
MetadataWriter(const MetadataWriter &) = delete;
|
19
21
|
MetadataWriter &operator=(const MetadataWriter &) = delete;
|
20
|
-
|
21
|
-
explicit MetadataWriter(MetadataManager &manager);
|
22
22
|
~MetadataWriter() override;
|
23
23
|
|
24
24
|
public:
|
@@ -27,6 +27,9 @@ public:
|
|
27
27
|
|
28
28
|
BlockPointer GetBlockPointer();
|
29
29
|
MetaBlockPointer GetMetaBlockPointer();
|
30
|
+
MetadataManager &GetManager() {
|
31
|
+
return manager;
|
32
|
+
}
|
30
33
|
|
31
34
|
protected:
|
32
35
|
virtual MetadataHandle NextHandle();
|
@@ -41,6 +44,7 @@ private:
|
|
41
44
|
MetadataManager &manager;
|
42
45
|
MetadataHandle block;
|
43
46
|
MetadataPointer current_pointer;
|
47
|
+
optional_ptr<vector<MetaBlockPointer>> written_pointers;
|
44
48
|
idx_t capacity;
|
45
49
|
idx_t offset;
|
46
50
|
};
|
@@ -23,6 +23,25 @@ struct FileHandle;
|
|
23
23
|
#error Row group size should be cleanly divisible by vector size
|
24
24
|
#endif
|
25
25
|
|
26
|
+
struct Storage {
|
27
|
+
//! The size of a hard disk sector, only really needed for Direct IO
|
28
|
+
constexpr static int SECTOR_SIZE = 4096;
|
29
|
+
//! Block header size for blocks written to the storage
|
30
|
+
constexpr static int BLOCK_HEADER_SIZE = sizeof(uint64_t);
|
31
|
+
// Size of a memory slot managed by the StorageManager. This is the quantum of allocation for Blocks on DuckDB. We
|
32
|
+
// default to 256KB. (1 << 18)
|
33
|
+
constexpr static int BLOCK_ALLOC_SIZE = 262144;
|
34
|
+
//! The actual memory space that is available within the blocks
|
35
|
+
constexpr static int BLOCK_SIZE = BLOCK_ALLOC_SIZE - BLOCK_HEADER_SIZE;
|
36
|
+
//! The size of the headers. This should be small and written more or less atomically by the hard disk. We default
|
37
|
+
//! to the page size, which is 4KB. (1 << 12)
|
38
|
+
constexpr static int FILE_HEADER_SIZE = 4096;
|
39
|
+
//! The number of rows per row group (must be a multiple of the vector size)
|
40
|
+
constexpr static const idx_t ROW_GROUP_SIZE = STANDARD_ROW_GROUPS_SIZE;
|
41
|
+
//! The number of vectors per row group
|
42
|
+
constexpr static const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE;
|
43
|
+
};
|
44
|
+
|
26
45
|
//! The version number of the database storage format
|
27
46
|
extern const uint64_t VERSION_NUMBER;
|
28
47
|
|
@@ -46,8 +46,10 @@ public:
|
|
46
46
|
virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0;
|
47
47
|
virtual idx_t GetCommittedDeletedCount(idx_t max_count) = 0;
|
48
48
|
|
49
|
-
virtual
|
50
|
-
|
49
|
+
virtual bool HasDeletes() const = 0;
|
50
|
+
|
51
|
+
virtual void Write(WriteStream &writer) const;
|
52
|
+
static unique_ptr<ChunkInfo> Read(ReadStream &reader);
|
51
53
|
|
52
54
|
public:
|
53
55
|
template <class TARGET>
|
@@ -74,8 +76,8 @@ public:
|
|
74
76
|
public:
|
75
77
|
explicit ChunkConstantInfo(idx_t start);
|
76
78
|
|
77
|
-
|
78
|
-
|
79
|
+
transaction_t insert_id;
|
80
|
+
transaction_t delete_id;
|
79
81
|
|
80
82
|
public:
|
81
83
|
idx_t GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) override;
|
@@ -85,8 +87,10 @@ public:
|
|
85
87
|
void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override;
|
86
88
|
idx_t GetCommittedDeletedCount(idx_t max_count) override;
|
87
89
|
|
88
|
-
|
89
|
-
|
90
|
+
bool HasDeletes() const override;
|
91
|
+
|
92
|
+
void Write(WriteStream &writer) const override;
|
93
|
+
static unique_ptr<ChunkInfo> Read(ReadStream &reader);
|
90
94
|
|
91
95
|
private:
|
92
96
|
template <class OP>
|
@@ -102,13 +106,13 @@ public:
|
|
102
106
|
explicit ChunkVectorInfo(idx_t start);
|
103
107
|
|
104
108
|
//! The transaction ids of the transactions that inserted the tuples (if any)
|
105
|
-
|
106
|
-
|
107
|
-
|
109
|
+
transaction_t inserted[STANDARD_VECTOR_SIZE];
|
110
|
+
transaction_t insert_id;
|
111
|
+
bool same_inserted_id;
|
108
112
|
|
109
113
|
//! The transaction ids of the transactions that deleted the tuples (if any)
|
110
|
-
|
111
|
-
|
114
|
+
transaction_t deleted[STANDARD_VECTOR_SIZE];
|
115
|
+
bool any_deleted;
|
112
116
|
|
113
117
|
public:
|
114
118
|
idx_t GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector,
|
@@ -130,8 +134,10 @@ public:
|
|
130
134
|
idx_t Delete(transaction_t transaction_id, row_t rows[], idx_t count);
|
131
135
|
void CommitDelete(transaction_t commit_id, row_t rows[], idx_t count);
|
132
136
|
|
133
|
-
|
134
|
-
|
137
|
+
bool HasDeletes() const override;
|
138
|
+
|
139
|
+
void Write(WriteStream &writer) const override;
|
140
|
+
static unique_ptr<ChunkInfo> Read(ReadStream &reader);
|
135
141
|
|
136
142
|
private:
|
137
143
|
template <class OP>
|
@@ -151,7 +151,7 @@ protected:
|
|
151
151
|
void AppendTransientSegment(SegmentLock &l, idx_t start_row);
|
152
152
|
|
153
153
|
//! Scans a base vector from the column
|
154
|
-
idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining);
|
154
|
+
idx_t ScanVector(ColumnScanState &state, Vector &result, idx_t remaining, bool has_updates);
|
155
155
|
//! Scans a vector from the column merged with any potential updates
|
156
156
|
//! If ALLOW_UPDATES is set to false, the function will instead throw an exception if any updates are found
|
157
157
|
template <bool SCAN_COMMITTED, bool ALLOW_UPDATES>
|
@@ -36,11 +36,12 @@ class Vector;
|
|
36
36
|
struct ColumnCheckpointState;
|
37
37
|
struct RowGroupPointer;
|
38
38
|
struct TransactionData;
|
39
|
-
struct VersionNode;
|
40
39
|
class CollectionScanState;
|
41
40
|
class TableFilterSet;
|
42
41
|
struct ColumnFetchState;
|
43
42
|
struct RowGroupAppendState;
|
43
|
+
class MetadataManager;
|
44
|
+
class RowVersionManager;
|
44
45
|
|
45
46
|
struct RowGroupWriteData {
|
46
47
|
vector<unique_ptr<ColumnCheckpointState>> states;
|
@@ -50,11 +51,6 @@ struct RowGroupWriteData {
|
|
50
51
|
class RowGroup : public SegmentBase<RowGroup> {
|
51
52
|
public:
|
52
53
|
friend class ColumnData;
|
53
|
-
friend class VersionDeleteState;
|
54
|
-
|
55
|
-
public:
|
56
|
-
static constexpr const idx_t ROW_GROUP_SIZE = STANDARD_ROW_GROUPS_SIZE;
|
57
|
-
static constexpr const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE;
|
58
54
|
|
59
55
|
public:
|
60
56
|
RowGroup(RowGroupCollection &collection, idx_t start, idx_t count);
|
@@ -65,7 +61,7 @@ private:
|
|
65
61
|
//! The RowGroupCollection this row-group is a part of
|
66
62
|
reference<RowGroupCollection> collection;
|
67
63
|
//! The version info of the row_group (inserted and deleted tuple info)
|
68
|
-
shared_ptr<
|
64
|
+
shared_ptr<RowVersionManager> version_info;
|
69
65
|
//! The column data of the row_group
|
70
66
|
vector<shared_ptr<ColumnData>> columns;
|
71
67
|
|
@@ -145,12 +141,17 @@ public:
|
|
145
141
|
|
146
142
|
void NextVector(CollectionScanState &state);
|
147
143
|
|
144
|
+
idx_t DeleteRows(idx_t vector_idx, transaction_t transaction_id, row_t rows[], idx_t count);
|
145
|
+
RowVersionManager &GetOrCreateVersionInfo();
|
146
|
+
|
148
147
|
// Serialization
|
149
148
|
static void Serialize(RowGroupPointer &pointer, Serializer &serializer);
|
150
149
|
static RowGroupPointer Deserialize(Deserializer &deserializer);
|
151
150
|
|
152
151
|
private:
|
153
|
-
|
152
|
+
shared_ptr<RowVersionManager> &GetVersionInfo();
|
153
|
+
shared_ptr<RowVersionManager> &GetOrCreateVersionInfoPtr();
|
154
|
+
|
154
155
|
ColumnData &GetColumn(storage_t c);
|
155
156
|
idx_t GetColumnCount() const;
|
156
157
|
vector<shared_ptr<ColumnData>> &GetColumns();
|
@@ -158,18 +159,17 @@ private:
|
|
158
159
|
template <TableScanType TYPE>
|
159
160
|
void TemplatedScan(TransactionData transaction, CollectionScanState &state, DataChunk &result);
|
160
161
|
|
162
|
+
vector<MetaBlockPointer> CheckpointDeletes(MetadataManager &manager);
|
163
|
+
|
164
|
+
bool HasUnloadedDeletes() const;
|
165
|
+
|
161
166
|
private:
|
162
167
|
mutex row_group_lock;
|
163
168
|
mutex stats_lock;
|
164
169
|
vector<MetaBlockPointer> column_pointers;
|
165
170
|
unique_ptr<atomic<bool>[]> is_loaded;
|
166
|
-
|
167
|
-
|
168
|
-
struct VersionNode {
|
169
|
-
unique_ptr<ChunkInfo> info[RowGroup::ROW_GROUP_VECTOR_COUNT];
|
170
|
-
|
171
|
-
void SetStart(idx_t start);
|
172
|
-
idx_t GetCommittedDeletedCount(idx_t count);
|
171
|
+
vector<MetaBlockPointer> deletes_pointers;
|
172
|
+
atomic<bool> deletes_is_loaded;
|
173
173
|
};
|
174
174
|
|
175
175
|
} // namespace duckdb
|
@@ -0,0 +1,59 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/storage/table/row_version_manager.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/vector_size.hpp"
|
12
|
+
#include "duckdb/storage/table/chunk_info.hpp"
|
13
|
+
#include "duckdb/storage/storage_info.hpp"
|
14
|
+
#include "duckdb/common/mutex.hpp"
|
15
|
+
|
16
|
+
namespace duckdb {
|
17
|
+
|
18
|
+
class MetadataManager;
|
19
|
+
struct MetaBlockPointer;
|
20
|
+
|
21
|
+
class RowVersionManager {
|
22
|
+
public:
|
23
|
+
explicit RowVersionManager(idx_t start);
|
24
|
+
|
25
|
+
idx_t GetStart() {
|
26
|
+
return start;
|
27
|
+
}
|
28
|
+
void SetStart(idx_t start);
|
29
|
+
idx_t GetCommittedDeletedCount(idx_t count);
|
30
|
+
|
31
|
+
idx_t GetSelVector(TransactionData transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count);
|
32
|
+
idx_t GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx,
|
33
|
+
SelectionVector &sel_vector, idx_t max_count);
|
34
|
+
bool Fetch(TransactionData transaction, idx_t row);
|
35
|
+
|
36
|
+
void AppendVersionInfo(TransactionData transaction, idx_t count, idx_t row_group_start, idx_t row_group_end);
|
37
|
+
void CommitAppend(transaction_t commit_id, idx_t row_group_start, idx_t count);
|
38
|
+
void RevertAppend(idx_t start_row);
|
39
|
+
|
40
|
+
idx_t DeleteRows(idx_t vector_idx, transaction_t transaction_id, row_t rows[], idx_t count);
|
41
|
+
void CommitDelete(idx_t vector_idx, transaction_t commit_id, row_t rows[], idx_t count);
|
42
|
+
|
43
|
+
vector<MetaBlockPointer> Checkpoint(MetadataManager &manager);
|
44
|
+
static shared_ptr<RowVersionManager> Deserialize(MetaBlockPointer delete_pointer, MetadataManager &manager,
|
45
|
+
idx_t start);
|
46
|
+
|
47
|
+
private:
|
48
|
+
mutex version_lock;
|
49
|
+
idx_t start;
|
50
|
+
unique_ptr<ChunkInfo> vector_info[Storage::ROW_GROUP_VECTOR_COUNT];
|
51
|
+
bool has_changes;
|
52
|
+
vector<MetaBlockPointer> storage_pointers;
|
53
|
+
|
54
|
+
private:
|
55
|
+
optional_ptr<ChunkInfo> GetChunkInfo(idx_t vector_idx);
|
56
|
+
ChunkVectorInfo &GetVectorInfo(idx_t vector_idx);
|
57
|
+
};
|
58
|
+
|
59
|
+
} // namespace duckdb
|
@@ -23,7 +23,7 @@ struct UpdateInfo;
|
|
23
23
|
|
24
24
|
class CommitState {
|
25
25
|
public:
|
26
|
-
explicit CommitState(
|
26
|
+
explicit CommitState(transaction_t commit_id, optional_ptr<WriteAheadLog> log = nullptr);
|
27
27
|
|
28
28
|
optional_ptr<WriteAheadLog> log;
|
29
29
|
transaction_t commit_id;
|
@@ -35,9 +35,6 @@ public:
|
|
35
35
|
unique_ptr<DataChunk> delete_chunk;
|
36
36
|
unique_ptr<DataChunk> update_chunk;
|
37
37
|
|
38
|
-
private:
|
39
|
-
ClientContext &context;
|
40
|
-
|
41
38
|
public:
|
42
39
|
template <bool HAS_LOG>
|
43
40
|
void CommitEntry(UndoFlags type, data_ptr_t data);
|
@@ -49,8 +46,6 @@ private:
|
|
49
46
|
void WriteCatalogEntry(CatalogEntry &entry, data_ptr_t extra_data);
|
50
47
|
void WriteDelete(DeleteInfo &info);
|
51
48
|
void WriteUpdate(UpdateInfo &info);
|
52
|
-
|
53
|
-
void AppendRowId(row_t rowid);
|
54
49
|
};
|
55
50
|
|
56
51
|
} // namespace duckdb
|
@@ -11,12 +11,13 @@
|
|
11
11
|
#include "duckdb/common/constants.hpp"
|
12
12
|
|
13
13
|
namespace duckdb {
|
14
|
-
class ChunkVectorInfo;
|
15
14
|
class DataTable;
|
15
|
+
class RowVersionManager;
|
16
16
|
|
17
17
|
struct DeleteInfo {
|
18
18
|
DataTable *table;
|
19
|
-
|
19
|
+
RowVersionManager *version_info;
|
20
|
+
idx_t vector_idx;
|
20
21
|
idx_t count;
|
21
22
|
idx_t base_row;
|
22
23
|
row_t rows[1];
|
@@ -11,12 +11,13 @@
|
|
11
11
|
#include "duckdb/transaction/transaction.hpp"
|
12
12
|
|
13
13
|
namespace duckdb {
|
14
|
+
class RowVersionManager;
|
14
15
|
|
15
16
|
class DuckTransaction : public Transaction {
|
16
17
|
public:
|
17
18
|
DuckTransaction(TransactionManager &manager, ClientContext &context, transaction_t start_time,
|
18
19
|
transaction_t transaction_id);
|
19
|
-
~DuckTransaction();
|
20
|
+
~DuckTransaction() override;
|
20
21
|
|
21
22
|
//! The start timestamp of this transaction
|
22
23
|
transaction_t start_time;
|
@@ -49,7 +50,8 @@ public:
|
|
49
50
|
|
50
51
|
bool ChangesMade();
|
51
52
|
|
52
|
-
void PushDelete(DataTable &table,
|
53
|
+
void PushDelete(DataTable &table, RowVersionManager &info, idx_t vector_idx, row_t rows[], idx_t count,
|
54
|
+
idx_t base_row);
|
53
55
|
void PushAppend(DataTable &table, idx_t row_start, idx_t row_count);
|
54
56
|
UpdateInfo *CreateUpdateInfo(idx_t type_size, idx_t entries);
|
55
57
|
|
@@ -88,7 +88,7 @@ private:
|
|
88
88
|
class LocalStorage {
|
89
89
|
public:
|
90
90
|
// Threshold to merge row groups instead of appending
|
91
|
-
static constexpr const idx_t MERGE_THRESHOLD =
|
91
|
+
static constexpr const idx_t MERGE_THRESHOLD = Storage::ROW_GROUP_SIZE;
|
92
92
|
|
93
93
|
public:
|
94
94
|
struct CommitState {
|