duckdb 0.8.2-dev4314.0 → 0.8.2-dev4424.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/parquet_extension.cpp +1 -1
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/file_buffer.cpp +1 -1
- package/src/duckdb/src/common/types/date.cpp +1 -1
- package/src/duckdb/src/common/types/validity_mask.cpp +56 -0
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +3 -10
- package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +6 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/function/table/arrow_conversion.cpp +9 -1
- package/src/duckdb/src/function/table/read_csv.cpp +5 -22
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -15
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp +1 -8
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +6 -2
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +19 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +19 -13
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +15 -15
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +59 -0
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +1 -6
- package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +4 -2
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +0 -1
- package/src/duckdb/src/main/settings/settings.cpp +5 -10
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -0
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +0 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +37 -36
- package/src/duckdb/src/storage/compression/bitpacking.cpp +55 -48
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +9 -2
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +41 -2
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +12 -3
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +8 -2
- package/src/duckdb/src/storage/single_file_block_manager.cpp +1 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +39 -33
- package/src/duckdb/src/storage/table/column_data.cpp +14 -9
- package/src/duckdb/src/storage/table/list_column_data.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group.cpp +102 -192
- package/src/duckdb/src/storage/table/row_group_collection.cpp +2 -2
- package/src/duckdb/src/storage/table/row_version_manager.cpp +228 -0
- package/src/duckdb/src/storage/table/update_segment.cpp +2 -2
- package/src/duckdb/src/transaction/cleanup_state.cpp +2 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -4
- package/src/duckdb/src/transaction/duck_transaction.cpp +4 -2
- package/src/duckdb/src/transaction/rollback_state.cpp +2 -1
- package/src/duckdb/src/transaction/undo_buffer.cpp +3 -5
- package/src/duckdb/ub_src_storage_table.cpp +2 -0
- package/test/prepare.test.ts +10 -1
- package/test/test_all_types.test.ts +4 -4
@@ -2,9 +2,14 @@
|
|
2
2
|
|
3
3
|
namespace duckdb {
|
4
4
|
|
5
|
-
MetadataReader::MetadataReader(MetadataManager &manager, MetaBlockPointer pointer,
|
6
|
-
|
7
|
-
|
5
|
+
MetadataReader::MetadataReader(MetadataManager &manager, MetaBlockPointer pointer,
|
6
|
+
optional_ptr<vector<MetaBlockPointer>> read_pointers_p, BlockReaderType type)
|
7
|
+
: manager(manager), type(type), next_pointer(FromDiskPointer(pointer)), has_next_block(true),
|
8
|
+
read_pointers(read_pointers_p), index(0), offset(0), next_offset(pointer.offset), capacity(0) {
|
9
|
+
if (read_pointers) {
|
10
|
+
D_ASSERT(read_pointers->empty());
|
11
|
+
read_pointers->push_back(pointer);
|
12
|
+
}
|
8
13
|
}
|
9
14
|
|
10
15
|
MetadataReader::MetadataReader(MetadataManager &manager, BlockPointer pointer)
|
@@ -57,6 +62,10 @@ void MetadataReader::ReadNextBlock() {
|
|
57
62
|
has_next_block = false;
|
58
63
|
} else {
|
59
64
|
next_pointer = FromDiskPointer(MetaBlockPointer(next_block, 0));
|
65
|
+
MetaBlockPointer next_block_pointer(next_block, 0);
|
66
|
+
if (read_pointers) {
|
67
|
+
read_pointers->push_back(next_block_pointer);
|
68
|
+
}
|
60
69
|
}
|
61
70
|
if (next_offset < sizeof(block_id_t)) {
|
62
71
|
next_offset = sizeof(block_id_t);
|
@@ -3,7 +3,9 @@
|
|
3
3
|
|
4
4
|
namespace duckdb {
|
5
5
|
|
6
|
-
MetadataWriter::MetadataWriter(MetadataManager &manager
|
6
|
+
MetadataWriter::MetadataWriter(MetadataManager &manager, optional_ptr<vector<MetaBlockPointer>> written_pointers_p)
|
7
|
+
: manager(manager), written_pointers(written_pointers_p), capacity(0), offset(0) {
|
8
|
+
D_ASSERT(!written_pointers || written_pointers->empty());
|
7
9
|
}
|
8
10
|
|
9
11
|
MetadataWriter::~MetadataWriter() {
|
@@ -38,7 +40,8 @@ void MetadataWriter::NextBlock() {
|
|
38
40
|
|
39
41
|
// write the block id of the new block to the start of the current block
|
40
42
|
if (capacity > 0) {
|
41
|
-
|
43
|
+
auto disk_block = manager.GetDiskPointer(new_handle.pointer);
|
44
|
+
Store<idx_t>(disk_block.block_pointer, BasePtr());
|
42
45
|
}
|
43
46
|
// now update the block id of the block
|
44
47
|
block = std::move(new_handle);
|
@@ -46,6 +49,9 @@ void MetadataWriter::NextBlock() {
|
|
46
49
|
offset = sizeof(idx_t);
|
47
50
|
capacity = MetadataManager::METADATA_BLOCK_SIZE;
|
48
51
|
Store<idx_t>(-1, BasePtr());
|
52
|
+
if (written_pointers) {
|
53
|
+
written_pointers->push_back(manager.GetDiskPointer(current_pointer));
|
54
|
+
}
|
49
55
|
}
|
50
56
|
|
51
57
|
void MetadataWriter::WriteData(const_data_ptr_t buffer, idx_t write_size) {
|
@@ -240,8 +240,7 @@ void SingleFileBlockManager::LoadFreeList() {
|
|
240
240
|
// no free list
|
241
241
|
return;
|
242
242
|
}
|
243
|
-
|
244
|
-
MetadataReader reader(GetMetadataManager(), free_pointer, BlockReaderType::REGISTER_BLOCKS);
|
243
|
+
MetadataReader reader(GetMetadataManager(), free_pointer, nullptr, BlockReaderType::REGISTER_BLOCKS);
|
245
244
|
auto free_list_count = reader.Read<uint64_t>();
|
246
245
|
free_list.clear();
|
247
246
|
for (idx_t i = 0; i < free_list_count; i++) {
|
@@ -2,6 +2,7 @@
|
|
2
2
|
#include "duckdb/transaction/transaction.hpp"
|
3
3
|
#include "duckdb/common/serializer/serializer.hpp"
|
4
4
|
#include "duckdb/common/serializer/deserializer.hpp"
|
5
|
+
#include "duckdb/common/serializer/memory_stream.hpp"
|
5
6
|
|
6
7
|
namespace duckdb {
|
7
8
|
|
@@ -29,15 +30,19 @@ static bool UseVersion(TransactionData transaction, transaction_t id) {
|
|
29
30
|
return TransactionVersionOperator::UseInsertedVersion(transaction.start_time, transaction.transaction_id, id);
|
30
31
|
}
|
31
32
|
|
32
|
-
|
33
|
-
|
33
|
+
void ChunkInfo::Write(WriteStream &writer) const {
|
34
|
+
writer.Write<ChunkInfoType>(type);
|
35
|
+
}
|
36
|
+
|
37
|
+
unique_ptr<ChunkInfo> ChunkInfo::Read(ReadStream &reader) {
|
38
|
+
auto type = reader.Read<ChunkInfoType>();
|
34
39
|
switch (type) {
|
35
40
|
case ChunkInfoType::EMPTY_INFO:
|
36
41
|
return nullptr;
|
37
42
|
case ChunkInfoType::CONSTANT_INFO:
|
38
|
-
return ChunkConstantInfo::
|
43
|
+
return ChunkConstantInfo::Read(reader);
|
39
44
|
case ChunkInfoType::VECTOR_INFO:
|
40
|
-
return ChunkVectorInfo::
|
45
|
+
return ChunkVectorInfo::Read(reader);
|
41
46
|
default:
|
42
47
|
throw SerializationException("Could not deserialize Chunk Info Type: unrecognized type");
|
43
48
|
}
|
@@ -79,22 +84,23 @@ void ChunkConstantInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t
|
|
79
84
|
insert_id = commit_id;
|
80
85
|
}
|
81
86
|
|
87
|
+
bool ChunkConstantInfo::HasDeletes() const {
|
88
|
+
bool is_deleted = insert_id >= TRANSACTION_ID_START || delete_id < TRANSACTION_ID_START;
|
89
|
+
return is_deleted;
|
90
|
+
}
|
91
|
+
|
82
92
|
idx_t ChunkConstantInfo::GetCommittedDeletedCount(idx_t max_count) {
|
83
93
|
return delete_id < TRANSACTION_ID_START ? max_count : 0;
|
84
94
|
}
|
85
95
|
|
86
|
-
void ChunkConstantInfo::
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
return;
|
91
|
-
}
|
92
|
-
serializer.WriteProperty(100, "type", type);
|
93
|
-
serializer.WriteProperty(200, "start", start);
|
96
|
+
void ChunkConstantInfo::Write(WriteStream &writer) const {
|
97
|
+
D_ASSERT(HasDeletes());
|
98
|
+
ChunkInfo::Write(writer);
|
99
|
+
writer.Write<idx_t>(start);
|
94
100
|
}
|
95
101
|
|
96
|
-
unique_ptr<ChunkInfo> ChunkConstantInfo::
|
97
|
-
auto start =
|
102
|
+
unique_ptr<ChunkInfo> ChunkConstantInfo::Read(ReadStream &reader) {
|
103
|
+
auto start = reader.Read<idx_t>();
|
98
104
|
auto info = make_uniq<ChunkConstantInfo>(start);
|
99
105
|
info->insert_id = 0;
|
100
106
|
info->delete_id = 0;
|
@@ -218,6 +224,10 @@ void ChunkVectorInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t e
|
|
218
224
|
}
|
219
225
|
}
|
220
226
|
|
227
|
+
bool ChunkVectorInfo::HasDeletes() const {
|
228
|
+
return any_deleted;
|
229
|
+
}
|
230
|
+
|
221
231
|
idx_t ChunkVectorInfo::GetCommittedDeletedCount(idx_t max_count) {
|
222
232
|
if (!any_deleted) {
|
223
233
|
return 0;
|
@@ -231,45 +241,41 @@ idx_t ChunkVectorInfo::GetCommittedDeletedCount(idx_t max_count) {
|
|
231
241
|
return delete_count;
|
232
242
|
}
|
233
243
|
|
234
|
-
void ChunkVectorInfo::
|
244
|
+
void ChunkVectorInfo::Write(WriteStream &writer) const {
|
235
245
|
SelectionVector sel(STANDARD_VECTOR_SIZE);
|
236
246
|
transaction_t start_time = TRANSACTION_ID_START - 1;
|
237
247
|
transaction_t transaction_id = DConstants::INVALID_INDEX;
|
238
248
|
idx_t count = GetSelVector(start_time, transaction_id, sel, STANDARD_VECTOR_SIZE);
|
239
249
|
if (count == STANDARD_VECTOR_SIZE) {
|
240
250
|
// nothing is deleted: skip writing anything
|
241
|
-
|
251
|
+
writer.Write<ChunkInfoType>(ChunkInfoType::EMPTY_INFO);
|
242
252
|
return;
|
243
253
|
}
|
244
254
|
if (count == 0) {
|
245
255
|
// everything is deleted: write a constant vector
|
246
|
-
|
247
|
-
|
256
|
+
writer.Write<ChunkInfoType>(ChunkInfoType::CONSTANT_INFO);
|
257
|
+
writer.Write<idx_t>(start);
|
248
258
|
return;
|
249
259
|
}
|
250
260
|
// write a boolean vector
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
deleted_tuples[i] = true;
|
256
|
-
}
|
261
|
+
ChunkInfo::Write(writer);
|
262
|
+
writer.Write<idx_t>(start);
|
263
|
+
ValidityMask mask(STANDARD_VECTOR_SIZE);
|
264
|
+
mask.Initialize(STANDARD_VECTOR_SIZE);
|
257
265
|
for (idx_t i = 0; i < count; i++) {
|
258
|
-
|
266
|
+
mask.SetInvalid(sel.get_index(i));
|
259
267
|
}
|
260
|
-
|
268
|
+
mask.Write(writer, STANDARD_VECTOR_SIZE);
|
261
269
|
}
|
262
270
|
|
263
|
-
unique_ptr<ChunkInfo> ChunkVectorInfo::
|
264
|
-
auto start =
|
265
|
-
|
271
|
+
unique_ptr<ChunkInfo> ChunkVectorInfo::Read(ReadStream &reader) {
|
272
|
+
auto start = reader.Read<idx_t>();
|
266
273
|
auto result = make_uniq<ChunkVectorInfo>(start);
|
267
274
|
result->any_deleted = true;
|
268
|
-
|
269
|
-
|
270
|
-
sizeof(bool) * STANDARD_VECTOR_SIZE);
|
275
|
+
ValidityMask mask;
|
276
|
+
mask.Read(reader, STANDARD_VECTOR_SIZE);
|
271
277
|
for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) {
|
272
|
-
if (
|
278
|
+
if (mask.RowIsValid(i)) {
|
273
279
|
result->deleted[i] = 0;
|
274
280
|
}
|
275
281
|
}
|
@@ -87,7 +87,7 @@ void ColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx)
|
|
87
87
|
state.last_offset = 0;
|
88
88
|
}
|
89
89
|
|
90
|
-
idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remaining) {
|
90
|
+
idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remaining, bool has_updates) {
|
91
91
|
state.previous_states.clear();
|
92
92
|
if (state.version != version) {
|
93
93
|
InitializeScanWithOffset(state, state.row_index);
|
@@ -113,7 +113,8 @@ idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remai
|
|
113
113
|
idx_t scan_count = MinValue<idx_t>(remaining, state.current->start + state.current->count - state.row_index);
|
114
114
|
idx_t result_offset = initial_remaining - remaining;
|
115
115
|
if (scan_count > 0) {
|
116
|
-
state.current->Scan(state, scan_count, result, result_offset,
|
116
|
+
state.current->Scan(state, scan_count, result, result_offset,
|
117
|
+
!has_updates && scan_count == initial_remaining);
|
117
118
|
|
118
119
|
state.row_index += scan_count;
|
119
120
|
remaining -= scan_count;
|
@@ -138,10 +139,14 @@ idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remai
|
|
138
139
|
|
139
140
|
template <bool SCAN_COMMITTED, bool ALLOW_UPDATES>
|
140
141
|
idx_t ColumnData::ScanVector(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result) {
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
142
|
+
bool has_updates;
|
143
|
+
{
|
144
|
+
lock_guard<mutex> update_guard(update_lock);
|
145
|
+
has_updates = updates ? true : false;
|
146
|
+
}
|
147
|
+
auto scan_count = ScanVector(state, result, STANDARD_VECTOR_SIZE, has_updates);
|
148
|
+
if (has_updates) {
|
149
|
+
lock_guard<mutex> update_guard(update_lock);
|
145
150
|
if (!ALLOW_UPDATES && updates->HasUncommittedUpdates(vector_index)) {
|
146
151
|
throw TransactionException("Cannot create index with outstanding updates");
|
147
152
|
}
|
@@ -179,7 +184,7 @@ idx_t ColumnData::ScanCommitted(idx_t vector_index, ColumnScanState &state, Vect
|
|
179
184
|
void ColumnData::ScanCommittedRange(idx_t row_group_start, idx_t offset_in_row_group, idx_t count, Vector &result) {
|
180
185
|
ColumnScanState child_state;
|
181
186
|
InitializeScanWithOffset(child_state, row_group_start + offset_in_row_group);
|
182
|
-
auto scan_count = ScanVector(child_state, result, count);
|
187
|
+
auto scan_count = ScanVector(child_state, result, count, updates ? true : false);
|
183
188
|
if (updates) {
|
184
189
|
result.Flatten(scan_count);
|
185
190
|
updates->FetchCommittedRange(offset_in_row_group, count, result);
|
@@ -192,7 +197,7 @@ idx_t ColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t count)
|
|
192
197
|
}
|
193
198
|
// ScanCount can only be used if there are no updates
|
194
199
|
D_ASSERT(!updates);
|
195
|
-
return ScanVector(state, result, count);
|
200
|
+
return ScanVector(state, result, count, false);
|
196
201
|
}
|
197
202
|
|
198
203
|
void ColumnData::Select(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result,
|
@@ -339,7 +344,7 @@ idx_t ColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) {
|
|
339
344
|
state.row_index = start + ((row_id - start) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE);
|
340
345
|
state.current = data.GetSegment(state.row_index);
|
341
346
|
state.internal_index = state.current->start;
|
342
|
-
return ScanVector(state, result, STANDARD_VECTOR_SIZE);
|
347
|
+
return ScanVector(state, result, STANDARD_VECTOR_SIZE, false);
|
343
348
|
}
|
344
349
|
|
345
350
|
void ColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
|
@@ -86,7 +86,7 @@ idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t co
|
|
86
86
|
D_ASSERT(!updates);
|
87
87
|
|
88
88
|
Vector offset_vector(LogicalType::UBIGINT, count);
|
89
|
-
idx_t scan_count = ScanVector(state, offset_vector, count);
|
89
|
+
idx_t scan_count = ScanVector(state, offset_vector, count, false);
|
90
90
|
D_ASSERT(scan_count > 0);
|
91
91
|
validity.ScanCount(state.child_states[0], result, count);
|
92
92
|
|
@@ -132,7 +132,7 @@ void ListColumnData::Skip(ColumnScanState &state, idx_t count) {
|
|
132
132
|
// note that we only need to read the first and last entry
|
133
133
|
// however, let's just read all "count" entries for now
|
134
134
|
Vector result(LogicalType::UBIGINT, count);
|
135
|
-
idx_t scan_count = ScanVector(state, result, count);
|
135
|
+
idx_t scan_count = ScanVector(state, result, count, false);
|
136
136
|
if (scan_count == 0) {
|
137
137
|
return;
|
138
138
|
}
|