duckdb 0.5.2-dev1229.0 → 0.5.2-dev1241.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +107 -41
- package/src/duckdb.hpp +20 -7
- package/src/parquet-amalgamation.cpp +37149 -37149
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -6538,6 +6538,7 @@ void AllocatedData::Reset() {
|
|
|
6538
6538
|
}
|
|
6539
6539
|
D_ASSERT(allocator);
|
|
6540
6540
|
allocator->FreeData(pointer, allocated_size);
|
|
6541
|
+
allocated_size = 0;
|
|
6541
6542
|
pointer = nullptr;
|
|
6542
6543
|
}
|
|
6543
6544
|
|
|
@@ -15982,23 +15983,27 @@ void FileBuffer::ReallocBuffer(size_t new_size) {
|
|
|
15982
15983
|
}
|
|
15983
15984
|
|
|
15984
15985
|
void FileBuffer::Resize(uint64_t new_size) {
|
|
15986
|
+
idx_t header_size = Storage::BLOCK_HEADER_SIZE;
|
|
15985
15987
|
{
|
|
15986
15988
|
// TODO: All the logic here is specific to SingleFileBlockManager.
|
|
15987
15989
|
// and should be moved there, via a specific implementation of FileBuffer.
|
|
15988
15990
|
//
|
|
15989
15991
|
// make room for the block header (if this is not the db file header)
|
|
15990
|
-
if (type == FileBufferType::
|
|
15992
|
+
if (type == FileBufferType::TINY_BUFFER) {
|
|
15993
|
+
header_size = 0;
|
|
15994
|
+
}
|
|
15995
|
+
if (type == FileBufferType::MANAGED_BUFFER) {
|
|
15991
15996
|
new_size += Storage::BLOCK_HEADER_SIZE;
|
|
15992
|
-
// If we don't write/read an entire block, our checksum won't match.
|
|
15993
|
-
new_size = AlignValue<uint32_t, Storage::BLOCK_ALLOC_SIZE>(new_size);
|
|
15994
15997
|
}
|
|
15995
|
-
|
|
15998
|
+
if (type != FileBufferType::TINY_BUFFER) {
|
|
15999
|
+
new_size = AlignValue<uint32_t, Storage::SECTOR_SIZE>(new_size);
|
|
16000
|
+
}
|
|
15996
16001
|
ReallocBuffer(new_size);
|
|
15997
16002
|
}
|
|
15998
16003
|
|
|
15999
16004
|
if (new_size > 0) {
|
|
16000
|
-
buffer = internal_buffer +
|
|
16001
|
-
size = internal_size -
|
|
16005
|
+
buffer = internal_buffer + header_size;
|
|
16006
|
+
size = internal_size - header_size;
|
|
16002
16007
|
}
|
|
16003
16008
|
}
|
|
16004
16009
|
|
|
@@ -78786,7 +78791,7 @@ public:
|
|
|
78786
78791
|
void CreateNewCollection(TableCatalogEntry *table, const vector<LogicalType> &insert_types) {
|
|
78787
78792
|
auto &table_info = table->storage->info;
|
|
78788
78793
|
auto &block_manager = TableIOManager::Get(*table->storage).GetBlockManagerForRowData();
|
|
78789
|
-
current_collection = make_unique<RowGroupCollection>(table_info, block_manager, insert_types,
|
|
78794
|
+
current_collection = make_unique<RowGroupCollection>(table_info, block_manager, insert_types, MAX_ROW_ID);
|
|
78790
78795
|
current_collection->InitializeEmpty();
|
|
78791
78796
|
current_collection->InitializeAppend(current_append_state);
|
|
78792
78797
|
written_to_disk = false;
|
|
@@ -78888,7 +78893,7 @@ struct CollectionMerger {
|
|
|
78888
78893
|
auto &table_info = storage.info;
|
|
78889
78894
|
auto &block_manager = TableIOManager::Get(storage).GetBlockManagerForRowData();
|
|
78890
78895
|
auto types = storage.GetTypes();
|
|
78891
|
-
new_collection = make_unique<RowGroupCollection>(table_info, block_manager, types,
|
|
78896
|
+
new_collection = make_unique<RowGroupCollection>(table_info, block_manager, types, MAX_ROW_ID);
|
|
78892
78897
|
TableAppendState append_state;
|
|
78893
78898
|
new_collection->InitializeEmpty();
|
|
78894
78899
|
new_collection->InitializeAppend(append_state);
|
|
@@ -79709,7 +79714,8 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, GlobalSinkState &
|
|
|
79709
79714
|
if (!lstate.local_collection) {
|
|
79710
79715
|
auto &table_info = table->storage->info;
|
|
79711
79716
|
auto &block_manager = TableIOManager::Get(*table->storage).GetBlockManagerForRowData();
|
|
79712
|
-
lstate.local_collection =
|
|
79717
|
+
lstate.local_collection =
|
|
79718
|
+
make_unique<RowGroupCollection>(table_info, block_manager, insert_types, MAX_ROW_ID);
|
|
79713
79719
|
lstate.local_collection->InitializeEmpty();
|
|
79714
79720
|
lstate.local_collection->InitializeAppend(lstate.local_append_state);
|
|
79715
79721
|
lstate.writer = make_unique<OptimisticDataWriter>(gstate.table->storage.get());
|
|
@@ -196037,10 +196043,9 @@ BlockHandle::BlockHandle(BlockManager &block_manager, block_id_t block_id_p, uni
|
|
|
196037
196043
|
bool can_destroy_p, idx_t block_size)
|
|
196038
196044
|
: block_manager(block_manager), readers(0), block_id(block_id_p), eviction_timestamp(0), can_destroy(can_destroy_p),
|
|
196039
196045
|
unswizzled(nullptr) {
|
|
196040
|
-
D_ASSERT(block_size >= Storage::BLOCK_SIZE);
|
|
196041
196046
|
buffer = move(buffer_p);
|
|
196042
196047
|
state = BlockState::BLOCK_LOADED;
|
|
196043
|
-
memory_usage = block_size + Storage::BLOCK_HEADER_SIZE;
|
|
196048
|
+
memory_usage = buffer->type == FileBufferType::TINY_BUFFER ? block_size : block_size + Storage::BLOCK_HEADER_SIZE;
|
|
196044
196049
|
}
|
|
196045
196050
|
|
|
196046
196051
|
BlockHandle::~BlockHandle() {
|
|
@@ -196048,9 +196053,10 @@ BlockHandle::~BlockHandle() {
|
|
|
196048
196053
|
unswizzled = nullptr;
|
|
196049
196054
|
auto &buffer_manager = block_manager.buffer_manager;
|
|
196050
196055
|
// no references remain to this block: erase
|
|
196051
|
-
if (state == BlockState::BLOCK_LOADED) {
|
|
196056
|
+
if (buffer && state == BlockState::BLOCK_LOADED) {
|
|
196052
196057
|
// the block is still loaded in memory: erase it
|
|
196053
196058
|
buffer.reset();
|
|
196059
|
+
D_ASSERT(buffer_manager.current_memory >= memory_usage);
|
|
196054
196060
|
buffer_manager.current_memory -= memory_usage;
|
|
196055
196061
|
}
|
|
196056
196062
|
block_manager.UnregisterBlock(block_id, can_destroy);
|
|
@@ -196075,14 +196081,15 @@ unique_ptr<Block> AllocateBlock(BlockManager &block_manager, unique_ptr<FileBuff
|
|
|
196075
196081
|
}
|
|
196076
196082
|
}
|
|
196077
196083
|
|
|
196078
|
-
unique_ptr<FileBuffer> BufferManager::ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source
|
|
196084
|
+
unique_ptr<FileBuffer> BufferManager::ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source,
|
|
196085
|
+
FileBufferType type) {
|
|
196079
196086
|
if (source) {
|
|
196080
196087
|
auto tmp = move(source);
|
|
196081
196088
|
D_ASSERT(tmp->size == size);
|
|
196082
|
-
return make_unique<FileBuffer>(*tmp,
|
|
196089
|
+
return make_unique<FileBuffer>(*tmp, type);
|
|
196083
196090
|
} else {
|
|
196084
196091
|
// no re-usable buffer: allocate a new buffer
|
|
196085
|
-
return make_unique<FileBuffer>(Allocator::Get(db),
|
|
196092
|
+
return make_unique<FileBuffer>(Allocator::Get(db), type, size);
|
|
196086
196093
|
}
|
|
196087
196094
|
}
|
|
196088
196095
|
|
|
@@ -196116,10 +196123,10 @@ unique_ptr<FileBuffer> BlockHandle::UnloadAndTakeBlock() {
|
|
|
196116
196123
|
}
|
|
196117
196124
|
D_ASSERT(!unswizzled);
|
|
196118
196125
|
D_ASSERT(CanUnload());
|
|
196119
|
-
D_ASSERT(memory_usage >= Storage::BLOCK_ALLOC_SIZE);
|
|
196120
196126
|
|
|
196121
196127
|
if (block_id >= MAXIMUM_BLOCK && !can_destroy) {
|
|
196122
196128
|
// temporary block that cannot be destroyed: write to temporary file
|
|
196129
|
+
D_ASSERT(memory_usage >= Storage::BLOCK_ALLOC_SIZE);
|
|
196123
196130
|
block_manager.buffer_manager.WriteTemporaryBuffer(block_id, *buffer);
|
|
196124
196131
|
}
|
|
196125
196132
|
block_manager.buffer_manager.current_memory -= memory_usage;
|
|
@@ -196277,8 +196284,20 @@ shared_ptr<BlockHandle> BlockManager::ConvertToPersistent(block_id_t block_id, s
|
|
|
196277
196284
|
return new_block;
|
|
196278
196285
|
}
|
|
196279
196286
|
|
|
196287
|
+
shared_ptr<BlockHandle> BufferManager::RegisterSmallMemory(idx_t block_size) {
|
|
196288
|
+
if (!EvictBlocks(block_size, maximum_memory, nullptr)) {
|
|
196289
|
+
throw OutOfMemoryException("could not allocate block of %lld bytes (%lld/%lld used) %s", block_size,
|
|
196290
|
+
GetUsedMemory(), GetMaxMemory(), InMemoryWarning());
|
|
196291
|
+
}
|
|
196292
|
+
auto buffer = ConstructManagedBuffer(block_size, nullptr, FileBufferType::TINY_BUFFER);
|
|
196293
|
+
|
|
196294
|
+
// create a new block pointer for this block
|
|
196295
|
+
return make_shared<BlockHandle>(*temp_block_manager, ++temporary_id, move(buffer), false, block_size);
|
|
196296
|
+
}
|
|
196297
|
+
|
|
196280
196298
|
shared_ptr<BlockHandle> BufferManager::RegisterMemory(idx_t block_size, bool can_destroy) {
|
|
196281
|
-
|
|
196299
|
+
D_ASSERT(block_size >= Storage::BLOCK_SIZE);
|
|
196300
|
+
auto alloc_size = AlignValue<idx_t, Storage::SECTOR_SIZE>(block_size + Storage::BLOCK_HEADER_SIZE);
|
|
196282
196301
|
// first evict blocks until we have enough memory to store this buffer
|
|
196283
196302
|
unique_ptr<FileBuffer> reusable_buffer;
|
|
196284
196303
|
if (!EvictBlocks(alloc_size, maximum_memory, &reusable_buffer)) {
|
|
@@ -196313,6 +196332,7 @@ void BufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size
|
|
|
196313
196332
|
}
|
|
196314
196333
|
} else {
|
|
196315
196334
|
// no need to evict blocks
|
|
196335
|
+
D_ASSERT(current_memory >= idx_t(-required_memory));
|
|
196316
196336
|
current_memory -= idx_t(-required_memory);
|
|
196317
196337
|
}
|
|
196318
196338
|
|
|
@@ -196334,6 +196354,7 @@ BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
|
|
|
196334
196354
|
}
|
|
196335
196355
|
required_memory = handle->memory_usage;
|
|
196336
196356
|
}
|
|
196357
|
+
D_ASSERT(required_memory >= Storage::BLOCK_SIZE);
|
|
196337
196358
|
// evict blocks until we have space for the current block
|
|
196338
196359
|
unique_ptr<FileBuffer> reusable_buffer;
|
|
196339
196360
|
if (!EvictBlocks(required_memory, maximum_memory, &reusable_buffer)) {
|
|
@@ -196345,6 +196366,7 @@ BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
|
|
|
196345
196366
|
if (handle->state == BlockState::BLOCK_LOADED) {
|
|
196346
196367
|
// the block is loaded, increment the reader count and return a pointer to the handle
|
|
196347
196368
|
handle->readers++;
|
|
196369
|
+
D_ASSERT(current_memory >= required_memory);
|
|
196348
196370
|
current_memory -= required_memory;
|
|
196349
196371
|
return handle->Load(handle);
|
|
196350
196372
|
}
|
|
@@ -196363,6 +196385,9 @@ void BufferManager::AddToEvictionQueue(shared_ptr<BlockHandle> &handle) {
|
|
|
196363
196385
|
|
|
196364
196386
|
void BufferManager::Unpin(shared_ptr<BlockHandle> &handle) {
|
|
196365
196387
|
lock_guard<mutex> lock(handle->lock);
|
|
196388
|
+
if (!handle->buffer || handle->buffer->type == FileBufferType::TINY_BUFFER) {
|
|
196389
|
+
return;
|
|
196390
|
+
}
|
|
196366
196391
|
D_ASSERT(handle->readers > 0);
|
|
196367
196392
|
handle->readers--;
|
|
196368
196393
|
if (handle->readers == 0) {
|
|
@@ -196378,6 +196403,7 @@ bool BufferManager::EvictBlocks(idx_t extra_memory, idx_t memory_limit, unique_p
|
|
|
196378
196403
|
while (current_memory > memory_limit) {
|
|
196379
196404
|
// get a block to unpin from the queue
|
|
196380
196405
|
if (!queue->q.try_dequeue(node)) {
|
|
196406
|
+
D_ASSERT(current_memory >= extra_memory);
|
|
196381
196407
|
current_memory -= extra_memory;
|
|
196382
196408
|
return false;
|
|
196383
196409
|
}
|
|
@@ -196797,10 +196823,9 @@ void BufferManager::WriteTemporaryBuffer(block_id_t block_id, FileBuffer &buffer
|
|
|
196797
196823
|
temp_directory_handle->GetTempFile().WriteTemporaryBuffer(block_id, buffer);
|
|
196798
196824
|
return;
|
|
196799
196825
|
}
|
|
196800
|
-
|
|
196801
|
-
D_ASSERT(buffer.size > Storage::BLOCK_SIZE);
|
|
196802
196826
|
// get the path to write to
|
|
196803
196827
|
auto path = GetTemporaryPath(block_id);
|
|
196828
|
+
D_ASSERT(buffer.size > Storage::BLOCK_SIZE);
|
|
196804
196829
|
// create the file and write the size followed by the buffer contents
|
|
196805
196830
|
auto &fs = FileSystem::GetFileSystem(db);
|
|
196806
196831
|
auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE);
|
|
@@ -196876,6 +196901,7 @@ data_ptr_t BufferManager::BufferAllocatorAllocate(PrivateAllocatorData *private_
|
|
|
196876
196901
|
|
|
196877
196902
|
void BufferManager::BufferAllocatorFree(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size) {
|
|
196878
196903
|
auto &data = (BufferAllocatorData &)*private_data;
|
|
196904
|
+
D_ASSERT(data.manager.current_memory >= size);
|
|
196879
196905
|
data.manager.current_memory -= size;
|
|
196880
196906
|
return Allocator::Get(data.manager.db).FreeData(pointer, size);
|
|
196881
196907
|
}
|
|
@@ -197534,7 +197560,6 @@ WriteOverflowStringsToDisk::WriteOverflowStringsToDisk(BlockManager &block_manag
|
|
|
197534
197560
|
|
|
197535
197561
|
WriteOverflowStringsToDisk::~WriteOverflowStringsToDisk() {
|
|
197536
197562
|
if (offset > 0) {
|
|
197537
|
-
handle.GetFileBuffer().Resize(offset);
|
|
197538
197563
|
block_manager.Write(handle.GetFileBuffer(), block_id);
|
|
197539
197564
|
}
|
|
197540
197565
|
}
|
|
@@ -203289,7 +203314,7 @@ idx_t FixedSizeAppend(CompressionAppendState &append_state, ColumnSegment &segme
|
|
|
203289
203314
|
D_ASSERT(segment.GetBlockOffset() == 0);
|
|
203290
203315
|
|
|
203291
203316
|
auto target_ptr = append_state.handle.Ptr();
|
|
203292
|
-
idx_t max_tuple_count =
|
|
203317
|
+
idx_t max_tuple_count = segment.SegmentSize() / sizeof(T);
|
|
203293
203318
|
idx_t copy_count = MinValue<idx_t>(count, max_tuple_count - segment.count);
|
|
203294
203319
|
|
|
203295
203320
|
AppendLoop<T>(stats, target_ptr, segment.count, data, offset, copy_count);
|
|
@@ -204802,7 +204827,7 @@ unique_ptr<CompressedSegmentState> UncompressedStringStorage::StringInitSegment(
|
|
|
204802
204827
|
auto handle = buffer_manager.Pin(segment.block);
|
|
204803
204828
|
StringDictionaryContainer dictionary;
|
|
204804
204829
|
dictionary.size = 0;
|
|
204805
|
-
dictionary.end =
|
|
204830
|
+
dictionary.end = segment.SegmentSize();
|
|
204806
204831
|
SetDictionary(segment, handle, dictionary);
|
|
204807
204832
|
}
|
|
204808
204833
|
return make_unique<UncompressedStringSegmentState>();
|
|
@@ -204812,16 +204837,16 @@ idx_t UncompressedStringStorage::FinalizeAppend(ColumnSegment &segment, SegmentS
|
|
|
204812
204837
|
auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
|
|
204813
204838
|
auto handle = buffer_manager.Pin(segment.block);
|
|
204814
204839
|
auto dict = GetDictionary(segment, handle);
|
|
204815
|
-
D_ASSERT(dict.end ==
|
|
204840
|
+
D_ASSERT(dict.end == segment.SegmentSize());
|
|
204816
204841
|
// compute the total size required to store this segment
|
|
204817
204842
|
auto offset_size = DICTIONARY_HEADER_SIZE + segment.count * sizeof(int32_t);
|
|
204818
204843
|
auto total_size = offset_size + dict.size;
|
|
204819
204844
|
if (total_size >= COMPACTION_FLUSH_LIMIT) {
|
|
204820
204845
|
// the block is full enough, don't bother moving around the dictionary
|
|
204821
|
-
return
|
|
204846
|
+
return segment.SegmentSize();
|
|
204822
204847
|
}
|
|
204823
204848
|
// the block has space left: figure out how much space we can save
|
|
204824
|
-
auto move_amount =
|
|
204849
|
+
auto move_amount = segment.SegmentSize() - total_size;
|
|
204825
204850
|
// move the dictionary so it lines up exactly with the offsets
|
|
204826
204851
|
auto dataptr = handle.Ptr();
|
|
204827
204852
|
memmove(dataptr + offset_size, dataptr + dict.end - dict.size, dict.size);
|
|
@@ -204868,10 +204893,10 @@ StringDictionaryContainer UncompressedStringStorage::GetDictionary(ColumnSegment
|
|
|
204868
204893
|
|
|
204869
204894
|
idx_t UncompressedStringStorage::RemainingSpace(ColumnSegment &segment, BufferHandle &handle) {
|
|
204870
204895
|
auto dictionary = GetDictionary(segment, handle);
|
|
204871
|
-
D_ASSERT(dictionary.end ==
|
|
204896
|
+
D_ASSERT(dictionary.end == segment.SegmentSize());
|
|
204872
204897
|
idx_t used_space = dictionary.size + segment.count * sizeof(int32_t) + DICTIONARY_HEADER_SIZE;
|
|
204873
|
-
D_ASSERT(
|
|
204874
|
-
return
|
|
204898
|
+
D_ASSERT(segment.SegmentSize() >= used_space);
|
|
204899
|
+
return segment.SegmentSize() - used_space;
|
|
204875
204900
|
}
|
|
204876
204901
|
|
|
204877
204902
|
void UncompressedStringStorage::WriteString(ColumnSegment &segment, string_t string, block_id_t &result_block,
|
|
@@ -205503,7 +205528,7 @@ unique_ptr<CompressedSegmentState> ValidityInitSegment(ColumnSegment &segment, b
|
|
|
205503
205528
|
auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
|
|
205504
205529
|
if (block_id == INVALID_BLOCK) {
|
|
205505
205530
|
auto handle = buffer_manager.Pin(segment.block);
|
|
205506
|
-
memset(handle.Ptr(), 0xFF,
|
|
205531
|
+
memset(handle.Ptr(), 0xFF, segment.SegmentSize());
|
|
205507
205532
|
}
|
|
205508
205533
|
return nullptr;
|
|
205509
205534
|
}
|
|
@@ -205513,7 +205538,7 @@ idx_t ValidityAppend(CompressionAppendState &append_state, ColumnSegment &segmen
|
|
|
205513
205538
|
D_ASSERT(segment.GetBlockOffset() == 0);
|
|
205514
205539
|
auto &validity_stats = (ValidityStatistics &)*stats.statistics;
|
|
205515
205540
|
|
|
205516
|
-
auto max_tuples =
|
|
205541
|
+
auto max_tuples = segment.SegmentSize() / ValidityMask::STANDARD_MASK_SIZE * STANDARD_VECTOR_SIZE;
|
|
205517
205542
|
idx_t append_count = MinValue<idx_t>(vcount, max_tuples - segment.count);
|
|
205518
205543
|
if (data.validity.AllValid()) {
|
|
205519
205544
|
// no null values: skip append
|
|
@@ -205560,7 +205585,7 @@ void ValidityRevertAppend(ColumnSegment &segment, idx_t start_row) {
|
|
|
205560
205585
|
revert_start = start_bit / 8;
|
|
205561
205586
|
}
|
|
205562
205587
|
// for the rest, we just memset
|
|
205563
|
-
memset(handle.Ptr() + revert_start, 0xFF,
|
|
205588
|
+
memset(handle.Ptr() + revert_start, 0xFF, segment.SegmentSize() - revert_start);
|
|
205564
205589
|
}
|
|
205565
205590
|
|
|
205566
205591
|
//===--------------------------------------------------------------------===//
|
|
@@ -207713,8 +207738,9 @@ T DeserializeHeaderStructure(data_ptr_t ptr) {
|
|
|
207713
207738
|
SingleFileBlockManager::SingleFileBlockManager(DatabaseInstance &db, string path_p, bool read_only, bool create_new,
|
|
207714
207739
|
bool use_direct_io)
|
|
207715
207740
|
: BlockManager(BufferManager::GetBufferManager(db)), db(db), path(move(path_p)),
|
|
207716
|
-
header_buffer(Allocator::Get(db), FileBufferType::MANAGED_BUFFER,
|
|
207717
|
-
|
|
207741
|
+
header_buffer(Allocator::Get(db), FileBufferType::MANAGED_BUFFER,
|
|
207742
|
+
Storage::FILE_HEADER_SIZE - Storage::BLOCK_HEADER_SIZE),
|
|
207743
|
+
iteration_count(0), read_only(read_only), use_direct_io(use_direct_io) {
|
|
207718
207744
|
uint8_t flags;
|
|
207719
207745
|
FileLockType lock;
|
|
207720
207746
|
if (read_only) {
|
|
@@ -209893,6 +209919,12 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
|
|
|
209893
209919
|
pstate->AddSegmentToTail(&column_data, segment.get(), offset_in_block);
|
|
209894
209920
|
} else {
|
|
209895
209921
|
// Create a new block for future reuse.
|
|
209922
|
+
if (segment->SegmentSize() != Storage::BLOCK_SIZE) {
|
|
209923
|
+
// the segment is smaller than the block size
|
|
209924
|
+
// allocate a new block and copy the data over
|
|
209925
|
+
D_ASSERT(segment->SegmentSize() < Storage::BLOCK_SIZE);
|
|
209926
|
+
segment->Resize(Storage::BLOCK_SIZE);
|
|
209927
|
+
}
|
|
209896
209928
|
D_ASSERT(offset_in_block == 0);
|
|
209897
209929
|
allocation.partial_block = make_unique<PartialBlockForCheckpoint>(
|
|
209898
209930
|
&column_data, segment.get(), *allocation.block_manager, allocation.state);
|
|
@@ -210454,7 +210486,15 @@ unique_ptr<BaseStatistics> ColumnData::GetUpdateStatistics() {
|
|
|
210454
210486
|
}
|
|
210455
210487
|
|
|
210456
210488
|
void ColumnData::AppendTransientSegment(SegmentLock &l, idx_t start_row) {
|
|
210457
|
-
|
|
210489
|
+
idx_t segment_size = Storage::BLOCK_SIZE;
|
|
210490
|
+
if (start_row == idx_t(MAX_ROW_ID)) {
|
|
210491
|
+
#if STANDARD_VECTOR_SIZE < 1024
|
|
210492
|
+
segment_size = 1024 * GetTypeIdSize(type.InternalType());
|
|
210493
|
+
#else
|
|
210494
|
+
segment_size = STANDARD_VECTOR_SIZE * GetTypeIdSize(type.InternalType());
|
|
210495
|
+
#endif
|
|
210496
|
+
}
|
|
210497
|
+
auto new_segment = ColumnSegment::CreateTransientSegment(GetDatabase(), type, start_row, segment_size);
|
|
210458
210498
|
data.AppendSegment(l, move(new_segment));
|
|
210459
210499
|
}
|
|
210460
210500
|
|
|
@@ -210952,18 +210992,25 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
|
|
|
210952
210992
|
function = config.GetCompressionFunction(compression_type, type.InternalType());
|
|
210953
210993
|
block = block_manager.RegisterBlock(block_id);
|
|
210954
210994
|
}
|
|
210995
|
+
auto segment_size = Storage::BLOCK_SIZE;
|
|
210955
210996
|
return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::PERSISTENT, start, count, function,
|
|
210956
|
-
move(statistics), block_id, offset);
|
|
210997
|
+
move(statistics), block_id, offset, segment_size);
|
|
210957
210998
|
}
|
|
210958
210999
|
|
|
210959
211000
|
unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance &db, const LogicalType &type,
|
|
210960
|
-
idx_t start) {
|
|
211001
|
+
idx_t start, idx_t segment_size) {
|
|
210961
211002
|
auto &config = DBConfig::GetConfig(db);
|
|
210962
211003
|
auto function = config.GetCompressionFunction(CompressionType::COMPRESSION_UNCOMPRESSED, type.InternalType());
|
|
211004
|
+
auto &buffer_manager = BufferManager::GetBufferManager(db);
|
|
211005
|
+
shared_ptr<BlockHandle> block;
|
|
210963
211006
|
// transient: allocate a buffer for the uncompressed segment
|
|
210964
|
-
|
|
211007
|
+
if (segment_size < Storage::BLOCK_SIZE) {
|
|
211008
|
+
block = buffer_manager.RegisterSmallMemory(segment_size);
|
|
211009
|
+
} else {
|
|
211010
|
+
block = buffer_manager.RegisterMemory(segment_size, false);
|
|
211011
|
+
}
|
|
210965
211012
|
return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr,
|
|
210966
|
-
INVALID_BLOCK, 0);
|
|
211013
|
+
INVALID_BLOCK, 0, segment_size);
|
|
210967
211014
|
}
|
|
210968
211015
|
|
|
210969
211016
|
unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx_t start) {
|
|
@@ -210972,10 +211019,11 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx
|
|
|
210972
211019
|
|
|
210973
211020
|
ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type_p,
|
|
210974
211021
|
ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction *function_p,
|
|
210975
|
-
unique_ptr<BaseStatistics> statistics, block_id_t block_id_p, idx_t offset_p
|
|
211022
|
+
unique_ptr<BaseStatistics> statistics, block_id_t block_id_p, idx_t offset_p,
|
|
211023
|
+
idx_t segment_size_p)
|
|
210976
211024
|
: SegmentBase(start, count), db(db), type(move(type_p)), type_size(GetTypeIdSize(type.InternalType())),
|
|
210977
211025
|
segment_type(segment_type), function(function_p), stats(type, move(statistics)), block(move(block)),
|
|
210978
|
-
block_id(block_id_p), offset(offset_p) {
|
|
211026
|
+
block_id(block_id_p), offset(offset_p), segment_size(segment_size_p) {
|
|
210979
211027
|
D_ASSERT(function);
|
|
210980
211028
|
if (function->init_segment) {
|
|
210981
211029
|
segment_state = function->init_segment(*this, block_id);
|
|
@@ -210985,7 +211033,8 @@ ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block
|
|
|
210985
211033
|
ColumnSegment::ColumnSegment(ColumnSegment &other, idx_t start)
|
|
210986
211034
|
: SegmentBase(start, other.count), db(other.db), type(move(other.type)), type_size(other.type_size),
|
|
210987
211035
|
segment_type(other.segment_type), function(other.function), stats(move(other.stats)), block(move(other.block)),
|
|
210988
|
-
block_id(other.block_id), offset(other.offset),
|
|
211036
|
+
block_id(other.block_id), offset(other.offset), segment_size(other.segment_size),
|
|
211037
|
+
segment_state(move(other.segment_state)) {
|
|
210989
211038
|
}
|
|
210990
211039
|
|
|
210991
211040
|
ColumnSegment::~ColumnSegment() {
|
|
@@ -211033,6 +211082,23 @@ void ColumnSegment::FetchRow(ColumnFetchState &state, row_t row_id, Vector &resu
|
|
|
211033
211082
|
//===--------------------------------------------------------------------===//
|
|
211034
211083
|
// Append
|
|
211035
211084
|
//===--------------------------------------------------------------------===//
|
|
211085
|
+
idx_t ColumnSegment::SegmentSize() const {
|
|
211086
|
+
return segment_size;
|
|
211087
|
+
}
|
|
211088
|
+
|
|
211089
|
+
void ColumnSegment::Resize(idx_t new_size) {
|
|
211090
|
+
D_ASSERT(new_size > this->segment_size);
|
|
211091
|
+
D_ASSERT(offset == 0);
|
|
211092
|
+
auto &buffer_manager = BufferManager::GetBufferManager(db);
|
|
211093
|
+
auto new_block = buffer_manager.RegisterMemory(Storage::BLOCK_SIZE, false);
|
|
211094
|
+
auto old_handle = buffer_manager.Pin(block);
|
|
211095
|
+
auto new_handle = buffer_manager.Pin(new_block);
|
|
211096
|
+
memcpy(new_handle.Ptr(), old_handle.Ptr(), segment_size);
|
|
211097
|
+
this->block_id = new_block->BlockId();
|
|
211098
|
+
this->block = move(new_block);
|
|
211099
|
+
this->segment_size = new_size;
|
|
211100
|
+
}
|
|
211101
|
+
|
|
211036
211102
|
void ColumnSegment::InitializeAppend(ColumnAppendState &state) {
|
|
211037
211103
|
D_ASSERT(segment_type == ColumnSegmentType::TRANSIENT);
|
|
211038
211104
|
if (!function->init_append) {
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "ed8eafda9"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev1241"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -1942,7 +1942,7 @@ namespace duckdb {
|
|
|
1942
1942
|
class Allocator;
|
|
1943
1943
|
struct FileHandle;
|
|
1944
1944
|
|
|
1945
|
-
enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2 };
|
|
1945
|
+
enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2, TINY_BUFFER = 3 };
|
|
1946
1946
|
|
|
1947
1947
|
//! The FileBuffer represents a buffer that can be read or written to a Direct IO FileHandle.
|
|
1948
1948
|
class FileBuffer {
|
|
@@ -25307,12 +25307,16 @@ class BufferManager {
|
|
|
25307
25307
|
|
|
25308
25308
|
public:
|
|
25309
25309
|
BufferManager(DatabaseInstance &db, string temp_directory, idx_t maximum_memory);
|
|
25310
|
-
~BufferManager();
|
|
25310
|
+
virtual ~BufferManager();
|
|
25311
25311
|
|
|
25312
25312
|
//! Register an in-memory buffer of arbitrary size, as long as it is >= BLOCK_SIZE. can_destroy signifies whether or
|
|
25313
25313
|
//! not the buffer can be destroyed when unpinned, or whether or not it needs to be written to a temporary file so
|
|
25314
25314
|
//! it can be reloaded. The resulting buffer will already be allocated, but needs to be pinned in order to be used.
|
|
25315
25315
|
shared_ptr<BlockHandle> RegisterMemory(idx_t block_size, bool can_destroy);
|
|
25316
|
+
//! Registers an in-memory buffer that cannot be unloaded until it is destroyed
|
|
25317
|
+
//! This buffer can be small (smaller than BLOCK_SIZE)
|
|
25318
|
+
//! Unpin and pin are nops on this block of memory
|
|
25319
|
+
shared_ptr<BlockHandle> RegisterSmallMemory(idx_t block_size);
|
|
25316
25320
|
|
|
25317
25321
|
//! Allocate an in-memory buffer with a single pin.
|
|
25318
25322
|
//! The allocated memory is released when the buffer handle is destroyed.
|
|
@@ -25353,7 +25357,8 @@ public:
|
|
|
25353
25357
|
//! Construct a managed buffer.
|
|
25354
25358
|
//! The block_id is just used for internal tracking. It doesn't map to any actual
|
|
25355
25359
|
//! BlockManager.
|
|
25356
|
-
virtual unique_ptr<FileBuffer> ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source
|
|
25360
|
+
virtual unique_ptr<FileBuffer> ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source,
|
|
25361
|
+
FileBufferType type = FileBufferType::MANAGED_BUFFER);
|
|
25357
25362
|
|
|
25358
25363
|
private:
|
|
25359
25364
|
//! Evict blocks until the currently used memory + extra_memory fit, returns false if this was not possible
|
|
@@ -26166,7 +26171,8 @@ public:
|
|
|
26166
26171
|
block_id_t id, idx_t offset, const LogicalType &type_p,
|
|
26167
26172
|
idx_t start, idx_t count, CompressionType compression_type,
|
|
26168
26173
|
unique_ptr<BaseStatistics> statistics);
|
|
26169
|
-
static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start
|
|
26174
|
+
static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start,
|
|
26175
|
+
idx_t segment_size = Storage::BLOCK_SIZE);
|
|
26170
26176
|
static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
|
|
26171
26177
|
|
|
26172
26178
|
public:
|
|
@@ -26182,6 +26188,11 @@ public:
|
|
|
26182
26188
|
//! Skip a scan forward to the row_index specified in the scan state
|
|
26183
26189
|
void Skip(ColumnScanState &state);
|
|
26184
26190
|
|
|
26191
|
+
// The maximum size of the buffer (in bytes)
|
|
26192
|
+
idx_t SegmentSize() const;
|
|
26193
|
+
//! Resize the block
|
|
26194
|
+
void Resize(idx_t segment_size);
|
|
26195
|
+
|
|
26185
26196
|
//! Initialize an append of this segment. Appends are only supported on transient segments.
|
|
26186
26197
|
void InitializeAppend(ColumnAppendState &state);
|
|
26187
26198
|
//! Appends a (part of) vector to the segment, returns the amount of entries successfully appended
|
|
@@ -26227,7 +26238,7 @@ public:
|
|
|
26227
26238
|
public:
|
|
26228
26239
|
ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
|
|
26229
26240
|
idx_t start, idx_t count, CompressionFunction *function, unique_ptr<BaseStatistics> statistics,
|
|
26230
|
-
block_id_t block_id, idx_t offset);
|
|
26241
|
+
block_id_t block_id, idx_t offset, idx_t segment_size);
|
|
26231
26242
|
ColumnSegment(ColumnSegment &other, idx_t start);
|
|
26232
26243
|
|
|
26233
26244
|
private:
|
|
@@ -26239,6 +26250,8 @@ private:
|
|
|
26239
26250
|
block_id_t block_id;
|
|
26240
26251
|
//! The offset into the block (persistent segment only)
|
|
26241
26252
|
idx_t offset;
|
|
26253
|
+
//! The allocated segment size
|
|
26254
|
+
idx_t segment_size;
|
|
26242
26255
|
//! Storage associated with the compressed segment
|
|
26243
26256
|
unique_ptr<CompressedSegmentState> segment_state;
|
|
26244
26257
|
};
|