duckdb 0.5.2-dev1229.0 → 0.5.2-dev1241.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1229.0",
5
+ "version": "0.5.2-dev1241.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -6538,6 +6538,7 @@ void AllocatedData::Reset() {
6538
6538
  }
6539
6539
  D_ASSERT(allocator);
6540
6540
  allocator->FreeData(pointer, allocated_size);
6541
+ allocated_size = 0;
6541
6542
  pointer = nullptr;
6542
6543
  }
6543
6544
 
@@ -15982,23 +15983,27 @@ void FileBuffer::ReallocBuffer(size_t new_size) {
15982
15983
  }
15983
15984
 
15984
15985
  void FileBuffer::Resize(uint64_t new_size) {
15986
+ idx_t header_size = Storage::BLOCK_HEADER_SIZE;
15985
15987
  {
15986
15988
  // TODO: All the logic here is specific to SingleFileBlockManager.
15987
15989
  // and should be moved there, via a specific implementation of FileBuffer.
15988
15990
  //
15989
15991
  // make room for the block header (if this is not the db file header)
15990
- if (type == FileBufferType::MANAGED_BUFFER && new_size != Storage::FILE_HEADER_SIZE) {
15992
+ if (type == FileBufferType::TINY_BUFFER) {
15993
+ header_size = 0;
15994
+ }
15995
+ if (type == FileBufferType::MANAGED_BUFFER) {
15991
15996
  new_size += Storage::BLOCK_HEADER_SIZE;
15992
- // If we don't write/read an entire block, our checksum won't match.
15993
- new_size = AlignValue<uint32_t, Storage::BLOCK_ALLOC_SIZE>(new_size);
15994
15997
  }
15995
- new_size = AlignValue<uint32_t, Storage::SECTOR_SIZE>(new_size);
15998
+ if (type != FileBufferType::TINY_BUFFER) {
15999
+ new_size = AlignValue<uint32_t, Storage::SECTOR_SIZE>(new_size);
16000
+ }
15996
16001
  ReallocBuffer(new_size);
15997
16002
  }
15998
16003
 
15999
16004
  if (new_size > 0) {
16000
- buffer = internal_buffer + Storage::BLOCK_HEADER_SIZE;
16001
- size = internal_size - Storage::BLOCK_HEADER_SIZE;
16005
+ buffer = internal_buffer + header_size;
16006
+ size = internal_size - header_size;
16002
16007
  }
16003
16008
  }
16004
16009
 
@@ -78786,7 +78791,7 @@ public:
78786
78791
  void CreateNewCollection(TableCatalogEntry *table, const vector<LogicalType> &insert_types) {
78787
78792
  auto &table_info = table->storage->info;
78788
78793
  auto &block_manager = TableIOManager::Get(*table->storage).GetBlockManagerForRowData();
78789
- current_collection = make_unique<RowGroupCollection>(table_info, block_manager, insert_types, 0);
78794
+ current_collection = make_unique<RowGroupCollection>(table_info, block_manager, insert_types, MAX_ROW_ID);
78790
78795
  current_collection->InitializeEmpty();
78791
78796
  current_collection->InitializeAppend(current_append_state);
78792
78797
  written_to_disk = false;
@@ -78888,7 +78893,7 @@ struct CollectionMerger {
78888
78893
  auto &table_info = storage.info;
78889
78894
  auto &block_manager = TableIOManager::Get(storage).GetBlockManagerForRowData();
78890
78895
  auto types = storage.GetTypes();
78891
- new_collection = make_unique<RowGroupCollection>(table_info, block_manager, types, 0);
78896
+ new_collection = make_unique<RowGroupCollection>(table_info, block_manager, types, MAX_ROW_ID);
78892
78897
  TableAppendState append_state;
78893
78898
  new_collection->InitializeEmpty();
78894
78899
  new_collection->InitializeAppend(append_state);
@@ -79709,7 +79714,8 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, GlobalSinkState &
79709
79714
  if (!lstate.local_collection) {
79710
79715
  auto &table_info = table->storage->info;
79711
79716
  auto &block_manager = TableIOManager::Get(*table->storage).GetBlockManagerForRowData();
79712
- lstate.local_collection = make_unique<RowGroupCollection>(table_info, block_manager, insert_types, 0);
79717
+ lstate.local_collection =
79718
+ make_unique<RowGroupCollection>(table_info, block_manager, insert_types, MAX_ROW_ID);
79713
79719
  lstate.local_collection->InitializeEmpty();
79714
79720
  lstate.local_collection->InitializeAppend(lstate.local_append_state);
79715
79721
  lstate.writer = make_unique<OptimisticDataWriter>(gstate.table->storage.get());
@@ -196037,10 +196043,9 @@ BlockHandle::BlockHandle(BlockManager &block_manager, block_id_t block_id_p, uni
196037
196043
  bool can_destroy_p, idx_t block_size)
196038
196044
  : block_manager(block_manager), readers(0), block_id(block_id_p), eviction_timestamp(0), can_destroy(can_destroy_p),
196039
196045
  unswizzled(nullptr) {
196040
- D_ASSERT(block_size >= Storage::BLOCK_SIZE);
196041
196046
  buffer = move(buffer_p);
196042
196047
  state = BlockState::BLOCK_LOADED;
196043
- memory_usage = block_size + Storage::BLOCK_HEADER_SIZE;
196048
+ memory_usage = buffer->type == FileBufferType::TINY_BUFFER ? block_size : block_size + Storage::BLOCK_HEADER_SIZE;
196044
196049
  }
196045
196050
 
196046
196051
  BlockHandle::~BlockHandle() {
@@ -196048,9 +196053,10 @@ BlockHandle::~BlockHandle() {
196048
196053
  unswizzled = nullptr;
196049
196054
  auto &buffer_manager = block_manager.buffer_manager;
196050
196055
  // no references remain to this block: erase
196051
- if (state == BlockState::BLOCK_LOADED) {
196056
+ if (buffer && state == BlockState::BLOCK_LOADED) {
196052
196057
  // the block is still loaded in memory: erase it
196053
196058
  buffer.reset();
196059
+ D_ASSERT(buffer_manager.current_memory >= memory_usage);
196054
196060
  buffer_manager.current_memory -= memory_usage;
196055
196061
  }
196056
196062
  block_manager.UnregisterBlock(block_id, can_destroy);
@@ -196075,14 +196081,15 @@ unique_ptr<Block> AllocateBlock(BlockManager &block_manager, unique_ptr<FileBuff
196075
196081
  }
196076
196082
  }
196077
196083
 
196078
- unique_ptr<FileBuffer> BufferManager::ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source) {
196084
+ unique_ptr<FileBuffer> BufferManager::ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source,
196085
+ FileBufferType type) {
196079
196086
  if (source) {
196080
196087
  auto tmp = move(source);
196081
196088
  D_ASSERT(tmp->size == size);
196082
- return make_unique<FileBuffer>(*tmp, FileBufferType::MANAGED_BUFFER);
196089
+ return make_unique<FileBuffer>(*tmp, type);
196083
196090
  } else {
196084
196091
  // no re-usable buffer: allocate a new buffer
196085
- return make_unique<FileBuffer>(Allocator::Get(db), FileBufferType::MANAGED_BUFFER, size);
196092
+ return make_unique<FileBuffer>(Allocator::Get(db), type, size);
196086
196093
  }
196087
196094
  }
196088
196095
 
@@ -196116,10 +196123,10 @@ unique_ptr<FileBuffer> BlockHandle::UnloadAndTakeBlock() {
196116
196123
  }
196117
196124
  D_ASSERT(!unswizzled);
196118
196125
  D_ASSERT(CanUnload());
196119
- D_ASSERT(memory_usage >= Storage::BLOCK_ALLOC_SIZE);
196120
196126
 
196121
196127
  if (block_id >= MAXIMUM_BLOCK && !can_destroy) {
196122
196128
  // temporary block that cannot be destroyed: write to temporary file
196129
+ D_ASSERT(memory_usage >= Storage::BLOCK_ALLOC_SIZE);
196123
196130
  block_manager.buffer_manager.WriteTemporaryBuffer(block_id, *buffer);
196124
196131
  }
196125
196132
  block_manager.buffer_manager.current_memory -= memory_usage;
@@ -196277,8 +196284,20 @@ shared_ptr<BlockHandle> BlockManager::ConvertToPersistent(block_id_t block_id, s
196277
196284
  return new_block;
196278
196285
  }
196279
196286
 
196287
+ shared_ptr<BlockHandle> BufferManager::RegisterSmallMemory(idx_t block_size) {
196288
+ if (!EvictBlocks(block_size, maximum_memory, nullptr)) {
196289
+ throw OutOfMemoryException("could not allocate block of %lld bytes (%lld/%lld used) %s", block_size,
196290
+ GetUsedMemory(), GetMaxMemory(), InMemoryWarning());
196291
+ }
196292
+ auto buffer = ConstructManagedBuffer(block_size, nullptr, FileBufferType::TINY_BUFFER);
196293
+
196294
+ // create a new block pointer for this block
196295
+ return make_shared<BlockHandle>(*temp_block_manager, ++temporary_id, move(buffer), false, block_size);
196296
+ }
196297
+
196280
196298
  shared_ptr<BlockHandle> BufferManager::RegisterMemory(idx_t block_size, bool can_destroy) {
196281
- auto alloc_size = AlignValue<idx_t, 1 << 12>(block_size + Storage::BLOCK_HEADER_SIZE);
196299
+ D_ASSERT(block_size >= Storage::BLOCK_SIZE);
196300
+ auto alloc_size = AlignValue<idx_t, Storage::SECTOR_SIZE>(block_size + Storage::BLOCK_HEADER_SIZE);
196282
196301
  // first evict blocks until we have enough memory to store this buffer
196283
196302
  unique_ptr<FileBuffer> reusable_buffer;
196284
196303
  if (!EvictBlocks(alloc_size, maximum_memory, &reusable_buffer)) {
@@ -196313,6 +196332,7 @@ void BufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size
196313
196332
  }
196314
196333
  } else {
196315
196334
  // no need to evict blocks
196335
+ D_ASSERT(current_memory >= idx_t(-required_memory));
196316
196336
  current_memory -= idx_t(-required_memory);
196317
196337
  }
196318
196338
 
@@ -196334,6 +196354,7 @@ BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
196334
196354
  }
196335
196355
  required_memory = handle->memory_usage;
196336
196356
  }
196357
+ D_ASSERT(required_memory >= Storage::BLOCK_SIZE);
196337
196358
  // evict blocks until we have space for the current block
196338
196359
  unique_ptr<FileBuffer> reusable_buffer;
196339
196360
  if (!EvictBlocks(required_memory, maximum_memory, &reusable_buffer)) {
@@ -196345,6 +196366,7 @@ BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
196345
196366
  if (handle->state == BlockState::BLOCK_LOADED) {
196346
196367
  // the block is loaded, increment the reader count and return a pointer to the handle
196347
196368
  handle->readers++;
196369
+ D_ASSERT(current_memory >= required_memory);
196348
196370
  current_memory -= required_memory;
196349
196371
  return handle->Load(handle);
196350
196372
  }
@@ -196363,6 +196385,9 @@ void BufferManager::AddToEvictionQueue(shared_ptr<BlockHandle> &handle) {
196363
196385
 
196364
196386
  void BufferManager::Unpin(shared_ptr<BlockHandle> &handle) {
196365
196387
  lock_guard<mutex> lock(handle->lock);
196388
+ if (!handle->buffer || handle->buffer->type == FileBufferType::TINY_BUFFER) {
196389
+ return;
196390
+ }
196366
196391
  D_ASSERT(handle->readers > 0);
196367
196392
  handle->readers--;
196368
196393
  if (handle->readers == 0) {
@@ -196378,6 +196403,7 @@ bool BufferManager::EvictBlocks(idx_t extra_memory, idx_t memory_limit, unique_p
196378
196403
  while (current_memory > memory_limit) {
196379
196404
  // get a block to unpin from the queue
196380
196405
  if (!queue->q.try_dequeue(node)) {
196406
+ D_ASSERT(current_memory >= extra_memory);
196381
196407
  current_memory -= extra_memory;
196382
196408
  return false;
196383
196409
  }
@@ -196797,10 +196823,9 @@ void BufferManager::WriteTemporaryBuffer(block_id_t block_id, FileBuffer &buffer
196797
196823
  temp_directory_handle->GetTempFile().WriteTemporaryBuffer(block_id, buffer);
196798
196824
  return;
196799
196825
  }
196800
-
196801
- D_ASSERT(buffer.size > Storage::BLOCK_SIZE);
196802
196826
  // get the path to write to
196803
196827
  auto path = GetTemporaryPath(block_id);
196828
+ D_ASSERT(buffer.size > Storage::BLOCK_SIZE);
196804
196829
  // create the file and write the size followed by the buffer contents
196805
196830
  auto &fs = FileSystem::GetFileSystem(db);
196806
196831
  auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE);
@@ -196876,6 +196901,7 @@ data_ptr_t BufferManager::BufferAllocatorAllocate(PrivateAllocatorData *private_
196876
196901
 
196877
196902
  void BufferManager::BufferAllocatorFree(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size) {
196878
196903
  auto &data = (BufferAllocatorData &)*private_data;
196904
+ D_ASSERT(data.manager.current_memory >= size);
196879
196905
  data.manager.current_memory -= size;
196880
196906
  return Allocator::Get(data.manager.db).FreeData(pointer, size);
196881
196907
  }
@@ -197534,7 +197560,6 @@ WriteOverflowStringsToDisk::WriteOverflowStringsToDisk(BlockManager &block_manag
197534
197560
 
197535
197561
  WriteOverflowStringsToDisk::~WriteOverflowStringsToDisk() {
197536
197562
  if (offset > 0) {
197537
- handle.GetFileBuffer().Resize(offset);
197538
197563
  block_manager.Write(handle.GetFileBuffer(), block_id);
197539
197564
  }
197540
197565
  }
@@ -203289,7 +203314,7 @@ idx_t FixedSizeAppend(CompressionAppendState &append_state, ColumnSegment &segme
203289
203314
  D_ASSERT(segment.GetBlockOffset() == 0);
203290
203315
 
203291
203316
  auto target_ptr = append_state.handle.Ptr();
203292
- idx_t max_tuple_count = Storage::BLOCK_SIZE / sizeof(T);
203317
+ idx_t max_tuple_count = segment.SegmentSize() / sizeof(T);
203293
203318
  idx_t copy_count = MinValue<idx_t>(count, max_tuple_count - segment.count);
203294
203319
 
203295
203320
  AppendLoop<T>(stats, target_ptr, segment.count, data, offset, copy_count);
@@ -204802,7 +204827,7 @@ unique_ptr<CompressedSegmentState> UncompressedStringStorage::StringInitSegment(
204802
204827
  auto handle = buffer_manager.Pin(segment.block);
204803
204828
  StringDictionaryContainer dictionary;
204804
204829
  dictionary.size = 0;
204805
- dictionary.end = Storage::BLOCK_SIZE;
204830
+ dictionary.end = segment.SegmentSize();
204806
204831
  SetDictionary(segment, handle, dictionary);
204807
204832
  }
204808
204833
  return make_unique<UncompressedStringSegmentState>();
@@ -204812,16 +204837,16 @@ idx_t UncompressedStringStorage::FinalizeAppend(ColumnSegment &segment, SegmentS
204812
204837
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
204813
204838
  auto handle = buffer_manager.Pin(segment.block);
204814
204839
  auto dict = GetDictionary(segment, handle);
204815
- D_ASSERT(dict.end == Storage::BLOCK_SIZE);
204840
+ D_ASSERT(dict.end == segment.SegmentSize());
204816
204841
  // compute the total size required to store this segment
204817
204842
  auto offset_size = DICTIONARY_HEADER_SIZE + segment.count * sizeof(int32_t);
204818
204843
  auto total_size = offset_size + dict.size;
204819
204844
  if (total_size >= COMPACTION_FLUSH_LIMIT) {
204820
204845
  // the block is full enough, don't bother moving around the dictionary
204821
- return Storage::BLOCK_SIZE;
204846
+ return segment.SegmentSize();
204822
204847
  }
204823
204848
  // the block has space left: figure out how much space we can save
204824
- auto move_amount = Storage::BLOCK_SIZE - total_size;
204849
+ auto move_amount = segment.SegmentSize() - total_size;
204825
204850
  // move the dictionary so it lines up exactly with the offsets
204826
204851
  auto dataptr = handle.Ptr();
204827
204852
  memmove(dataptr + offset_size, dataptr + dict.end - dict.size, dict.size);
@@ -204868,10 +204893,10 @@ StringDictionaryContainer UncompressedStringStorage::GetDictionary(ColumnSegment
204868
204893
 
204869
204894
  idx_t UncompressedStringStorage::RemainingSpace(ColumnSegment &segment, BufferHandle &handle) {
204870
204895
  auto dictionary = GetDictionary(segment, handle);
204871
- D_ASSERT(dictionary.end == Storage::BLOCK_SIZE);
204896
+ D_ASSERT(dictionary.end == segment.SegmentSize());
204872
204897
  idx_t used_space = dictionary.size + segment.count * sizeof(int32_t) + DICTIONARY_HEADER_SIZE;
204873
- D_ASSERT(Storage::BLOCK_SIZE >= used_space);
204874
- return Storage::BLOCK_SIZE - used_space;
204898
+ D_ASSERT(segment.SegmentSize() >= used_space);
204899
+ return segment.SegmentSize() - used_space;
204875
204900
  }
204876
204901
 
204877
204902
  void UncompressedStringStorage::WriteString(ColumnSegment &segment, string_t string, block_id_t &result_block,
@@ -205503,7 +205528,7 @@ unique_ptr<CompressedSegmentState> ValidityInitSegment(ColumnSegment &segment, b
205503
205528
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
205504
205529
  if (block_id == INVALID_BLOCK) {
205505
205530
  auto handle = buffer_manager.Pin(segment.block);
205506
- memset(handle.Ptr(), 0xFF, Storage::BLOCK_SIZE);
205531
+ memset(handle.Ptr(), 0xFF, segment.SegmentSize());
205507
205532
  }
205508
205533
  return nullptr;
205509
205534
  }
@@ -205513,7 +205538,7 @@ idx_t ValidityAppend(CompressionAppendState &append_state, ColumnSegment &segmen
205513
205538
  D_ASSERT(segment.GetBlockOffset() == 0);
205514
205539
  auto &validity_stats = (ValidityStatistics &)*stats.statistics;
205515
205540
 
205516
- auto max_tuples = Storage::BLOCK_SIZE / ValidityMask::STANDARD_MASK_SIZE * STANDARD_VECTOR_SIZE;
205541
+ auto max_tuples = segment.SegmentSize() / ValidityMask::STANDARD_MASK_SIZE * STANDARD_VECTOR_SIZE;
205517
205542
  idx_t append_count = MinValue<idx_t>(vcount, max_tuples - segment.count);
205518
205543
  if (data.validity.AllValid()) {
205519
205544
  // no null values: skip append
@@ -205560,7 +205585,7 @@ void ValidityRevertAppend(ColumnSegment &segment, idx_t start_row) {
205560
205585
  revert_start = start_bit / 8;
205561
205586
  }
205562
205587
  // for the rest, we just memset
205563
- memset(handle.Ptr() + revert_start, 0xFF, Storage::BLOCK_SIZE - revert_start);
205588
+ memset(handle.Ptr() + revert_start, 0xFF, segment.SegmentSize() - revert_start);
205564
205589
  }
205565
205590
 
205566
205591
  //===--------------------------------------------------------------------===//
@@ -207713,8 +207738,9 @@ T DeserializeHeaderStructure(data_ptr_t ptr) {
207713
207738
  SingleFileBlockManager::SingleFileBlockManager(DatabaseInstance &db, string path_p, bool read_only, bool create_new,
207714
207739
  bool use_direct_io)
207715
207740
  : BlockManager(BufferManager::GetBufferManager(db)), db(db), path(move(path_p)),
207716
- header_buffer(Allocator::Get(db), FileBufferType::MANAGED_BUFFER, Storage::FILE_HEADER_SIZE), iteration_count(0),
207717
- read_only(read_only), use_direct_io(use_direct_io) {
207741
+ header_buffer(Allocator::Get(db), FileBufferType::MANAGED_BUFFER,
207742
+ Storage::FILE_HEADER_SIZE - Storage::BLOCK_HEADER_SIZE),
207743
+ iteration_count(0), read_only(read_only), use_direct_io(use_direct_io) {
207718
207744
  uint8_t flags;
207719
207745
  FileLockType lock;
207720
207746
  if (read_only) {
@@ -209893,6 +209919,12 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
209893
209919
  pstate->AddSegmentToTail(&column_data, segment.get(), offset_in_block);
209894
209920
  } else {
209895
209921
  // Create a new block for future reuse.
209922
+ if (segment->SegmentSize() != Storage::BLOCK_SIZE) {
209923
+ // the segment is smaller than the block size
209924
+ // allocate a new block and copy the data over
209925
+ D_ASSERT(segment->SegmentSize() < Storage::BLOCK_SIZE);
209926
+ segment->Resize(Storage::BLOCK_SIZE);
209927
+ }
209896
209928
  D_ASSERT(offset_in_block == 0);
209897
209929
  allocation.partial_block = make_unique<PartialBlockForCheckpoint>(
209898
209930
  &column_data, segment.get(), *allocation.block_manager, allocation.state);
@@ -210454,7 +210486,15 @@ unique_ptr<BaseStatistics> ColumnData::GetUpdateStatistics() {
210454
210486
  }
210455
210487
 
210456
210488
  void ColumnData::AppendTransientSegment(SegmentLock &l, idx_t start_row) {
210457
- auto new_segment = ColumnSegment::CreateTransientSegment(GetDatabase(), type, start_row);
210489
+ idx_t segment_size = Storage::BLOCK_SIZE;
210490
+ if (start_row == idx_t(MAX_ROW_ID)) {
210491
+ #if STANDARD_VECTOR_SIZE < 1024
210492
+ segment_size = 1024 * GetTypeIdSize(type.InternalType());
210493
+ #else
210494
+ segment_size = STANDARD_VECTOR_SIZE * GetTypeIdSize(type.InternalType());
210495
+ #endif
210496
+ }
210497
+ auto new_segment = ColumnSegment::CreateTransientSegment(GetDatabase(), type, start_row, segment_size);
210458
210498
  data.AppendSegment(l, move(new_segment));
210459
210499
  }
210460
210500
 
@@ -210952,18 +210992,25 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
210952
210992
  function = config.GetCompressionFunction(compression_type, type.InternalType());
210953
210993
  block = block_manager.RegisterBlock(block_id);
210954
210994
  }
210995
+ auto segment_size = Storage::BLOCK_SIZE;
210955
210996
  return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::PERSISTENT, start, count, function,
210956
- move(statistics), block_id, offset);
210997
+ move(statistics), block_id, offset, segment_size);
210957
210998
  }
210958
210999
 
210959
211000
  unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance &db, const LogicalType &type,
210960
- idx_t start) {
211001
+ idx_t start, idx_t segment_size) {
210961
211002
  auto &config = DBConfig::GetConfig(db);
210962
211003
  auto function = config.GetCompressionFunction(CompressionType::COMPRESSION_UNCOMPRESSED, type.InternalType());
211004
+ auto &buffer_manager = BufferManager::GetBufferManager(db);
211005
+ shared_ptr<BlockHandle> block;
210963
211006
  // transient: allocate a buffer for the uncompressed segment
210964
- auto block = BufferManager::GetBufferManager(db).RegisterMemory(Storage::BLOCK_SIZE, false);
211007
+ if (segment_size < Storage::BLOCK_SIZE) {
211008
+ block = buffer_manager.RegisterSmallMemory(segment_size);
211009
+ } else {
211010
+ block = buffer_manager.RegisterMemory(segment_size, false);
211011
+ }
210965
211012
  return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr,
210966
- INVALID_BLOCK, 0);
211013
+ INVALID_BLOCK, 0, segment_size);
210967
211014
  }
210968
211015
 
210969
211016
  unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx_t start) {
@@ -210972,10 +211019,11 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx
210972
211019
 
210973
211020
  ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type_p,
210974
211021
  ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction *function_p,
210975
- unique_ptr<BaseStatistics> statistics, block_id_t block_id_p, idx_t offset_p)
211022
+ unique_ptr<BaseStatistics> statistics, block_id_t block_id_p, idx_t offset_p,
211023
+ idx_t segment_size_p)
210976
211024
  : SegmentBase(start, count), db(db), type(move(type_p)), type_size(GetTypeIdSize(type.InternalType())),
210977
211025
  segment_type(segment_type), function(function_p), stats(type, move(statistics)), block(move(block)),
210978
- block_id(block_id_p), offset(offset_p) {
211026
+ block_id(block_id_p), offset(offset_p), segment_size(segment_size_p) {
210979
211027
  D_ASSERT(function);
210980
211028
  if (function->init_segment) {
210981
211029
  segment_state = function->init_segment(*this, block_id);
@@ -210985,7 +211033,8 @@ ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block
210985
211033
  ColumnSegment::ColumnSegment(ColumnSegment &other, idx_t start)
210986
211034
  : SegmentBase(start, other.count), db(other.db), type(move(other.type)), type_size(other.type_size),
210987
211035
  segment_type(other.segment_type), function(other.function), stats(move(other.stats)), block(move(other.block)),
210988
- block_id(other.block_id), offset(other.offset), segment_state(move(other.segment_state)) {
211036
+ block_id(other.block_id), offset(other.offset), segment_size(other.segment_size),
211037
+ segment_state(move(other.segment_state)) {
210989
211038
  }
210990
211039
 
210991
211040
  ColumnSegment::~ColumnSegment() {
@@ -211033,6 +211082,23 @@ void ColumnSegment::FetchRow(ColumnFetchState &state, row_t row_id, Vector &resu
211033
211082
  //===--------------------------------------------------------------------===//
211034
211083
  // Append
211035
211084
  //===--------------------------------------------------------------------===//
211085
+ idx_t ColumnSegment::SegmentSize() const {
211086
+ return segment_size;
211087
+ }
211088
+
211089
+ void ColumnSegment::Resize(idx_t new_size) {
211090
+ D_ASSERT(new_size > this->segment_size);
211091
+ D_ASSERT(offset == 0);
211092
+ auto &buffer_manager = BufferManager::GetBufferManager(db);
211093
+ auto new_block = buffer_manager.RegisterMemory(Storage::BLOCK_SIZE, false);
211094
+ auto old_handle = buffer_manager.Pin(block);
211095
+ auto new_handle = buffer_manager.Pin(new_block);
211096
+ memcpy(new_handle.Ptr(), old_handle.Ptr(), segment_size);
211097
+ this->block_id = new_block->BlockId();
211098
+ this->block = move(new_block);
211099
+ this->segment_size = new_size;
211100
+ }
211101
+
211036
211102
  void ColumnSegment::InitializeAppend(ColumnAppendState &state) {
211037
211103
  D_ASSERT(segment_type == ColumnSegmentType::TRANSIENT);
211038
211104
  if (!function->init_append) {
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "e4ee601c8"
15
- #define DUCKDB_VERSION "v0.5.2-dev1229"
14
+ #define DUCKDB_SOURCE_ID "ed8eafda9"
15
+ #define DUCKDB_VERSION "v0.5.2-dev1241"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -1942,7 +1942,7 @@ namespace duckdb {
1942
1942
  class Allocator;
1943
1943
  struct FileHandle;
1944
1944
 
1945
- enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2 };
1945
+ enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2, TINY_BUFFER = 3 };
1946
1946
 
1947
1947
  //! The FileBuffer represents a buffer that can be read or written to a Direct IO FileHandle.
1948
1948
  class FileBuffer {
@@ -25307,12 +25307,16 @@ class BufferManager {
25307
25307
 
25308
25308
  public:
25309
25309
  BufferManager(DatabaseInstance &db, string temp_directory, idx_t maximum_memory);
25310
- ~BufferManager();
25310
+ virtual ~BufferManager();
25311
25311
 
25312
25312
  //! Register an in-memory buffer of arbitrary size, as long as it is >= BLOCK_SIZE. can_destroy signifies whether or
25313
25313
  //! not the buffer can be destroyed when unpinned, or whether or not it needs to be written to a temporary file so
25314
25314
  //! it can be reloaded. The resulting buffer will already be allocated, but needs to be pinned in order to be used.
25315
25315
  shared_ptr<BlockHandle> RegisterMemory(idx_t block_size, bool can_destroy);
25316
+ //! Registers an in-memory buffer that cannot be unloaded until it is destroyed
25317
+ //! This buffer can be small (smaller than BLOCK_SIZE)
25318
+ //! Unpin and pin are nops on this block of memory
25319
+ shared_ptr<BlockHandle> RegisterSmallMemory(idx_t block_size);
25316
25320
 
25317
25321
  //! Allocate an in-memory buffer with a single pin.
25318
25322
  //! The allocated memory is released when the buffer handle is destroyed.
@@ -25353,7 +25357,8 @@ public:
25353
25357
  //! Construct a managed buffer.
25354
25358
  //! The block_id is just used for internal tracking. It doesn't map to any actual
25355
25359
  //! BlockManager.
25356
- virtual unique_ptr<FileBuffer> ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source);
25360
+ virtual unique_ptr<FileBuffer> ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source,
25361
+ FileBufferType type = FileBufferType::MANAGED_BUFFER);
25357
25362
 
25358
25363
  private:
25359
25364
  //! Evict blocks until the currently used memory + extra_memory fit, returns false if this was not possible
@@ -26166,7 +26171,8 @@ public:
26166
26171
  block_id_t id, idx_t offset, const LogicalType &type_p,
26167
26172
  idx_t start, idx_t count, CompressionType compression_type,
26168
26173
  unique_ptr<BaseStatistics> statistics);
26169
- static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start);
26174
+ static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start,
26175
+ idx_t segment_size = Storage::BLOCK_SIZE);
26170
26176
  static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
26171
26177
 
26172
26178
  public:
@@ -26182,6 +26188,11 @@ public:
26182
26188
  //! Skip a scan forward to the row_index specified in the scan state
26183
26189
  void Skip(ColumnScanState &state);
26184
26190
 
26191
+ // The maximum size of the buffer (in bytes)
26192
+ idx_t SegmentSize() const;
26193
+ //! Resize the block
26194
+ void Resize(idx_t segment_size);
26195
+
26185
26196
  //! Initialize an append of this segment. Appends are only supported on transient segments.
26186
26197
  void InitializeAppend(ColumnAppendState &state);
26187
26198
  //! Appends a (part of) vector to the segment, returns the amount of entries successfully appended
@@ -26227,7 +26238,7 @@ public:
26227
26238
  public:
26228
26239
  ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
26229
26240
  idx_t start, idx_t count, CompressionFunction *function, unique_ptr<BaseStatistics> statistics,
26230
- block_id_t block_id, idx_t offset);
26241
+ block_id_t block_id, idx_t offset, idx_t segment_size);
26231
26242
  ColumnSegment(ColumnSegment &other, idx_t start);
26232
26243
 
26233
26244
  private:
@@ -26239,6 +26250,8 @@ private:
26239
26250
  block_id_t block_id;
26240
26251
  //! The offset into the block (persistent segment only)
26241
26252
  idx_t offset;
26253
+ //! The allocated segment size
26254
+ idx_t segment_size;
26242
26255
  //! Storage associated with the compressed segment
26243
26256
  unique_ptr<CompressedSegmentState> segment_state;
26244
26257
  };