duckdb 0.5.2-dev1144.0 → 0.5.2-dev1149.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1144.0",
5
+ "version": "0.5.2-dev1149.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -15914,11 +15914,21 @@ void FieldReader::Finalize() {
15914
15914
 
15915
15915
  namespace duckdb {
15916
15916
 
15917
- FileBuffer::FileBuffer(Allocator &allocator, FileBufferType type, uint64_t bufsiz)
15918
- : allocator(allocator), type(type), malloced_buffer(nullptr) {
15919
- SetMallocedSize(bufsiz);
15920
- malloced_buffer = allocator.AllocateData(malloced_size);
15921
- Construct(bufsiz);
15917
+ FileBuffer::FileBuffer(Allocator &allocator, FileBufferType type, uint64_t user_size)
15918
+ : allocator(allocator), type(type) {
15919
+ Init();
15920
+ if (user_size) {
15921
+ Resize(user_size);
15922
+ }
15923
+ }
15924
+
15925
+ void FileBuffer::Init() {
15926
+ buffer = nullptr;
15927
+ size = 0;
15928
+ internal_buffer = nullptr;
15929
+ internal_size = 0;
15930
+ malloced_buffer = nullptr;
15931
+ malloced_size = 0;
15922
15932
  }
15923
15933
 
15924
15934
  FileBuffer::FileBuffer(FileBuffer &source, FileBufferType type_p) : allocator(source.allocator), type(type_p) {
@@ -15930,12 +15940,7 @@ FileBuffer::FileBuffer(FileBuffer &source, FileBufferType type_p) : allocator(so
15930
15940
  malloced_buffer = source.malloced_buffer;
15931
15941
  malloced_size = source.malloced_size;
15932
15942
 
15933
- source.buffer = nullptr;
15934
- source.size = 0;
15935
- source.internal_buffer = nullptr;
15936
- source.internal_size = 0;
15937
- source.malloced_buffer = nullptr;
15938
- source.malloced_size = 0;
15943
+ source.Init();
15939
15944
  }
15940
15945
 
15941
15946
  FileBuffer::~FileBuffer() {
@@ -15945,30 +15950,42 @@ FileBuffer::~FileBuffer() {
15945
15950
  allocator.FreeData(malloced_buffer, malloced_size);
15946
15951
  }
15947
15952
 
15948
- void FileBuffer::SetMallocedSize(uint64_t &bufsiz) {
15949
- // make room for the block header (if this is not the db file header)
15950
- if (type == FileBufferType::MANAGED_BUFFER && bufsiz != Storage::FILE_HEADER_SIZE) {
15951
- bufsiz += Storage::BLOCK_HEADER_SIZE;
15953
+ void FileBuffer::ReallocBuffer(size_t new_size) {
15954
+ if (malloced_buffer) {
15955
+ malloced_buffer = allocator.ReallocateData(malloced_buffer, malloced_size, new_size);
15956
+ } else {
15957
+ malloced_buffer = allocator.AllocateData(new_size);
15952
15958
  }
15953
- malloced_size = bufsiz;
15954
- }
15955
-
15956
- void FileBuffer::Construct(uint64_t bufsiz) {
15957
15959
  if (!malloced_buffer) {
15958
15960
  throw std::bad_alloc();
15959
15961
  }
15962
+ malloced_size = new_size;
15960
15963
  internal_buffer = malloced_buffer;
15961
15964
  internal_size = malloced_size;
15962
- buffer = internal_buffer + Storage::BLOCK_HEADER_SIZE;
15963
- size = internal_size - Storage::BLOCK_HEADER_SIZE;
15965
+ // Caller must update these.
15966
+ buffer = nullptr;
15967
+ size = 0;
15964
15968
  }
15965
15969
 
15966
- void FileBuffer::Resize(uint64_t bufsiz) {
15967
- D_ASSERT(type == FileBufferType::MANAGED_BUFFER);
15968
- auto old_size = malloced_size;
15969
- SetMallocedSize(bufsiz);
15970
- malloced_buffer = allocator.ReallocateData(malloced_buffer, old_size, malloced_size);
15971
- Construct(bufsiz);
15970
+ void FileBuffer::Resize(uint64_t new_size) {
15971
+ {
15972
+ // TODO: All the logic here is specific to SingleFileBlockManager.
15973
+ // and should be moved there, via a specific implementation of FileBuffer.
15974
+ //
15975
+ // make room for the block header (if this is not the db file header)
15976
+ if (type == FileBufferType::MANAGED_BUFFER && new_size != Storage::FILE_HEADER_SIZE) {
15977
+ new_size += Storage::BLOCK_HEADER_SIZE;
15978
+ // If we don't write/read an entire block, our checksum won't match.
15979
+ new_size = AlignValue<uint32_t, Storage::BLOCK_ALLOC_SIZE>(new_size);
15980
+ }
15981
+ new_size = AlignValue<uint32_t, Storage::SECTOR_SIZE>(new_size);
15982
+ ReallocBuffer(new_size);
15983
+ }
15984
+
15985
+ if (new_size > 0) {
15986
+ buffer = internal_buffer + Storage::BLOCK_HEADER_SIZE;
15987
+ size = internal_size - Storage::BLOCK_HEADER_SIZE;
15988
+ }
15972
15989
  }
15973
15990
 
15974
15991
  void FileBuffer::Read(FileHandle &handle, uint64_t location) {
@@ -41725,8 +41742,8 @@ void ColumnDataAllocator::AllocateBlock() {
41725
41742
  D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR);
41726
41743
  BlockMetaData data;
41727
41744
  data.size = 0;
41728
- data.capacity = Storage::BLOCK_ALLOC_SIZE;
41729
- data.handle = alloc.buffer_manager->RegisterMemory(Storage::BLOCK_ALLOC_SIZE, false);
41745
+ data.capacity = Storage::BLOCK_SIZE;
41746
+ data.handle = alloc.buffer_manager->RegisterMemory(Storage::BLOCK_SIZE, false);
41730
41747
  blocks.push_back(move(data));
41731
41748
  }
41732
41749
 
@@ -61613,6 +61630,7 @@ void JoinHashTable::InitializePointerTable() {
61613
61630
  // allocate the HT if not yet done
61614
61631
  hash_map = buffer_manager.Allocate(capacity * sizeof(data_ptr_t));
61615
61632
  }
61633
+ D_ASSERT(hash_map.GetFileBuffer().size >= capacity * sizeof(data_ptr_t));
61616
61634
 
61617
61635
  // initialize HT with all-zero entries
61618
61636
  memset(hash_map.Ptr(), 0, capacity * sizeof(data_ptr_t));
@@ -194234,12 +194252,16 @@ bool ArenaAllocator::IsEmpty() {
194234
194252
  namespace duckdb {
194235
194253
 
194236
194254
  Block::Block(Allocator &allocator, block_id_t id)
194237
- : FileBuffer(allocator, FileBufferType::BLOCK, Storage::BLOCK_ALLOC_SIZE), id(id) {
194255
+ : FileBuffer(allocator, FileBufferType::BLOCK, Storage::BLOCK_SIZE), id(id) {
194256
+ }
194257
+
194258
+ Block::Block(Allocator &allocator, block_id_t id, uint32_t internal_size)
194259
+ : FileBuffer(allocator, FileBufferType::BLOCK, internal_size), id(id) {
194260
+ D_ASSERT((GetMallocedSize() & (Storage::SECTOR_SIZE - 1)) == 0);
194238
194261
  }
194239
194262
 
194240
194263
  Block::Block(FileBuffer &source, block_id_t id) : FileBuffer(source, FileBufferType::BLOCK), id(id) {
194241
- D_ASSERT(GetMallocedSize() == Storage::BLOCK_ALLOC_SIZE);
194242
- D_ASSERT(size == Storage::BLOCK_SIZE);
194264
+ D_ASSERT((GetMallocedSize() & (Storage::SECTOR_SIZE - 1)) == 0);
194243
194265
  }
194244
194266
 
194245
194267
  } // namespace duckdb
@@ -194478,7 +194500,8 @@ BlockHandle::~BlockHandle() {
194478
194500
  block_manager.UnregisterBlock(block_id, can_destroy);
194479
194501
  }
194480
194502
 
194481
- unique_ptr<Block> AllocateBlock(Allocator &allocator, unique_ptr<FileBuffer> reusable_buffer, block_id_t block_id) {
194503
+ unique_ptr<Block> AllocateBlock(BlockManager &block_manager, unique_ptr<FileBuffer> reusable_buffer,
194504
+ block_id_t block_id) {
194482
194505
  if (reusable_buffer) {
194483
194506
  // re-usable buffer: re-use it
194484
194507
  if (reusable_buffer->type == FileBufferType::BLOCK) {
@@ -194487,18 +194510,19 @@ unique_ptr<Block> AllocateBlock(Allocator &allocator, unique_ptr<FileBuffer> reu
194487
194510
  block.id = block_id;
194488
194511
  return unique_ptr_cast<FileBuffer, Block>(move(reusable_buffer));
194489
194512
  }
194490
- auto block = make_unique<Block>(*reusable_buffer, block_id);
194513
+ auto block = block_manager.CreateBlock(block_id, reusable_buffer.get());
194491
194514
  reusable_buffer.reset();
194492
194515
  return block;
194493
194516
  } else {
194494
194517
  // no re-usable buffer: allocate a new block
194495
- return make_unique<Block>(allocator, block_id);
194518
+ return block_manager.CreateBlock(block_id, nullptr);
194496
194519
  }
194497
194520
  }
194498
194521
 
194499
- unique_ptr<FileBuffer> AllocateManagedBuffer(DatabaseInstance &db, unique_ptr<FileBuffer> reusable_buffer, idx_t size) {
194500
- if (reusable_buffer) {
194501
- auto tmp = move(reusable_buffer);
194522
+ unique_ptr<FileBuffer> BufferManager::ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source) {
194523
+ if (source) {
194524
+ auto tmp = move(source);
194525
+ D_ASSERT(tmp->size == size);
194502
194526
  return make_unique<FileBuffer>(*tmp, FileBufferType::MANAGED_BUFFER);
194503
194527
  } else {
194504
194528
  // no re-usable buffer: allocate a new buffer
@@ -194515,8 +194539,7 @@ BufferHandle BlockHandle::Load(shared_ptr<BlockHandle> &handle, unique_ptr<FileB
194515
194539
 
194516
194540
  auto &block_manager = handle->block_manager;
194517
194541
  if (handle->block_id < MAXIMUM_BLOCK) {
194518
- auto block = AllocateBlock(Allocator::Get(block_manager.buffer_manager.GetDatabase()), move(reusable_buffer),
194519
- handle->block_id);
194542
+ auto block = AllocateBlock(block_manager, move(reusable_buffer), handle->block_id);
194520
194543
  block_manager.Read(*block);
194521
194544
  handle->buffer = move(block);
194522
194545
  } else {
@@ -194670,6 +194693,10 @@ shared_ptr<BlockHandle> BlockManager::ConvertToPersistent(block_id_t block_id, s
194670
194693
  D_ASSERT(old_block->state == BlockState::BLOCK_LOADED);
194671
194694
  D_ASSERT(old_block->buffer);
194672
194695
 
194696
+ // Temp buffers can be larger than the storage block size. But persistent buffers
194697
+ // cannot.
194698
+ D_ASSERT(old_block->buffer->AllocSize() <= Storage::BLOCK_ALLOC_SIZE);
194699
+
194673
194700
  // register a block with the new block id
194674
194701
  auto new_block = RegisterBlock(block_id);
194675
194702
  D_ASSERT(new_block->state == BlockState::BLOCK_UNLOADED);
@@ -194677,7 +194704,7 @@ shared_ptr<BlockHandle> BlockManager::ConvertToPersistent(block_id_t block_id, s
194677
194704
 
194678
194705
  // move the data from the old block into data for the new block
194679
194706
  new_block->state = BlockState::BLOCK_LOADED;
194680
- new_block->buffer = make_unique<Block>(*old_block->buffer, block_id);
194707
+ new_block->buffer = CreateBlock(block_id, old_block->buffer.get());
194681
194708
 
194682
194709
  // clear the old buffer and unload it
194683
194710
  old_block->buffer.reset();
@@ -194695,7 +194722,7 @@ shared_ptr<BlockHandle> BlockManager::ConvertToPersistent(block_id_t block_id, s
194695
194722
  }
194696
194723
 
194697
194724
  shared_ptr<BlockHandle> BufferManager::RegisterMemory(idx_t block_size, bool can_destroy) {
194698
- auto alloc_size = block_size + Storage::BLOCK_HEADER_SIZE;
194725
+ auto alloc_size = AlignValue<idx_t, 1 << 12>(block_size + Storage::BLOCK_HEADER_SIZE);
194699
194726
  // first evict blocks until we have enough memory to store this buffer
194700
194727
  unique_ptr<FileBuffer> reusable_buffer;
194701
194728
  if (!EvictBlocks(alloc_size, maximum_memory, &reusable_buffer)) {
@@ -194703,7 +194730,7 @@ shared_ptr<BlockHandle> BufferManager::RegisterMemory(idx_t block_size, bool can
194703
194730
  GetUsedMemory(), GetMaxMemory(), InMemoryWarning());
194704
194731
  }
194705
194732
 
194706
- auto buffer = AllocateManagedBuffer(db, move(reusable_buffer), block_size);
194733
+ auto buffer = ConstructManagedBuffer(block_size, move(reusable_buffer));
194707
194734
 
194708
194735
  // create a new block pointer for this block
194709
194736
  return make_shared<BlockHandle>(*temp_block_manager, ++temporary_id, move(buffer), can_destroy, block_size);
@@ -194876,9 +194903,9 @@ void BufferManager::SetLimit(idx_t limit) {
194876
194903
  //===--------------------------------------------------------------------===//
194877
194904
  // Temporary File Management
194878
194905
  //===--------------------------------------------------------------------===//
194879
- unique_ptr<FileBuffer> ReadTemporaryBufferInternal(DatabaseInstance &db, FileHandle &handle, idx_t position, idx_t size,
194880
- unique_ptr<FileBuffer> reusable_buffer) {
194881
- auto buffer = AllocateManagedBuffer(db, move(reusable_buffer), size);
194906
+ unique_ptr<FileBuffer> ReadTemporaryBufferInternal(BufferManager &buffer_manager, FileHandle &handle, idx_t position,
194907
+ idx_t size, block_id_t id, unique_ptr<FileBuffer> reusable_buffer) {
194908
+ auto buffer = buffer_manager.ConstructManagedBuffer(size, move(reusable_buffer));
194882
194909
  buffer->Read(handle, position);
194883
194910
  return buffer;
194884
194911
  }
@@ -194999,8 +195026,9 @@ public:
194999
195026
 
195000
195027
  unique_ptr<FileBuffer> ReadTemporaryBuffer(block_id_t id, idx_t block_index,
195001
195028
  unique_ptr<FileBuffer> reusable_buffer) {
195002
- auto buffer = ReadTemporaryBufferInternal(db, *handle, GetPositionInFile(block_index), Storage::BLOCK_SIZE,
195003
- move(reusable_buffer));
195029
+ auto buffer =
195030
+ ReadTemporaryBufferInternal(BufferManager::GetBufferManager(db), *handle, GetPositionInFile(block_index),
195031
+ Storage::BLOCK_SIZE, id, move(reusable_buffer));
195004
195032
  {
195005
195033
  // remove the block (and potentially truncate the temp file)
195006
195034
  TemporaryFileLock lock(file_lock);
@@ -195238,7 +195266,7 @@ unique_ptr<FileBuffer> BufferManager::ReadTemporaryBuffer(block_id_t id, unique_
195238
195266
  handle->Read(&block_size, sizeof(idx_t), 0);
195239
195267
 
195240
195268
  // now allocate a buffer of this size and read the data into that buffer
195241
- auto buffer = ReadTemporaryBufferInternal(db, *handle, sizeof(idx_t), block_size, move(reusable_buffer));
195269
+ auto buffer = ReadTemporaryBufferInternal(*this, *handle, sizeof(idx_t), block_size, id, move(reusable_buffer));
195242
195270
 
195243
195271
  handle.reset();
195244
195272
  DeleteTemporaryFile(id);
@@ -195950,6 +195978,7 @@ WriteOverflowStringsToDisk::WriteOverflowStringsToDisk(BlockManager &block_manag
195950
195978
 
195951
195979
  WriteOverflowStringsToDisk::~WriteOverflowStringsToDisk() {
195952
195980
  if (offset > 0) {
195981
+ handle.GetFileBuffer().Resize(offset);
195953
195982
  block_manager.Write(handle.GetFileBuffer(), block_id);
195954
195983
  }
195955
195984
  }
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "0e999f15d"
15
- #define DUCKDB_VERSION "v0.5.2-dev1144"
14
+ #define DUCKDB_SOURCE_ID "596c5fe1e"
15
+ #define DUCKDB_VERSION "v0.5.2-dev1149"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -1935,11 +1935,11 @@ enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2 };
1935
1935
  //! The FileBuffer represents a buffer that can be read or written to a Direct IO FileHandle.
1936
1936
  class FileBuffer {
1937
1937
  public:
1938
- //! Allocates a buffer of the specified size that is sector-aligned. bufsiz must be a multiple of
1939
- //! FileSystemConstants::FILE_BUFFER_BLOCK_SIZE. The content in this buffer can be written to FileHandles that have
1940
- //! been opened with DIRECT_IO on all operating systems, however, the entire buffer must be written to the file.
1941
- //! Note that the returned size is 8 bytes less than the allocation size to account for the checksum.
1942
- FileBuffer(Allocator &allocator, FileBufferType type, uint64_t bufsiz);
1938
+ //! Allocates a buffer of the specified size, with room for additional header bytes
1939
+ //! (typically 8 bytes). On return, this->AllocSize() >= this->size >= user_size.
1940
+ //! Our allocation size will always be page-aligned, which is necessary to support
1941
+ //! DIRECT_IO
1942
+ FileBuffer(Allocator &allocator, FileBufferType type, uint64_t user_size);
1943
1943
  FileBuffer(FileBuffer &source, FileBufferType type);
1944
1944
 
1945
1945
  virtual ~FileBuffer();
@@ -1957,18 +1957,20 @@ public:
1957
1957
  void Read(FileHandle &handle, uint64_t location);
1958
1958
  //! Read into the FileBuffer from the specified location. Automatically verifies the checksum, and throws an
1959
1959
  //! exception if the checksum does not match correctly.
1960
- void ReadAndChecksum(FileHandle &handle, uint64_t location);
1960
+ virtual void ReadAndChecksum(FileHandle &handle, uint64_t location);
1961
1961
  //! Write the contents of the FileBuffer to the specified location.
1962
1962
  void Write(FileHandle &handle, uint64_t location);
1963
1963
  //! Write the contents of the FileBuffer to the specified location. Automatically adds a checksum of the contents of
1964
1964
  //! the filebuffer in front of the written data.
1965
- void ChecksumAndWrite(FileHandle &handle, uint64_t location);
1965
+ virtual void ChecksumAndWrite(FileHandle &handle, uint64_t location);
1966
1966
 
1967
1967
  void Clear();
1968
1968
 
1969
- void Resize(uint64_t bufsiz);
1969
+ // Same rules as the constructor. We will add room for a header, in additio to
1970
+ // the requested user bytes. We will then sector-align the result.
1971
+ virtual void Resize(uint64_t user_size);
1970
1972
 
1971
- uint64_t AllocSize() {
1973
+ uint64_t AllocSize() const {
1972
1974
  return internal_size;
1973
1975
  }
1974
1976
 
@@ -1978,6 +1980,8 @@ protected:
1978
1980
  //! The aligned size as passed to the constructor. This is the size that is read or written to disk.
1979
1981
  uint64_t internal_size;
1980
1982
 
1983
+ void ReallocBuffer(size_t malloc_size);
1984
+
1981
1985
  private:
1982
1986
  //! The buffer that was actually malloc'd, i.e. the pointer that must be freed when the FileBuffer is destroyed
1983
1987
  data_ptr_t malloced_buffer;
@@ -1987,10 +1991,7 @@ protected:
1987
1991
  uint64_t GetMallocedSize() {
1988
1992
  return malloced_size;
1989
1993
  }
1990
- //! Sets malloced_size given the requested buffer size
1991
- void SetMallocedSize(uint64_t &bufsiz);
1992
- //! Constructs the Filebuffer object
1993
- void Construct(uint64_t bufsiz);
1994
+ void Init();
1994
1995
  };
1995
1996
 
1996
1997
  } // namespace duckdb
@@ -3849,7 +3850,7 @@ using block_id_t = int64_t;
3849
3850
  //! file.
3850
3851
  struct MainHeader {
3851
3852
  static constexpr idx_t MAGIC_BYTE_SIZE = 4;
3852
- static constexpr idx_t MAGIC_BYTE_OFFSET = sizeof(uint64_t);
3853
+ static constexpr idx_t MAGIC_BYTE_OFFSET = Storage::BLOCK_HEADER_SIZE;
3853
3854
  static constexpr idx_t FLAG_COUNT = 4;
3854
3855
  // the magic bytes in front of the file
3855
3856
  // should be "DUCK"
@@ -20585,6 +20586,7 @@ namespace duckdb {
20585
20586
  class Block : public FileBuffer {
20586
20587
  public:
20587
20588
  Block(Allocator &allocator, block_id_t id);
20589
+ Block(Allocator &allocator, block_id_t id, uint32_t internal_size);
20588
20590
  Block(FileBuffer &source, block_id_t id);
20589
20591
 
20590
20592
  block_id_t id;
@@ -20593,8 +20595,8 @@ public:
20593
20595
  struct BlockPointer {
20594
20596
  BlockPointer(block_id_t block_id_p, uint32_t offset_p) : block_id(block_id_p), offset(offset_p) {};
20595
20597
  BlockPointer() {};
20596
- block_id_t block_id;
20597
- uint32_t offset;
20598
+ block_id_t block_id {0};
20599
+ uint32_t offset {0};
20598
20600
  };
20599
20601
 
20600
20602
  } // namespace duckdb
@@ -25219,6 +25221,11 @@ public:
25219
25221
  return db;
25220
25222
  }
25221
25223
 
25224
+ //! Construct a managed buffer.
25225
+ //! The block_id is just used for internal tracking. It doesn't map to any actual
25226
+ //! BlockManager.
25227
+ virtual unique_ptr<FileBuffer> ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source);
25228
+
25222
25229
  private:
25223
25230
  //! Evict blocks until the currently used memory + extra_memory fit, returns false if this was not possible
25224
25231
  //! (i.e. not enough blocks could be evicted)