duckdb 0.7.2-dev1671.0 → 0.7.2-dev1734.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-datefunc.cpp +20 -8
  3. package/src/duckdb/extension/icu/icu-strptime.cpp +117 -29
  4. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -0
  5. package/src/duckdb/src/common/local_file_system.cpp +13 -2
  6. package/src/duckdb/src/common/sort/partition_state.cpp +644 -0
  7. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  8. package/src/duckdb/src/execution/expression_executor_state.cpp +2 -3
  9. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +77 -849
  10. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +2 -2
  11. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  12. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +247 -0
  13. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -3
  14. package/src/duckdb/src/include/duckdb/planner/pragma_handler.hpp +3 -2
  15. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +1 -2
  16. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +77 -0
  17. package/src/duckdb/src/include/duckdb/storage/buffer/temporary_file_information.hpp +12 -0
  18. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +3 -59
  19. package/src/duckdb/src/main/extension/extension_install.cpp +11 -0
  20. package/src/duckdb/src/main/extension/extension_load.cpp +29 -3
  21. package/src/duckdb/src/main/query_profiler.cpp +1 -1
  22. package/src/duckdb/src/planner/pragma_handler.cpp +7 -5
  23. package/src/duckdb/src/storage/buffer/block_handle.cpp +128 -0
  24. package/src/duckdb/src/storage/buffer/block_manager.cpp +81 -0
  25. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +132 -0
  26. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +32 -0
  27. package/src/duckdb/src/storage/buffer_manager.cpp +0 -351
  28. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +3 -5
  29. package/src/duckdb/ub_src_common_sort.cpp +2 -0
  30. package/src/duckdb/ub_src_storage_buffer.cpp +8 -0
@@ -0,0 +1,128 @@
1
+ #include "duckdb/storage/buffer/block_handle.hpp"
2
+ #include "duckdb/storage/block.hpp"
3
+ #include "duckdb/storage/block_manager.hpp"
4
+ #include "duckdb/storage/buffer/buffer_handle.hpp"
5
+ #include "duckdb/storage/buffer_manager.hpp"
6
+ #include "duckdb/common/file_buffer.hpp"
7
+
8
+ namespace duckdb {
9
+
10
+ BlockHandle::BlockHandle(BlockManager &block_manager, block_id_t block_id_p)
11
+ : block_manager(block_manager), readers(0), block_id(block_id_p), buffer(nullptr), eviction_timestamp(0),
12
+ can_destroy(false), unswizzled(nullptr) {
13
+ eviction_timestamp = 0;
14
+ state = BlockState::BLOCK_UNLOADED;
15
+ memory_usage = Storage::BLOCK_ALLOC_SIZE;
16
+ }
17
+
18
+ BlockHandle::BlockHandle(BlockManager &block_manager, block_id_t block_id_p, unique_ptr<FileBuffer> buffer_p,
19
+ bool can_destroy_p, idx_t block_size, BufferPoolReservation &&reservation)
20
+ : block_manager(block_manager), readers(0), block_id(block_id_p), eviction_timestamp(0), can_destroy(can_destroy_p),
21
+ unswizzled(nullptr) {
22
+ buffer = std::move(buffer_p);
23
+ state = BlockState::BLOCK_LOADED;
24
+ memory_usage = block_size;
25
+ memory_charge = std::move(reservation);
26
+ }
27
+
28
+ BlockHandle::~BlockHandle() { // NOLINT: allow internal exceptions
29
+ // being destroyed, so any unswizzled pointers are just binary junk now.
30
+ unswizzled = nullptr;
31
+ auto &buffer_manager = block_manager.buffer_manager;
32
+ // no references remain to this block: erase
33
+ if (buffer && state == BlockState::BLOCK_LOADED) {
34
+ D_ASSERT(memory_charge.size > 0);
35
+ // the block is still loaded in memory: erase it
36
+ buffer.reset();
37
+ memory_charge.Resize(buffer_manager.buffer_pool.current_memory, 0);
38
+ } else {
39
+ D_ASSERT(memory_charge.size == 0);
40
+ }
41
+ buffer_manager.buffer_pool.PurgeQueue();
42
+ block_manager.UnregisterBlock(block_id, can_destroy);
43
+ }
44
+
45
+ unique_ptr<Block> AllocateBlock(BlockManager &block_manager, unique_ptr<FileBuffer> reusable_buffer,
46
+ block_id_t block_id) {
47
+ if (reusable_buffer) {
48
+ // re-usable buffer: re-use it
49
+ if (reusable_buffer->type == FileBufferType::BLOCK) {
50
+ // we can reuse the buffer entirely
51
+ auto &block = (Block &)*reusable_buffer;
52
+ block.id = block_id;
53
+ return unique_ptr_cast<FileBuffer, Block>(std::move(reusable_buffer));
54
+ }
55
+ auto block = block_manager.CreateBlock(block_id, reusable_buffer.get());
56
+ reusable_buffer.reset();
57
+ return block;
58
+ } else {
59
+ // no re-usable buffer: allocate a new block
60
+ return block_manager.CreateBlock(block_id, nullptr);
61
+ }
62
+ }
63
+
64
+ BufferHandle BlockHandle::Load(shared_ptr<BlockHandle> &handle, unique_ptr<FileBuffer> reusable_buffer) {
65
+ if (handle->state == BlockState::BLOCK_LOADED) {
66
+ // already loaded
67
+ D_ASSERT(handle->buffer);
68
+ return BufferHandle(handle, handle->buffer.get());
69
+ }
70
+
71
+ auto &block_manager = handle->block_manager;
72
+ if (handle->block_id < MAXIMUM_BLOCK) {
73
+ auto block = AllocateBlock(block_manager, std::move(reusable_buffer), handle->block_id);
74
+ block_manager.Read(*block);
75
+ handle->buffer = std::move(block);
76
+ } else {
77
+ if (handle->can_destroy) {
78
+ return BufferHandle();
79
+ } else {
80
+ handle->buffer =
81
+ block_manager.buffer_manager.ReadTemporaryBuffer(handle->block_id, std::move(reusable_buffer));
82
+ }
83
+ }
84
+ handle->state = BlockState::BLOCK_LOADED;
85
+ return BufferHandle(handle, handle->buffer.get());
86
+ }
87
+
88
+ unique_ptr<FileBuffer> BlockHandle::UnloadAndTakeBlock() {
89
+ if (state == BlockState::BLOCK_UNLOADED) {
90
+ // already unloaded: nothing to do
91
+ return nullptr;
92
+ }
93
+ D_ASSERT(!unswizzled);
94
+ D_ASSERT(CanUnload());
95
+
96
+ if (block_id >= MAXIMUM_BLOCK && !can_destroy) {
97
+ // temporary block that cannot be destroyed: write to temporary file
98
+ block_manager.buffer_manager.WriteTemporaryBuffer(block_id, *buffer);
99
+ }
100
+ memory_charge.Resize(block_manager.buffer_manager.buffer_pool.current_memory, 0);
101
+ state = BlockState::BLOCK_UNLOADED;
102
+ return std::move(buffer);
103
+ }
104
+
105
+ void BlockHandle::Unload() {
106
+ auto block = UnloadAndTakeBlock();
107
+ block.reset();
108
+ }
109
+
110
+ bool BlockHandle::CanUnload() {
111
+ if (state == BlockState::BLOCK_UNLOADED) {
112
+ // already unloaded
113
+ return false;
114
+ }
115
+ if (readers > 0) {
116
+ // there are active readers
117
+ return false;
118
+ }
119
+ if (block_id >= MAXIMUM_BLOCK && !can_destroy && block_manager.buffer_manager.temp_directory.empty()) {
120
+ // in order to unload this block we need to write it to a temporary buffer
121
+ // however, no temporary directory is specified!
122
+ // hence we cannot unload the block
123
+ return false;
124
+ }
125
+ return true;
126
+ }
127
+
128
+ } // namespace duckdb
@@ -0,0 +1,81 @@
1
+ #include "duckdb/storage/block_manager.hpp"
2
+ #include "duckdb/storage/buffer_manager.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ shared_ptr<BlockHandle> BlockManager::RegisterBlock(block_id_t block_id, bool is_meta_block) {
7
+ lock_guard<mutex> lock(blocks_lock);
8
+ // check if the block already exists
9
+ auto entry = blocks.find(block_id);
10
+ if (entry != blocks.end()) {
11
+ // already exists: check if it hasn't expired yet
12
+ auto existing_ptr = entry->second.lock();
13
+ if (existing_ptr) {
14
+ //! it hasn't! return it
15
+ return existing_ptr;
16
+ }
17
+ }
18
+ // create a new block pointer for this block
19
+ auto result = make_shared<BlockHandle>(*this, block_id);
20
+ // for meta block, cache the handle in meta_blocks
21
+ if (is_meta_block) {
22
+ meta_blocks[block_id] = result;
23
+ }
24
+ // register the block pointer in the set of blocks as a weak pointer
25
+ blocks[block_id] = weak_ptr<BlockHandle>(result);
26
+ return result;
27
+ }
28
+
29
+ void BlockManager::ClearMetaBlockHandles() {
30
+ meta_blocks.clear();
31
+ }
32
+
33
+ shared_ptr<BlockHandle> BlockManager::ConvertToPersistent(block_id_t block_id, shared_ptr<BlockHandle> old_block) {
34
+
35
+ // pin the old block to ensure we have it loaded in memory
36
+ auto old_handle = buffer_manager.Pin(old_block);
37
+ D_ASSERT(old_block->state == BlockState::BLOCK_LOADED);
38
+ D_ASSERT(old_block->buffer);
39
+
40
+ // Temp buffers can be larger than the storage block size. But persistent buffers
41
+ // cannot.
42
+ D_ASSERT(old_block->buffer->AllocSize() <= Storage::BLOCK_ALLOC_SIZE);
43
+
44
+ // register a block with the new block id
45
+ auto new_block = RegisterBlock(block_id);
46
+ D_ASSERT(new_block->state == BlockState::BLOCK_UNLOADED);
47
+ D_ASSERT(new_block->readers == 0);
48
+
49
+ // move the data from the old block into data for the new block
50
+ new_block->state = BlockState::BLOCK_LOADED;
51
+ new_block->buffer = CreateBlock(block_id, old_block->buffer.get());
52
+ new_block->memory_usage = old_block->memory_usage;
53
+ new_block->memory_charge = std::move(old_block->memory_charge);
54
+
55
+ // clear the old buffer and unload it
56
+ old_block->buffer.reset();
57
+ old_block->state = BlockState::BLOCK_UNLOADED;
58
+ old_block->memory_usage = 0;
59
+ old_handle.Destroy();
60
+ old_block.reset();
61
+
62
+ // persist the new block to disk
63
+ Write(*new_block->buffer, block_id);
64
+
65
+ buffer_manager.buffer_pool.AddToEvictionQueue(new_block);
66
+
67
+ return new_block;
68
+ }
69
+
70
+ void BlockManager::UnregisterBlock(block_id_t block_id, bool can_destroy) {
71
+ if (block_id >= MAXIMUM_BLOCK) {
72
+ // in-memory buffer: buffer could have been offloaded to disk: remove the file
73
+ buffer_manager.DeleteTemporaryFile(block_id);
74
+ } else {
75
+ lock_guard<mutex> lock(blocks_lock);
76
+ // on-disk block: erase from list of blocks in manager
77
+ blocks.erase(block_id);
78
+ }
79
+ }
80
+
81
+ } // namespace duckdb
@@ -0,0 +1,132 @@
1
+ #include "duckdb/storage/buffer/buffer_pool.hpp"
2
+ #include "duckdb/parallel/concurrentqueue.hpp"
3
+ #include "duckdb/common/exception.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ typedef duckdb_moodycamel::ConcurrentQueue<BufferEvictionNode> eviction_queue_t;
8
+
9
+ struct EvictionQueue {
10
+ eviction_queue_t q;
11
+ };
12
+
13
+ bool BufferEvictionNode::CanUnload(BlockHandle &handle_p) {
14
+ if (timestamp != handle_p.eviction_timestamp) {
15
+ // handle was used in between
16
+ return false;
17
+ }
18
+ return handle_p.CanUnload();
19
+ }
20
+
21
+ shared_ptr<BlockHandle> BufferEvictionNode::TryGetBlockHandle() {
22
+ auto handle_p = handle.lock();
23
+ if (!handle_p) {
24
+ // BlockHandle has been destroyed
25
+ return nullptr;
26
+ }
27
+ if (!CanUnload(*handle_p)) {
28
+ // handle was used in between
29
+ return nullptr;
30
+ }
31
+ // this is the latest node in the queue with this handle
32
+ return handle_p;
33
+ }
34
+
35
+ BufferPool::BufferPool(idx_t maximum_memory)
36
+ : current_memory(0), maximum_memory(maximum_memory), queue(make_uniq<EvictionQueue>()), queue_insertions(0) {
37
+ }
38
+ BufferPool::~BufferPool() {
39
+ }
40
+
41
+ void BufferPool::AddToEvictionQueue(shared_ptr<BlockHandle> &handle) {
42
+ constexpr int INSERT_INTERVAL = 1024;
43
+
44
+ D_ASSERT(handle->readers == 0);
45
+ handle->eviction_timestamp++;
46
+ // After each 1024 insertions, run through the queue and purge.
47
+ if ((++queue_insertions % INSERT_INTERVAL) == 0) {
48
+ PurgeQueue();
49
+ }
50
+ queue->q.enqueue(BufferEvictionNode(weak_ptr<BlockHandle>(handle), handle->eviction_timestamp));
51
+ }
52
+
53
+ idx_t BufferPool::GetUsedMemory() {
54
+ return current_memory;
55
+ }
56
+ idx_t BufferPool::GetMaxMemory() {
57
+ return maximum_memory;
58
+ }
59
+
60
+ BufferPool::EvictionResult BufferPool::EvictBlocks(idx_t extra_memory, idx_t memory_limit,
61
+ unique_ptr<FileBuffer> *buffer) {
62
+ BufferEvictionNode node;
63
+ TempBufferPoolReservation r(current_memory, extra_memory);
64
+ while (current_memory > memory_limit) {
65
+ // get a block to unpin from the queue
66
+ if (!queue->q.try_dequeue(node)) {
67
+ // Failed to reserve. Adjust size of temp reservation to 0.
68
+ r.Resize(current_memory, 0);
69
+ return {false, std::move(r)};
70
+ }
71
+ // get a reference to the underlying block pointer
72
+ auto handle = node.TryGetBlockHandle();
73
+ if (!handle) {
74
+ continue;
75
+ }
76
+ // we might be able to free this block: grab the mutex and check if we can free it
77
+ lock_guard<mutex> lock(handle->lock);
78
+ if (!node.CanUnload(*handle)) {
79
+ // something changed in the mean-time, bail out
80
+ continue;
81
+ }
82
+ // hooray, we can unload the block
83
+ if (buffer && handle->buffer->AllocSize() == extra_memory) {
84
+ // we can actually re-use the memory directly!
85
+ *buffer = handle->UnloadAndTakeBlock();
86
+ return {true, std::move(r)};
87
+ } else {
88
+ // release the memory and mark the block as unloaded
89
+ handle->Unload();
90
+ }
91
+ }
92
+ return {true, std::move(r)};
93
+ }
94
+
95
+ void BufferPool::PurgeQueue() {
96
+ BufferEvictionNode node;
97
+ while (true) {
98
+ if (!queue->q.try_dequeue(node)) {
99
+ break;
100
+ }
101
+ auto handle = node.TryGetBlockHandle();
102
+ if (!handle) {
103
+ continue;
104
+ } else {
105
+ queue->q.enqueue(std::move(node));
106
+ break;
107
+ }
108
+ }
109
+ }
110
+
111
+ void BufferPool::SetLimit(idx_t limit, const char *exception_postscript) {
112
+ lock_guard<mutex> l_lock(limit_lock);
113
+ // try to evict until the limit is reached
114
+ if (!EvictBlocks(0, limit).success) {
115
+ throw OutOfMemoryException(
116
+ "Failed to change memory limit to %lld: could not free up enough memory for the new limit%s", limit,
117
+ exception_postscript);
118
+ }
119
+ idx_t old_limit = maximum_memory;
120
+ // set the global maximum memory to the new limit if successful
121
+ maximum_memory = limit;
122
+ // evict again
123
+ if (!EvictBlocks(0, limit).success) {
124
+ // failed: go back to old limit
125
+ maximum_memory = old_limit;
126
+ throw OutOfMemoryException(
127
+ "Failed to change memory limit to %lld: could not free up enough memory for the new limit%s", limit,
128
+ exception_postscript);
129
+ }
130
+ }
131
+
132
+ } // namespace duckdb
@@ -0,0 +1,32 @@
1
+ #include "duckdb/storage/buffer/block_handle.hpp"
2
+
3
+ namespace duckdb {
4
+
5
+ BufferPoolReservation::BufferPoolReservation(BufferPoolReservation &&src) noexcept {
6
+ size = src.size;
7
+ src.size = 0;
8
+ }
9
+
10
+ BufferPoolReservation &BufferPoolReservation::operator=(BufferPoolReservation &&src) noexcept {
11
+ size = src.size;
12
+ src.size = 0;
13
+ return *this;
14
+ }
15
+
16
+ BufferPoolReservation::~BufferPoolReservation() {
17
+ D_ASSERT(size == 0);
18
+ }
19
+
20
+ void BufferPoolReservation::Resize(atomic<idx_t> &counter, idx_t new_size) {
21
+ int64_t delta = (int64_t)new_size - size;
22
+ D_ASSERT(delta > 0 || (int64_t)counter >= -delta);
23
+ counter += delta;
24
+ size = new_size;
25
+ }
26
+
27
+ void BufferPoolReservation::Merge(BufferPoolReservation &&src) {
28
+ size += src.size;
29
+ src.size = 0;
30
+ }
31
+
32
+ } // namespace duckdb