duckdb 0.8.2-dev4514.0 → 0.8.2-dev4623.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/duckdb.js +11 -1
- package/package.json +3 -1
- package/src/connection.cpp +48 -7
- package/src/duckdb/src/catalog/catalog.cpp +5 -0
- package/src/duckdb/src/catalog/duck_catalog.cpp +4 -0
- package/src/duckdb/src/common/enum_util.cpp +24 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +213 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +59 -38
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/table/arrow.cpp +18 -13
- package/src/duckdb/src/function/table/read_csv.cpp +3 -130
- package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +83 -0
- package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +5 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +24 -0
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +36 -4
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +10 -4
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +27 -4
- package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +22 -1
- package/src/duckdb/src/include/duckdb/storage/database_size.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +6 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
- package/src/duckdb/src/main/connection.cpp +4 -6
- package/src/duckdb/src/main/extension/extension_install.cpp +2 -1
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +28 -9
- package/src/duckdb/src/main/relation/table_function_relation.cpp +8 -2
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +47 -10
- package/src/duckdb/src/storage/checkpoint_manager.cpp +0 -2
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +6 -1
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +62 -12
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -1
- package/src/duckdb/src/storage/data_pointer.cpp +20 -0
- package/src/duckdb/src/storage/local_storage.cpp +3 -7
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -15
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +4 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +15 -9
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/storage_manager.cpp +5 -0
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -0
- package/src/duckdb/src/storage/table/column_data.cpp +17 -14
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +4 -8
- package/src/duckdb/src/storage/table/column_segment.cpp +21 -12
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/src/duckdb/ub_src_storage.cpp +2 -0
- package/src/duckdb_node.hpp +1 -0
- package/test/close_hang.test.ts +39 -0
@@ -3,6 +3,9 @@
|
|
3
3
|
#include "duckdb/common/pair.hpp"
|
4
4
|
#include "duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp"
|
5
5
|
#include "miniz_wrapper.hpp"
|
6
|
+
#include "duckdb/common/serializer/serializer.hpp"
|
7
|
+
#include "duckdb/common/serializer/deserializer.hpp"
|
8
|
+
#include "duckdb/storage/table/column_data.hpp"
|
6
9
|
|
7
10
|
namespace duckdb {
|
8
11
|
|
@@ -141,9 +144,22 @@ void UncompressedStringStorage::StringFetchRow(ColumnSegment &segment, ColumnFet
|
|
141
144
|
//===--------------------------------------------------------------------===//
|
142
145
|
// Append
|
143
146
|
//===--------------------------------------------------------------------===//
|
147
|
+
struct SerializedStringSegmentState : public ColumnSegmentState {
|
148
|
+
SerializedStringSegmentState() {
|
149
|
+
}
|
150
|
+
explicit SerializedStringSegmentState(vector<block_id_t> blocks_p) : blocks(std::move(blocks_p)) {
|
151
|
+
}
|
152
|
+
|
153
|
+
vector<block_id_t> blocks;
|
154
|
+
|
155
|
+
void Serialize(Serializer &serializer) const override {
|
156
|
+
serializer.WriteProperty(1, "overflow_blocks", blocks);
|
157
|
+
}
|
158
|
+
};
|
144
159
|
|
145
|
-
unique_ptr<CompressedSegmentState>
|
146
|
-
|
160
|
+
unique_ptr<CompressedSegmentState>
|
161
|
+
UncompressedStringStorage::StringInitSegment(ColumnSegment &segment, block_id_t block_id,
|
162
|
+
optional_ptr<ColumnSegmentState> segment_state) {
|
147
163
|
auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
|
148
164
|
if (block_id == INVALID_BLOCK) {
|
149
165
|
auto handle = buffer_manager.Pin(segment.block);
|
@@ -152,7 +168,12 @@ unique_ptr<CompressedSegmentState> UncompressedStringStorage::StringInitSegment(
|
|
152
168
|
dictionary.end = segment.SegmentSize();
|
153
169
|
SetDictionary(segment, handle, dictionary);
|
154
170
|
}
|
155
|
-
|
171
|
+
auto result = make_uniq<UncompressedStringSegmentState>();
|
172
|
+
if (segment_state) {
|
173
|
+
auto &serialized_state = segment_state->Cast<SerializedStringSegmentState>();
|
174
|
+
result->on_disk_blocks = std::move(serialized_state.blocks);
|
175
|
+
}
|
176
|
+
return std::move(result);
|
156
177
|
}
|
157
178
|
|
158
179
|
idx_t UncompressedStringStorage::FinalizeAppend(ColumnSegment &segment, SegmentStatistics &stats) {
|
@@ -179,6 +200,32 @@ idx_t UncompressedStringStorage::FinalizeAppend(ColumnSegment &segment, SegmentS
|
|
179
200
|
return total_size;
|
180
201
|
}
|
181
202
|
|
203
|
+
//===--------------------------------------------------------------------===//
|
204
|
+
// Serialization & Cleanup
|
205
|
+
//===--------------------------------------------------------------------===//
|
206
|
+
unique_ptr<ColumnSegmentState> UncompressedStringStorage::SerializeState(ColumnSegment &segment) {
|
207
|
+
auto &state = segment.GetSegmentState()->Cast<UncompressedStringSegmentState>();
|
208
|
+
if (state.on_disk_blocks.empty()) {
|
209
|
+
// no on-disk blocks - nothing to write
|
210
|
+
return nullptr;
|
211
|
+
}
|
212
|
+
return make_uniq<SerializedStringSegmentState>(state.on_disk_blocks);
|
213
|
+
}
|
214
|
+
|
215
|
+
unique_ptr<ColumnSegmentState> UncompressedStringStorage::DeserializeState(Deserializer &deserializer) {
|
216
|
+
auto result = make_uniq<SerializedStringSegmentState>();
|
217
|
+
deserializer.ReadProperty(1, "overflow_blocks", result->blocks);
|
218
|
+
return std::move(result);
|
219
|
+
}
|
220
|
+
|
221
|
+
void UncompressedStringStorage::CleanupState(ColumnSegment &segment) {
|
222
|
+
auto &state = segment.GetSegmentState()->Cast<UncompressedStringSegmentState>();
|
223
|
+
auto &block_manager = segment.GetBlockManager();
|
224
|
+
for (auto &block_id : state.on_disk_blocks) {
|
225
|
+
block_manager.MarkBlockAsModified(block_id);
|
226
|
+
}
|
227
|
+
}
|
228
|
+
|
182
229
|
//===--------------------------------------------------------------------===//
|
183
230
|
// Get Function
|
184
231
|
//===--------------------------------------------------------------------===//
|
@@ -192,7 +239,9 @@ CompressionFunction StringUncompressed::GetFunction(PhysicalType data_type) {
|
|
192
239
|
UncompressedStringStorage::StringScanPartial, UncompressedStringStorage::StringFetchRow,
|
193
240
|
UncompressedFunctions::EmptySkip, UncompressedStringStorage::StringInitSegment,
|
194
241
|
UncompressedStringStorage::StringInitAppend, UncompressedStringStorage::StringAppend,
|
195
|
-
UncompressedStringStorage::FinalizeAppend
|
242
|
+
UncompressedStringStorage::FinalizeAppend, nullptr,
|
243
|
+
UncompressedStringStorage::SerializeState, UncompressedStringStorage::DeserializeState,
|
244
|
+
UncompressedStringStorage::CleanupState);
|
196
245
|
}
|
197
246
|
|
198
247
|
//===--------------------------------------------------------------------===//
|
@@ -226,7 +275,7 @@ void UncompressedStringStorage::WriteString(ColumnSegment &segment, string_t str
|
|
226
275
|
auto &state = segment.GetSegmentState()->Cast<UncompressedStringSegmentState>();
|
227
276
|
if (state.overflow_writer) {
|
228
277
|
// overflow writer is set: write string there
|
229
|
-
state.overflow_writer->WriteString(string, result_block, result_offset);
|
278
|
+
state.overflow_writer->WriteString(state, string, result_block, result_offset);
|
230
279
|
} else {
|
231
280
|
// default overflow behavior: use in-memory buffer to store the overflow string
|
232
281
|
WriteStringMemory(segment, string, result_block, result_offset);
|
@@ -251,7 +300,7 @@ void UncompressedStringStorage::WriteStringMemory(ColumnSegment &segment, string
|
|
251
300
|
new_block->size = alloc_size;
|
252
301
|
// allocate an in-memory buffer for it
|
253
302
|
handle = buffer_manager.Allocate(alloc_size, false, &block);
|
254
|
-
state.overflow_blocks
|
303
|
+
state.overflow_blocks.insert(make_pair(block->BlockId(), reference<StringBlock>(*new_block)));
|
255
304
|
new_block->block = std::move(block);
|
256
305
|
new_block->next = std::move(state.head);
|
257
306
|
state.head = std::move(new_block);
|
@@ -282,7 +331,7 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
|
|
282
331
|
if (block < MAXIMUM_BLOCK) {
|
283
332
|
// read the overflow string from disk
|
284
333
|
// pin the initial handle and read the length
|
285
|
-
auto block_handle =
|
334
|
+
auto block_handle = state.GetHandle(block_manager, block);
|
286
335
|
auto handle = buffer_manager.Pin(block_handle);
|
287
336
|
|
288
337
|
// read header
|
@@ -295,7 +344,7 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
|
|
295
344
|
unsafe_unique_array<data_t> decompression_buffer;
|
296
345
|
|
297
346
|
// If string is in single block we decompress straight from it, else we copy first
|
298
|
-
if (remaining <=
|
347
|
+
if (remaining <= WriteOverflowStringsToDisk::STRING_SPACE - offset) {
|
299
348
|
decompression_ptr = handle.Ptr() + offset;
|
300
349
|
} else {
|
301
350
|
decompression_buffer = make_unsafe_uniq_array<data_t>(compressed_size);
|
@@ -303,7 +352,7 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
|
|
303
352
|
|
304
353
|
// now append the string to the single buffer
|
305
354
|
while (remaining > 0) {
|
306
|
-
idx_t to_write = MinValue<idx_t>(remaining,
|
355
|
+
idx_t to_write = MinValue<idx_t>(remaining, WriteOverflowStringsToDisk::STRING_SPACE - offset);
|
307
356
|
memcpy(target_ptr, handle.Ptr() + offset, to_write);
|
308
357
|
|
309
358
|
remaining -= to_write;
|
@@ -311,8 +360,9 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
|
|
311
360
|
target_ptr += to_write;
|
312
361
|
if (remaining > 0) {
|
313
362
|
// read the next block
|
314
|
-
|
315
|
-
|
363
|
+
D_ASSERT(offset == WriteOverflowStringsToDisk::STRING_SPACE);
|
364
|
+
block_id_t next_block = Load<block_id_t>(handle.Ptr() + WriteOverflowStringsToDisk::STRING_SPACE);
|
365
|
+
block_handle = state.GetHandle(block_manager, next_block);
|
316
366
|
handle = buffer_manager.Pin(block_handle);
|
317
367
|
offset = 0;
|
318
368
|
}
|
@@ -336,7 +386,7 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
|
|
336
386
|
// first pin the handle, if it is not pinned yet
|
337
387
|
auto entry = state.overflow_blocks.find(block);
|
338
388
|
D_ASSERT(entry != state.overflow_blocks.end());
|
339
|
-
auto handle = buffer_manager.Pin(entry->second
|
389
|
+
auto handle = buffer_manager.Pin(entry->second.get().block);
|
340
390
|
auto final_buffer = handle.Ptr();
|
341
391
|
StringVector::AddHandle(result, std::move(handle));
|
342
392
|
return ReadStringWithLength(final_buffer, offset);
|
@@ -398,7 +398,8 @@ static unique_ptr<CompressionAppendState> ValidityInitAppend(ColumnSegment &segm
|
|
398
398
|
return make_uniq<CompressionAppendState>(std::move(handle));
|
399
399
|
}
|
400
400
|
|
401
|
-
unique_ptr<CompressedSegmentState> ValidityInitSegment(ColumnSegment &segment, block_id_t block_id
|
401
|
+
unique_ptr<CompressedSegmentState> ValidityInitSegment(ColumnSegment &segment, block_id_t block_id,
|
402
|
+
optional_ptr<ColumnSegmentState> segment_state) {
|
402
403
|
auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
|
403
404
|
if (block_id == INVALID_BLOCK) {
|
404
405
|
auto handle = buffer_manager.Pin(segment.block);
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#include "duckdb/storage/data_pointer.hpp"
|
2
|
+
#include "duckdb/common/serializer/serializer.hpp"
|
3
|
+
#include "duckdb/common/serializer/deserializer.hpp"
|
4
|
+
#include "duckdb/main/config.hpp"
|
5
|
+
#include "duckdb/function/compression_function.hpp"
|
6
|
+
|
7
|
+
namespace duckdb {
|
8
|
+
|
9
|
+
unique_ptr<ColumnSegmentState> ColumnSegmentState::Deserialize(Deserializer &deserializer) {
|
10
|
+
auto compression_type = deserializer.Get<CompressionType>();
|
11
|
+
auto &db = deserializer.Get<DatabaseInstance &>();
|
12
|
+
auto &type = deserializer.Get<LogicalType &>();
|
13
|
+
auto compression_function = DBConfig::GetConfig(db).GetCompressionFunction(compression_type, type.InternalType());
|
14
|
+
if (!compression_function || !compression_function->deserialize_state) {
|
15
|
+
throw SerializationException("Deserializing a ColumnSegmentState but could not find deserialize method");
|
16
|
+
}
|
17
|
+
return compression_function->deserialize_state(deserializer);
|
18
|
+
}
|
19
|
+
|
20
|
+
} // namespace duckdb
|
@@ -74,17 +74,13 @@ LocalTableStorage::~LocalTableStorage() {
|
|
74
74
|
|
75
75
|
void LocalTableStorage::InitializeScan(CollectionScanState &state, optional_ptr<TableFilterSet> table_filters) {
|
76
76
|
if (row_groups->GetTotalRows() == 0) {
|
77
|
-
|
78
|
-
return;
|
77
|
+
throw InternalException("No rows in LocalTableStorage row group for scan");
|
79
78
|
}
|
80
79
|
row_groups->InitializeScan(state, state.GetColumnIds(), table_filters.get());
|
81
80
|
}
|
82
81
|
|
83
82
|
idx_t LocalTableStorage::EstimatedSize() {
|
84
83
|
idx_t appended_rows = row_groups->GetTotalRows() - deleted_rows;
|
85
|
-
if (appended_rows == 0) {
|
86
|
-
return 0;
|
87
|
-
}
|
88
84
|
idx_t row_size = 0;
|
89
85
|
auto &types = row_groups->GetTypes();
|
90
86
|
for (auto &type : types) {
|
@@ -169,10 +165,10 @@ void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppen
|
|
169
165
|
} catch (Exception &ex) {
|
170
166
|
error = PreservedError(ex);
|
171
167
|
return false;
|
172
|
-
} catch (std::exception &ex) {
|
168
|
+
} catch (std::exception &ex) { // LCOV_EXCL_START
|
173
169
|
error = PreservedError(ex);
|
174
170
|
return false;
|
175
|
-
}
|
171
|
+
} // LCOV_EXCL_STOP
|
176
172
|
|
177
173
|
current_row += chunk.size();
|
178
174
|
if (current_row >= append_state.current_row) {
|
@@ -3,6 +3,7 @@
|
|
3
3
|
#include "duckdb/storage/buffer/block_handle.hpp"
|
4
4
|
#include "duckdb/common/serializer/write_stream.hpp"
|
5
5
|
#include "duckdb/common/serializer/read_stream.hpp"
|
6
|
+
#include "duckdb/storage/database_size.hpp"
|
6
7
|
|
7
8
|
namespace duckdb {
|
8
9
|
|
@@ -81,11 +82,13 @@ block_id_t MetadataManager::AllocateNewBlock() {
|
|
81
82
|
auto new_block_id = GetNextBlockId();
|
82
83
|
|
83
84
|
MetadataBlock new_block;
|
84
|
-
buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block.block);
|
85
|
+
auto handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block.block);
|
85
86
|
new_block.block_id = new_block_id;
|
86
87
|
for (idx_t i = 0; i < METADATA_BLOCK_COUNT; i++) {
|
87
88
|
new_block.free_blocks.push_back(METADATA_BLOCK_COUNT - i - 1);
|
88
89
|
}
|
90
|
+
// zero-initialize the handle
|
91
|
+
memset(handle.Ptr(), 0, Storage::BLOCK_SIZE);
|
89
92
|
AddBlock(std::move(new_block));
|
90
93
|
return new_block_id;
|
91
94
|
}
|
@@ -176,11 +179,6 @@ void MetadataManager::Flush() {
|
|
176
179
|
for (auto &kv : blocks) {
|
177
180
|
auto &block = kv.second;
|
178
181
|
auto handle = buffer_manager.Pin(block.block);
|
179
|
-
// zero-initialize any free blocks
|
180
|
-
for (auto free_block : block.free_blocks) {
|
181
|
-
memset(handle.Ptr() + free_block * MetadataManager::METADATA_BLOCK_SIZE, 0,
|
182
|
-
MetadataManager::METADATA_BLOCK_SIZE);
|
183
|
-
}
|
184
182
|
// there are a few bytes left-over at the end of the block, zero-initialize them
|
185
183
|
memset(handle.Ptr() + total_metadata_size, 0, Storage::BLOCK_SIZE - total_metadata_size);
|
186
184
|
D_ASSERT(kv.first == block.block_id);
|
@@ -255,7 +253,6 @@ void MetadataBlock::FreeBlocksFromInteger(idx_t free_list) {
|
|
255
253
|
}
|
256
254
|
|
257
255
|
void MetadataManager::MarkBlocksAsModified() {
|
258
|
-
|
259
256
|
// for any blocks that were modified in the last checkpoint - set them to free blocks currently
|
260
257
|
for (auto &kv : modified_blocks) {
|
261
258
|
auto block_id = kv.first;
|
@@ -266,14 +263,14 @@ void MetadataManager::MarkBlocksAsModified() {
|
|
266
263
|
idx_t current_free_blocks = block.FreeBlocksToInteger();
|
267
264
|
// merge the current set of free blocks with the modified blocks
|
268
265
|
idx_t new_free_blocks = current_free_blocks | modified_list;
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
266
|
+
if (new_free_blocks == NumericLimits<idx_t>::Maximum()) {
|
267
|
+
// if new free_blocks is all blocks - mark entire block as modified
|
268
|
+
blocks.erase(entry);
|
269
|
+
block_manager.MarkBlockAsModified(block_id);
|
270
|
+
} else {
|
271
|
+
// set the new set of free blocks
|
272
|
+
block.FreeBlocksFromInteger(new_free_blocks);
|
273
|
+
}
|
277
274
|
}
|
278
275
|
|
279
276
|
modified_blocks.clear();
|
@@ -301,6 +298,23 @@ void MetadataManager::ClearModifiedBlocks(const vector<MetaBlockPointer> &pointe
|
|
301
298
|
}
|
302
299
|
}
|
303
300
|
|
301
|
+
vector<MetadataBlockInfo> MetadataManager::GetMetadataInfo() const {
|
302
|
+
vector<MetadataBlockInfo> result;
|
303
|
+
for (auto &block : blocks) {
|
304
|
+
MetadataBlockInfo block_info;
|
305
|
+
block_info.block_id = block.second.block_id;
|
306
|
+
block_info.total_blocks = MetadataManager::METADATA_BLOCK_COUNT;
|
307
|
+
for (auto free_block : block.second.free_blocks) {
|
308
|
+
block_info.free_list.push_back(free_block);
|
309
|
+
}
|
310
|
+
std::sort(block_info.free_list.begin(), block_info.free_list.end());
|
311
|
+
result.push_back(std::move(block_info));
|
312
|
+
}
|
313
|
+
std::sort(result.begin(), result.end(),
|
314
|
+
[](const MetadataBlockInfo &a, const MetadataBlockInfo &b) { return a.block_id < b.block_id; });
|
315
|
+
return result;
|
316
|
+
}
|
317
|
+
|
304
318
|
block_id_t MetadataManager::GetNextBlockId() {
|
305
319
|
return block_manager.GetFreeBlockId();
|
306
320
|
}
|
@@ -29,6 +29,7 @@ void DataPointer::Serialize(Serializer &serializer) const {
|
|
29
29
|
serializer.WriteProperty(102, "block_pointer", block_pointer);
|
30
30
|
serializer.WriteProperty(103, "compression_type", compression_type);
|
31
31
|
serializer.WriteProperty(104, "statistics", statistics);
|
32
|
+
serializer.WriteProperty(105, "segment_state", segment_state);
|
32
33
|
}
|
33
34
|
|
34
35
|
DataPointer DataPointer::Deserialize(Deserializer &deserializer) {
|
@@ -42,6 +43,9 @@ DataPointer DataPointer::Deserialize(Deserializer &deserializer) {
|
|
42
43
|
result.tuple_count = tuple_count;
|
43
44
|
result.block_pointer = block_pointer;
|
44
45
|
result.compression_type = compression_type;
|
46
|
+
deserializer.Set<CompressionType>(compression_type);
|
47
|
+
deserializer.ReadProperty(105, "segment_state", result.segment_state);
|
48
|
+
deserializer.Unset<CompressionType>();
|
45
49
|
return result;
|
46
50
|
}
|
47
51
|
|
@@ -391,18 +391,23 @@ void SingleFileBlockManager::Truncate() {
|
|
391
391
|
vector<MetadataHandle> SingleFileBlockManager::GetFreeListBlocks() {
|
392
392
|
vector<MetadataHandle> free_list_blocks;
|
393
393
|
|
394
|
-
|
395
|
-
auto multi_use_blocks_size = sizeof(uint64_t) + (sizeof(block_id_t) + sizeof(uint32_t)) * multi_use_blocks.size();
|
396
|
-
auto metadata_blocks = sizeof(uint64_t) + (sizeof(idx_t) * 2) * GetMetadataManager().BlockCount();
|
397
|
-
auto total_size = free_list_size + multi_use_blocks_size + metadata_blocks;
|
398
|
-
|
399
|
-
// reserve the blocks that we are going to write
|
394
|
+
// reserve all blocks that we are going to write the free list to
|
400
395
|
// since these blocks are no longer free we cannot just include them in the free list!
|
401
396
|
auto block_size = MetadataManager::METADATA_BLOCK_SIZE - sizeof(idx_t);
|
402
|
-
|
397
|
+
idx_t allocated_size = 0;
|
398
|
+
while (true) {
|
399
|
+
auto free_list_size = sizeof(uint64_t) + sizeof(block_id_t) * (free_list.size() + modified_blocks.size());
|
400
|
+
auto multi_use_blocks_size =
|
401
|
+
sizeof(uint64_t) + (sizeof(block_id_t) + sizeof(uint32_t)) * multi_use_blocks.size();
|
402
|
+
auto metadata_blocks =
|
403
|
+
sizeof(uint64_t) + (sizeof(block_id_t) + sizeof(idx_t)) * GetMetadataManager().BlockCount();
|
404
|
+
auto total_size = free_list_size + multi_use_blocks_size + metadata_blocks;
|
405
|
+
if (total_size < allocated_size) {
|
406
|
+
break;
|
407
|
+
}
|
403
408
|
auto free_list_handle = GetMetadataManager().AllocateHandle();
|
404
409
|
free_list_blocks.push_back(std::move(free_list_handle));
|
405
|
-
|
410
|
+
allocated_size += block_size;
|
406
411
|
}
|
407
412
|
|
408
413
|
return free_list_blocks;
|
@@ -434,13 +439,14 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
|
|
434
439
|
auto free_list_blocks = GetFreeListBlocks();
|
435
440
|
|
436
441
|
// now handle the free list
|
442
|
+
auto &metadata_manager = GetMetadataManager();
|
437
443
|
// add all modified blocks to the free list: they can now be written to again
|
444
|
+
metadata_manager.MarkBlocksAsModified();
|
438
445
|
for (auto &block : modified_blocks) {
|
439
446
|
free_list.insert(block);
|
440
447
|
}
|
441
448
|
modified_blocks.clear();
|
442
449
|
|
443
|
-
auto &metadata_manager = GetMetadataManager();
|
444
450
|
if (!free_list_blocks.empty()) {
|
445
451
|
// there are blocks to write, either in the free_list or in the modified_blocks
|
446
452
|
// we write these blocks specifically to the free_list_blocks
|
@@ -260,6 +260,11 @@ DatabaseSize SingleFileStorageManager::GetDatabaseSize() {
|
|
260
260
|
return ds;
|
261
261
|
}
|
262
262
|
|
263
|
+
vector<MetadataBlockInfo> SingleFileStorageManager::GetMetadataInfo() {
|
264
|
+
auto &metadata_manager = block_manager->GetMetadataManager();
|
265
|
+
return metadata_manager.GetMetadataInfo();
|
266
|
+
}
|
267
|
+
|
263
268
|
bool SingleFileStorageManager::AutomaticCheckpoint(idx_t estimated_wal_bytes) {
|
264
269
|
auto log = GetWriteAheadLog();
|
265
270
|
if (!log) {
|
@@ -181,6 +181,9 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
|
|
181
181
|
}
|
182
182
|
data_pointer.tuple_count = tuple_count;
|
183
183
|
data_pointer.compression_type = segment->function.get().type;
|
184
|
+
if (segment->function.get().serialize_state) {
|
185
|
+
data_pointer.segment_state = segment->function.get().serialize_state(*segment);
|
186
|
+
}
|
184
187
|
|
185
188
|
// append the segment to the new segment tree
|
186
189
|
new_tree.AppendSegment(std::move(segment));
|
@@ -402,12 +402,7 @@ void ColumnData::AppendTransientSegment(SegmentLock &l, idx_t start_row) {
|
|
402
402
|
void ColumnData::CommitDropColumn() {
|
403
403
|
for (auto &segment_p : data.Segments()) {
|
404
404
|
auto &segment = segment_p;
|
405
|
-
|
406
|
-
auto block_id = segment.GetBlockId();
|
407
|
-
if (block_id != INVALID_BLOCK) {
|
408
|
-
block_manager.MarkBlockAsModified(block_id);
|
409
|
-
}
|
410
|
-
}
|
405
|
+
segment.CommitDropSegment();
|
411
406
|
}
|
412
407
|
}
|
413
408
|
|
@@ -453,12 +448,18 @@ unique_ptr<ColumnCheckpointState> ColumnData::Checkpoint(RowGroup &row_group,
|
|
453
448
|
|
454
449
|
void ColumnData::DeserializeColumn(Deserializer &deserializer) {
|
455
450
|
// load the data pointers for the column
|
456
|
-
|
451
|
+
deserializer.Set<DatabaseInstance &>(info.db.GetDatabase());
|
457
452
|
deserializer.Set<LogicalType &>(type);
|
458
453
|
|
459
|
-
|
460
|
-
|
454
|
+
vector<DataPointer> data_pointers;
|
455
|
+
deserializer.ReadProperty(100, "data_pointers", data_pointers);
|
456
|
+
|
457
|
+
deserializer.Unset<DatabaseInstance>();
|
458
|
+
deserializer.Unset<LogicalType>();
|
461
459
|
|
460
|
+
// construct the segments based on the data pointers
|
461
|
+
this->count = 0;
|
462
|
+
for (auto &data_pointer : data_pointers) {
|
462
463
|
// Update the count and statistics
|
463
464
|
this->count += data_pointer.tuple_count;
|
464
465
|
if (stats) {
|
@@ -469,12 +470,10 @@ void ColumnData::DeserializeColumn(Deserializer &deserializer) {
|
|
469
470
|
auto segment = ColumnSegment::CreatePersistentSegment(
|
470
471
|
GetDatabase(), block_manager, data_pointer.block_pointer.block_id, data_pointer.block_pointer.offset, type,
|
471
472
|
data_pointer.row_start, data_pointer.tuple_count, data_pointer.compression_type,
|
472
|
-
std::move(data_pointer.statistics));
|
473
|
+
std::move(data_pointer.statistics), std::move(data_pointer.segment_state));
|
473
474
|
|
474
475
|
data.AppendSegment(std::move(segment));
|
475
|
-
}
|
476
|
-
|
477
|
-
deserializer.Unset<LogicalType>();
|
476
|
+
}
|
478
477
|
}
|
479
478
|
|
480
479
|
shared_ptr<ColumnData> ColumnData::Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
|
@@ -530,10 +529,14 @@ void ColumnData::GetColumnSegmentInfo(idx_t row_group_index, vector<idx_t> col_p
|
|
530
529
|
} else {
|
531
530
|
column_info.persistent = false;
|
532
531
|
}
|
532
|
+
auto segment_state = segment->GetSegmentState();
|
533
|
+
if (segment_state) {
|
534
|
+
column_info.segment_info = segment_state->GetSegmentInfo();
|
535
|
+
}
|
533
536
|
result.emplace_back(column_info);
|
534
537
|
|
535
538
|
segment_idx++;
|
536
|
-
segment =
|
539
|
+
segment = data.GetNextSegment(segment);
|
537
540
|
}
|
538
541
|
}
|
539
542
|
|
@@ -168,16 +168,9 @@ void ColumnDataCheckpointer::WriteToDisk() {
|
|
168
168
|
// first we check the current segments
|
169
169
|
// if there are any persistent segments, we will mark their old block ids as modified
|
170
170
|
// since the segments will be rewritten their old on disk data is no longer required
|
171
|
-
auto &block_manager = col_data.GetBlockManager();
|
172
171
|
for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
|
173
172
|
auto segment = nodes[segment_idx].node.get();
|
174
|
-
|
175
|
-
// persistent segment has updates: mark it as modified and rewrite the block with the merged updates
|
176
|
-
auto block_id = segment->GetBlockId();
|
177
|
-
if (block_id != INVALID_BLOCK) {
|
178
|
-
block_manager.MarkBlockAsModified(block_id);
|
179
|
-
}
|
180
|
-
}
|
173
|
+
segment->CommitDropSegment();
|
181
174
|
}
|
182
175
|
|
183
176
|
// now we need to write our segment
|
@@ -231,6 +224,9 @@ void ColumnDataCheckpointer::WritePersistentSegments() {
|
|
231
224
|
pointer.row_start = segment->start;
|
232
225
|
pointer.tuple_count = segment->count;
|
233
226
|
pointer.compression_type = segment->function.get().type;
|
227
|
+
if (segment->function.get().serialize_state) {
|
228
|
+
pointer.segment_state = segment->function.get().serialize_state(*segment);
|
229
|
+
}
|
234
230
|
|
235
231
|
// merge the persistent stats into the global column stats
|
236
232
|
state.global_stats->Merge(segment->stats.statistics);
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#include "duckdb/planner/filter/constant_filter.hpp"
|
10
10
|
#include "duckdb/main/config.hpp"
|
11
11
|
#include "duckdb/storage/table/scan_state.hpp"
|
12
|
+
#include "duckdb/storage/data_pointer.hpp"
|
12
13
|
|
13
14
|
#include <cstring>
|
14
15
|
|
@@ -18,7 +19,8 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
|
|
18
19
|
block_id_t block_id, idx_t offset,
|
19
20
|
const LogicalType &type, idx_t start, idx_t count,
|
20
21
|
CompressionType compression_type,
|
21
|
-
BaseStatistics statistics
|
22
|
+
BaseStatistics statistics,
|
23
|
+
unique_ptr<ColumnSegmentState> segment_state) {
|
22
24
|
auto &config = DBConfig::GetConfig(db);
|
23
25
|
optional_ptr<CompressionFunction> function;
|
24
26
|
shared_ptr<BlockHandle> block;
|
@@ -31,7 +33,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
|
|
31
33
|
}
|
32
34
|
auto segment_size = Storage::BLOCK_SIZE;
|
33
35
|
return make_uniq<ColumnSegment>(db, std::move(block), type, ColumnSegmentType::PERSISTENT, start, count, *function,
|
34
|
-
std::move(statistics), block_id, offset, segment_size);
|
36
|
+
std::move(statistics), block_id, offset, segment_size, std::move(segment_state));
|
35
37
|
}
|
36
38
|
|
37
39
|
unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance &db, const LogicalType &type,
|
@@ -56,13 +58,14 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx
|
|
56
58
|
|
57
59
|
ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type_p,
|
58
60
|
ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction &function_p,
|
59
|
-
BaseStatistics statistics, block_id_t block_id_p, idx_t offset_p, idx_t segment_size_p
|
61
|
+
BaseStatistics statistics, block_id_t block_id_p, idx_t offset_p, idx_t segment_size_p,
|
62
|
+
unique_ptr<ColumnSegmentState> segment_state)
|
60
63
|
: SegmentBase<ColumnSegment>(start, count), db(db), type(std::move(type_p)),
|
61
64
|
type_size(GetTypeIdSize(type.InternalType())), segment_type(segment_type), function(function_p),
|
62
65
|
stats(std::move(statistics)), block(std::move(block)), block_id(block_id_p), offset(offset_p),
|
63
66
|
segment_size(segment_size_p) {
|
64
67
|
if (function.get().init_segment) {
|
65
|
-
segment_state = function.get().init_segment(*this, block_id);
|
68
|
+
this->segment_state = function.get().init_segment(*this, block_id, segment_state.get());
|
66
69
|
}
|
67
70
|
}
|
68
71
|
|
@@ -190,11 +193,6 @@ void ColumnSegment::ConvertToPersistent(optional_ptr<BlockManager> block_manager
|
|
190
193
|
// instead of copying the data we alter some metadata so the buffer points to an on-disk block
|
191
194
|
block = block_manager->ConvertToPersistent(block_id, std::move(block));
|
192
195
|
}
|
193
|
-
|
194
|
-
segment_state.reset();
|
195
|
-
if (function.get().init_segment) {
|
196
|
-
segment_state = function.get().init_segment(*this, block_id);
|
197
|
-
}
|
198
196
|
}
|
199
197
|
|
200
198
|
void ColumnSegment::MarkAsPersistent(shared_ptr<BlockHandle> block_p, uint32_t offset_p) {
|
@@ -204,10 +202,21 @@ void ColumnSegment::MarkAsPersistent(shared_ptr<BlockHandle> block_p, uint32_t o
|
|
204
202
|
block_id = block_p->BlockId();
|
205
203
|
offset = offset_p;
|
206
204
|
block = std::move(block_p);
|
205
|
+
}
|
207
206
|
|
208
|
-
|
209
|
-
|
210
|
-
|
207
|
+
//===--------------------------------------------------------------------===//
|
208
|
+
// Drop Segment
|
209
|
+
//===--------------------------------------------------------------------===//
|
210
|
+
void ColumnSegment::CommitDropSegment() {
|
211
|
+
if (segment_type != ColumnSegmentType::PERSISTENT) {
|
212
|
+
// not persistent
|
213
|
+
return;
|
214
|
+
}
|
215
|
+
if (block_id != INVALID_BLOCK) {
|
216
|
+
GetBlockManager().MarkBlockAsModified(block_id);
|
217
|
+
}
|
218
|
+
if (function.get().cleanup_state) {
|
219
|
+
function.get().cleanup_state(*this);
|
211
220
|
}
|
212
221
|
}
|
213
222
|
|
@@ -32,6 +32,8 @@
|
|
32
32
|
|
33
33
|
#include "src/function/table/system/pragma_database_size.cpp"
|
34
34
|
|
35
|
+
#include "src/function/table/system/pragma_metadata_info.cpp"
|
36
|
+
|
35
37
|
#include "src/function/table/system/pragma_storage_info.cpp"
|
36
38
|
|
37
39
|
#include "src/function/table/system/pragma_table_info.cpp"
|
package/src/duckdb_node.hpp
CHANGED
@@ -138,6 +138,7 @@ public:
|
|
138
138
|
static Napi::Object NewInstance(const Napi::Value &db);
|
139
139
|
|
140
140
|
public:
|
141
|
+
Napi::Value Close(const Napi::CallbackInfo &info);
|
141
142
|
Napi::Value Prepare(const Napi::CallbackInfo &info);
|
142
143
|
Napi::Value Exec(const Napi::CallbackInfo &info);
|
143
144
|
Napi::Value RegisterUdf(const Napi::CallbackInfo &info);
|
@@ -0,0 +1,39 @@
|
|
1
|
+
import 'chai-as-promised';
|
2
|
+
import {exec as _exec} from "child_process";
|
3
|
+
import fs from "fs-extra";
|
4
|
+
import path from "path";
|
5
|
+
import {Database, OPEN_READWRITE} from "..";
|
6
|
+
import {promisify} from "util";
|
7
|
+
import {expect} from "chai";
|
8
|
+
|
9
|
+
const exec = promisify(_exec);
|
10
|
+
|
11
|
+
it("close hang", async function main() {
|
12
|
+
if (process.platform == 'win32') this.skip();
|
13
|
+
|
14
|
+
const databasePath = path.join(__dirname, "tmp", "close_hang.db");
|
15
|
+
const pathExists = await fs.pathExists(databasePath);
|
16
|
+
if (pathExists) {
|
17
|
+
await fs.remove(databasePath);
|
18
|
+
}
|
19
|
+
await fs.mkdirp(path.dirname(databasePath));
|
20
|
+
|
21
|
+
const db = await new Promise<Database>((resolve, reject) => {
|
22
|
+
let db: Database = new Database(
|
23
|
+
databasePath,
|
24
|
+
OPEN_READWRITE,
|
25
|
+
(err: unknown) => (err ? reject(err) : resolve(db)),
|
26
|
+
);
|
27
|
+
});
|
28
|
+
try {
|
29
|
+
await new Promise((resolve) =>
|
30
|
+
db.exec("create table foo(bar int)", resolve),
|
31
|
+
);
|
32
|
+
} finally {
|
33
|
+
await new Promise((resolve) => db.close(resolve));
|
34
|
+
}
|
35
|
+
|
36
|
+
// exit code 1 and stdout blank means no open handles
|
37
|
+
await expect(exec(
|
38
|
+
`lsof ${databasePath}`)).to.eventually.be.rejected.and.to.include({ 'code': 1 ,'stdout': ''});
|
39
|
+
});
|