duckdb 0.8.2-dev4514.0 → 0.8.2-dev4623.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/lib/duckdb.js +11 -1
  2. package/package.json +3 -1
  3. package/src/connection.cpp +48 -7
  4. package/src/duckdb/src/catalog/catalog.cpp +5 -0
  5. package/src/duckdb/src/catalog/duck_catalog.cpp +4 -0
  6. package/src/duckdb/src/common/enum_util.cpp +24 -0
  7. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +213 -2
  8. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +59 -38
  9. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  10. package/src/duckdb/src/function/table/arrow.cpp +18 -13
  11. package/src/duckdb/src/function/table/read_csv.cpp +3 -130
  12. package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +83 -0
  13. package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +5 -0
  14. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  15. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  16. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
  17. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +1 -0
  18. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +1 -1
  19. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  20. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +36 -0
  21. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +24 -0
  22. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +36 -4
  23. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
  24. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  25. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -1
  26. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +10 -4
  27. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +3 -3
  28. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
  29. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +27 -4
  30. package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +4 -2
  31. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +22 -1
  32. package/src/duckdb/src/include/duckdb/storage/database_size.hpp +6 -0
  33. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  34. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +6 -1
  36. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +7 -3
  37. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
  38. package/src/duckdb/src/main/connection.cpp +4 -6
  39. package/src/duckdb/src/main/extension/extension_install.cpp +2 -1
  40. package/src/duckdb/src/main/relation/read_csv_relation.cpp +28 -9
  41. package/src/duckdb/src/main/relation/table_function_relation.cpp +8 -2
  42. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +1 -4
  43. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -4
  44. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +47 -10
  45. package/src/duckdb/src/storage/checkpoint_manager.cpp +0 -2
  46. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +6 -1
  47. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +62 -12
  48. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -1
  49. package/src/duckdb/src/storage/data_pointer.cpp +20 -0
  50. package/src/duckdb/src/storage/local_storage.cpp +3 -7
  51. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -15
  52. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +4 -0
  53. package/src/duckdb/src/storage/single_file_block_manager.cpp +15 -9
  54. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  55. package/src/duckdb/src/storage/storage_manager.cpp +5 -0
  56. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -0
  57. package/src/duckdb/src/storage/table/column_data.cpp +17 -14
  58. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +4 -8
  59. package/src/duckdb/src/storage/table/column_segment.cpp +21 -12
  60. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  61. package/src/duckdb/ub_src_storage.cpp +2 -0
  62. package/src/duckdb_node.hpp +1 -0
  63. package/test/close_hang.test.ts +39 -0
@@ -3,6 +3,9 @@
3
3
  #include "duckdb/common/pair.hpp"
4
4
  #include "duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp"
5
5
  #include "miniz_wrapper.hpp"
6
+ #include "duckdb/common/serializer/serializer.hpp"
7
+ #include "duckdb/common/serializer/deserializer.hpp"
8
+ #include "duckdb/storage/table/column_data.hpp"
6
9
 
7
10
  namespace duckdb {
8
11
 
@@ -141,9 +144,22 @@ void UncompressedStringStorage::StringFetchRow(ColumnSegment &segment, ColumnFet
141
144
  //===--------------------------------------------------------------------===//
142
145
  // Append
143
146
  //===--------------------------------------------------------------------===//
147
+ struct SerializedStringSegmentState : public ColumnSegmentState {
148
+ SerializedStringSegmentState() {
149
+ }
150
+ explicit SerializedStringSegmentState(vector<block_id_t> blocks_p) : blocks(std::move(blocks_p)) {
151
+ }
152
+
153
+ vector<block_id_t> blocks;
154
+
155
+ void Serialize(Serializer &serializer) const override {
156
+ serializer.WriteProperty(1, "overflow_blocks", blocks);
157
+ }
158
+ };
144
159
 
145
- unique_ptr<CompressedSegmentState> UncompressedStringStorage::StringInitSegment(ColumnSegment &segment,
146
- block_id_t block_id) {
160
+ unique_ptr<CompressedSegmentState>
161
+ UncompressedStringStorage::StringInitSegment(ColumnSegment &segment, block_id_t block_id,
162
+ optional_ptr<ColumnSegmentState> segment_state) {
147
163
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
148
164
  if (block_id == INVALID_BLOCK) {
149
165
  auto handle = buffer_manager.Pin(segment.block);
@@ -152,7 +168,12 @@ unique_ptr<CompressedSegmentState> UncompressedStringStorage::StringInitSegment(
152
168
  dictionary.end = segment.SegmentSize();
153
169
  SetDictionary(segment, handle, dictionary);
154
170
  }
155
- return make_uniq<UncompressedStringSegmentState>();
171
+ auto result = make_uniq<UncompressedStringSegmentState>();
172
+ if (segment_state) {
173
+ auto &serialized_state = segment_state->Cast<SerializedStringSegmentState>();
174
+ result->on_disk_blocks = std::move(serialized_state.blocks);
175
+ }
176
+ return std::move(result);
156
177
  }
157
178
 
158
179
  idx_t UncompressedStringStorage::FinalizeAppend(ColumnSegment &segment, SegmentStatistics &stats) {
@@ -179,6 +200,32 @@ idx_t UncompressedStringStorage::FinalizeAppend(ColumnSegment &segment, SegmentS
179
200
  return total_size;
180
201
  }
181
202
 
203
+ //===--------------------------------------------------------------------===//
204
+ // Serialization & Cleanup
205
+ //===--------------------------------------------------------------------===//
206
+ unique_ptr<ColumnSegmentState> UncompressedStringStorage::SerializeState(ColumnSegment &segment) {
207
+ auto &state = segment.GetSegmentState()->Cast<UncompressedStringSegmentState>();
208
+ if (state.on_disk_blocks.empty()) {
209
+ // no on-disk blocks - nothing to write
210
+ return nullptr;
211
+ }
212
+ return make_uniq<SerializedStringSegmentState>(state.on_disk_blocks);
213
+ }
214
+
215
+ unique_ptr<ColumnSegmentState> UncompressedStringStorage::DeserializeState(Deserializer &deserializer) {
216
+ auto result = make_uniq<SerializedStringSegmentState>();
217
+ deserializer.ReadProperty(1, "overflow_blocks", result->blocks);
218
+ return std::move(result);
219
+ }
220
+
221
+ void UncompressedStringStorage::CleanupState(ColumnSegment &segment) {
222
+ auto &state = segment.GetSegmentState()->Cast<UncompressedStringSegmentState>();
223
+ auto &block_manager = segment.GetBlockManager();
224
+ for (auto &block_id : state.on_disk_blocks) {
225
+ block_manager.MarkBlockAsModified(block_id);
226
+ }
227
+ }
228
+
182
229
  //===--------------------------------------------------------------------===//
183
230
  // Get Function
184
231
  //===--------------------------------------------------------------------===//
@@ -192,7 +239,9 @@ CompressionFunction StringUncompressed::GetFunction(PhysicalType data_type) {
192
239
  UncompressedStringStorage::StringScanPartial, UncompressedStringStorage::StringFetchRow,
193
240
  UncompressedFunctions::EmptySkip, UncompressedStringStorage::StringInitSegment,
194
241
  UncompressedStringStorage::StringInitAppend, UncompressedStringStorage::StringAppend,
195
- UncompressedStringStorage::FinalizeAppend);
242
+ UncompressedStringStorage::FinalizeAppend, nullptr,
243
+ UncompressedStringStorage::SerializeState, UncompressedStringStorage::DeserializeState,
244
+ UncompressedStringStorage::CleanupState);
196
245
  }
197
246
 
198
247
  //===--------------------------------------------------------------------===//
@@ -226,7 +275,7 @@ void UncompressedStringStorage::WriteString(ColumnSegment &segment, string_t str
226
275
  auto &state = segment.GetSegmentState()->Cast<UncompressedStringSegmentState>();
227
276
  if (state.overflow_writer) {
228
277
  // overflow writer is set: write string there
229
- state.overflow_writer->WriteString(string, result_block, result_offset);
278
+ state.overflow_writer->WriteString(state, string, result_block, result_offset);
230
279
  } else {
231
280
  // default overflow behavior: use in-memory buffer to store the overflow string
232
281
  WriteStringMemory(segment, string, result_block, result_offset);
@@ -251,7 +300,7 @@ void UncompressedStringStorage::WriteStringMemory(ColumnSegment &segment, string
251
300
  new_block->size = alloc_size;
252
301
  // allocate an in-memory buffer for it
253
302
  handle = buffer_manager.Allocate(alloc_size, false, &block);
254
- state.overflow_blocks[block->BlockId()] = new_block.get();
303
+ state.overflow_blocks.insert(make_pair(block->BlockId(), reference<StringBlock>(*new_block)));
255
304
  new_block->block = std::move(block);
256
305
  new_block->next = std::move(state.head);
257
306
  state.head = std::move(new_block);
@@ -282,7 +331,7 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
282
331
  if (block < MAXIMUM_BLOCK) {
283
332
  // read the overflow string from disk
284
333
  // pin the initial handle and read the length
285
- auto block_handle = block_manager.RegisterBlock(block);
334
+ auto block_handle = state.GetHandle(block_manager, block);
286
335
  auto handle = buffer_manager.Pin(block_handle);
287
336
 
288
337
  // read header
@@ -295,7 +344,7 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
295
344
  unsafe_unique_array<data_t> decompression_buffer;
296
345
 
297
346
  // If string is in single block we decompress straight from it, else we copy first
298
- if (remaining <= Storage::BLOCK_SIZE - sizeof(block_id_t) - offset) {
347
+ if (remaining <= WriteOverflowStringsToDisk::STRING_SPACE - offset) {
299
348
  decompression_ptr = handle.Ptr() + offset;
300
349
  } else {
301
350
  decompression_buffer = make_unsafe_uniq_array<data_t>(compressed_size);
@@ -303,7 +352,7 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
303
352
 
304
353
  // now append the string to the single buffer
305
354
  while (remaining > 0) {
306
- idx_t to_write = MinValue<idx_t>(remaining, Storage::BLOCK_SIZE - sizeof(block_id_t) - offset);
355
+ idx_t to_write = MinValue<idx_t>(remaining, WriteOverflowStringsToDisk::STRING_SPACE - offset);
307
356
  memcpy(target_ptr, handle.Ptr() + offset, to_write);
308
357
 
309
358
  remaining -= to_write;
@@ -311,8 +360,9 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
311
360
  target_ptr += to_write;
312
361
  if (remaining > 0) {
313
362
  // read the next block
314
- block_id_t next_block = Load<block_id_t>(handle.Ptr() + offset);
315
- block_handle = block_manager.RegisterBlock(next_block);
363
+ D_ASSERT(offset == WriteOverflowStringsToDisk::STRING_SPACE);
364
+ block_id_t next_block = Load<block_id_t>(handle.Ptr() + WriteOverflowStringsToDisk::STRING_SPACE);
365
+ block_handle = state.GetHandle(block_manager, next_block);
316
366
  handle = buffer_manager.Pin(block_handle);
317
367
  offset = 0;
318
368
  }
@@ -336,7 +386,7 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
336
386
  // first pin the handle, if it is not pinned yet
337
387
  auto entry = state.overflow_blocks.find(block);
338
388
  D_ASSERT(entry != state.overflow_blocks.end());
339
- auto handle = buffer_manager.Pin(entry->second->block);
389
+ auto handle = buffer_manager.Pin(entry->second.get().block);
340
390
  auto final_buffer = handle.Ptr();
341
391
  StringVector::AddHandle(result, std::move(handle));
342
392
  return ReadStringWithLength(final_buffer, offset);
@@ -398,7 +398,8 @@ static unique_ptr<CompressionAppendState> ValidityInitAppend(ColumnSegment &segm
398
398
  return make_uniq<CompressionAppendState>(std::move(handle));
399
399
  }
400
400
 
401
- unique_ptr<CompressedSegmentState> ValidityInitSegment(ColumnSegment &segment, block_id_t block_id) {
401
+ unique_ptr<CompressedSegmentState> ValidityInitSegment(ColumnSegment &segment, block_id_t block_id,
402
+ optional_ptr<ColumnSegmentState> segment_state) {
402
403
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
403
404
  if (block_id == INVALID_BLOCK) {
404
405
  auto handle = buffer_manager.Pin(segment.block);
@@ -0,0 +1,20 @@
1
+ #include "duckdb/storage/data_pointer.hpp"
2
+ #include "duckdb/common/serializer/serializer.hpp"
3
+ #include "duckdb/common/serializer/deserializer.hpp"
4
+ #include "duckdb/main/config.hpp"
5
+ #include "duckdb/function/compression_function.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ unique_ptr<ColumnSegmentState> ColumnSegmentState::Deserialize(Deserializer &deserializer) {
10
+ auto compression_type = deserializer.Get<CompressionType>();
11
+ auto &db = deserializer.Get<DatabaseInstance &>();
12
+ auto &type = deserializer.Get<LogicalType &>();
13
+ auto compression_function = DBConfig::GetConfig(db).GetCompressionFunction(compression_type, type.InternalType());
14
+ if (!compression_function || !compression_function->deserialize_state) {
15
+ throw SerializationException("Deserializing a ColumnSegmentState but could not find deserialize method");
16
+ }
17
+ return compression_function->deserialize_state(deserializer);
18
+ }
19
+
20
+ } // namespace duckdb
@@ -74,17 +74,13 @@ LocalTableStorage::~LocalTableStorage() {
74
74
 
75
75
  void LocalTableStorage::InitializeScan(CollectionScanState &state, optional_ptr<TableFilterSet> table_filters) {
76
76
  if (row_groups->GetTotalRows() == 0) {
77
- // nothing to scan
78
- return;
77
+ throw InternalException("No rows in LocalTableStorage row group for scan");
79
78
  }
80
79
  row_groups->InitializeScan(state, state.GetColumnIds(), table_filters.get());
81
80
  }
82
81
 
83
82
  idx_t LocalTableStorage::EstimatedSize() {
84
83
  idx_t appended_rows = row_groups->GetTotalRows() - deleted_rows;
85
- if (appended_rows == 0) {
86
- return 0;
87
- }
88
84
  idx_t row_size = 0;
89
85
  auto &types = row_groups->GetTypes();
90
86
  for (auto &type : types) {
@@ -169,10 +165,10 @@ void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppen
169
165
  } catch (Exception &ex) {
170
166
  error = PreservedError(ex);
171
167
  return false;
172
- } catch (std::exception &ex) {
168
+ } catch (std::exception &ex) { // LCOV_EXCL_START
173
169
  error = PreservedError(ex);
174
170
  return false;
175
- }
171
+ } // LCOV_EXCL_STOP
176
172
 
177
173
  current_row += chunk.size();
178
174
  if (current_row >= append_state.current_row) {
@@ -3,6 +3,7 @@
3
3
  #include "duckdb/storage/buffer/block_handle.hpp"
4
4
  #include "duckdb/common/serializer/write_stream.hpp"
5
5
  #include "duckdb/common/serializer/read_stream.hpp"
6
+ #include "duckdb/storage/database_size.hpp"
6
7
 
7
8
  namespace duckdb {
8
9
 
@@ -81,11 +82,13 @@ block_id_t MetadataManager::AllocateNewBlock() {
81
82
  auto new_block_id = GetNextBlockId();
82
83
 
83
84
  MetadataBlock new_block;
84
- buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block.block);
85
+ auto handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block.block);
85
86
  new_block.block_id = new_block_id;
86
87
  for (idx_t i = 0; i < METADATA_BLOCK_COUNT; i++) {
87
88
  new_block.free_blocks.push_back(METADATA_BLOCK_COUNT - i - 1);
88
89
  }
90
+ // zero-initialize the handle
91
+ memset(handle.Ptr(), 0, Storage::BLOCK_SIZE);
89
92
  AddBlock(std::move(new_block));
90
93
  return new_block_id;
91
94
  }
@@ -176,11 +179,6 @@ void MetadataManager::Flush() {
176
179
  for (auto &kv : blocks) {
177
180
  auto &block = kv.second;
178
181
  auto handle = buffer_manager.Pin(block.block);
179
- // zero-initialize any free blocks
180
- for (auto free_block : block.free_blocks) {
181
- memset(handle.Ptr() + free_block * MetadataManager::METADATA_BLOCK_SIZE, 0,
182
- MetadataManager::METADATA_BLOCK_SIZE);
183
- }
184
182
  // there are a few bytes left-over at the end of the block, zero-initialize them
185
183
  memset(handle.Ptr() + total_metadata_size, 0, Storage::BLOCK_SIZE - total_metadata_size);
186
184
  D_ASSERT(kv.first == block.block_id);
@@ -255,7 +253,6 @@ void MetadataBlock::FreeBlocksFromInteger(idx_t free_list) {
255
253
  }
256
254
 
257
255
  void MetadataManager::MarkBlocksAsModified() {
258
-
259
256
  // for any blocks that were modified in the last checkpoint - set them to free blocks currently
260
257
  for (auto &kv : modified_blocks) {
261
258
  auto block_id = kv.first;
@@ -266,14 +263,14 @@ void MetadataManager::MarkBlocksAsModified() {
266
263
  idx_t current_free_blocks = block.FreeBlocksToInteger();
267
264
  // merge the current set of free blocks with the modified blocks
268
265
  idx_t new_free_blocks = current_free_blocks | modified_list;
269
- // if (new_free_blocks == NumericLimits<idx_t>::Maximum()) {
270
- // // if new free_blocks is all blocks - mark entire block as modified
271
- // blocks.erase(entry);
272
- // block_manager.MarkBlockAsModified(block_id);
273
- // } else {
274
- // set the new set of free blocks
275
- block.FreeBlocksFromInteger(new_free_blocks);
276
- // }
266
+ if (new_free_blocks == NumericLimits<idx_t>::Maximum()) {
267
+ // if new free_blocks is all blocks - mark entire block as modified
268
+ blocks.erase(entry);
269
+ block_manager.MarkBlockAsModified(block_id);
270
+ } else {
271
+ // set the new set of free blocks
272
+ block.FreeBlocksFromInteger(new_free_blocks);
273
+ }
277
274
  }
278
275
 
279
276
  modified_blocks.clear();
@@ -301,6 +298,23 @@ void MetadataManager::ClearModifiedBlocks(const vector<MetaBlockPointer> &pointe
301
298
  }
302
299
  }
303
300
 
301
+ vector<MetadataBlockInfo> MetadataManager::GetMetadataInfo() const {
302
+ vector<MetadataBlockInfo> result;
303
+ for (auto &block : blocks) {
304
+ MetadataBlockInfo block_info;
305
+ block_info.block_id = block.second.block_id;
306
+ block_info.total_blocks = MetadataManager::METADATA_BLOCK_COUNT;
307
+ for (auto free_block : block.second.free_blocks) {
308
+ block_info.free_list.push_back(free_block);
309
+ }
310
+ std::sort(block_info.free_list.begin(), block_info.free_list.end());
311
+ result.push_back(std::move(block_info));
312
+ }
313
+ std::sort(result.begin(), result.end(),
314
+ [](const MetadataBlockInfo &a, const MetadataBlockInfo &b) { return a.block_id < b.block_id; });
315
+ return result;
316
+ }
317
+
304
318
  block_id_t MetadataManager::GetNextBlockId() {
305
319
  return block_manager.GetFreeBlockId();
306
320
  }
@@ -29,6 +29,7 @@ void DataPointer::Serialize(Serializer &serializer) const {
29
29
  serializer.WriteProperty(102, "block_pointer", block_pointer);
30
30
  serializer.WriteProperty(103, "compression_type", compression_type);
31
31
  serializer.WriteProperty(104, "statistics", statistics);
32
+ serializer.WriteProperty(105, "segment_state", segment_state);
32
33
  }
33
34
 
34
35
  DataPointer DataPointer::Deserialize(Deserializer &deserializer) {
@@ -42,6 +43,9 @@ DataPointer DataPointer::Deserialize(Deserializer &deserializer) {
42
43
  result.tuple_count = tuple_count;
43
44
  result.block_pointer = block_pointer;
44
45
  result.compression_type = compression_type;
46
+ deserializer.Set<CompressionType>(compression_type);
47
+ deserializer.ReadProperty(105, "segment_state", result.segment_state);
48
+ deserializer.Unset<CompressionType>();
45
49
  return result;
46
50
  }
47
51
 
@@ -391,18 +391,23 @@ void SingleFileBlockManager::Truncate() {
391
391
  vector<MetadataHandle> SingleFileBlockManager::GetFreeListBlocks() {
392
392
  vector<MetadataHandle> free_list_blocks;
393
393
 
394
- auto free_list_size = sizeof(uint64_t) + sizeof(block_id_t) * (free_list.size() + modified_blocks.size());
395
- auto multi_use_blocks_size = sizeof(uint64_t) + (sizeof(block_id_t) + sizeof(uint32_t)) * multi_use_blocks.size();
396
- auto metadata_blocks = sizeof(uint64_t) + (sizeof(idx_t) * 2) * GetMetadataManager().BlockCount();
397
- auto total_size = free_list_size + multi_use_blocks_size + metadata_blocks;
398
-
399
- // reserve the blocks that we are going to write
394
+ // reserve all blocks that we are going to write the free list to
400
395
  // since these blocks are no longer free we cannot just include them in the free list!
401
396
  auto block_size = MetadataManager::METADATA_BLOCK_SIZE - sizeof(idx_t);
402
- while (total_size > 0) {
397
+ idx_t allocated_size = 0;
398
+ while (true) {
399
+ auto free_list_size = sizeof(uint64_t) + sizeof(block_id_t) * (free_list.size() + modified_blocks.size());
400
+ auto multi_use_blocks_size =
401
+ sizeof(uint64_t) + (sizeof(block_id_t) + sizeof(uint32_t)) * multi_use_blocks.size();
402
+ auto metadata_blocks =
403
+ sizeof(uint64_t) + (sizeof(block_id_t) + sizeof(idx_t)) * GetMetadataManager().BlockCount();
404
+ auto total_size = free_list_size + multi_use_blocks_size + metadata_blocks;
405
+ if (total_size < allocated_size) {
406
+ break;
407
+ }
403
408
  auto free_list_handle = GetMetadataManager().AllocateHandle();
404
409
  free_list_blocks.push_back(std::move(free_list_handle));
405
- total_size -= MinValue<idx_t>(total_size, block_size);
410
+ allocated_size += block_size;
406
411
  }
407
412
 
408
413
  return free_list_blocks;
@@ -434,13 +439,14 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
434
439
  auto free_list_blocks = GetFreeListBlocks();
435
440
 
436
441
  // now handle the free list
442
+ auto &metadata_manager = GetMetadataManager();
437
443
  // add all modified blocks to the free list: they can now be written to again
444
+ metadata_manager.MarkBlocksAsModified();
438
445
  for (auto &block : modified_blocks) {
439
446
  free_list.insert(block);
440
447
  }
441
448
  modified_blocks.clear();
442
449
 
443
- auto &metadata_manager = GetMetadataManager();
444
450
  if (!free_list_blocks.empty()) {
445
451
  // there are blocks to write, either in the free_list or in the modified_blocks
446
452
  // we write these blocks specifically to the free_list_blocks
@@ -2,7 +2,7 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- const uint64_t VERSION_NUMBER = 60;
5
+ const uint64_t VERSION_NUMBER = 61;
6
6
 
7
7
  struct StorageVersionInfo {
8
8
  const char *version_name;
@@ -260,6 +260,11 @@ DatabaseSize SingleFileStorageManager::GetDatabaseSize() {
260
260
  return ds;
261
261
  }
262
262
 
263
+ vector<MetadataBlockInfo> SingleFileStorageManager::GetMetadataInfo() {
264
+ auto &metadata_manager = block_manager->GetMetadataManager();
265
+ return metadata_manager.GetMetadataInfo();
266
+ }
267
+
263
268
  bool SingleFileStorageManager::AutomaticCheckpoint(idx_t estimated_wal_bytes) {
264
269
  auto log = GetWriteAheadLog();
265
270
  if (!log) {
@@ -181,6 +181,9 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
181
181
  }
182
182
  data_pointer.tuple_count = tuple_count;
183
183
  data_pointer.compression_type = segment->function.get().type;
184
+ if (segment->function.get().serialize_state) {
185
+ data_pointer.segment_state = segment->function.get().serialize_state(*segment);
186
+ }
184
187
 
185
188
  // append the segment to the new segment tree
186
189
  new_tree.AppendSegment(std::move(segment));
@@ -402,12 +402,7 @@ void ColumnData::AppendTransientSegment(SegmentLock &l, idx_t start_row) {
402
402
  void ColumnData::CommitDropColumn() {
403
403
  for (auto &segment_p : data.Segments()) {
404
404
  auto &segment = segment_p;
405
- if (segment.segment_type == ColumnSegmentType::PERSISTENT) {
406
- auto block_id = segment.GetBlockId();
407
- if (block_id != INVALID_BLOCK) {
408
- block_manager.MarkBlockAsModified(block_id);
409
- }
410
- }
405
+ segment.CommitDropSegment();
411
406
  }
412
407
  }
413
408
 
@@ -453,12 +448,18 @@ unique_ptr<ColumnCheckpointState> ColumnData::Checkpoint(RowGroup &row_group,
453
448
 
454
449
  void ColumnData::DeserializeColumn(Deserializer &deserializer) {
455
450
  // load the data pointers for the column
456
- this->count = 0;
451
+ deserializer.Set<DatabaseInstance &>(info.db.GetDatabase());
457
452
  deserializer.Set<LogicalType &>(type);
458
453
 
459
- deserializer.ReadList(100, "data_pointers", [&](Deserializer::List &list, idx_t i) {
460
- auto data_pointer = list.ReadElement<DataPointer>();
454
+ vector<DataPointer> data_pointers;
455
+ deserializer.ReadProperty(100, "data_pointers", data_pointers);
456
+
457
+ deserializer.Unset<DatabaseInstance>();
458
+ deserializer.Unset<LogicalType>();
461
459
 
460
+ // construct the segments based on the data pointers
461
+ this->count = 0;
462
+ for (auto &data_pointer : data_pointers) {
462
463
  // Update the count and statistics
463
464
  this->count += data_pointer.tuple_count;
464
465
  if (stats) {
@@ -469,12 +470,10 @@ void ColumnData::DeserializeColumn(Deserializer &deserializer) {
469
470
  auto segment = ColumnSegment::CreatePersistentSegment(
470
471
  GetDatabase(), block_manager, data_pointer.block_pointer.block_id, data_pointer.block_pointer.offset, type,
471
472
  data_pointer.row_start, data_pointer.tuple_count, data_pointer.compression_type,
472
- std::move(data_pointer.statistics));
473
+ std::move(data_pointer.statistics), std::move(data_pointer.segment_state));
473
474
 
474
475
  data.AppendSegment(std::move(segment));
475
- });
476
-
477
- deserializer.Unset<LogicalType>();
476
+ }
478
477
  }
479
478
 
480
479
  shared_ptr<ColumnData> ColumnData::Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
@@ -530,10 +529,14 @@ void ColumnData::GetColumnSegmentInfo(idx_t row_group_index, vector<idx_t> col_p
530
529
  } else {
531
530
  column_info.persistent = false;
532
531
  }
532
+ auto segment_state = segment->GetSegmentState();
533
+ if (segment_state) {
534
+ column_info.segment_info = segment_state->GetSegmentInfo();
535
+ }
533
536
  result.emplace_back(column_info);
534
537
 
535
538
  segment_idx++;
536
- segment = (ColumnSegment *)data.GetNextSegment(segment);
539
+ segment = data.GetNextSegment(segment);
537
540
  }
538
541
  }
539
542
 
@@ -168,16 +168,9 @@ void ColumnDataCheckpointer::WriteToDisk() {
168
168
  // first we check the current segments
169
169
  // if there are any persistent segments, we will mark their old block ids as modified
170
170
  // since the segments will be rewritten their old on disk data is no longer required
171
- auto &block_manager = col_data.GetBlockManager();
172
171
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
173
172
  auto segment = nodes[segment_idx].node.get();
174
- if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
175
- // persistent segment has updates: mark it as modified and rewrite the block with the merged updates
176
- auto block_id = segment->GetBlockId();
177
- if (block_id != INVALID_BLOCK) {
178
- block_manager.MarkBlockAsModified(block_id);
179
- }
180
- }
173
+ segment->CommitDropSegment();
181
174
  }
182
175
 
183
176
  // now we need to write our segment
@@ -231,6 +224,9 @@ void ColumnDataCheckpointer::WritePersistentSegments() {
231
224
  pointer.row_start = segment->start;
232
225
  pointer.tuple_count = segment->count;
233
226
  pointer.compression_type = segment->function.get().type;
227
+ if (segment->function.get().serialize_state) {
228
+ pointer.segment_state = segment->function.get().serialize_state(*segment);
229
+ }
234
230
 
235
231
  // merge the persistent stats into the global column stats
236
232
  state.global_stats->Merge(segment->stats.statistics);
@@ -9,6 +9,7 @@
9
9
  #include "duckdb/planner/filter/constant_filter.hpp"
10
10
  #include "duckdb/main/config.hpp"
11
11
  #include "duckdb/storage/table/scan_state.hpp"
12
+ #include "duckdb/storage/data_pointer.hpp"
12
13
 
13
14
  #include <cstring>
14
15
 
@@ -18,7 +19,8 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
18
19
  block_id_t block_id, idx_t offset,
19
20
  const LogicalType &type, idx_t start, idx_t count,
20
21
  CompressionType compression_type,
21
- BaseStatistics statistics) {
22
+ BaseStatistics statistics,
23
+ unique_ptr<ColumnSegmentState> segment_state) {
22
24
  auto &config = DBConfig::GetConfig(db);
23
25
  optional_ptr<CompressionFunction> function;
24
26
  shared_ptr<BlockHandle> block;
@@ -31,7 +33,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
31
33
  }
32
34
  auto segment_size = Storage::BLOCK_SIZE;
33
35
  return make_uniq<ColumnSegment>(db, std::move(block), type, ColumnSegmentType::PERSISTENT, start, count, *function,
34
- std::move(statistics), block_id, offset, segment_size);
36
+ std::move(statistics), block_id, offset, segment_size, std::move(segment_state));
35
37
  }
36
38
 
37
39
  unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance &db, const LogicalType &type,
@@ -56,13 +58,14 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx
56
58
 
57
59
  ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type_p,
58
60
  ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction &function_p,
59
- BaseStatistics statistics, block_id_t block_id_p, idx_t offset_p, idx_t segment_size_p)
61
+ BaseStatistics statistics, block_id_t block_id_p, idx_t offset_p, idx_t segment_size_p,
62
+ unique_ptr<ColumnSegmentState> segment_state)
60
63
  : SegmentBase<ColumnSegment>(start, count), db(db), type(std::move(type_p)),
61
64
  type_size(GetTypeIdSize(type.InternalType())), segment_type(segment_type), function(function_p),
62
65
  stats(std::move(statistics)), block(std::move(block)), block_id(block_id_p), offset(offset_p),
63
66
  segment_size(segment_size_p) {
64
67
  if (function.get().init_segment) {
65
- segment_state = function.get().init_segment(*this, block_id);
68
+ this->segment_state = function.get().init_segment(*this, block_id, segment_state.get());
66
69
  }
67
70
  }
68
71
 
@@ -190,11 +193,6 @@ void ColumnSegment::ConvertToPersistent(optional_ptr<BlockManager> block_manager
190
193
  // instead of copying the data we alter some metadata so the buffer points to an on-disk block
191
194
  block = block_manager->ConvertToPersistent(block_id, std::move(block));
192
195
  }
193
-
194
- segment_state.reset();
195
- if (function.get().init_segment) {
196
- segment_state = function.get().init_segment(*this, block_id);
197
- }
198
196
  }
199
197
 
200
198
  void ColumnSegment::MarkAsPersistent(shared_ptr<BlockHandle> block_p, uint32_t offset_p) {
@@ -204,10 +202,21 @@ void ColumnSegment::MarkAsPersistent(shared_ptr<BlockHandle> block_p, uint32_t o
204
202
  block_id = block_p->BlockId();
205
203
  offset = offset_p;
206
204
  block = std::move(block_p);
205
+ }
207
206
 
208
- segment_state.reset();
209
- if (function.get().init_segment) {
210
- segment_state = function.get().init_segment(*this, block_id);
207
+ //===--------------------------------------------------------------------===//
208
+ // Drop Segment
209
+ //===--------------------------------------------------------------------===//
210
+ void ColumnSegment::CommitDropSegment() {
211
+ if (segment_type != ColumnSegmentType::PERSISTENT) {
212
+ // not persistent
213
+ return;
214
+ }
215
+ if (block_id != INVALID_BLOCK) {
216
+ GetBlockManager().MarkBlockAsModified(block_id);
217
+ }
218
+ if (function.get().cleanup_state) {
219
+ function.get().cleanup_state(*this);
211
220
  }
212
221
  }
213
222
 
@@ -32,6 +32,8 @@
32
32
 
33
33
  #include "src/function/table/system/pragma_database_size.cpp"
34
34
 
35
+ #include "src/function/table/system/pragma_metadata_info.cpp"
36
+
35
37
  #include "src/function/table/system/pragma_storage_info.cpp"
36
38
 
37
39
  #include "src/function/table/system/pragma_table_info.cpp"
@@ -6,6 +6,8 @@
6
6
 
7
7
  #include "src/storage/block.cpp"
8
8
 
9
+ #include "src/storage/data_pointer.cpp"
10
+
9
11
  #include "src/storage/data_table.cpp"
10
12
 
11
13
  #include "src/storage/index.cpp"
@@ -138,6 +138,7 @@ public:
138
138
  static Napi::Object NewInstance(const Napi::Value &db);
139
139
 
140
140
  public:
141
+ Napi::Value Close(const Napi::CallbackInfo &info);
141
142
  Napi::Value Prepare(const Napi::CallbackInfo &info);
142
143
  Napi::Value Exec(const Napi::CallbackInfo &info);
143
144
  Napi::Value RegisterUdf(const Napi::CallbackInfo &info);
@@ -0,0 +1,39 @@
1
+ import 'chai-as-promised';
2
+ import {exec as _exec} from "child_process";
3
+ import fs from "fs-extra";
4
+ import path from "path";
5
+ import {Database, OPEN_READWRITE} from "..";
6
+ import {promisify} from "util";
7
+ import {expect} from "chai";
8
+
9
+ const exec = promisify(_exec);
10
+
11
+ it("close hang", async function main() {
12
+ if (process.platform == 'win32') this.skip();
13
+
14
+ const databasePath = path.join(__dirname, "tmp", "close_hang.db");
15
+ const pathExists = await fs.pathExists(databasePath);
16
+ if (pathExists) {
17
+ await fs.remove(databasePath);
18
+ }
19
+ await fs.mkdirp(path.dirname(databasePath));
20
+
21
+ const db = await new Promise<Database>((resolve, reject) => {
22
+ let db: Database = new Database(
23
+ databasePath,
24
+ OPEN_READWRITE,
25
+ (err: unknown) => (err ? reject(err) : resolve(db)),
26
+ );
27
+ });
28
+ try {
29
+ await new Promise((resolve) =>
30
+ db.exec("create table foo(bar int)", resolve),
31
+ );
32
+ } finally {
33
+ await new Promise((resolve) => db.close(resolve));
34
+ }
35
+
36
+ // exit code 1 and stdout blank means no open handles
37
+ await expect(exec(
38
+ `lsof ${databasePath}`)).to.eventually.be.rejected.and.to.include({ 'code': 1 ,'stdout': ''});
39
+ });