duckdb 0.8.2-dev4314.0 → 0.8.2-dev4424.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/parquet_extension.cpp +1 -1
  3. package/src/duckdb/src/common/enum_util.cpp +5 -0
  4. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  5. package/src/duckdb/src/common/types/date.cpp +1 -1
  6. package/src/duckdb/src/common/types/validity_mask.cpp +56 -0
  7. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +3 -10
  8. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +6 -3
  9. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +1 -1
  10. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  11. package/src/duckdb/src/function/table/arrow_conversion.cpp +9 -1
  12. package/src/duckdb/src/function/table/read_csv.cpp +5 -22
  13. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  14. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -15
  15. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +1 -1
  16. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  17. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +3 -0
  18. package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
  19. package/src/duckdb/src/include/duckdb/storage/block.hpp +3 -3
  20. package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp +1 -8
  21. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +2 -2
  22. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  23. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +2 -0
  24. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +6 -2
  25. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +19 -0
  26. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +19 -13
  27. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  28. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +15 -15
  29. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +59 -0
  30. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
  31. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +1 -6
  32. package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +3 -2
  33. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +4 -2
  34. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
  35. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +0 -1
  36. package/src/duckdb/src/main/settings/settings.cpp +5 -10
  37. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -0
  38. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +0 -1
  39. package/src/duckdb/src/storage/checkpoint_manager.cpp +37 -36
  40. package/src/duckdb/src/storage/compression/bitpacking.cpp +55 -48
  41. package/src/duckdb/src/storage/data_table.cpp +1 -1
  42. package/src/duckdb/src/storage/local_storage.cpp +9 -2
  43. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +41 -2
  44. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +12 -3
  45. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +8 -2
  46. package/src/duckdb/src/storage/single_file_block_manager.cpp +1 -2
  47. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  48. package/src/duckdb/src/storage/table/chunk_info.cpp +39 -33
  49. package/src/duckdb/src/storage/table/column_data.cpp +14 -9
  50. package/src/duckdb/src/storage/table/list_column_data.cpp +2 -2
  51. package/src/duckdb/src/storage/table/row_group.cpp +102 -192
  52. package/src/duckdb/src/storage/table/row_group_collection.cpp +2 -2
  53. package/src/duckdb/src/storage/table/row_version_manager.cpp +228 -0
  54. package/src/duckdb/src/storage/table/update_segment.cpp +2 -2
  55. package/src/duckdb/src/transaction/cleanup_state.cpp +2 -1
  56. package/src/duckdb/src/transaction/commit_state.cpp +5 -4
  57. package/src/duckdb/src/transaction/duck_transaction.cpp +4 -2
  58. package/src/duckdb/src/transaction/rollback_state.cpp +2 -1
  59. package/src/duckdb/src/transaction/undo_buffer.cpp +3 -5
  60. package/src/duckdb/ub_src_storage_table.cpp +2 -0
  61. package/test/prepare.test.ts +10 -1
  62. package/test/test_all_types.test.ts +4 -4
@@ -2,9 +2,14 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- MetadataReader::MetadataReader(MetadataManager &manager, MetaBlockPointer pointer, BlockReaderType type)
6
- : manager(manager), type(type), next_pointer(FromDiskPointer(pointer)), has_next_block(true), index(0), offset(0),
7
- next_offset(pointer.offset), capacity(0) {
5
+ MetadataReader::MetadataReader(MetadataManager &manager, MetaBlockPointer pointer,
6
+ optional_ptr<vector<MetaBlockPointer>> read_pointers_p, BlockReaderType type)
7
+ : manager(manager), type(type), next_pointer(FromDiskPointer(pointer)), has_next_block(true),
8
+ read_pointers(read_pointers_p), index(0), offset(0), next_offset(pointer.offset), capacity(0) {
9
+ if (read_pointers) {
10
+ D_ASSERT(read_pointers->empty());
11
+ read_pointers->push_back(pointer);
12
+ }
8
13
  }
9
14
 
10
15
  MetadataReader::MetadataReader(MetadataManager &manager, BlockPointer pointer)
@@ -57,6 +62,10 @@ void MetadataReader::ReadNextBlock() {
57
62
  has_next_block = false;
58
63
  } else {
59
64
  next_pointer = FromDiskPointer(MetaBlockPointer(next_block, 0));
65
+ MetaBlockPointer next_block_pointer(next_block, 0);
66
+ if (read_pointers) {
67
+ read_pointers->push_back(next_block_pointer);
68
+ }
60
69
  }
61
70
  if (next_offset < sizeof(block_id_t)) {
62
71
  next_offset = sizeof(block_id_t);
@@ -3,7 +3,9 @@
3
3
 
4
4
  namespace duckdb {
5
5
 
6
- MetadataWriter::MetadataWriter(MetadataManager &manager) : manager(manager), capacity(0), offset(0) {
6
+ MetadataWriter::MetadataWriter(MetadataManager &manager, optional_ptr<vector<MetaBlockPointer>> written_pointers_p)
7
+ : manager(manager), written_pointers(written_pointers_p), capacity(0), offset(0) {
8
+ D_ASSERT(!written_pointers || written_pointers->empty());
7
9
  }
8
10
 
9
11
  MetadataWriter::~MetadataWriter() {
@@ -38,7 +40,8 @@ void MetadataWriter::NextBlock() {
38
40
 
39
41
  // write the block id of the new block to the start of the current block
40
42
  if (capacity > 0) {
41
- Store<idx_t>(manager.GetDiskPointer(new_handle.pointer).block_pointer, BasePtr());
43
+ auto disk_block = manager.GetDiskPointer(new_handle.pointer);
44
+ Store<idx_t>(disk_block.block_pointer, BasePtr());
42
45
  }
43
46
  // now update the block id of the block
44
47
  block = std::move(new_handle);
@@ -46,6 +49,9 @@ void MetadataWriter::NextBlock() {
46
49
  offset = sizeof(idx_t);
47
50
  capacity = MetadataManager::METADATA_BLOCK_SIZE;
48
51
  Store<idx_t>(-1, BasePtr());
52
+ if (written_pointers) {
53
+ written_pointers->push_back(manager.GetDiskPointer(current_pointer));
54
+ }
49
55
  }
50
56
 
51
57
  void MetadataWriter::WriteData(const_data_ptr_t buffer, idx_t write_size) {
@@ -240,8 +240,7 @@ void SingleFileBlockManager::LoadFreeList() {
240
240
  // no free list
241
241
  return;
242
242
  }
243
-
244
- MetadataReader reader(GetMetadataManager(), free_pointer, BlockReaderType::REGISTER_BLOCKS);
243
+ MetadataReader reader(GetMetadataManager(), free_pointer, nullptr, BlockReaderType::REGISTER_BLOCKS);
245
244
  auto free_list_count = reader.Read<uint64_t>();
246
245
  free_list.clear();
247
246
  for (idx_t i = 0; i < free_list_count; i++) {
@@ -2,7 +2,7 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- const uint64_t VERSION_NUMBER = 59;
5
+ const uint64_t VERSION_NUMBER = 60;
6
6
 
7
7
  struct StorageVersionInfo {
8
8
  const char *version_name;
@@ -2,6 +2,7 @@
2
2
  #include "duckdb/transaction/transaction.hpp"
3
3
  #include "duckdb/common/serializer/serializer.hpp"
4
4
  #include "duckdb/common/serializer/deserializer.hpp"
5
+ #include "duckdb/common/serializer/memory_stream.hpp"
5
6
 
6
7
  namespace duckdb {
7
8
 
@@ -29,15 +30,19 @@ static bool UseVersion(TransactionData transaction, transaction_t id) {
29
30
  return TransactionVersionOperator::UseInsertedVersion(transaction.start_time, transaction.transaction_id, id);
30
31
  }
31
32
 
32
- unique_ptr<ChunkInfo> ChunkInfo::Deserialize(Deserializer &deserializer) {
33
- auto type = deserializer.ReadProperty<ChunkInfoType>(100, "type");
33
+ void ChunkInfo::Write(WriteStream &writer) const {
34
+ writer.Write<ChunkInfoType>(type);
35
+ }
36
+
37
+ unique_ptr<ChunkInfo> ChunkInfo::Read(ReadStream &reader) {
38
+ auto type = reader.Read<ChunkInfoType>();
34
39
  switch (type) {
35
40
  case ChunkInfoType::EMPTY_INFO:
36
41
  return nullptr;
37
42
  case ChunkInfoType::CONSTANT_INFO:
38
- return ChunkConstantInfo::Deserialize(deserializer);
43
+ return ChunkConstantInfo::Read(reader);
39
44
  case ChunkInfoType::VECTOR_INFO:
40
- return ChunkVectorInfo::Deserialize(deserializer);
45
+ return ChunkVectorInfo::Read(reader);
41
46
  default:
42
47
  throw SerializationException("Could not deserialize Chunk Info Type: unrecognized type");
43
48
  }
@@ -79,22 +84,23 @@ void ChunkConstantInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t
79
84
  insert_id = commit_id;
80
85
  }
81
86
 
87
+ bool ChunkConstantInfo::HasDeletes() const {
88
+ bool is_deleted = insert_id >= TRANSACTION_ID_START || delete_id < TRANSACTION_ID_START;
89
+ return is_deleted;
90
+ }
91
+
82
92
  idx_t ChunkConstantInfo::GetCommittedDeletedCount(idx_t max_count) {
83
93
  return delete_id < TRANSACTION_ID_START ? max_count : 0;
84
94
  }
85
95
 
86
- void ChunkConstantInfo::Serialize(Serializer &serializer) const {
87
- bool is_deleted = insert_id >= TRANSACTION_ID_START || delete_id < TRANSACTION_ID_START;
88
- if (!is_deleted) {
89
- serializer.WriteProperty(100, "type", ChunkInfoType::EMPTY_INFO);
90
- return;
91
- }
92
- serializer.WriteProperty(100, "type", type);
93
- serializer.WriteProperty(200, "start", start);
96
+ void ChunkConstantInfo::Write(WriteStream &writer) const {
97
+ D_ASSERT(HasDeletes());
98
+ ChunkInfo::Write(writer);
99
+ writer.Write<idx_t>(start);
94
100
  }
95
101
 
96
- unique_ptr<ChunkInfo> ChunkConstantInfo::Deserialize(Deserializer &deserializer) {
97
- auto start = deserializer.ReadProperty<idx_t>(200, "start");
102
+ unique_ptr<ChunkInfo> ChunkConstantInfo::Read(ReadStream &reader) {
103
+ auto start = reader.Read<idx_t>();
98
104
  auto info = make_uniq<ChunkConstantInfo>(start);
99
105
  info->insert_id = 0;
100
106
  info->delete_id = 0;
@@ -218,6 +224,10 @@ void ChunkVectorInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t e
218
224
  }
219
225
  }
220
226
 
227
+ bool ChunkVectorInfo::HasDeletes() const {
228
+ return any_deleted;
229
+ }
230
+
221
231
  idx_t ChunkVectorInfo::GetCommittedDeletedCount(idx_t max_count) {
222
232
  if (!any_deleted) {
223
233
  return 0;
@@ -231,45 +241,41 @@ idx_t ChunkVectorInfo::GetCommittedDeletedCount(idx_t max_count) {
231
241
  return delete_count;
232
242
  }
233
243
 
234
- void ChunkVectorInfo::Serialize(Serializer &serializer) const {
244
+ void ChunkVectorInfo::Write(WriteStream &writer) const {
235
245
  SelectionVector sel(STANDARD_VECTOR_SIZE);
236
246
  transaction_t start_time = TRANSACTION_ID_START - 1;
237
247
  transaction_t transaction_id = DConstants::INVALID_INDEX;
238
248
  idx_t count = GetSelVector(start_time, transaction_id, sel, STANDARD_VECTOR_SIZE);
239
249
  if (count == STANDARD_VECTOR_SIZE) {
240
250
  // nothing is deleted: skip writing anything
241
- serializer.WriteProperty(100, "type", ChunkInfoType::EMPTY_INFO);
251
+ writer.Write<ChunkInfoType>(ChunkInfoType::EMPTY_INFO);
242
252
  return;
243
253
  }
244
254
  if (count == 0) {
245
255
  // everything is deleted: write a constant vector
246
- serializer.WriteProperty(100, "type", ChunkInfoType::CONSTANT_INFO);
247
- serializer.WriteProperty(200, "start", start);
256
+ writer.Write<ChunkInfoType>(ChunkInfoType::CONSTANT_INFO);
257
+ writer.Write<idx_t>(start);
248
258
  return;
249
259
  }
250
260
  // write a boolean vector
251
- serializer.WriteProperty(100, "type", ChunkInfoType::VECTOR_INFO);
252
- serializer.WriteProperty(200, "start", start);
253
- bool deleted_tuples[STANDARD_VECTOR_SIZE];
254
- for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) {
255
- deleted_tuples[i] = true;
256
- }
261
+ ChunkInfo::Write(writer);
262
+ writer.Write<idx_t>(start);
263
+ ValidityMask mask(STANDARD_VECTOR_SIZE);
264
+ mask.Initialize(STANDARD_VECTOR_SIZE);
257
265
  for (idx_t i = 0; i < count; i++) {
258
- deleted_tuples[sel.get_index(i)] = false;
266
+ mask.SetInvalid(sel.get_index(i));
259
267
  }
260
- serializer.WriteProperty(201, "deleted_tuples", data_ptr_cast(deleted_tuples), sizeof(bool) * STANDARD_VECTOR_SIZE);
268
+ mask.Write(writer, STANDARD_VECTOR_SIZE);
261
269
  }
262
270
 
263
- unique_ptr<ChunkInfo> ChunkVectorInfo::Deserialize(Deserializer &deserializer) {
264
- auto start = deserializer.ReadProperty<idx_t>(200, "start");
265
-
271
+ unique_ptr<ChunkInfo> ChunkVectorInfo::Read(ReadStream &reader) {
272
+ auto start = reader.Read<idx_t>();
266
273
  auto result = make_uniq<ChunkVectorInfo>(start);
267
274
  result->any_deleted = true;
268
- bool deleted_tuples[STANDARD_VECTOR_SIZE];
269
- deserializer.ReadProperty(201, "deleted_tuples", data_ptr_cast(deleted_tuples),
270
- sizeof(bool) * STANDARD_VECTOR_SIZE);
275
+ ValidityMask mask;
276
+ mask.Read(reader, STANDARD_VECTOR_SIZE);
271
277
  for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) {
272
- if (deleted_tuples[i]) {
278
+ if (mask.RowIsValid(i)) {
273
279
  result->deleted[i] = 0;
274
280
  }
275
281
  }
@@ -87,7 +87,7 @@ void ColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx)
87
87
  state.last_offset = 0;
88
88
  }
89
89
 
90
- idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remaining) {
90
+ idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remaining, bool has_updates) {
91
91
  state.previous_states.clear();
92
92
  if (state.version != version) {
93
93
  InitializeScanWithOffset(state, state.row_index);
@@ -113,7 +113,8 @@ idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remai
113
113
  idx_t scan_count = MinValue<idx_t>(remaining, state.current->start + state.current->count - state.row_index);
114
114
  idx_t result_offset = initial_remaining - remaining;
115
115
  if (scan_count > 0) {
116
- state.current->Scan(state, scan_count, result, result_offset, scan_count == initial_remaining);
116
+ state.current->Scan(state, scan_count, result, result_offset,
117
+ !has_updates && scan_count == initial_remaining);
117
118
 
118
119
  state.row_index += scan_count;
119
120
  remaining -= scan_count;
@@ -138,10 +139,14 @@ idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remai
138
139
 
139
140
  template <bool SCAN_COMMITTED, bool ALLOW_UPDATES>
140
141
  idx_t ColumnData::ScanVector(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result) {
141
- auto scan_count = ScanVector(state, result, STANDARD_VECTOR_SIZE);
142
-
143
- lock_guard<mutex> update_guard(update_lock);
144
- if (updates) {
142
+ bool has_updates;
143
+ {
144
+ lock_guard<mutex> update_guard(update_lock);
145
+ has_updates = updates ? true : false;
146
+ }
147
+ auto scan_count = ScanVector(state, result, STANDARD_VECTOR_SIZE, has_updates);
148
+ if (has_updates) {
149
+ lock_guard<mutex> update_guard(update_lock);
145
150
  if (!ALLOW_UPDATES && updates->HasUncommittedUpdates(vector_index)) {
146
151
  throw TransactionException("Cannot create index with outstanding updates");
147
152
  }
@@ -179,7 +184,7 @@ idx_t ColumnData::ScanCommitted(idx_t vector_index, ColumnScanState &state, Vect
179
184
  void ColumnData::ScanCommittedRange(idx_t row_group_start, idx_t offset_in_row_group, idx_t count, Vector &result) {
180
185
  ColumnScanState child_state;
181
186
  InitializeScanWithOffset(child_state, row_group_start + offset_in_row_group);
182
- auto scan_count = ScanVector(child_state, result, count);
187
+ auto scan_count = ScanVector(child_state, result, count, updates ? true : false);
183
188
  if (updates) {
184
189
  result.Flatten(scan_count);
185
190
  updates->FetchCommittedRange(offset_in_row_group, count, result);
@@ -192,7 +197,7 @@ idx_t ColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t count)
192
197
  }
193
198
  // ScanCount can only be used if there are no updates
194
199
  D_ASSERT(!updates);
195
- return ScanVector(state, result, count);
200
+ return ScanVector(state, result, count, false);
196
201
  }
197
202
 
198
203
  void ColumnData::Select(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result,
@@ -339,7 +344,7 @@ idx_t ColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) {
339
344
  state.row_index = start + ((row_id - start) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE);
340
345
  state.current = data.GetSegment(state.row_index);
341
346
  state.internal_index = state.current->start;
342
- return ScanVector(state, result, STANDARD_VECTOR_SIZE);
347
+ return ScanVector(state, result, STANDARD_VECTOR_SIZE, false);
343
348
  }
344
349
 
345
350
  void ColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
@@ -86,7 +86,7 @@ idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t co
86
86
  D_ASSERT(!updates);
87
87
 
88
88
  Vector offset_vector(LogicalType::UBIGINT, count);
89
- idx_t scan_count = ScanVector(state, offset_vector, count);
89
+ idx_t scan_count = ScanVector(state, offset_vector, count, false);
90
90
  D_ASSERT(scan_count > 0);
91
91
  validity.ScanCount(state.child_states[0], result, count);
92
92
 
@@ -132,7 +132,7 @@ void ListColumnData::Skip(ColumnScanState &state, idx_t count) {
132
132
  // note that we only need to read the first and last entry
133
133
  // however, let's just read all "count" entries for now
134
134
  Vector result(LogicalType::UBIGINT, count);
135
- idx_t scan_count = ScanVector(state, result, count);
135
+ idx_t scan_count = ScanVector(state, result, count, false);
136
136
  if (scan_count == 0) {
137
137
  return;
138
138
  }