duckdb 0.7.2-dev832.0 → 0.7.2-dev886.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/catalog/catalog.cpp +21 -5
  3. package/src/duckdb/src/catalog/default/default_functions.cpp +3 -0
  4. package/src/duckdb/src/catalog/duck_catalog.cpp +34 -7
  5. package/src/duckdb/src/common/box_renderer.cpp +109 -23
  6. package/src/duckdb/src/common/types/value.cpp +0 -93
  7. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +3 -0
  8. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +5 -8
  9. package/src/duckdb/src/function/scalar/date/date_part.cpp +2 -2
  10. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +2 -2
  11. package/src/duckdb/src/function/scalar/list/list_aggregates.cpp +1 -1
  12. package/src/duckdb/src/function/scalar/list/list_lambdas.cpp +4 -0
  13. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +8 -8
  14. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  15. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +3 -0
  16. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -1
  17. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +8 -2
  18. package/src/duckdb/src/include/duckdb/common/types/value.hpp +0 -31
  19. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  20. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +9 -52
  21. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +62 -0
  22. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  23. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +6 -3
  24. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +3 -2
  25. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +5 -3
  26. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +4 -1
  27. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -3
  28. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +5 -3
  29. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +37 -0
  30. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +8 -1
  31. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +4 -3
  32. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +271 -26
  33. package/src/duckdb/src/optimizer/filter_pushdown.cpp +11 -7
  34. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -10
  35. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +1 -1
  36. package/src/duckdb/src/optimizer/pushdown/pushdown_single_join.cpp +1 -1
  37. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +11 -0
  38. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +31 -6
  39. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +22 -4
  40. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +1 -1
  41. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -11
  42. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +6 -0
  43. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -0
  44. package/src/duckdb/src/storage/compression/numeric_constant.cpp +2 -2
  45. package/src/duckdb/src/storage/data_table.cpp +1 -1
  46. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +145 -83
  47. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +65 -0
  48. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  49. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +1 -6
  50. package/src/duckdb/src/storage/table/column_data.cpp +29 -35
  51. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +5 -5
  52. package/src/duckdb/src/storage/table/column_segment.cpp +8 -7
  53. package/src/duckdb/src/storage/table/list_column_data.cpp +2 -1
  54. package/src/duckdb/src/storage/table/persistent_table_data.cpp +2 -1
  55. package/src/duckdb/src/storage/table/row_group.cpp +9 -9
  56. package/src/duckdb/src/storage/table/row_group_collection.cpp +82 -66
  57. package/src/duckdb/src/storage/table/scan_state.cpp +22 -3
  58. package/src/duckdb/src/storage/table/standard_column_data.cpp +1 -0
  59. package/src/duckdb/src/storage/table/struct_column_data.cpp +1 -0
  60. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11578 -11222
  61. package/src/duckdb/ub_src_storage_statistics.cpp +2 -0
  62. package/src/duckdb/ub_src_storage_table.cpp +0 -2
  63. package/src/duckdb/src/storage/table/segment_tree.cpp +0 -179
@@ -4,7 +4,7 @@
4
4
  #include "duckdb/common/exception.hpp"
5
5
  #include "duckdb/common/field_writer.hpp"
6
6
  #include "duckdb/storage/table/column_data.hpp"
7
- #include "duckdb/storage/table/standard_column_data.hpp"
7
+ #include "duckdb/storage/table/column_checkpoint_state.hpp"
8
8
  #include "duckdb/storage/table/update_segment.hpp"
9
9
  #include "duckdb/common/chrono.hpp"
10
10
  #include "duckdb/planner/table_filter.hpp"
@@ -23,14 +23,14 @@ constexpr const idx_t RowGroup::ROW_GROUP_SIZE;
23
23
 
24
24
  RowGroup::RowGroup(AttachedDatabase &db, BlockManager &block_manager, DataTableInfo &table_info, idx_t start,
25
25
  idx_t count)
26
- : SegmentBase(start, count), db(db), block_manager(block_manager), table_info(table_info) {
26
+ : SegmentBase<RowGroup>(start, count), db(db), block_manager(block_manager), table_info(table_info) {
27
27
 
28
28
  Verify();
29
29
  }
30
30
 
31
31
  RowGroup::RowGroup(AttachedDatabase &db, BlockManager &block_manager, DataTableInfo &table_info,
32
32
  const vector<LogicalType> &types, RowGroupPointer &&pointer)
33
- : SegmentBase(pointer.row_start, pointer.tuple_count), db(db), block_manager(block_manager),
33
+ : SegmentBase<RowGroup>(pointer.row_start, pointer.tuple_count), db(db), block_manager(block_manager),
34
34
  table_info(table_info) {
35
35
  // deserialize the columns
36
36
  if (pointer.data_pointers.size() != types.size()) {
@@ -54,7 +54,7 @@ RowGroup::RowGroup(AttachedDatabase &db, BlockManager &block_manager, DataTableI
54
54
  }
55
55
 
56
56
  RowGroup::RowGroup(RowGroup &row_group, idx_t start)
57
- : SegmentBase(start, row_group.count), db(row_group.db), block_manager(row_group.block_manager),
57
+ : SegmentBase<RowGroup>(start, row_group.count.load()), db(row_group.db), block_manager(row_group.block_manager),
58
58
  table_info(row_group.table_info), version_info(std::move(row_group.version_info)),
59
59
  stats(std::move(row_group.stats)) {
60
60
  for (auto &column : row_group.columns) {
@@ -822,22 +822,22 @@ void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &main_serializer)
822
822
  writer.Finalize();
823
823
  }
824
824
 
825
- RowGroupPointer RowGroup::Deserialize(Deserializer &main_source, const ColumnList &columns) {
825
+ RowGroupPointer RowGroup::Deserialize(Deserializer &main_source, const vector<LogicalType> &columns) {
826
826
  RowGroupPointer result;
827
827
 
828
828
  FieldReader reader(main_source);
829
829
  result.row_start = reader.ReadRequired<uint64_t>();
830
830
  result.tuple_count = reader.ReadRequired<uint64_t>();
831
831
 
832
- auto physical_columns = columns.PhysicalColumnCount();
832
+ auto physical_columns = columns.size();
833
833
  result.data_pointers.reserve(physical_columns);
834
834
  result.statistics.reserve(physical_columns);
835
835
 
836
836
  auto &source = reader.GetSource();
837
- for (auto &col : columns.Physical()) {
838
- result.statistics.push_back(BaseStatistics::Deserialize(source, col.Type()));
837
+ for (auto &col_type : columns) {
838
+ result.statistics.push_back(BaseStatistics::Deserialize(source, col_type));
839
839
  }
840
- for (idx_t i = 0; i < columns.PhysicalColumnCount(); i++) {
840
+ for (idx_t i = 0; i < columns.size(); i++) {
841
841
  BlockPointer pointer;
842
842
  pointer.block_id = source.Read<block_id_t>();
843
843
  pointer.offset = source.Read<uint64_t>();
@@ -6,14 +6,48 @@
6
6
  #include "duckdb/transaction/transaction.hpp"
7
7
  #include "duckdb/planner/constraints/bound_not_null_constraint.hpp"
8
8
  #include "duckdb/storage/checkpoint/table_data_writer.hpp"
9
+ #include "duckdb/storage/table/row_group_segment_tree.hpp"
10
+ #include "duckdb/storage/meta_block_reader.hpp"
9
11
 
10
12
  namespace duckdb {
11
13
 
14
+ //===--------------------------------------------------------------------===//
15
+ // Row Group Segment Tree
16
+ //===--------------------------------------------------------------------===//
17
+ RowGroupSegmentTree::RowGroupSegmentTree(DataTableInfo &table_info_p, BlockManager &block_manager_p,
18
+ vector<LogicalType> column_types_p)
19
+ : SegmentTree<RowGroup, true>(), info(table_info_p), block_manager(block_manager_p),
20
+ column_types(std::move(column_types_p)), current_row_group(0), max_row_group(0) {
21
+ }
22
+ RowGroupSegmentTree::~RowGroupSegmentTree() {
23
+ }
24
+
25
+ void RowGroupSegmentTree::Initialize(PersistentTableData &data) {
26
+ D_ASSERT(data.row_group_count > 0);
27
+ current_row_group = 0;
28
+ max_row_group = data.row_group_count;
29
+ finished_loading = false;
30
+ reader = make_unique<MetaBlockReader>(block_manager, data.block_id);
31
+ reader->offset = data.offset;
32
+ }
33
+
34
+ unique_ptr<RowGroup> RowGroupSegmentTree::LoadSegment() {
35
+ if (current_row_group >= max_row_group) {
36
+ finished_loading = true;
37
+ return nullptr;
38
+ }
39
+ auto row_group_pointer = RowGroup::Deserialize(*reader, column_types);
40
+ current_row_group++;
41
+ return make_unique<RowGroup>(info.db, block_manager, info, column_types, std::move(row_group_pointer));
42
+ }
43
+ //===--------------------------------------------------------------------===//
44
+ // Row Group Collection
45
+ //===--------------------------------------------------------------------===//
12
46
  RowGroupCollection::RowGroupCollection(shared_ptr<DataTableInfo> info_p, BlockManager &block_manager,
13
47
  vector<LogicalType> types_p, idx_t row_start_p, idx_t total_rows_p)
14
48
  : block_manager(block_manager), total_rows(total_rows_p), info(std::move(info_p)), types(std::move(types_p)),
15
49
  row_start(row_start_p) {
16
- row_groups = make_shared<SegmentTree>();
50
+ row_groups = make_shared<RowGroupSegmentTree>(*info, block_manager, types);
17
51
  }
18
52
 
19
53
  idx_t RowGroupCollection::GetTotalRows() const {
@@ -34,14 +68,8 @@ Allocator &RowGroupCollection::GetAllocator() const {
34
68
  void RowGroupCollection::Initialize(PersistentTableData &data) {
35
69
  D_ASSERT(this->row_start == 0);
36
70
  auto l = row_groups->Lock();
37
- for (auto &row_group_pointer : data.row_groups) {
38
- auto new_row_group = make_unique<RowGroup>(info->db, block_manager, *info, types, std::move(row_group_pointer));
39
- auto row_group_count = new_row_group->start + new_row_group->count;
40
- if (row_group_count > this->total_rows) {
41
- this->total_rows = row_group_count;
42
- }
43
- row_groups->AppendSegment(l, std::move(new_row_group));
44
- }
71
+ this->total_rows = data.total_rows;
72
+ row_groups->Initialize(data);
45
73
  stats.Initialize(types, data);
46
74
  }
47
75
 
@@ -64,8 +92,7 @@ void RowGroupCollection::Verify() {
64
92
  #ifdef DEBUG
65
93
  idx_t current_total_rows = 0;
66
94
  row_groups->Verify();
67
- for (auto segment = row_groups->GetRootSegment(); segment; segment = segment->Next()) {
68
- auto &row_group = (RowGroup &)*segment;
95
+ for (auto &row_group : row_groups->Segments()) {
69
96
  row_group.Verify();
70
97
  D_ASSERT(row_group.start == this->row_start + current_total_rows);
71
98
  current_total_rows += row_group.count;
@@ -79,11 +106,12 @@ void RowGroupCollection::Verify() {
79
106
  //===--------------------------------------------------------------------===//
80
107
  void RowGroupCollection::InitializeScan(CollectionScanState &state, const vector<column_t> &column_ids,
81
108
  TableFilterSet *table_filters) {
82
- auto row_group = (RowGroup *)row_groups->GetRootSegment();
109
+ auto row_group = row_groups->GetRootSegment();
83
110
  D_ASSERT(row_group);
111
+ state.row_groups = row_groups.get();
84
112
  state.max_row = row_start + total_rows;
85
113
  while (row_group && !row_group->InitializeScan(state.row_group_state)) {
86
- row_group = (RowGroup *)row_group->Next();
114
+ row_group = row_groups->GetNextSegment(row_group);
87
115
  }
88
116
  }
89
117
 
@@ -93,8 +121,9 @@ void RowGroupCollection::InitializeCreateIndexScan(CreateIndexScanState &state)
93
121
 
94
122
  void RowGroupCollection::InitializeScanWithOffset(CollectionScanState &state, const vector<column_t> &column_ids,
95
123
  idx_t start_row, idx_t end_row) {
96
- auto row_group = (RowGroup *)row_groups->GetSegment(start_row);
124
+ auto row_group = row_groups->GetSegment(start_row);
97
125
  D_ASSERT(row_group);
126
+ state.row_groups = row_groups.get();
98
127
  state.max_row = end_row;
99
128
  idx_t start_vector = (start_row - row_group->start) / STANDARD_VECTOR_SIZE;
100
129
  if (!row_group->InitializeScanWithOffset(state.row_group_state, start_vector)) {
@@ -102,14 +131,17 @@ void RowGroupCollection::InitializeScanWithOffset(CollectionScanState &state, co
102
131
  }
103
132
  }
104
133
 
105
- bool RowGroupCollection::InitializeScanInRowGroup(CollectionScanState &state, RowGroup *row_group, idx_t vector_index,
134
+ bool RowGroupCollection::InitializeScanInRowGroup(CollectionScanState &state,
135
+ ParallelCollectionScanState &parallel_state, idx_t vector_index,
106
136
  idx_t max_row) {
107
137
  state.max_row = max_row;
108
- return row_group->InitializeScanWithOffset(state.row_group_state, vector_index);
138
+ state.row_groups = parallel_state.collection->row_groups.get();
139
+ return parallel_state.current_row_group->InitializeScanWithOffset(state.row_group_state, vector_index);
109
140
  }
110
141
 
111
142
  void RowGroupCollection::InitializeParallelScan(ParallelCollectionScanState &state) {
112
- state.current_row_group = (RowGroup *)row_groups->GetRootSegment();
143
+ state.collection = this;
144
+ state.current_row_group = row_groups->GetRootSegment();
113
145
  state.vector_index = 0;
114
146
  state.max_row = row_start + total_rows;
115
147
  state.batch_index = 0;
@@ -131,15 +163,15 @@ bool RowGroupCollection::NextParallelScan(ClientContext &context, ParallelCollec
131
163
  max_row = state.current_row_group->start + state.current_row_group->count;
132
164
  }
133
165
  max_row = MinValue<idx_t>(max_row, state.max_row);
134
- bool need_to_scan = InitializeScanInRowGroup(scan_state, state.current_row_group, vector_index, max_row);
166
+ bool need_to_scan = InitializeScanInRowGroup(scan_state, state, vector_index, max_row);
135
167
  if (ClientConfig::GetConfig(context).verify_parallelism) {
136
168
  state.vector_index++;
137
169
  if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
138
- state.current_row_group = (RowGroup *)state.current_row_group->Next();
170
+ state.current_row_group = row_groups->GetNextSegment(state.current_row_group);
139
171
  state.vector_index = 0;
140
172
  }
141
173
  } else {
142
- state.current_row_group = (RowGroup *)state.current_row_group->Next();
174
+ state.current_row_group = row_groups->GetNextSegment(state.current_row_group);
143
175
  }
144
176
  scan_state.batch_index = ++state.batch_index;
145
177
  if (!need_to_scan) {
@@ -204,7 +236,7 @@ void RowGroupCollection::Fetch(TransactionData transaction, DataChunk &result, c
204
236
  // in parallel append scenarios it is possible for the row_id
205
237
  continue;
206
238
  }
207
- row_group = (RowGroup *)row_groups->GetSegmentByIndex(l, segment_index);
239
+ row_group = row_groups->GetSegmentByIndex(l, segment_index);
208
240
  }
209
241
  if (!row_group->Fetch(transaction, row_id - row_group->start)) {
210
242
  continue;
@@ -246,7 +278,7 @@ void RowGroupCollection::InitializeAppend(TransactionData transaction, TableAppe
246
278
  // empty row group collection: empty first row group
247
279
  AppendRowGroup(l, row_start);
248
280
  }
249
- state.start_row_group = (RowGroup *)row_groups->GetLastSegment(l);
281
+ state.start_row_group = row_groups->GetLastSegment(l);
250
282
  D_ASSERT(this->row_start + total_rows == state.start_row_group->start + state.start_row_group->count);
251
283
  state.start_row_group->InitializeAppend(state.row_group_append_state);
252
284
  state.remaining = append_count;
@@ -306,7 +338,7 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
306
338
  auto l = row_groups->Lock();
307
339
  AppendRowGroup(l, next_start);
308
340
  // set up the append state for this row_group
309
- auto last_row_group = (RowGroup *)row_groups->GetLastSegment(l);
341
+ auto last_row_group = row_groups->GetLastSegment(l);
310
342
  last_row_group->InitializeAppend(state.row_group_append_state);
311
343
  if (state.remaining > 0) {
312
344
  last_row_group->AppendVersionInfo(state.transaction, state.remaining);
@@ -331,7 +363,7 @@ void RowGroupCollection::FinalizeAppend(TransactionData transaction, TableAppend
331
363
  auto append_count = MinValue<idx_t>(remaining, RowGroup::ROW_GROUP_SIZE - row_group->count);
332
364
  row_group->AppendVersionInfo(transaction, append_count);
333
365
  remaining -= append_count;
334
- row_group = (RowGroup *)row_group->Next();
366
+ row_group = row_groups->GetNextSegment(row_group);
335
367
  }
336
368
  total_rows += state.total_append_count;
337
369
 
@@ -342,7 +374,7 @@ void RowGroupCollection::FinalizeAppend(TransactionData transaction, TableAppend
342
374
  }
343
375
 
344
376
  void RowGroupCollection::CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count) {
345
- auto row_group = (RowGroup *)row_groups->GetSegment(row_start);
377
+ auto row_group = row_groups->GetSegment(row_start);
346
378
  D_ASSERT(row_group);
347
379
  idx_t current_row = row_start;
348
380
  idx_t remaining = count;
@@ -357,7 +389,7 @@ void RowGroupCollection::CommitAppend(transaction_t commit_id, idx_t row_start,
357
389
  if (remaining == 0) {
358
390
  break;
359
391
  }
360
- row_group = (RowGroup *)row_group->Next();
392
+ row_group = row_groups->GetNextSegment(row_group);
361
393
  }
362
394
  }
363
395
 
@@ -371,7 +403,7 @@ void RowGroupCollection::RevertAppendInternal(idx_t start_row, idx_t count) {
371
403
  // find the segment index that the current row belongs to
372
404
  idx_t segment_index = row_groups->GetSegmentIndex(l, start_row);
373
405
  auto segment = row_groups->GetSegmentByIndex(l, segment_index);
374
- auto &info = (RowGroup &)*segment;
406
+ auto &info = *segment;
375
407
 
376
408
  // remove any segments AFTER this segment: they should be deleted entirely
377
409
  row_groups->EraseSegments(l, segment_index);
@@ -383,8 +415,7 @@ void RowGroupCollection::RevertAppendInternal(idx_t start_row, idx_t count) {
383
415
  void RowGroupCollection::MergeStorage(RowGroupCollection &data) {
384
416
  D_ASSERT(data.types == types);
385
417
  auto index = row_start + total_rows.load();
386
- for (auto segment = data.row_groups->GetRootSegment(); segment; segment = segment->Next()) {
387
- auto &row_group = (RowGroup &)*segment;
418
+ for (auto &row_group : data.row_groups->Segments()) {
388
419
  auto new_group = make_unique<RowGroup>(row_group, index);
389
420
  index += new_group->count;
390
421
  row_groups->AppendSegment(std::move(new_group));
@@ -405,7 +436,7 @@ idx_t RowGroupCollection::Delete(TransactionData transaction, DataTable *table,
405
436
  idx_t pos = 0;
406
437
  do {
407
438
  idx_t start = pos;
408
- auto row_group = (RowGroup *)row_groups->GetSegment(ids[start]);
439
+ auto row_group = row_groups->GetSegment(ids[start]);
409
440
  for (pos++; pos < count; pos++) {
410
441
  D_ASSERT(ids[pos] >= 0);
411
442
  // check if this id still belongs to this row group
@@ -431,7 +462,7 @@ void RowGroupCollection::Update(TransactionData transaction, row_t *ids, const v
431
462
  idx_t pos = 0;
432
463
  do {
433
464
  idx_t start = pos;
434
- auto row_group = (RowGroup *)row_groups->GetSegment(ids[pos]);
465
+ auto row_group = row_groups->GetSegment(ids[pos]);
435
466
  row_t base_id =
436
467
  row_group->start + ((ids[pos] - row_group->start) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE);
437
468
  row_t max_id = MinValue<row_t>(base_id + STANDARD_VECTOR_SIZE, row_group->start + row_group->count);
@@ -461,7 +492,7 @@ void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_
461
492
  auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
462
493
 
463
494
  // figure out which row_group to fetch from
464
- auto row_group = (RowGroup *)row_groups->GetSegment(row_ids[0]);
495
+ auto row_group = row_groups->GetSegment(row_ids[0]);
465
496
  auto row_group_vector_idx = (row_ids[0] - row_group->start) / STANDARD_VECTOR_SIZE;
466
497
  auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
467
498
 
@@ -506,7 +537,7 @@ void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_i
506
537
  }
507
538
  // find the row_group this id belongs to
508
539
  auto primary_column_idx = column_path[0];
509
- auto row_group = (RowGroup *)row_groups->GetSegment(first_id);
540
+ auto row_group = row_groups->GetSegment(first_id);
510
541
  row_group->UpdateColumn(transaction, updates, row_ids, column_path);
511
542
 
512
543
  row_group->MergeIntoStatistics(primary_column_idx, stats.GetStats(primary_column_idx).Statistics());
@@ -516,10 +547,9 @@ void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_i
516
547
  // Checkpoint
517
548
  //===--------------------------------------------------------------------===//
518
549
  void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &global_stats) {
519
- for (auto row_group = (RowGroup *)row_groups->GetRootSegment(); row_group;
520
- row_group = (RowGroup *)row_group->Next()) {
521
- auto rowg_writer = writer.GetRowGroupWriter(*row_group);
522
- auto pointer = row_group->Checkpoint(*rowg_writer, global_stats);
550
+ for (auto &row_group : row_groups->Segments()) {
551
+ auto rowg_writer = writer.GetRowGroupWriter(row_group);
552
+ auto pointer = row_group.Checkpoint(*rowg_writer, global_stats);
523
553
  writer.AddRowGroup(std::move(pointer), std::move(rowg_writer));
524
554
  }
525
555
  }
@@ -528,18 +558,14 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl
528
558
  // CommitDrop
529
559
  //===--------------------------------------------------------------------===//
530
560
  void RowGroupCollection::CommitDropColumn(idx_t index) {
531
- auto segment = (RowGroup *)row_groups->GetRootSegment();
532
- while (segment) {
533
- segment->CommitDropColumn(index);
534
- segment = (RowGroup *)segment->Next();
561
+ for (auto &row_group : row_groups->Segments()) {
562
+ row_group.CommitDropColumn(index);
535
563
  }
536
564
  }
537
565
 
538
566
  void RowGroupCollection::CommitDropTable() {
539
- auto segment = (RowGroup *)row_groups->GetRootSegment();
540
- while (segment) {
541
- segment->CommitDrop();
542
- segment = (RowGroup *)segment->Next();
567
+ for (auto &row_group : row_groups->Segments()) {
568
+ row_group.CommitDrop();
543
569
  }
544
570
  }
545
571
 
@@ -547,13 +573,8 @@ void RowGroupCollection::CommitDropTable() {
547
573
  // GetStorageInfo
548
574
  //===--------------------------------------------------------------------===//
549
575
  void RowGroupCollection::GetStorageInfo(TableStorageInfo &result) {
550
- auto row_group = (RowGroup *)row_groups->GetRootSegment();
551
- idx_t row_group_index = 0;
552
- while (row_group) {
553
- row_group->GetStorageInfo(row_group_index, result);
554
- row_group_index++;
555
-
556
- row_group = (RowGroup *)row_group->Next();
576
+ for (auto &row_group : row_groups->Segments()) {
577
+ row_group.GetStorageInfo(row_group.index, result);
557
578
  }
558
579
  }
559
580
 
@@ -582,14 +603,12 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &cont
582
603
 
583
604
  // fill the column with its DEFAULT value, or NULL if none is specified
584
605
  auto new_stats = make_unique<SegmentStatistics>(new_column.GetType());
585
- auto current_row_group = (RowGroup *)row_groups->GetRootSegment();
586
- while (current_row_group) {
587
- auto new_row_group = current_row_group->AddColumn(new_column, executor, default_value, default_vector);
606
+ for (auto &current_row_group : row_groups->Segments()) {
607
+ auto new_row_group = current_row_group.AddColumn(new_column, executor, default_value, default_vector);
588
608
  // merge in the statistics
589
609
  new_row_group->MergeIntoStatistics(new_column_idx, new_column_stats.Statistics());
590
610
 
591
611
  result->row_groups->AppendSegment(std::move(new_row_group));
592
- current_row_group = (RowGroup *)current_row_group->Next();
593
612
  }
594
613
  return result;
595
614
  }
@@ -603,11 +622,9 @@ shared_ptr<RowGroupCollection> RowGroupCollection::RemoveColumn(idx_t col_idx) {
603
622
  make_shared<RowGroupCollection>(info, block_manager, std::move(new_types), row_start, total_rows.load());
604
623
  result->stats.InitializeRemoveColumn(stats, col_idx);
605
624
 
606
- auto current_row_group = (RowGroup *)row_groups->GetRootSegment();
607
- while (current_row_group) {
608
- auto new_row_group = current_row_group->RemoveColumn(col_idx);
625
+ for (auto &current_row_group : row_groups->Segments()) {
626
+ auto new_row_group = current_row_group.RemoveColumn(col_idx);
609
627
  result->row_groups->AppendSegment(std::move(new_row_group));
610
- current_row_group = (RowGroup *)current_row_group->Next();
611
628
  }
612
629
  return result;
613
630
  }
@@ -642,14 +659,12 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AlterType(ClientContext &cont
642
659
  scan_state.table_state.max_row = row_start + total_rows;
643
660
 
644
661
  // now alter the type of the column within all of the row_groups individually
645
- auto current_row_group = (RowGroup *)row_groups->GetRootSegment();
646
662
  auto &changed_stats = result->stats.GetStats(changed_idx);
647
- while (current_row_group) {
648
- auto new_row_group = current_row_group->AlterType(target_type, changed_idx, executor,
649
- scan_state.table_state.row_group_state, scan_chunk);
663
+ for (auto &current_row_group : row_groups->Segments()) {
664
+ auto new_row_group = current_row_group.AlterType(target_type, changed_idx, executor,
665
+ scan_state.table_state.row_group_state, scan_chunk);
650
666
  new_row_group->MergeIntoStatistics(changed_idx, changed_stats.Statistics());
651
667
  result->row_groups->AppendSegment(std::move(new_row_group));
652
- current_row_group = (RowGroup *)current_row_group->Next();
653
668
  }
654
669
 
655
670
  return result;
@@ -677,7 +692,8 @@ void RowGroupCollection::VerifyNewConstraint(DataTable &parent, const BoundConst
677
692
  InitializeCreateIndexScan(state);
678
693
  while (true) {
679
694
  scan_chunk.Reset();
680
- state.table_state.ScanCommitted(scan_chunk, TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED);
695
+ state.table_state.ScanCommitted(scan_chunk, state.segment_lock,
696
+ TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED);
681
697
  if (scan_chunk.size() == 0) {
682
698
  break;
683
699
  }
@@ -2,6 +2,9 @@
2
2
  #include "duckdb/storage/table/row_group.hpp"
3
3
  #include "duckdb/storage/table/column_segment.hpp"
4
4
  #include "duckdb/transaction/duck_transaction.hpp"
5
+ #include "duckdb/storage/table/column_data.hpp"
6
+ #include "duckdb/storage/table/row_group_collection.hpp"
7
+ #include "duckdb/storage/table/row_group_segment_tree.hpp"
5
8
 
6
9
  namespace duckdb {
7
10
 
@@ -35,7 +38,7 @@ void ColumnScanState::NextInternal(idx_t count) {
35
38
  }
36
39
  row_index += count;
37
40
  while (row_index >= current->start + current->count) {
38
- current = (ColumnSegment *)current->Next();
41
+ current = segment_tree->GetNextSegment(current);
39
42
  initialized = false;
40
43
  segment_checked = false;
41
44
  if (!current) {
@@ -92,7 +95,7 @@ bool CollectionScanState::Scan(DuckTransaction &transaction, DataChunk &result)
92
95
  return true;
93
96
  } else {
94
97
  do {
95
- current_row_group = row_group_state.row_group = (RowGroup *)current_row_group->Next();
98
+ current_row_group = row_group_state.row_group = row_groups->GetNextSegment(current_row_group);
96
99
  if (current_row_group) {
97
100
  bool scan_row_group = current_row_group->InitializeScan(row_group_state);
98
101
  if (scan_row_group) {
@@ -106,6 +109,22 @@ bool CollectionScanState::Scan(DuckTransaction &transaction, DataChunk &result)
106
109
  return false;
107
110
  }
108
111
 
112
+ bool CollectionScanState::ScanCommitted(DataChunk &result, SegmentLock &l, TableScanType type) {
113
+ auto current_row_group = row_group_state.row_group;
114
+ while (current_row_group) {
115
+ current_row_group->ScanCommitted(row_group_state, result, type);
116
+ if (result.size() > 0) {
117
+ return true;
118
+ } else {
119
+ current_row_group = row_group_state.row_group = row_groups->GetNextSegment(l, current_row_group);
120
+ if (current_row_group) {
121
+ current_row_group->InitializeScan(row_group_state);
122
+ }
123
+ }
124
+ }
125
+ return false;
126
+ }
127
+
109
128
  bool CollectionScanState::ScanCommitted(DataChunk &result, TableScanType type) {
110
129
  auto current_row_group = row_group_state.row_group;
111
130
  while (current_row_group) {
@@ -113,7 +132,7 @@ bool CollectionScanState::ScanCommitted(DataChunk &result, TableScanType type) {
113
132
  if (result.size() > 0) {
114
133
  return true;
115
134
  } else {
116
- current_row_group = row_group_state.row_group = (RowGroup *)current_row_group->Next();
135
+ current_row_group = row_group_state.row_group = row_groups->GetNextSegment(current_row_group);
117
136
  if (current_row_group) {
118
137
  current_row_group->InitializeScan(row_group_state);
119
138
  }
@@ -5,6 +5,7 @@
5
5
  #include "duckdb/storage/data_table.hpp"
6
6
  #include "duckdb/planner/table_filter.hpp"
7
7
  #include "duckdb/transaction/transaction.hpp"
8
+ #include "duckdb/storage/table/column_checkpoint_state.hpp"
8
9
 
9
10
  namespace duckdb {
10
11
 
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/storage/table/struct_column_data.hpp"
2
2
  #include "duckdb/storage/statistics/struct_stats.hpp"
3
3
  #include "duckdb/transaction/transaction.hpp"
4
+ #include "duckdb/storage/table/column_checkpoint_state.hpp"
4
5
 
5
6
  namespace duckdb {
6
7