duckdb 0.7.2-dev1034.0 → 0.7.2-dev1138.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  3. package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
  4. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
  5. package/src/duckdb/src/common/serializer/enum_serializer.cpp +6 -6
  6. package/src/duckdb/src/common/sort/comparators.cpp +14 -5
  7. package/src/duckdb/src/common/types/interval.cpp +0 -41
  8. package/src/duckdb/src/common/types/list_segment.cpp +658 -0
  9. package/src/duckdb/src/common/types/string_heap.cpp +1 -1
  10. package/src/duckdb/src/common/types/string_type.cpp +1 -1
  11. package/src/duckdb/src/common/types/vector.cpp +1 -1
  12. package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
  13. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
  14. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
  15. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  16. package/src/duckdb/src/execution/index/art/art.cpp +13 -0
  17. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
  18. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
  19. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +1 -0
  20. package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
  21. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +5 -1
  22. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +18 -5
  23. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
  24. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
  25. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
  26. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
  27. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +0 -4
  28. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +1 -0
  29. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
  30. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
  31. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -0
  32. package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
  33. package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
  34. package/src/duckdb/src/function/table/read_csv.cpp +5 -0
  35. package/src/duckdb/src/function/table/table_scan.cpp +8 -11
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  37. package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
  39. package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
  40. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
  41. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
  42. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
  43. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -12
  44. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +4 -0
  45. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  46. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -0
  47. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +0 -2
  49. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  50. package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +18 -7
  53. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -3
  54. package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
  55. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +0 -1
  56. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +35 -43
  57. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +18 -5
  58. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -4
  59. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -29
  60. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +2 -3
  61. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +11 -1
  62. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
  63. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +4 -1
  64. package/src/duckdb/src/include/duckdb.h +21 -0
  65. package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
  66. package/src/duckdb/src/main/settings/settings.cpp +20 -8
  67. package/src/duckdb/src/optimizer/filter_combiner.cpp +2 -5
  68. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -0
  69. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +1 -0
  70. package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -3
  71. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +22 -0
  72. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
  73. package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
  74. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -1
  75. package/src/duckdb/src/storage/compression/numeric_constant.cpp +1 -1
  76. package/src/duckdb/src/storage/compression/rle.cpp +1 -0
  77. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
  78. package/src/duckdb/src/storage/data_table.cpp +3 -3
  79. package/src/duckdb/src/storage/local_storage.cpp +7 -0
  80. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  81. package/src/duckdb/src/storage/table/column_data.cpp +75 -18
  82. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -1
  83. package/src/duckdb/src/storage/table/column_segment.cpp +17 -31
  84. package/src/duckdb/src/storage/table/list_column_data.cpp +9 -12
  85. package/src/duckdb/src/storage/table/row_group.cpp +200 -136
  86. package/src/duckdb/src/storage/table/row_group_collection.cpp +75 -45
  87. package/src/duckdb/src/storage/table/scan_state.cpp +31 -38
  88. package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -6
  89. package/src/duckdb/src/storage/table/struct_column_data.cpp +11 -18
  90. package/src/duckdb/src/storage/table/update_segment.cpp +3 -0
  91. package/src/duckdb/ub_src_common_types.cpp +2 -0
@@ -3,21 +3,20 @@
3
3
  #include "duckdb/execution/expression_executor.hpp"
4
4
  #include "duckdb/main/client_context.hpp"
5
5
  #include "duckdb/storage/data_table.hpp"
6
- #include "duckdb/transaction/transaction.hpp"
7
6
  #include "duckdb/planner/constraints/bound_not_null_constraint.hpp"
8
7
  #include "duckdb/storage/checkpoint/table_data_writer.hpp"
9
8
  #include "duckdb/storage/table/row_group_segment_tree.hpp"
10
9
  #include "duckdb/storage/meta_block_reader.hpp"
10
+ #include "duckdb/storage/table/append_state.hpp"
11
+ #include "duckdb/storage/table/scan_state.hpp"
11
12
 
12
13
  namespace duckdb {
13
14
 
14
15
  //===--------------------------------------------------------------------===//
15
16
  // Row Group Segment Tree
16
17
  //===--------------------------------------------------------------------===//
17
- RowGroupSegmentTree::RowGroupSegmentTree(DataTableInfo &table_info_p, BlockManager &block_manager_p,
18
- vector<LogicalType> column_types_p)
19
- : SegmentTree<RowGroup, true>(), info(table_info_p), block_manager(block_manager_p),
20
- column_types(std::move(column_types_p)), current_row_group(0), max_row_group(0) {
18
+ RowGroupSegmentTree::RowGroupSegmentTree(RowGroupCollection &collection)
19
+ : SegmentTree<RowGroup, true>(), collection(collection), current_row_group(0), max_row_group(0) {
21
20
  }
22
21
  RowGroupSegmentTree::~RowGroupSegmentTree() {
23
22
  }
@@ -27,7 +26,7 @@ void RowGroupSegmentTree::Initialize(PersistentTableData &data) {
27
26
  current_row_group = 0;
28
27
  max_row_group = data.row_group_count;
29
28
  finished_loading = false;
30
- reader = make_unique<MetaBlockReader>(block_manager, data.block_id);
29
+ reader = make_unique<MetaBlockReader>(collection.GetBlockManager(), data.block_id);
31
30
  reader->offset = data.offset;
32
31
  }
33
32
 
@@ -36,10 +35,11 @@ unique_ptr<RowGroup> RowGroupSegmentTree::LoadSegment() {
36
35
  finished_loading = true;
37
36
  return nullptr;
38
37
  }
39
- auto row_group_pointer = RowGroup::Deserialize(*reader, column_types);
38
+ auto row_group_pointer = RowGroup::Deserialize(*reader, collection.GetTypes());
40
39
  current_row_group++;
41
- return make_unique<RowGroup>(info.db, block_manager, info, column_types, std::move(row_group_pointer));
40
+ return make_unique<RowGroup>(collection, std::move(row_group_pointer));
42
41
  }
42
+
43
43
  //===--------------------------------------------------------------------===//
44
44
  // Row Group Collection
45
45
  //===--------------------------------------------------------------------===//
@@ -47,7 +47,7 @@ RowGroupCollection::RowGroupCollection(shared_ptr<DataTableInfo> info_p, BlockMa
47
47
  vector<LogicalType> types_p, idx_t row_start_p, idx_t total_rows_p)
48
48
  : block_manager(block_manager), total_rows(total_rows_p), info(std::move(info_p)), types(std::move(types_p)),
49
49
  row_start(row_start_p) {
50
- row_groups = make_shared<RowGroupSegmentTree>(*info, block_manager, types);
50
+ row_groups = make_shared<RowGroupSegmentTree>(*this);
51
51
  }
52
52
 
53
53
  idx_t RowGroupCollection::GetTotalRows() const {
@@ -62,6 +62,14 @@ Allocator &RowGroupCollection::GetAllocator() const {
62
62
  return Allocator::Get(info->db);
63
63
  }
64
64
 
65
+ AttachedDatabase &RowGroupCollection::GetAttached() {
66
+ return GetTableInfo().db;
67
+ }
68
+
69
+ DatabaseInstance &RowGroupCollection::GetDatabase() {
70
+ return GetAttached().GetDatabase();
71
+ }
72
+
65
73
  //===--------------------------------------------------------------------===//
66
74
  // Initialize
67
75
  //===--------------------------------------------------------------------===//
@@ -79,7 +87,7 @@ void RowGroupCollection::InitializeEmpty() {
79
87
 
80
88
  void RowGroupCollection::AppendRowGroup(SegmentLock &l, idx_t start_row) {
81
89
  D_ASSERT(start_row >= row_start);
82
- auto new_row_group = make_unique<RowGroup>(info->db, block_manager, *info, start_row, 0);
90
+ auto new_row_group = make_unique<RowGroup>(*this, start_row, 0);
83
91
  new_row_group->InitializeEmpty(types);
84
92
  row_groups->AppendSegment(l, std::move(new_row_group));
85
93
  }
@@ -94,6 +102,7 @@ void RowGroupCollection::Verify() {
94
102
  row_groups->Verify();
95
103
  for (auto &row_group : row_groups->Segments()) {
96
104
  row_group.Verify();
105
+ D_ASSERT(&row_group.GetCollection() == this);
97
106
  D_ASSERT(row_group.start == this->row_start + current_total_rows);
98
107
  current_total_rows += row_group.count;
99
108
  }
@@ -110,7 +119,8 @@ void RowGroupCollection::InitializeScan(CollectionScanState &state, const vector
110
119
  D_ASSERT(row_group);
111
120
  state.row_groups = row_groups.get();
112
121
  state.max_row = row_start + total_rows;
113
- while (row_group && !row_group->InitializeScan(state.row_group_state)) {
122
+ state.Initialize(GetTypes());
123
+ while (row_group && !row_group->InitializeScan(state)) {
114
124
  row_group = row_groups->GetNextSegment(row_group);
115
125
  }
116
126
  }
@@ -125,18 +135,22 @@ void RowGroupCollection::InitializeScanWithOffset(CollectionScanState &state, co
125
135
  D_ASSERT(row_group);
126
136
  state.row_groups = row_groups.get();
127
137
  state.max_row = end_row;
138
+ state.Initialize(GetTypes());
128
139
  idx_t start_vector = (start_row - row_group->start) / STANDARD_VECTOR_SIZE;
129
- if (!row_group->InitializeScanWithOffset(state.row_group_state, start_vector)) {
140
+ if (!row_group->InitializeScanWithOffset(state, start_vector)) {
130
141
  throw InternalException("Failed to initialize row group scan with offset");
131
142
  }
132
143
  }
133
144
 
134
- bool RowGroupCollection::InitializeScanInRowGroup(CollectionScanState &state,
135
- ParallelCollectionScanState &parallel_state, idx_t vector_index,
136
- idx_t max_row) {
145
+ bool RowGroupCollection::InitializeScanInRowGroup(CollectionScanState &state, RowGroupCollection &collection,
146
+ RowGroup &row_group, idx_t vector_index, idx_t max_row) {
137
147
  state.max_row = max_row;
138
- state.row_groups = parallel_state.collection->row_groups.get();
139
- return parallel_state.current_row_group->InitializeScanWithOffset(state.row_group_state, vector_index);
148
+ state.row_groups = collection.row_groups.get();
149
+ if (!state.column_scans) {
150
+ // initialize the scan state
151
+ state.Initialize(collection.GetTypes());
152
+ }
153
+ return row_group.InitializeScanWithOffset(state, vector_index);
140
154
  }
141
155
 
142
156
  void RowGroupCollection::InitializeParallelScan(ParallelCollectionScanState &state) {
@@ -145,37 +159,52 @@ void RowGroupCollection::InitializeParallelScan(ParallelCollectionScanState &sta
145
159
  state.vector_index = 0;
146
160
  state.max_row = row_start + total_rows;
147
161
  state.batch_index = 0;
162
+ state.processed_rows = 0;
148
163
  }
149
164
 
150
165
  bool RowGroupCollection::NextParallelScan(ClientContext &context, ParallelCollectionScanState &state,
151
166
  CollectionScanState &scan_state) {
152
- while (state.current_row_group && state.current_row_group->count > 0) {
167
+ while (true) {
153
168
  idx_t vector_index;
154
169
  idx_t max_row;
155
- if (ClientConfig::GetConfig(context).verify_parallelism) {
156
- vector_index = state.vector_index;
157
- max_row = state.current_row_group->start +
158
- MinValue<idx_t>(state.current_row_group->count,
159
- STANDARD_VECTOR_SIZE * state.vector_index + STANDARD_VECTOR_SIZE);
160
- D_ASSERT(vector_index * STANDARD_VECTOR_SIZE < state.current_row_group->count);
161
- } else {
162
- vector_index = 0;
163
- max_row = state.current_row_group->start + state.current_row_group->count;
164
- }
165
- max_row = MinValue<idx_t>(max_row, state.max_row);
166
- bool need_to_scan = InitializeScanInRowGroup(scan_state, state, vector_index, max_row);
167
- if (ClientConfig::GetConfig(context).verify_parallelism) {
168
- state.vector_index++;
169
- if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
170
+ RowGroupCollection *collection;
171
+ RowGroup *row_group;
172
+ {
173
+ // select the next row group to scan from the parallel state
174
+ lock_guard<mutex> l(state.lock);
175
+ if (!state.current_row_group || state.current_row_group->count == 0) {
176
+ // no more data left to scan
177
+ break;
178
+ }
179
+ collection = state.collection;
180
+ row_group = state.current_row_group;
181
+ if (ClientConfig::GetConfig(context).verify_parallelism) {
182
+ vector_index = state.vector_index;
183
+ max_row = state.current_row_group->start +
184
+ MinValue<idx_t>(state.current_row_group->count,
185
+ STANDARD_VECTOR_SIZE * state.vector_index + STANDARD_VECTOR_SIZE);
186
+ D_ASSERT(vector_index * STANDARD_VECTOR_SIZE < state.current_row_group->count);
187
+ state.vector_index++;
188
+ if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
189
+ state.current_row_group = row_groups->GetNextSegment(state.current_row_group);
190
+ state.vector_index = 0;
191
+ }
192
+ } else {
193
+ state.processed_rows += state.current_row_group->count;
194
+ vector_index = 0;
195
+ max_row = state.current_row_group->start + state.current_row_group->count;
170
196
  state.current_row_group = row_groups->GetNextSegment(state.current_row_group);
171
- state.vector_index = 0;
172
197
  }
173
- } else {
174
- state.current_row_group = row_groups->GetNextSegment(state.current_row_group);
198
+ max_row = MinValue<idx_t>(max_row, state.max_row);
199
+ scan_state.batch_index = ++state.batch_index;
175
200
  }
176
- scan_state.batch_index = ++state.batch_index;
201
+ D_ASSERT(collection);
202
+ D_ASSERT(row_group);
203
+
204
+ // initialize the scan for this row group
205
+ bool need_to_scan = InitializeScanInRowGroup(scan_state, *collection, *row_group, vector_index, max_row);
177
206
  if (!need_to_scan) {
178
- // filters allow us to skip this row group: move to the next row group
207
+ // skip this row group
179
208
  continue;
180
209
  }
181
210
  return true;
@@ -416,7 +445,7 @@ void RowGroupCollection::MergeStorage(RowGroupCollection &data) {
416
445
  D_ASSERT(data.types == types);
417
446
  auto index = row_start + total_rows.load();
418
447
  for (auto &row_group : data.row_groups->Segments()) {
419
- auto new_group = make_unique<RowGroup>(row_group, index);
448
+ auto new_group = make_unique<RowGroup>(row_group, *this, index);
420
449
  index += new_group->count;
421
450
  row_groups->AppendSegment(std::move(new_group));
422
451
  }
@@ -519,8 +548,9 @@ void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_
519
548
  DataChunk result;
520
549
  result.Initialize(GetAllocator(), types);
521
550
 
522
- row_group->InitializeScanWithOffset(state.table_state.row_group_state, row_group_vector_idx);
523
- row_group->ScanCommitted(state.table_state.row_group_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
551
+ state.table_state.Initialize(GetTypes());
552
+ row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
553
+ row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
524
554
  result.Slice(sel, count);
525
555
 
526
556
  indexes.Scan([&](Index &index) {
@@ -604,7 +634,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &cont
604
634
  // fill the column with its DEFAULT value, or NULL if none is specified
605
635
  auto new_stats = make_unique<SegmentStatistics>(new_column.GetType());
606
636
  for (auto &current_row_group : row_groups->Segments()) {
607
- auto new_row_group = current_row_group.AddColumn(new_column, executor, default_value, default_vector);
637
+ auto new_row_group = current_row_group.AddColumn(*result, new_column, executor, default_value, default_vector);
608
638
  // merge in the statistics
609
639
  new_row_group->MergeIntoStatistics(new_column_idx, new_column_stats.Statistics());
610
640
 
@@ -623,7 +653,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::RemoveColumn(idx_t col_idx) {
623
653
  result->stats.InitializeRemoveColumn(stats, col_idx);
624
654
 
625
655
  for (auto &current_row_group : row_groups->Segments()) {
626
- auto new_row_group = current_row_group.RemoveColumn(col_idx);
656
+ auto new_row_group = current_row_group.RemoveColumn(*result, col_idx);
627
657
  result->row_groups->AppendSegment(std::move(new_row_group));
628
658
  }
629
659
  return result;
@@ -661,8 +691,8 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AlterType(ClientContext &cont
661
691
  // now alter the type of the column within all of the row_groups individually
662
692
  auto &changed_stats = result->stats.GetStats(changed_idx);
663
693
  for (auto &current_row_group : row_groups->Segments()) {
664
- auto new_row_group = current_row_group.AlterType(target_type, changed_idx, executor,
665
- scan_state.table_state.row_group_state, scan_chunk);
694
+ auto new_row_group = current_row_group.AlterType(*result, target_type, changed_idx, executor,
695
+ scan_state.table_state, scan_chunk);
666
696
  new_row_group->MergeIntoStatistics(changed_idx, changed_stats.Statistics());
667
697
  result->row_groups->AppendSegment(std::move(new_row_group));
668
698
  }
@@ -55,70 +55,64 @@ void ColumnScanState::Next(idx_t count) {
55
55
  }
56
56
  }
57
57
 
58
- void ColumnScanState::NextVector() {
59
- Next(STANDARD_VECTOR_SIZE);
60
- }
61
-
62
- const vector<column_t> &RowGroupScanState::GetColumnIds() {
58
+ const vector<column_t> &CollectionScanState::GetColumnIds() {
63
59
  return parent.GetColumnIds();
64
60
  }
65
61
 
66
- TableFilterSet *RowGroupScanState::GetFilters() {
62
+ TableFilterSet *CollectionScanState::GetFilters() {
67
63
  return parent.GetFilters();
68
64
  }
69
65
 
70
- AdaptiveFilter *RowGroupScanState::GetAdaptiveFilter() {
66
+ AdaptiveFilter *CollectionScanState::GetAdaptiveFilter() {
71
67
  return parent.GetAdaptiveFilter();
72
68
  }
73
69
 
74
- idx_t RowGroupScanState::GetParentMaxRow() {
75
- return parent.max_row;
70
+ ParallelCollectionScanState::ParallelCollectionScanState()
71
+ : collection(nullptr), current_row_group(nullptr), processed_rows(0) {
76
72
  }
77
73
 
78
- const vector<column_t> &CollectionScanState::GetColumnIds() {
79
- return parent.GetColumnIds();
80
- }
81
-
82
- TableFilterSet *CollectionScanState::GetFilters() {
83
- return parent.GetFilters();
84
- }
85
-
86
- AdaptiveFilter *CollectionScanState::GetAdaptiveFilter() {
87
- return parent.GetAdaptiveFilter();
74
+ CollectionScanState::CollectionScanState(TableScanState &parent_p)
75
+ : row_group(nullptr), vector_index(0), max_row_group_row(0), row_groups(nullptr), max_row(0), batch_index(0),
76
+ parent(parent_p) {
88
77
  }
89
78
 
90
79
  bool CollectionScanState::Scan(DuckTransaction &transaction, DataChunk &result) {
91
- auto current_row_group = row_group_state.row_group;
92
- while (current_row_group) {
93
- current_row_group->Scan(transaction, row_group_state, result);
80
+ while (row_group) {
81
+ row_group->Scan(transaction, *this, result);
94
82
  if (result.size() > 0) {
95
83
  return true;
84
+ } else if (max_row <= row_group->start + row_group->count) {
85
+ row_group = nullptr;
86
+ return false;
96
87
  } else {
97
88
  do {
98
- current_row_group = row_group_state.row_group = row_groups->GetNextSegment(current_row_group);
99
- if (current_row_group) {
100
- bool scan_row_group = current_row_group->InitializeScan(row_group_state);
89
+ row_group = row_groups->GetNextSegment(row_group);
90
+ if (row_group) {
91
+ if (row_group->start >= max_row) {
92
+ row_group = nullptr;
93
+ break;
94
+ }
95
+ bool scan_row_group = row_group->InitializeScan(*this);
101
96
  if (scan_row_group) {
102
97
  // scan this row group
103
98
  break;
104
99
  }
105
100
  }
106
- } while (current_row_group);
101
+ } while (row_group);
107
102
  }
108
103
  }
109
104
  return false;
110
105
  }
111
106
 
112
107
  bool CollectionScanState::ScanCommitted(DataChunk &result, SegmentLock &l, TableScanType type) {
113
- auto current_row_group = row_group_state.row_group;
114
- while (current_row_group) {
115
- current_row_group->ScanCommitted(row_group_state, result, type);
108
+ while (row_group) {
109
+ row_group->ScanCommitted(*this, result, type);
116
110
  if (result.size() > 0) {
117
111
  return true;
118
112
  } else {
119
- current_row_group = row_group_state.row_group = row_groups->GetNextSegment(l, current_row_group);
120
- if (current_row_group) {
121
- current_row_group->InitializeScan(row_group_state);
113
+ row_group = row_groups->GetNextSegment(l, row_group);
114
+ if (row_group) {
115
+ row_group->InitializeScan(*this);
122
116
  }
123
117
  }
124
118
  }
@@ -126,15 +120,14 @@ bool CollectionScanState::ScanCommitted(DataChunk &result, SegmentLock &l, Table
126
120
  }
127
121
 
128
122
  bool CollectionScanState::ScanCommitted(DataChunk &result, TableScanType type) {
129
- auto current_row_group = row_group_state.row_group;
130
- while (current_row_group) {
131
- current_row_group->ScanCommitted(row_group_state, result, type);
123
+ while (row_group) {
124
+ row_group->ScanCommitted(*this, result, type);
132
125
  if (result.size() > 0) {
133
126
  return true;
134
127
  } else {
135
- current_row_group = row_group_state.row_group = row_groups->GetNextSegment(current_row_group);
136
- if (current_row_group) {
137
- current_row_group->InitializeScan(row_group_state);
128
+ row_group = row_groups->GetNextSegment(row_group);
129
+ if (row_group) {
130
+ row_group->InitializeScan(*this);
138
131
  }
139
132
  }
140
133
  }
@@ -45,18 +45,16 @@ void StandardColumnData::InitializeScan(ColumnScanState &state) {
45
45
  ColumnData::InitializeScan(state);
46
46
 
47
47
  // initialize the validity segment
48
- ColumnScanState child_state;
49
- validity.InitializeScan(child_state);
50
- state.child_states.push_back(std::move(child_state));
48
+ D_ASSERT(state.child_states.size() == 1);
49
+ validity.InitializeScan(state.child_states[0]);
51
50
  }
52
51
 
53
52
  void StandardColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
54
53
  ColumnData::InitializeScanWithOffset(state, row_idx);
55
54
 
56
55
  // initialize the validity segment
57
- ColumnScanState child_state;
58
- validity.InitializeScanWithOffset(child_state, row_idx);
59
- state.child_states.push_back(std::move(child_state));
56
+ D_ASSERT(state.child_states.size() == 1);
57
+ validity.InitializeScanWithOffset(state.child_states[0], row_idx);
60
58
  }
61
59
 
62
60
  idx_t StandardColumnData::Scan(TransactionData transaction, idx_t vector_index, ColumnScanState &state,
@@ -2,6 +2,8 @@
2
2
  #include "duckdb/storage/statistics/struct_stats.hpp"
3
3
  #include "duckdb/transaction/transaction.hpp"
4
4
  #include "duckdb/storage/table/column_checkpoint_state.hpp"
5
+ #include "duckdb/storage/table/append_state.hpp"
6
+ #include "duckdb/storage/table/scan_state.hpp"
5
7
 
6
8
  namespace duckdb {
7
9
 
@@ -39,40 +41,30 @@ idx_t StructColumnData::GetMaxEntry() {
39
41
  }
40
42
 
41
43
  void StructColumnData::InitializeScan(ColumnScanState &state) {
42
- D_ASSERT(state.child_states.empty());
43
-
44
+ D_ASSERT(state.child_states.size() == sub_columns.size() + 1);
44
45
  state.row_index = 0;
45
46
  state.current = nullptr;
46
47
 
47
48
  // initialize the validity segment
48
- ColumnScanState validity_state;
49
- validity.InitializeScan(validity_state);
50
- state.child_states.push_back(std::move(validity_state));
49
+ validity.InitializeScan(state.child_states[0]);
51
50
 
52
51
  // initialize the sub-columns
53
- for (auto &sub_column : sub_columns) {
54
- ColumnScanState child_state;
55
- sub_column->InitializeScan(child_state);
56
- state.child_states.push_back(std::move(child_state));
52
+ for (idx_t i = 0; i < sub_columns.size(); i++) {
53
+ sub_columns[i]->InitializeScan(state.child_states[i + 1]);
57
54
  }
58
55
  }
59
56
 
60
57
  void StructColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
61
- D_ASSERT(state.child_states.empty());
62
-
58
+ D_ASSERT(state.child_states.size() == sub_columns.size() + 1);
63
59
  state.row_index = row_idx;
64
60
  state.current = nullptr;
65
61
 
66
62
  // initialize the validity segment
67
- ColumnScanState validity_state;
68
- validity.InitializeScanWithOffset(validity_state, row_idx);
69
- state.child_states.push_back(std::move(validity_state));
63
+ validity.InitializeScanWithOffset(state.child_states[0], row_idx);
70
64
 
71
65
  // initialize the sub-columns
72
- for (auto &sub_column : sub_columns) {
73
- ColumnScanState child_state;
74
- sub_column->InitializeScanWithOffset(child_state, row_idx);
75
- state.child_states.push_back(std::move(child_state));
66
+ for (idx_t i = 0; i < sub_columns.size(); i++) {
67
+ sub_columns[i]->InitializeScanWithOffset(state.child_states[i + 1], row_idx);
76
68
  }
77
69
  }
78
70
 
@@ -284,6 +276,7 @@ void StructColumnData::DeserializeColumn(Deserializer &source) {
284
276
  for (auto &sub_column : sub_columns) {
285
277
  sub_column->DeserializeColumn(source);
286
278
  }
279
+ this->count = validity.count;
287
280
  }
288
281
 
289
282
  void StructColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, TableStorageInfo &result) {
@@ -5,6 +5,9 @@
5
5
  #include "duckdb/storage/table/column_data.hpp"
6
6
  #include "duckdb/transaction/duck_transaction.hpp"
7
7
  #include "duckdb/transaction/update_info.hpp"
8
+ #include "duckdb/common/printer.hpp"
9
+
10
+ #include <algorithm>
8
11
 
9
12
  namespace duckdb {
10
13
 
@@ -36,6 +36,8 @@
36
36
 
37
37
  #include "src/common/types/interval.cpp"
38
38
 
39
+ #include "src/common/types/list_segment.cpp"
40
+
39
41
  #include "src/common/types/partitioned_column_data.cpp"
40
42
 
41
43
  #include "src/common/types/row_data_collection.cpp"