duckdb 0.7.2-dev1034.0 → 0.7.2-dev1146.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
- package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -1
- package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
- package/src/duckdb/src/common/serializer/enum_serializer.cpp +6 -6
- package/src/duckdb/src/common/sort/comparators.cpp +14 -5
- package/src/duckdb/src/common/types/column_data_collection_segment.cpp +1 -4
- package/src/duckdb/src/common/types/interval.cpp +0 -41
- package/src/duckdb/src/common/types/list_segment.cpp +658 -0
- package/src/duckdb/src/common/types/string_heap.cpp +1 -1
- package/src/duckdb/src/common/types/string_type.cpp +1 -1
- package/src/duckdb/src/common/types/validity_mask.cpp +24 -7
- package/src/duckdb/src/common/types/vector.cpp +3 -7
- package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
- package/src/duckdb/src/execution/index/art/art.cpp +13 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +1 -0
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +5 -1
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +18 -5
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +1 -0
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +18 -10
- package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
- package/src/duckdb/src/function/cast_rules.cpp +9 -4
- package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
- package/src/duckdb/src/function/table/read_csv.cpp +5 -0
- package/src/duckdb/src/function/table/table_scan.cpp +8 -11
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
- package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -12
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +0 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +18 -7
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +35 -43
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +18 -5
- package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -4
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -29
- package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +11 -1
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +4 -1
- package/src/duckdb/src/include/duckdb.h +21 -0
- package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
- package/src/duckdb/src/main/settings/settings.cpp +20 -8
- package/src/duckdb/src/optimizer/filter_combiner.cpp +2 -5
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -0
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +1 -0
- package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -3
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +22 -0
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
- package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -1
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +1 -0
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/local_storage.cpp +7 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_data.cpp +75 -18
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -1
- package/src/duckdb/src/storage/table/column_segment.cpp +17 -31
- package/src/duckdb/src/storage/table/list_column_data.cpp +9 -12
- package/src/duckdb/src/storage/table/row_group.cpp +200 -136
- package/src/duckdb/src/storage/table/row_group_collection.cpp +75 -45
- package/src/duckdb/src/storage/table/scan_state.cpp +31 -38
- package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +11 -18
- package/src/duckdb/src/storage/table/update_segment.cpp +3 -0
- package/src/duckdb/ub_src_common_types.cpp +2 -0
| @@ -3,21 +3,20 @@ | |
| 3 3 | 
             
            #include "duckdb/execution/expression_executor.hpp"
         | 
| 4 4 | 
             
            #include "duckdb/main/client_context.hpp"
         | 
| 5 5 | 
             
            #include "duckdb/storage/data_table.hpp"
         | 
| 6 | 
            -
            #include "duckdb/transaction/transaction.hpp"
         | 
| 7 6 | 
             
            #include "duckdb/planner/constraints/bound_not_null_constraint.hpp"
         | 
| 8 7 | 
             
            #include "duckdb/storage/checkpoint/table_data_writer.hpp"
         | 
| 9 8 | 
             
            #include "duckdb/storage/table/row_group_segment_tree.hpp"
         | 
| 10 9 | 
             
            #include "duckdb/storage/meta_block_reader.hpp"
         | 
| 10 | 
            +
            #include "duckdb/storage/table/append_state.hpp"
         | 
| 11 | 
            +
            #include "duckdb/storage/table/scan_state.hpp"
         | 
| 11 12 |  | 
| 12 13 | 
             
            namespace duckdb {
         | 
| 13 14 |  | 
| 14 15 | 
             
            //===--------------------------------------------------------------------===//
         | 
| 15 16 | 
             
            // Row Group Segment Tree
         | 
| 16 17 | 
             
            //===--------------------------------------------------------------------===//
         | 
| 17 | 
            -
            RowGroupSegmentTree::RowGroupSegmentTree( | 
| 18 | 
            -
             | 
| 19 | 
            -
                : SegmentTree<RowGroup, true>(), info(table_info_p), block_manager(block_manager_p),
         | 
| 20 | 
            -
                  column_types(std::move(column_types_p)), current_row_group(0), max_row_group(0) {
         | 
| 18 | 
            +
            RowGroupSegmentTree::RowGroupSegmentTree(RowGroupCollection &collection)
         | 
| 19 | 
            +
                : SegmentTree<RowGroup, true>(), collection(collection), current_row_group(0), max_row_group(0) {
         | 
| 21 20 | 
             
            }
         | 
| 22 21 | 
             
            RowGroupSegmentTree::~RowGroupSegmentTree() {
         | 
| 23 22 | 
             
            }
         | 
| @@ -27,7 +26,7 @@ void RowGroupSegmentTree::Initialize(PersistentTableData &data) { | |
| 27 26 | 
             
            	current_row_group = 0;
         | 
| 28 27 | 
             
            	max_row_group = data.row_group_count;
         | 
| 29 28 | 
             
            	finished_loading = false;
         | 
| 30 | 
            -
            	reader = make_unique<MetaBlockReader>( | 
| 29 | 
            +
            	reader = make_unique<MetaBlockReader>(collection.GetBlockManager(), data.block_id);
         | 
| 31 30 | 
             
            	reader->offset = data.offset;
         | 
| 32 31 | 
             
            }
         | 
| 33 32 |  | 
| @@ -36,10 +35,11 @@ unique_ptr<RowGroup> RowGroupSegmentTree::LoadSegment() { | |
| 36 35 | 
             
            		finished_loading = true;
         | 
| 37 36 | 
             
            		return nullptr;
         | 
| 38 37 | 
             
            	}
         | 
| 39 | 
            -
            	auto row_group_pointer = RowGroup::Deserialize(*reader,  | 
| 38 | 
            +
            	auto row_group_pointer = RowGroup::Deserialize(*reader, collection.GetTypes());
         | 
| 40 39 | 
             
            	current_row_group++;
         | 
| 41 | 
            -
            	return make_unique<RowGroup>( | 
| 40 | 
            +
            	return make_unique<RowGroup>(collection, std::move(row_group_pointer));
         | 
| 42 41 | 
             
            }
         | 
| 42 | 
            +
             | 
| 43 43 | 
             
            //===--------------------------------------------------------------------===//
         | 
| 44 44 | 
             
            // Row Group Collection
         | 
| 45 45 | 
             
            //===--------------------------------------------------------------------===//
         | 
| @@ -47,7 +47,7 @@ RowGroupCollection::RowGroupCollection(shared_ptr<DataTableInfo> info_p, BlockMa | |
| 47 47 | 
             
                                                   vector<LogicalType> types_p, idx_t row_start_p, idx_t total_rows_p)
         | 
| 48 48 | 
             
                : block_manager(block_manager), total_rows(total_rows_p), info(std::move(info_p)), types(std::move(types_p)),
         | 
| 49 49 | 
             
                  row_start(row_start_p) {
         | 
| 50 | 
            -
            	row_groups = make_shared<RowGroupSegmentTree>(* | 
| 50 | 
            +
            	row_groups = make_shared<RowGroupSegmentTree>(*this);
         | 
| 51 51 | 
             
            }
         | 
| 52 52 |  | 
| 53 53 | 
             
            idx_t RowGroupCollection::GetTotalRows() const {
         | 
| @@ -62,6 +62,14 @@ Allocator &RowGroupCollection::GetAllocator() const { | |
| 62 62 | 
             
            	return Allocator::Get(info->db);
         | 
| 63 63 | 
             
            }
         | 
| 64 64 |  | 
| 65 | 
            +
            AttachedDatabase &RowGroupCollection::GetAttached() {
         | 
| 66 | 
            +
            	return GetTableInfo().db;
         | 
| 67 | 
            +
            }
         | 
| 68 | 
            +
             | 
| 69 | 
            +
            DatabaseInstance &RowGroupCollection::GetDatabase() {
         | 
| 70 | 
            +
            	return GetAttached().GetDatabase();
         | 
| 71 | 
            +
            }
         | 
| 72 | 
            +
             | 
| 65 73 | 
             
            //===--------------------------------------------------------------------===//
         | 
| 66 74 | 
             
            // Initialize
         | 
| 67 75 | 
             
            //===--------------------------------------------------------------------===//
         | 
| @@ -79,7 +87,7 @@ void RowGroupCollection::InitializeEmpty() { | |
| 79 87 |  | 
| 80 88 | 
             
            void RowGroupCollection::AppendRowGroup(SegmentLock &l, idx_t start_row) {
         | 
| 81 89 | 
             
            	D_ASSERT(start_row >= row_start);
         | 
| 82 | 
            -
            	auto new_row_group = make_unique<RowGroup>( | 
| 90 | 
            +
            	auto new_row_group = make_unique<RowGroup>(*this, start_row, 0);
         | 
| 83 91 | 
             
            	new_row_group->InitializeEmpty(types);
         | 
| 84 92 | 
             
            	row_groups->AppendSegment(l, std::move(new_row_group));
         | 
| 85 93 | 
             
            }
         | 
| @@ -94,6 +102,7 @@ void RowGroupCollection::Verify() { | |
| 94 102 | 
             
            	row_groups->Verify();
         | 
| 95 103 | 
             
            	for (auto &row_group : row_groups->Segments()) {
         | 
| 96 104 | 
             
            		row_group.Verify();
         | 
| 105 | 
            +
            		D_ASSERT(&row_group.GetCollection() == this);
         | 
| 97 106 | 
             
            		D_ASSERT(row_group.start == this->row_start + current_total_rows);
         | 
| 98 107 | 
             
            		current_total_rows += row_group.count;
         | 
| 99 108 | 
             
            	}
         | 
| @@ -110,7 +119,8 @@ void RowGroupCollection::InitializeScan(CollectionScanState &state, const vector | |
| 110 119 | 
             
            	D_ASSERT(row_group);
         | 
| 111 120 | 
             
            	state.row_groups = row_groups.get();
         | 
| 112 121 | 
             
            	state.max_row = row_start + total_rows;
         | 
| 113 | 
            -
            	 | 
| 122 | 
            +
            	state.Initialize(GetTypes());
         | 
| 123 | 
            +
            	while (row_group && !row_group->InitializeScan(state)) {
         | 
| 114 124 | 
             
            		row_group = row_groups->GetNextSegment(row_group);
         | 
| 115 125 | 
             
            	}
         | 
| 116 126 | 
             
            }
         | 
| @@ -125,18 +135,22 @@ void RowGroupCollection::InitializeScanWithOffset(CollectionScanState &state, co | |
| 125 135 | 
             
            	D_ASSERT(row_group);
         | 
| 126 136 | 
             
            	state.row_groups = row_groups.get();
         | 
| 127 137 | 
             
            	state.max_row = end_row;
         | 
| 138 | 
            +
            	state.Initialize(GetTypes());
         | 
| 128 139 | 
             
            	idx_t start_vector = (start_row - row_group->start) / STANDARD_VECTOR_SIZE;
         | 
| 129 | 
            -
            	if (!row_group->InitializeScanWithOffset(state | 
| 140 | 
            +
            	if (!row_group->InitializeScanWithOffset(state, start_vector)) {
         | 
| 130 141 | 
             
            		throw InternalException("Failed to initialize row group scan with offset");
         | 
| 131 142 | 
             
            	}
         | 
| 132 143 | 
             
            }
         | 
| 133 144 |  | 
| 134 | 
            -
            bool RowGroupCollection::InitializeScanInRowGroup(CollectionScanState &state,
         | 
| 135 | 
            -
                                                               | 
| 136 | 
            -
                                                              idx_t max_row) {
         | 
| 145 | 
            +
            bool RowGroupCollection::InitializeScanInRowGroup(CollectionScanState &state, RowGroupCollection &collection,
         | 
| 146 | 
            +
                                                              RowGroup &row_group, idx_t vector_index, idx_t max_row) {
         | 
| 137 147 | 
             
            	state.max_row = max_row;
         | 
| 138 | 
            -
            	state.row_groups =  | 
| 139 | 
            -
            	 | 
| 148 | 
            +
            	state.row_groups = collection.row_groups.get();
         | 
| 149 | 
            +
            	if (!state.column_scans) {
         | 
| 150 | 
            +
            		// initialize the scan state
         | 
| 151 | 
            +
            		state.Initialize(collection.GetTypes());
         | 
| 152 | 
            +
            	}
         | 
| 153 | 
            +
            	return row_group.InitializeScanWithOffset(state, vector_index);
         | 
| 140 154 | 
             
            }
         | 
| 141 155 |  | 
| 142 156 | 
             
            void RowGroupCollection::InitializeParallelScan(ParallelCollectionScanState &state) {
         | 
| @@ -145,37 +159,52 @@ void RowGroupCollection::InitializeParallelScan(ParallelCollectionScanState &sta | |
| 145 159 | 
             
            	state.vector_index = 0;
         | 
| 146 160 | 
             
            	state.max_row = row_start + total_rows;
         | 
| 147 161 | 
             
            	state.batch_index = 0;
         | 
| 162 | 
            +
            	state.processed_rows = 0;
         | 
| 148 163 | 
             
            }
         | 
| 149 164 |  | 
| 150 165 | 
             
            bool RowGroupCollection::NextParallelScan(ClientContext &context, ParallelCollectionScanState &state,
         | 
| 151 166 | 
             
                                                      CollectionScanState &scan_state) {
         | 
| 152 | 
            -
            	while ( | 
| 167 | 
            +
            	while (true) {
         | 
| 153 168 | 
             
            		idx_t vector_index;
         | 
| 154 169 | 
             
            		idx_t max_row;
         | 
| 155 | 
            -
            		 | 
| 156 | 
            -
             | 
| 157 | 
            -
             | 
| 158 | 
            -
             | 
| 159 | 
            -
             | 
| 160 | 
            -
            			 | 
| 161 | 
            -
             | 
| 162 | 
            -
             | 
| 163 | 
            -
            			 | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 167 | 
            -
             | 
| 168 | 
            -
             | 
| 169 | 
            -
             | 
| 170 | 
            +
            		RowGroupCollection *collection;
         | 
| 171 | 
            +
            		RowGroup *row_group;
         | 
| 172 | 
            +
            		{
         | 
| 173 | 
            +
            			// select the next row group to scan from the parallel state
         | 
| 174 | 
            +
            			lock_guard<mutex> l(state.lock);
         | 
| 175 | 
            +
            			if (!state.current_row_group || state.current_row_group->count == 0) {
         | 
| 176 | 
            +
            				// no more data left to scan
         | 
| 177 | 
            +
            				break;
         | 
| 178 | 
            +
            			}
         | 
| 179 | 
            +
            			collection = state.collection;
         | 
| 180 | 
            +
            			row_group = state.current_row_group;
         | 
| 181 | 
            +
            			if (ClientConfig::GetConfig(context).verify_parallelism) {
         | 
| 182 | 
            +
            				vector_index = state.vector_index;
         | 
| 183 | 
            +
            				max_row = state.current_row_group->start +
         | 
| 184 | 
            +
            				          MinValue<idx_t>(state.current_row_group->count,
         | 
| 185 | 
            +
            				                          STANDARD_VECTOR_SIZE * state.vector_index + STANDARD_VECTOR_SIZE);
         | 
| 186 | 
            +
            				D_ASSERT(vector_index * STANDARD_VECTOR_SIZE < state.current_row_group->count);
         | 
| 187 | 
            +
            				state.vector_index++;
         | 
| 188 | 
            +
            				if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
         | 
| 189 | 
            +
            					state.current_row_group = row_groups->GetNextSegment(state.current_row_group);
         | 
| 190 | 
            +
            					state.vector_index = 0;
         | 
| 191 | 
            +
            				}
         | 
| 192 | 
            +
            			} else {
         | 
| 193 | 
            +
            				state.processed_rows += state.current_row_group->count;
         | 
| 194 | 
            +
            				vector_index = 0;
         | 
| 195 | 
            +
            				max_row = state.current_row_group->start + state.current_row_group->count;
         | 
| 170 196 | 
             
            				state.current_row_group = row_groups->GetNextSegment(state.current_row_group);
         | 
| 171 | 
            -
            				state.vector_index = 0;
         | 
| 172 197 | 
             
            			}
         | 
| 173 | 
            -
             | 
| 174 | 
            -
            			 | 
| 198 | 
            +
            			max_row = MinValue<idx_t>(max_row, state.max_row);
         | 
| 199 | 
            +
            			scan_state.batch_index = ++state.batch_index;
         | 
| 175 200 | 
             
            		}
         | 
| 176 | 
            -
            		 | 
| 201 | 
            +
            		D_ASSERT(collection);
         | 
| 202 | 
            +
            		D_ASSERT(row_group);
         | 
| 203 | 
            +
             | 
| 204 | 
            +
            		// initialize the scan for this row group
         | 
| 205 | 
            +
            		bool need_to_scan = InitializeScanInRowGroup(scan_state, *collection, *row_group, vector_index, max_row);
         | 
| 177 206 | 
             
            		if (!need_to_scan) {
         | 
| 178 | 
            -
            			//  | 
| 207 | 
            +
            			// skip this row group
         | 
| 179 208 | 
             
            			continue;
         | 
| 180 209 | 
             
            		}
         | 
| 181 210 | 
             
            		return true;
         | 
| @@ -416,7 +445,7 @@ void RowGroupCollection::MergeStorage(RowGroupCollection &data) { | |
| 416 445 | 
             
            	D_ASSERT(data.types == types);
         | 
| 417 446 | 
             
            	auto index = row_start + total_rows.load();
         | 
| 418 447 | 
             
            	for (auto &row_group : data.row_groups->Segments()) {
         | 
| 419 | 
            -
            		auto new_group = make_unique<RowGroup>(row_group, index);
         | 
| 448 | 
            +
            		auto new_group = make_unique<RowGroup>(row_group, *this, index);
         | 
| 420 449 | 
             
            		index += new_group->count;
         | 
| 421 450 | 
             
            		row_groups->AppendSegment(std::move(new_group));
         | 
| 422 451 | 
             
            	}
         | 
| @@ -519,8 +548,9 @@ void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_ | |
| 519 548 | 
             
            	DataChunk result;
         | 
| 520 549 | 
             
            	result.Initialize(GetAllocator(), types);
         | 
| 521 550 |  | 
| 522 | 
            -
            	 | 
| 523 | 
            -
            	row_group-> | 
| 551 | 
            +
            	state.table_state.Initialize(GetTypes());
         | 
| 552 | 
            +
            	row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
         | 
| 553 | 
            +
            	row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
         | 
| 524 554 | 
             
            	result.Slice(sel, count);
         | 
| 525 555 |  | 
| 526 556 | 
             
            	indexes.Scan([&](Index &index) {
         | 
| @@ -604,7 +634,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &cont | |
| 604 634 | 
             
            	// fill the column with its DEFAULT value, or NULL if none is specified
         | 
| 605 635 | 
             
            	auto new_stats = make_unique<SegmentStatistics>(new_column.GetType());
         | 
| 606 636 | 
             
            	for (auto ¤t_row_group : row_groups->Segments()) {
         | 
| 607 | 
            -
            		auto new_row_group = current_row_group.AddColumn(new_column, executor, default_value, default_vector);
         | 
| 637 | 
            +
            		auto new_row_group = current_row_group.AddColumn(*result, new_column, executor, default_value, default_vector);
         | 
| 608 638 | 
             
            		// merge in the statistics
         | 
| 609 639 | 
             
            		new_row_group->MergeIntoStatistics(new_column_idx, new_column_stats.Statistics());
         | 
| 610 640 |  | 
| @@ -623,7 +653,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::RemoveColumn(idx_t col_idx) { | |
| 623 653 | 
             
            	result->stats.InitializeRemoveColumn(stats, col_idx);
         | 
| 624 654 |  | 
| 625 655 | 
             
            	for (auto ¤t_row_group : row_groups->Segments()) {
         | 
| 626 | 
            -
            		auto new_row_group = current_row_group.RemoveColumn(col_idx);
         | 
| 656 | 
            +
            		auto new_row_group = current_row_group.RemoveColumn(*result, col_idx);
         | 
| 627 657 | 
             
            		result->row_groups->AppendSegment(std::move(new_row_group));
         | 
| 628 658 | 
             
            	}
         | 
| 629 659 | 
             
            	return result;
         | 
| @@ -661,8 +691,8 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AlterType(ClientContext &cont | |
| 661 691 | 
             
            	// now alter the type of the column within all of the row_groups individually
         | 
| 662 692 | 
             
            	auto &changed_stats = result->stats.GetStats(changed_idx);
         | 
| 663 693 | 
             
            	for (auto ¤t_row_group : row_groups->Segments()) {
         | 
| 664 | 
            -
            		auto new_row_group = current_row_group.AlterType(target_type, changed_idx, executor,
         | 
| 665 | 
            -
            		                                                 scan_state.table_state | 
| 694 | 
            +
            		auto new_row_group = current_row_group.AlterType(*result, target_type, changed_idx, executor,
         | 
| 695 | 
            +
            		                                                 scan_state.table_state, scan_chunk);
         | 
| 666 696 | 
             
            		new_row_group->MergeIntoStatistics(changed_idx, changed_stats.Statistics());
         | 
| 667 697 | 
             
            		result->row_groups->AppendSegment(std::move(new_row_group));
         | 
| 668 698 | 
             
            	}
         | 
| @@ -55,70 +55,64 @@ void ColumnScanState::Next(idx_t count) { | |
| 55 55 | 
             
            	}
         | 
| 56 56 | 
             
            }
         | 
| 57 57 |  | 
| 58 | 
            -
             | 
| 59 | 
            -
            	Next(STANDARD_VECTOR_SIZE);
         | 
| 60 | 
            -
            }
         | 
| 61 | 
            -
             | 
| 62 | 
            -
            const vector<column_t> &RowGroupScanState::GetColumnIds() {
         | 
| 58 | 
            +
            const vector<column_t> &CollectionScanState::GetColumnIds() {
         | 
| 63 59 | 
             
            	return parent.GetColumnIds();
         | 
| 64 60 | 
             
            }
         | 
| 65 61 |  | 
| 66 | 
            -
            TableFilterSet * | 
| 62 | 
            +
            TableFilterSet *CollectionScanState::GetFilters() {
         | 
| 67 63 | 
             
            	return parent.GetFilters();
         | 
| 68 64 | 
             
            }
         | 
| 69 65 |  | 
| 70 | 
            -
            AdaptiveFilter * | 
| 66 | 
            +
            AdaptiveFilter *CollectionScanState::GetAdaptiveFilter() {
         | 
| 71 67 | 
             
            	return parent.GetAdaptiveFilter();
         | 
| 72 68 | 
             
            }
         | 
| 73 69 |  | 
| 74 | 
            -
             | 
| 75 | 
            -
             | 
| 70 | 
            +
            ParallelCollectionScanState::ParallelCollectionScanState()
         | 
| 71 | 
            +
                : collection(nullptr), current_row_group(nullptr), processed_rows(0) {
         | 
| 76 72 | 
             
            }
         | 
| 77 73 |  | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 82 | 
            -
            TableFilterSet *CollectionScanState::GetFilters() {
         | 
| 83 | 
            -
            	return parent.GetFilters();
         | 
| 84 | 
            -
            }
         | 
| 85 | 
            -
             | 
| 86 | 
            -
            AdaptiveFilter *CollectionScanState::GetAdaptiveFilter() {
         | 
| 87 | 
            -
            	return parent.GetAdaptiveFilter();
         | 
| 74 | 
            +
            CollectionScanState::CollectionScanState(TableScanState &parent_p)
         | 
| 75 | 
            +
                : row_group(nullptr), vector_index(0), max_row_group_row(0), row_groups(nullptr), max_row(0), batch_index(0),
         | 
| 76 | 
            +
                  parent(parent_p) {
         | 
| 88 77 | 
             
            }
         | 
| 89 78 |  | 
| 90 79 | 
             
            bool CollectionScanState::Scan(DuckTransaction &transaction, DataChunk &result) {
         | 
| 91 | 
            -
            	 | 
| 92 | 
            -
             | 
| 93 | 
            -
            		current_row_group->Scan(transaction, row_group_state, result);
         | 
| 80 | 
            +
            	while (row_group) {
         | 
| 81 | 
            +
            		row_group->Scan(transaction, *this, result);
         | 
| 94 82 | 
             
            		if (result.size() > 0) {
         | 
| 95 83 | 
             
            			return true;
         | 
| 84 | 
            +
            		} else if (max_row <= row_group->start + row_group->count) {
         | 
| 85 | 
            +
            			row_group = nullptr;
         | 
| 86 | 
            +
            			return false;
         | 
| 96 87 | 
             
            		} else {
         | 
| 97 88 | 
             
            			do {
         | 
| 98 | 
            -
            				 | 
| 99 | 
            -
            				if ( | 
| 100 | 
            -
            					 | 
| 89 | 
            +
            				row_group = row_groups->GetNextSegment(row_group);
         | 
| 90 | 
            +
            				if (row_group) {
         | 
| 91 | 
            +
            					if (row_group->start >= max_row) {
         | 
| 92 | 
            +
            						row_group = nullptr;
         | 
| 93 | 
            +
            						break;
         | 
| 94 | 
            +
            					}
         | 
| 95 | 
            +
            					bool scan_row_group = row_group->InitializeScan(*this);
         | 
| 101 96 | 
             
            					if (scan_row_group) {
         | 
| 102 97 | 
             
            						// scan this row group
         | 
| 103 98 | 
             
            						break;
         | 
| 104 99 | 
             
            					}
         | 
| 105 100 | 
             
            				}
         | 
| 106 | 
            -
            			} while ( | 
| 101 | 
            +
            			} while (row_group);
         | 
| 107 102 | 
             
            		}
         | 
| 108 103 | 
             
            	}
         | 
| 109 104 | 
             
            	return false;
         | 
| 110 105 | 
             
            }
         | 
| 111 106 |  | 
| 112 107 | 
             
            bool CollectionScanState::ScanCommitted(DataChunk &result, SegmentLock &l, TableScanType type) {
         | 
| 113 | 
            -
            	 | 
| 114 | 
            -
             | 
| 115 | 
            -
            		current_row_group->ScanCommitted(row_group_state, result, type);
         | 
| 108 | 
            +
            	while (row_group) {
         | 
| 109 | 
            +
            		row_group->ScanCommitted(*this, result, type);
         | 
| 116 110 | 
             
            		if (result.size() > 0) {
         | 
| 117 111 | 
             
            			return true;
         | 
| 118 112 | 
             
            		} else {
         | 
| 119 | 
            -
            			 | 
| 120 | 
            -
            			if ( | 
| 121 | 
            -
            				 | 
| 113 | 
            +
            			row_group = row_groups->GetNextSegment(l, row_group);
         | 
| 114 | 
            +
            			if (row_group) {
         | 
| 115 | 
            +
            				row_group->InitializeScan(*this);
         | 
| 122 116 | 
             
            			}
         | 
| 123 117 | 
             
            		}
         | 
| 124 118 | 
             
            	}
         | 
| @@ -126,15 +120,14 @@ bool CollectionScanState::ScanCommitted(DataChunk &result, SegmentLock &l, Table | |
| 126 120 | 
             
            }
         | 
| 127 121 |  | 
| 128 122 | 
             
            bool CollectionScanState::ScanCommitted(DataChunk &result, TableScanType type) {
         | 
| 129 | 
            -
            	 | 
| 130 | 
            -
             | 
| 131 | 
            -
            		current_row_group->ScanCommitted(row_group_state, result, type);
         | 
| 123 | 
            +
            	while (row_group) {
         | 
| 124 | 
            +
            		row_group->ScanCommitted(*this, result, type);
         | 
| 132 125 | 
             
            		if (result.size() > 0) {
         | 
| 133 126 | 
             
            			return true;
         | 
| 134 127 | 
             
            		} else {
         | 
| 135 | 
            -
            			 | 
| 136 | 
            -
            			if ( | 
| 137 | 
            -
            				 | 
| 128 | 
            +
            			row_group = row_groups->GetNextSegment(row_group);
         | 
| 129 | 
            +
            			if (row_group) {
         | 
| 130 | 
            +
            				row_group->InitializeScan(*this);
         | 
| 138 131 | 
             
            			}
         | 
| 139 132 | 
             
            		}
         | 
| 140 133 | 
             
            	}
         | 
| @@ -45,18 +45,16 @@ void StandardColumnData::InitializeScan(ColumnScanState &state) { | |
| 45 45 | 
             
            	ColumnData::InitializeScan(state);
         | 
| 46 46 |  | 
| 47 47 | 
             
            	// initialize the validity segment
         | 
| 48 | 
            -
            	 | 
| 49 | 
            -
            	validity.InitializeScan( | 
| 50 | 
            -
            	state.child_states.push_back(std::move(child_state));
         | 
| 48 | 
            +
            	D_ASSERT(state.child_states.size() == 1);
         | 
| 49 | 
            +
            	validity.InitializeScan(state.child_states[0]);
         | 
| 51 50 | 
             
            }
         | 
| 52 51 |  | 
| 53 52 | 
             
            void StandardColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
         | 
| 54 53 | 
             
            	ColumnData::InitializeScanWithOffset(state, row_idx);
         | 
| 55 54 |  | 
| 56 55 | 
             
            	// initialize the validity segment
         | 
| 57 | 
            -
            	 | 
| 58 | 
            -
            	validity.InitializeScanWithOffset( | 
| 59 | 
            -
            	state.child_states.push_back(std::move(child_state));
         | 
| 56 | 
            +
            	D_ASSERT(state.child_states.size() == 1);
         | 
| 57 | 
            +
            	validity.InitializeScanWithOffset(state.child_states[0], row_idx);
         | 
| 60 58 | 
             
            }
         | 
| 61 59 |  | 
| 62 60 | 
             
            idx_t StandardColumnData::Scan(TransactionData transaction, idx_t vector_index, ColumnScanState &state,
         | 
| @@ -2,6 +2,8 @@ | |
| 2 2 | 
             
            #include "duckdb/storage/statistics/struct_stats.hpp"
         | 
| 3 3 | 
             
            #include "duckdb/transaction/transaction.hpp"
         | 
| 4 4 | 
             
            #include "duckdb/storage/table/column_checkpoint_state.hpp"
         | 
| 5 | 
            +
            #include "duckdb/storage/table/append_state.hpp"
         | 
| 6 | 
            +
            #include "duckdb/storage/table/scan_state.hpp"
         | 
| 5 7 |  | 
| 6 8 | 
             
            namespace duckdb {
         | 
| 7 9 |  | 
| @@ -39,40 +41,30 @@ idx_t StructColumnData::GetMaxEntry() { | |
| 39 41 | 
             
            }
         | 
| 40 42 |  | 
| 41 43 | 
             
            void StructColumnData::InitializeScan(ColumnScanState &state) {
         | 
| 42 | 
            -
            	D_ASSERT(state.child_states. | 
| 43 | 
            -
             | 
| 44 | 
            +
            	D_ASSERT(state.child_states.size() == sub_columns.size() + 1);
         | 
| 44 45 | 
             
            	state.row_index = 0;
         | 
| 45 46 | 
             
            	state.current = nullptr;
         | 
| 46 47 |  | 
| 47 48 | 
             
            	// initialize the validity segment
         | 
| 48 | 
            -
            	 | 
| 49 | 
            -
            	validity.InitializeScan(validity_state);
         | 
| 50 | 
            -
            	state.child_states.push_back(std::move(validity_state));
         | 
| 49 | 
            +
            	validity.InitializeScan(state.child_states[0]);
         | 
| 51 50 |  | 
| 52 51 | 
             
            	// initialize the sub-columns
         | 
| 53 | 
            -
            	for ( | 
| 54 | 
            -
            		 | 
| 55 | 
            -
            		sub_column->InitializeScan(child_state);
         | 
| 56 | 
            -
            		state.child_states.push_back(std::move(child_state));
         | 
| 52 | 
            +
            	for (idx_t i = 0; i < sub_columns.size(); i++) {
         | 
| 53 | 
            +
            		sub_columns[i]->InitializeScan(state.child_states[i + 1]);
         | 
| 57 54 | 
             
            	}
         | 
| 58 55 | 
             
            }
         | 
| 59 56 |  | 
| 60 57 | 
             
            void StructColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
         | 
| 61 | 
            -
            	D_ASSERT(state.child_states. | 
| 62 | 
            -
             | 
| 58 | 
            +
            	D_ASSERT(state.child_states.size() == sub_columns.size() + 1);
         | 
| 63 59 | 
             
            	state.row_index = row_idx;
         | 
| 64 60 | 
             
            	state.current = nullptr;
         | 
| 65 61 |  | 
| 66 62 | 
             
            	// initialize the validity segment
         | 
| 67 | 
            -
            	 | 
| 68 | 
            -
            	validity.InitializeScanWithOffset(validity_state, row_idx);
         | 
| 69 | 
            -
            	state.child_states.push_back(std::move(validity_state));
         | 
| 63 | 
            +
            	validity.InitializeScanWithOffset(state.child_states[0], row_idx);
         | 
| 70 64 |  | 
| 71 65 | 
             
            	// initialize the sub-columns
         | 
| 72 | 
            -
            	for ( | 
| 73 | 
            -
            		 | 
| 74 | 
            -
            		sub_column->InitializeScanWithOffset(child_state, row_idx);
         | 
| 75 | 
            -
            		state.child_states.push_back(std::move(child_state));
         | 
| 66 | 
            +
            	for (idx_t i = 0; i < sub_columns.size(); i++) {
         | 
| 67 | 
            +
            		sub_columns[i]->InitializeScanWithOffset(state.child_states[i + 1], row_idx);
         | 
| 76 68 | 
             
            	}
         | 
| 77 69 | 
             
            }
         | 
| 78 70 |  | 
| @@ -284,6 +276,7 @@ void StructColumnData::DeserializeColumn(Deserializer &source) { | |
| 284 276 | 
             
            	for (auto &sub_column : sub_columns) {
         | 
| 285 277 | 
             
            		sub_column->DeserializeColumn(source);
         | 
| 286 278 | 
             
            	}
         | 
| 279 | 
            +
            	this->count = validity.count;
         | 
| 287 280 | 
             
            }
         | 
| 288 281 |  | 
| 289 282 | 
             
            void StructColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, TableStorageInfo &result) {
         |