duckdb 0.7.2-dev1034.0 → 0.7.2-dev1138.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
- package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
- package/src/duckdb/src/common/serializer/enum_serializer.cpp +6 -6
- package/src/duckdb/src/common/sort/comparators.cpp +14 -5
- package/src/duckdb/src/common/types/interval.cpp +0 -41
- package/src/duckdb/src/common/types/list_segment.cpp +658 -0
- package/src/duckdb/src/common/types/string_heap.cpp +1 -1
- package/src/duckdb/src/common/types/string_type.cpp +1 -1
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
- package/src/duckdb/src/execution/index/art/art.cpp +13 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +1 -0
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +5 -1
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +18 -5
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +1 -0
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -0
- package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
- package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
- package/src/duckdb/src/function/table/read_csv.cpp +5 -0
- package/src/duckdb/src/function/table/table_scan.cpp +8 -11
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
- package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -12
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +0 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +18 -7
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +35 -43
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +18 -5
- package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -4
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -29
- package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +11 -1
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +4 -1
- package/src/duckdb/src/include/duckdb.h +21 -0
- package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
- package/src/duckdb/src/main/settings/settings.cpp +20 -8
- package/src/duckdb/src/optimizer/filter_combiner.cpp +2 -5
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -0
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +1 -0
- package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -3
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +22 -0
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
- package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -1
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +1 -0
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/local_storage.cpp +7 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_data.cpp +75 -18
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -1
- package/src/duckdb/src/storage/table/column_segment.cpp +17 -31
- package/src/duckdb/src/storage/table/list_column_data.cpp +9 -12
- package/src/duckdb/src/storage/table/row_group.cpp +200 -136
- package/src/duckdb/src/storage/table/row_group_collection.cpp +75 -45
- package/src/duckdb/src/storage/table/scan_state.cpp +31 -38
- package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +11 -18
- package/src/duckdb/src/storage/table/update_segment.cpp +3 -0
- package/src/duckdb/ub_src_common_types.cpp +2 -0
|
@@ -3,21 +3,20 @@
|
|
|
3
3
|
#include "duckdb/execution/expression_executor.hpp"
|
|
4
4
|
#include "duckdb/main/client_context.hpp"
|
|
5
5
|
#include "duckdb/storage/data_table.hpp"
|
|
6
|
-
#include "duckdb/transaction/transaction.hpp"
|
|
7
6
|
#include "duckdb/planner/constraints/bound_not_null_constraint.hpp"
|
|
8
7
|
#include "duckdb/storage/checkpoint/table_data_writer.hpp"
|
|
9
8
|
#include "duckdb/storage/table/row_group_segment_tree.hpp"
|
|
10
9
|
#include "duckdb/storage/meta_block_reader.hpp"
|
|
10
|
+
#include "duckdb/storage/table/append_state.hpp"
|
|
11
|
+
#include "duckdb/storage/table/scan_state.hpp"
|
|
11
12
|
|
|
12
13
|
namespace duckdb {
|
|
13
14
|
|
|
14
15
|
//===--------------------------------------------------------------------===//
|
|
15
16
|
// Row Group Segment Tree
|
|
16
17
|
//===--------------------------------------------------------------------===//
|
|
17
|
-
RowGroupSegmentTree::RowGroupSegmentTree(
|
|
18
|
-
|
|
19
|
-
: SegmentTree<RowGroup, true>(), info(table_info_p), block_manager(block_manager_p),
|
|
20
|
-
column_types(std::move(column_types_p)), current_row_group(0), max_row_group(0) {
|
|
18
|
+
RowGroupSegmentTree::RowGroupSegmentTree(RowGroupCollection &collection)
|
|
19
|
+
: SegmentTree<RowGroup, true>(), collection(collection), current_row_group(0), max_row_group(0) {
|
|
21
20
|
}
|
|
22
21
|
RowGroupSegmentTree::~RowGroupSegmentTree() {
|
|
23
22
|
}
|
|
@@ -27,7 +26,7 @@ void RowGroupSegmentTree::Initialize(PersistentTableData &data) {
|
|
|
27
26
|
current_row_group = 0;
|
|
28
27
|
max_row_group = data.row_group_count;
|
|
29
28
|
finished_loading = false;
|
|
30
|
-
reader = make_unique<MetaBlockReader>(
|
|
29
|
+
reader = make_unique<MetaBlockReader>(collection.GetBlockManager(), data.block_id);
|
|
31
30
|
reader->offset = data.offset;
|
|
32
31
|
}
|
|
33
32
|
|
|
@@ -36,10 +35,11 @@ unique_ptr<RowGroup> RowGroupSegmentTree::LoadSegment() {
|
|
|
36
35
|
finished_loading = true;
|
|
37
36
|
return nullptr;
|
|
38
37
|
}
|
|
39
|
-
auto row_group_pointer = RowGroup::Deserialize(*reader,
|
|
38
|
+
auto row_group_pointer = RowGroup::Deserialize(*reader, collection.GetTypes());
|
|
40
39
|
current_row_group++;
|
|
41
|
-
return make_unique<RowGroup>(
|
|
40
|
+
return make_unique<RowGroup>(collection, std::move(row_group_pointer));
|
|
42
41
|
}
|
|
42
|
+
|
|
43
43
|
//===--------------------------------------------------------------------===//
|
|
44
44
|
// Row Group Collection
|
|
45
45
|
//===--------------------------------------------------------------------===//
|
|
@@ -47,7 +47,7 @@ RowGroupCollection::RowGroupCollection(shared_ptr<DataTableInfo> info_p, BlockMa
|
|
|
47
47
|
vector<LogicalType> types_p, idx_t row_start_p, idx_t total_rows_p)
|
|
48
48
|
: block_manager(block_manager), total_rows(total_rows_p), info(std::move(info_p)), types(std::move(types_p)),
|
|
49
49
|
row_start(row_start_p) {
|
|
50
|
-
row_groups = make_shared<RowGroupSegmentTree>(*
|
|
50
|
+
row_groups = make_shared<RowGroupSegmentTree>(*this);
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
idx_t RowGroupCollection::GetTotalRows() const {
|
|
@@ -62,6 +62,14 @@ Allocator &RowGroupCollection::GetAllocator() const {
|
|
|
62
62
|
return Allocator::Get(info->db);
|
|
63
63
|
}
|
|
64
64
|
|
|
65
|
+
AttachedDatabase &RowGroupCollection::GetAttached() {
|
|
66
|
+
return GetTableInfo().db;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
DatabaseInstance &RowGroupCollection::GetDatabase() {
|
|
70
|
+
return GetAttached().GetDatabase();
|
|
71
|
+
}
|
|
72
|
+
|
|
65
73
|
//===--------------------------------------------------------------------===//
|
|
66
74
|
// Initialize
|
|
67
75
|
//===--------------------------------------------------------------------===//
|
|
@@ -79,7 +87,7 @@ void RowGroupCollection::InitializeEmpty() {
|
|
|
79
87
|
|
|
80
88
|
void RowGroupCollection::AppendRowGroup(SegmentLock &l, idx_t start_row) {
|
|
81
89
|
D_ASSERT(start_row >= row_start);
|
|
82
|
-
auto new_row_group = make_unique<RowGroup>(
|
|
90
|
+
auto new_row_group = make_unique<RowGroup>(*this, start_row, 0);
|
|
83
91
|
new_row_group->InitializeEmpty(types);
|
|
84
92
|
row_groups->AppendSegment(l, std::move(new_row_group));
|
|
85
93
|
}
|
|
@@ -94,6 +102,7 @@ void RowGroupCollection::Verify() {
|
|
|
94
102
|
row_groups->Verify();
|
|
95
103
|
for (auto &row_group : row_groups->Segments()) {
|
|
96
104
|
row_group.Verify();
|
|
105
|
+
D_ASSERT(&row_group.GetCollection() == this);
|
|
97
106
|
D_ASSERT(row_group.start == this->row_start + current_total_rows);
|
|
98
107
|
current_total_rows += row_group.count;
|
|
99
108
|
}
|
|
@@ -110,7 +119,8 @@ void RowGroupCollection::InitializeScan(CollectionScanState &state, const vector
|
|
|
110
119
|
D_ASSERT(row_group);
|
|
111
120
|
state.row_groups = row_groups.get();
|
|
112
121
|
state.max_row = row_start + total_rows;
|
|
113
|
-
|
|
122
|
+
state.Initialize(GetTypes());
|
|
123
|
+
while (row_group && !row_group->InitializeScan(state)) {
|
|
114
124
|
row_group = row_groups->GetNextSegment(row_group);
|
|
115
125
|
}
|
|
116
126
|
}
|
|
@@ -125,18 +135,22 @@ void RowGroupCollection::InitializeScanWithOffset(CollectionScanState &state, co
|
|
|
125
135
|
D_ASSERT(row_group);
|
|
126
136
|
state.row_groups = row_groups.get();
|
|
127
137
|
state.max_row = end_row;
|
|
138
|
+
state.Initialize(GetTypes());
|
|
128
139
|
idx_t start_vector = (start_row - row_group->start) / STANDARD_VECTOR_SIZE;
|
|
129
|
-
if (!row_group->InitializeScanWithOffset(state
|
|
140
|
+
if (!row_group->InitializeScanWithOffset(state, start_vector)) {
|
|
130
141
|
throw InternalException("Failed to initialize row group scan with offset");
|
|
131
142
|
}
|
|
132
143
|
}
|
|
133
144
|
|
|
134
|
-
bool RowGroupCollection::InitializeScanInRowGroup(CollectionScanState &state,
|
|
135
|
-
|
|
136
|
-
idx_t max_row) {
|
|
145
|
+
bool RowGroupCollection::InitializeScanInRowGroup(CollectionScanState &state, RowGroupCollection &collection,
|
|
146
|
+
RowGroup &row_group, idx_t vector_index, idx_t max_row) {
|
|
137
147
|
state.max_row = max_row;
|
|
138
|
-
state.row_groups =
|
|
139
|
-
|
|
148
|
+
state.row_groups = collection.row_groups.get();
|
|
149
|
+
if (!state.column_scans) {
|
|
150
|
+
// initialize the scan state
|
|
151
|
+
state.Initialize(collection.GetTypes());
|
|
152
|
+
}
|
|
153
|
+
return row_group.InitializeScanWithOffset(state, vector_index);
|
|
140
154
|
}
|
|
141
155
|
|
|
142
156
|
void RowGroupCollection::InitializeParallelScan(ParallelCollectionScanState &state) {
|
|
@@ -145,37 +159,52 @@ void RowGroupCollection::InitializeParallelScan(ParallelCollectionScanState &sta
|
|
|
145
159
|
state.vector_index = 0;
|
|
146
160
|
state.max_row = row_start + total_rows;
|
|
147
161
|
state.batch_index = 0;
|
|
162
|
+
state.processed_rows = 0;
|
|
148
163
|
}
|
|
149
164
|
|
|
150
165
|
bool RowGroupCollection::NextParallelScan(ClientContext &context, ParallelCollectionScanState &state,
|
|
151
166
|
CollectionScanState &scan_state) {
|
|
152
|
-
while (
|
|
167
|
+
while (true) {
|
|
153
168
|
idx_t vector_index;
|
|
154
169
|
idx_t max_row;
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
+
RowGroupCollection *collection;
|
|
171
|
+
RowGroup *row_group;
|
|
172
|
+
{
|
|
173
|
+
// select the next row group to scan from the parallel state
|
|
174
|
+
lock_guard<mutex> l(state.lock);
|
|
175
|
+
if (!state.current_row_group || state.current_row_group->count == 0) {
|
|
176
|
+
// no more data left to scan
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
collection = state.collection;
|
|
180
|
+
row_group = state.current_row_group;
|
|
181
|
+
if (ClientConfig::GetConfig(context).verify_parallelism) {
|
|
182
|
+
vector_index = state.vector_index;
|
|
183
|
+
max_row = state.current_row_group->start +
|
|
184
|
+
MinValue<idx_t>(state.current_row_group->count,
|
|
185
|
+
STANDARD_VECTOR_SIZE * state.vector_index + STANDARD_VECTOR_SIZE);
|
|
186
|
+
D_ASSERT(vector_index * STANDARD_VECTOR_SIZE < state.current_row_group->count);
|
|
187
|
+
state.vector_index++;
|
|
188
|
+
if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
|
|
189
|
+
state.current_row_group = row_groups->GetNextSegment(state.current_row_group);
|
|
190
|
+
state.vector_index = 0;
|
|
191
|
+
}
|
|
192
|
+
} else {
|
|
193
|
+
state.processed_rows += state.current_row_group->count;
|
|
194
|
+
vector_index = 0;
|
|
195
|
+
max_row = state.current_row_group->start + state.current_row_group->count;
|
|
170
196
|
state.current_row_group = row_groups->GetNextSegment(state.current_row_group);
|
|
171
|
-
state.vector_index = 0;
|
|
172
197
|
}
|
|
173
|
-
|
|
174
|
-
|
|
198
|
+
max_row = MinValue<idx_t>(max_row, state.max_row);
|
|
199
|
+
scan_state.batch_index = ++state.batch_index;
|
|
175
200
|
}
|
|
176
|
-
|
|
201
|
+
D_ASSERT(collection);
|
|
202
|
+
D_ASSERT(row_group);
|
|
203
|
+
|
|
204
|
+
// initialize the scan for this row group
|
|
205
|
+
bool need_to_scan = InitializeScanInRowGroup(scan_state, *collection, *row_group, vector_index, max_row);
|
|
177
206
|
if (!need_to_scan) {
|
|
178
|
-
//
|
|
207
|
+
// skip this row group
|
|
179
208
|
continue;
|
|
180
209
|
}
|
|
181
210
|
return true;
|
|
@@ -416,7 +445,7 @@ void RowGroupCollection::MergeStorage(RowGroupCollection &data) {
|
|
|
416
445
|
D_ASSERT(data.types == types);
|
|
417
446
|
auto index = row_start + total_rows.load();
|
|
418
447
|
for (auto &row_group : data.row_groups->Segments()) {
|
|
419
|
-
auto new_group = make_unique<RowGroup>(row_group, index);
|
|
448
|
+
auto new_group = make_unique<RowGroup>(row_group, *this, index);
|
|
420
449
|
index += new_group->count;
|
|
421
450
|
row_groups->AppendSegment(std::move(new_group));
|
|
422
451
|
}
|
|
@@ -519,8 +548,9 @@ void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_
|
|
|
519
548
|
DataChunk result;
|
|
520
549
|
result.Initialize(GetAllocator(), types);
|
|
521
550
|
|
|
522
|
-
|
|
523
|
-
row_group->
|
|
551
|
+
state.table_state.Initialize(GetTypes());
|
|
552
|
+
row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
|
|
553
|
+
row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
|
|
524
554
|
result.Slice(sel, count);
|
|
525
555
|
|
|
526
556
|
indexes.Scan([&](Index &index) {
|
|
@@ -604,7 +634,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &cont
|
|
|
604
634
|
// fill the column with its DEFAULT value, or NULL if none is specified
|
|
605
635
|
auto new_stats = make_unique<SegmentStatistics>(new_column.GetType());
|
|
606
636
|
for (auto ¤t_row_group : row_groups->Segments()) {
|
|
607
|
-
auto new_row_group = current_row_group.AddColumn(new_column, executor, default_value, default_vector);
|
|
637
|
+
auto new_row_group = current_row_group.AddColumn(*result, new_column, executor, default_value, default_vector);
|
|
608
638
|
// merge in the statistics
|
|
609
639
|
new_row_group->MergeIntoStatistics(new_column_idx, new_column_stats.Statistics());
|
|
610
640
|
|
|
@@ -623,7 +653,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::RemoveColumn(idx_t col_idx) {
|
|
|
623
653
|
result->stats.InitializeRemoveColumn(stats, col_idx);
|
|
624
654
|
|
|
625
655
|
for (auto ¤t_row_group : row_groups->Segments()) {
|
|
626
|
-
auto new_row_group = current_row_group.RemoveColumn(col_idx);
|
|
656
|
+
auto new_row_group = current_row_group.RemoveColumn(*result, col_idx);
|
|
627
657
|
result->row_groups->AppendSegment(std::move(new_row_group));
|
|
628
658
|
}
|
|
629
659
|
return result;
|
|
@@ -661,8 +691,8 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AlterType(ClientContext &cont
|
|
|
661
691
|
// now alter the type of the column within all of the row_groups individually
|
|
662
692
|
auto &changed_stats = result->stats.GetStats(changed_idx);
|
|
663
693
|
for (auto ¤t_row_group : row_groups->Segments()) {
|
|
664
|
-
auto new_row_group = current_row_group.AlterType(target_type, changed_idx, executor,
|
|
665
|
-
scan_state.table_state
|
|
694
|
+
auto new_row_group = current_row_group.AlterType(*result, target_type, changed_idx, executor,
|
|
695
|
+
scan_state.table_state, scan_chunk);
|
|
666
696
|
new_row_group->MergeIntoStatistics(changed_idx, changed_stats.Statistics());
|
|
667
697
|
result->row_groups->AppendSegment(std::move(new_row_group));
|
|
668
698
|
}
|
|
@@ -55,70 +55,64 @@ void ColumnScanState::Next(idx_t count) {
|
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
-
|
|
59
|
-
Next(STANDARD_VECTOR_SIZE);
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
const vector<column_t> &RowGroupScanState::GetColumnIds() {
|
|
58
|
+
const vector<column_t> &CollectionScanState::GetColumnIds() {
|
|
63
59
|
return parent.GetColumnIds();
|
|
64
60
|
}
|
|
65
61
|
|
|
66
|
-
TableFilterSet *
|
|
62
|
+
TableFilterSet *CollectionScanState::GetFilters() {
|
|
67
63
|
return parent.GetFilters();
|
|
68
64
|
}
|
|
69
65
|
|
|
70
|
-
AdaptiveFilter *
|
|
66
|
+
AdaptiveFilter *CollectionScanState::GetAdaptiveFilter() {
|
|
71
67
|
return parent.GetAdaptiveFilter();
|
|
72
68
|
}
|
|
73
69
|
|
|
74
|
-
|
|
75
|
-
|
|
70
|
+
ParallelCollectionScanState::ParallelCollectionScanState()
|
|
71
|
+
: collection(nullptr), current_row_group(nullptr), processed_rows(0) {
|
|
76
72
|
}
|
|
77
73
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
TableFilterSet *CollectionScanState::GetFilters() {
|
|
83
|
-
return parent.GetFilters();
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
AdaptiveFilter *CollectionScanState::GetAdaptiveFilter() {
|
|
87
|
-
return parent.GetAdaptiveFilter();
|
|
74
|
+
CollectionScanState::CollectionScanState(TableScanState &parent_p)
|
|
75
|
+
: row_group(nullptr), vector_index(0), max_row_group_row(0), row_groups(nullptr), max_row(0), batch_index(0),
|
|
76
|
+
parent(parent_p) {
|
|
88
77
|
}
|
|
89
78
|
|
|
90
79
|
bool CollectionScanState::Scan(DuckTransaction &transaction, DataChunk &result) {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
current_row_group->Scan(transaction, row_group_state, result);
|
|
80
|
+
while (row_group) {
|
|
81
|
+
row_group->Scan(transaction, *this, result);
|
|
94
82
|
if (result.size() > 0) {
|
|
95
83
|
return true;
|
|
84
|
+
} else if (max_row <= row_group->start + row_group->count) {
|
|
85
|
+
row_group = nullptr;
|
|
86
|
+
return false;
|
|
96
87
|
} else {
|
|
97
88
|
do {
|
|
98
|
-
|
|
99
|
-
if (
|
|
100
|
-
|
|
89
|
+
row_group = row_groups->GetNextSegment(row_group);
|
|
90
|
+
if (row_group) {
|
|
91
|
+
if (row_group->start >= max_row) {
|
|
92
|
+
row_group = nullptr;
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
bool scan_row_group = row_group->InitializeScan(*this);
|
|
101
96
|
if (scan_row_group) {
|
|
102
97
|
// scan this row group
|
|
103
98
|
break;
|
|
104
99
|
}
|
|
105
100
|
}
|
|
106
|
-
} while (
|
|
101
|
+
} while (row_group);
|
|
107
102
|
}
|
|
108
103
|
}
|
|
109
104
|
return false;
|
|
110
105
|
}
|
|
111
106
|
|
|
112
107
|
bool CollectionScanState::ScanCommitted(DataChunk &result, SegmentLock &l, TableScanType type) {
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
current_row_group->ScanCommitted(row_group_state, result, type);
|
|
108
|
+
while (row_group) {
|
|
109
|
+
row_group->ScanCommitted(*this, result, type);
|
|
116
110
|
if (result.size() > 0) {
|
|
117
111
|
return true;
|
|
118
112
|
} else {
|
|
119
|
-
|
|
120
|
-
if (
|
|
121
|
-
|
|
113
|
+
row_group = row_groups->GetNextSegment(l, row_group);
|
|
114
|
+
if (row_group) {
|
|
115
|
+
row_group->InitializeScan(*this);
|
|
122
116
|
}
|
|
123
117
|
}
|
|
124
118
|
}
|
|
@@ -126,15 +120,14 @@ bool CollectionScanState::ScanCommitted(DataChunk &result, SegmentLock &l, Table
|
|
|
126
120
|
}
|
|
127
121
|
|
|
128
122
|
bool CollectionScanState::ScanCommitted(DataChunk &result, TableScanType type) {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
current_row_group->ScanCommitted(row_group_state, result, type);
|
|
123
|
+
while (row_group) {
|
|
124
|
+
row_group->ScanCommitted(*this, result, type);
|
|
132
125
|
if (result.size() > 0) {
|
|
133
126
|
return true;
|
|
134
127
|
} else {
|
|
135
|
-
|
|
136
|
-
if (
|
|
137
|
-
|
|
128
|
+
row_group = row_groups->GetNextSegment(row_group);
|
|
129
|
+
if (row_group) {
|
|
130
|
+
row_group->InitializeScan(*this);
|
|
138
131
|
}
|
|
139
132
|
}
|
|
140
133
|
}
|
|
@@ -45,18 +45,16 @@ void StandardColumnData::InitializeScan(ColumnScanState &state) {
|
|
|
45
45
|
ColumnData::InitializeScan(state);
|
|
46
46
|
|
|
47
47
|
// initialize the validity segment
|
|
48
|
-
|
|
49
|
-
validity.InitializeScan(
|
|
50
|
-
state.child_states.push_back(std::move(child_state));
|
|
48
|
+
D_ASSERT(state.child_states.size() == 1);
|
|
49
|
+
validity.InitializeScan(state.child_states[0]);
|
|
51
50
|
}
|
|
52
51
|
|
|
53
52
|
void StandardColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
|
|
54
53
|
ColumnData::InitializeScanWithOffset(state, row_idx);
|
|
55
54
|
|
|
56
55
|
// initialize the validity segment
|
|
57
|
-
|
|
58
|
-
validity.InitializeScanWithOffset(
|
|
59
|
-
state.child_states.push_back(std::move(child_state));
|
|
56
|
+
D_ASSERT(state.child_states.size() == 1);
|
|
57
|
+
validity.InitializeScanWithOffset(state.child_states[0], row_idx);
|
|
60
58
|
}
|
|
61
59
|
|
|
62
60
|
idx_t StandardColumnData::Scan(TransactionData transaction, idx_t vector_index, ColumnScanState &state,
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
#include "duckdb/storage/statistics/struct_stats.hpp"
|
|
3
3
|
#include "duckdb/transaction/transaction.hpp"
|
|
4
4
|
#include "duckdb/storage/table/column_checkpoint_state.hpp"
|
|
5
|
+
#include "duckdb/storage/table/append_state.hpp"
|
|
6
|
+
#include "duckdb/storage/table/scan_state.hpp"
|
|
5
7
|
|
|
6
8
|
namespace duckdb {
|
|
7
9
|
|
|
@@ -39,40 +41,30 @@ idx_t StructColumnData::GetMaxEntry() {
|
|
|
39
41
|
}
|
|
40
42
|
|
|
41
43
|
void StructColumnData::InitializeScan(ColumnScanState &state) {
|
|
42
|
-
D_ASSERT(state.child_states.
|
|
43
|
-
|
|
44
|
+
D_ASSERT(state.child_states.size() == sub_columns.size() + 1);
|
|
44
45
|
state.row_index = 0;
|
|
45
46
|
state.current = nullptr;
|
|
46
47
|
|
|
47
48
|
// initialize the validity segment
|
|
48
|
-
|
|
49
|
-
validity.InitializeScan(validity_state);
|
|
50
|
-
state.child_states.push_back(std::move(validity_state));
|
|
49
|
+
validity.InitializeScan(state.child_states[0]);
|
|
51
50
|
|
|
52
51
|
// initialize the sub-columns
|
|
53
|
-
for (
|
|
54
|
-
|
|
55
|
-
sub_column->InitializeScan(child_state);
|
|
56
|
-
state.child_states.push_back(std::move(child_state));
|
|
52
|
+
for (idx_t i = 0; i < sub_columns.size(); i++) {
|
|
53
|
+
sub_columns[i]->InitializeScan(state.child_states[i + 1]);
|
|
57
54
|
}
|
|
58
55
|
}
|
|
59
56
|
|
|
60
57
|
void StructColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
|
|
61
|
-
D_ASSERT(state.child_states.
|
|
62
|
-
|
|
58
|
+
D_ASSERT(state.child_states.size() == sub_columns.size() + 1);
|
|
63
59
|
state.row_index = row_idx;
|
|
64
60
|
state.current = nullptr;
|
|
65
61
|
|
|
66
62
|
// initialize the validity segment
|
|
67
|
-
|
|
68
|
-
validity.InitializeScanWithOffset(validity_state, row_idx);
|
|
69
|
-
state.child_states.push_back(std::move(validity_state));
|
|
63
|
+
validity.InitializeScanWithOffset(state.child_states[0], row_idx);
|
|
70
64
|
|
|
71
65
|
// initialize the sub-columns
|
|
72
|
-
for (
|
|
73
|
-
|
|
74
|
-
sub_column->InitializeScanWithOffset(child_state, row_idx);
|
|
75
|
-
state.child_states.push_back(std::move(child_state));
|
|
66
|
+
for (idx_t i = 0; i < sub_columns.size(); i++) {
|
|
67
|
+
sub_columns[i]->InitializeScanWithOffset(state.child_states[i + 1], row_idx);
|
|
76
68
|
}
|
|
77
69
|
}
|
|
78
70
|
|
|
@@ -284,6 +276,7 @@ void StructColumnData::DeserializeColumn(Deserializer &source) {
|
|
|
284
276
|
for (auto &sub_column : sub_columns) {
|
|
285
277
|
sub_column->DeserializeColumn(source);
|
|
286
278
|
}
|
|
279
|
+
this->count = validity.count;
|
|
287
280
|
}
|
|
288
281
|
|
|
289
282
|
void StructColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, TableStorageInfo &result) {
|