duckdb 0.7.2-dev832.0 → 0.7.2-dev886.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/catalog/catalog.cpp +21 -5
- package/src/duckdb/src/catalog/default/default_functions.cpp +3 -0
- package/src/duckdb/src/catalog/duck_catalog.cpp +34 -7
- package/src/duckdb/src/common/box_renderer.cpp +109 -23
- package/src/duckdb/src/common/types/value.cpp +0 -93
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +3 -0
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +5 -8
- package/src/duckdb/src/function/scalar/date/date_part.cpp +2 -2
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +2 -2
- package/src/duckdb/src/function/scalar/list/list_aggregates.cpp +1 -1
- package/src/duckdb/src/function/scalar/list/list_lambdas.cpp +4 -0
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +8 -8
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +3 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +8 -2
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +0 -31
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +9 -52
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +62 -0
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +6 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +5 -3
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +4 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +5 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +37 -0
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +8 -1
- package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +271 -26
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +11 -7
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -10
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_single_join.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +11 -0
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +31 -6
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +22 -4
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -11
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +6 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -0
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +2 -2
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +145 -83
- package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +65 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +1 -6
- package/src/duckdb/src/storage/table/column_data.cpp +29 -35
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +5 -5
- package/src/duckdb/src/storage/table/column_segment.cpp +8 -7
- package/src/duckdb/src/storage/table/list_column_data.cpp +2 -1
- package/src/duckdb/src/storage/table/persistent_table_data.cpp +2 -1
- package/src/duckdb/src/storage/table/row_group.cpp +9 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +82 -66
- package/src/duckdb/src/storage/table/scan_state.cpp +22 -3
- package/src/duckdb/src/storage/table/standard_column_data.cpp +1 -0
- package/src/duckdb/src/storage/table/struct_column_data.cpp +1 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11578 -11222
- package/src/duckdb/ub_src_storage_statistics.cpp +2 -0
- package/src/duckdb/ub_src_storage_table.cpp +0 -2
- package/src/duckdb/src/storage/table/segment_tree.cpp +0 -179
@@ -2,6 +2,7 @@
|
|
2
2
|
#include "duckdb/storage/statistics/base_statistics.hpp"
|
3
3
|
#include "duckdb/common/field_writer.hpp"
|
4
4
|
#include "duckdb/common/types/vector.hpp"
|
5
|
+
#include "duckdb/common/operator/comparison_operators.hpp"
|
5
6
|
|
6
7
|
namespace duckdb {
|
7
8
|
|
@@ -13,6 +14,9 @@ template <>
|
|
13
14
|
void NumericStats::Update<list_entry_t>(BaseStatistics &stats, list_entry_t new_value) {
|
14
15
|
}
|
15
16
|
|
17
|
+
//===--------------------------------------------------------------------===//
|
18
|
+
// NumericStats
|
19
|
+
//===--------------------------------------------------------------------===//
|
16
20
|
BaseStatistics NumericStats::CreateUnknown(LogicalType type) {
|
17
21
|
BaseStatistics result(std::move(type));
|
18
22
|
result.InitializeUnknown();
|
@@ -62,73 +66,149 @@ void NumericStats::Merge(BaseStatistics &stats, const BaseStatistics &other) {
|
|
62
66
|
}
|
63
67
|
}
|
64
68
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
69
|
+
struct GetNumericValueUnion {
|
70
|
+
template <class T>
|
71
|
+
static T Operation(const NumericValueUnion &v);
|
72
|
+
};
|
73
|
+
|
74
|
+
template <>
|
75
|
+
int8_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
76
|
+
return v.value_.tinyint;
|
77
|
+
}
|
78
|
+
|
79
|
+
template <>
|
80
|
+
int16_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
81
|
+
return v.value_.smallint;
|
82
|
+
}
|
83
|
+
|
84
|
+
template <>
|
85
|
+
int32_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
86
|
+
return v.value_.integer;
|
87
|
+
}
|
88
|
+
|
89
|
+
template <>
|
90
|
+
int64_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
91
|
+
return v.value_.bigint;
|
92
|
+
}
|
93
|
+
|
94
|
+
template <>
|
95
|
+
hugeint_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
96
|
+
return v.value_.hugeint;
|
97
|
+
}
|
98
|
+
|
99
|
+
template <>
|
100
|
+
uint8_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
101
|
+
return v.value_.utinyint;
|
102
|
+
}
|
103
|
+
|
104
|
+
template <>
|
105
|
+
uint16_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
106
|
+
return v.value_.usmallint;
|
107
|
+
}
|
108
|
+
|
109
|
+
template <>
|
110
|
+
uint32_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
111
|
+
return v.value_.uinteger;
|
112
|
+
}
|
113
|
+
|
114
|
+
template <>
|
115
|
+
uint64_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
116
|
+
return v.value_.ubigint;
|
117
|
+
}
|
118
|
+
|
119
|
+
template <>
|
120
|
+
float GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
121
|
+
return v.value_.float_;
|
122
|
+
}
|
123
|
+
|
124
|
+
template <>
|
125
|
+
double GetNumericValueUnion::Operation(const NumericValueUnion &v) {
|
126
|
+
return v.value_.double_;
|
127
|
+
}
|
128
|
+
|
129
|
+
template <class T>
|
130
|
+
T NumericStats::GetMinUnsafe(const BaseStatistics &stats) {
|
131
|
+
return GetNumericValueUnion::Operation<T>(NumericStats::GetDataUnsafe(stats).min);
|
132
|
+
}
|
133
|
+
|
134
|
+
template <class T>
|
135
|
+
T NumericStats::GetMaxUnsafe(const BaseStatistics &stats) {
|
136
|
+
return GetNumericValueUnion::Operation<T>(NumericStats::GetDataUnsafe(stats).max);
|
137
|
+
}
|
138
|
+
|
139
|
+
template <class T>
|
140
|
+
bool ConstantExactRange(T min, T max, T constant) {
|
141
|
+
return Equals::Operation(constant, min) && Equals::Operation(constant, max);
|
142
|
+
}
|
143
|
+
|
144
|
+
template <class T>
|
145
|
+
bool ConstantValueInRange(T min, T max, T constant) {
|
146
|
+
return !(LessThan::Operation(constant, min) || GreaterThan::Operation(constant, max));
|
147
|
+
}
|
148
|
+
|
149
|
+
template <class T>
|
150
|
+
FilterPropagateResult CheckZonemapTemplated(const BaseStatistics &stats, ExpressionType comparison_type,
|
151
|
+
const Value &constant_value) {
|
152
|
+
T min_value = NumericStats::GetMinUnsafe<T>(stats);
|
153
|
+
T max_value = NumericStats::GetMaxUnsafe<T>(stats);
|
154
|
+
T constant = constant_value.GetValueUnsafe<T>();
|
75
155
|
switch (comparison_type) {
|
76
156
|
case ExpressionType::COMPARE_EQUAL:
|
77
|
-
if (
|
157
|
+
if (ConstantExactRange(min_value, max_value, constant)) {
|
78
158
|
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
79
|
-
}
|
159
|
+
}
|
160
|
+
if (ConstantValueInRange(min_value, max_value, constant)) {
|
80
161
|
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
81
|
-
} else {
|
82
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
83
162
|
}
|
163
|
+
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
84
164
|
case ExpressionType::COMPARE_NOTEQUAL:
|
85
|
-
if (
|
165
|
+
if (!ConstantValueInRange(min_value, max_value, constant)) {
|
86
166
|
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
87
|
-
} else if (min_value
|
167
|
+
} else if (ConstantExactRange(min_value, max_value, constant)) {
|
88
168
|
// corner case of a cluster with one numeric equal to the target constant
|
89
169
|
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
90
170
|
}
|
91
171
|
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
92
172
|
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
93
|
-
// X
|
173
|
+
// GreaterThanEquals::Operation(X, C)
|
94
174
|
// this can be true only if max(X) >= C
|
95
175
|
// if min(X) >= C, then this is always true
|
96
|
-
if (min_value
|
176
|
+
if (GreaterThanEquals::Operation(min_value, constant)) {
|
97
177
|
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
98
|
-
} else if (max_value
|
178
|
+
} else if (GreaterThanEquals::Operation(max_value, constant)) {
|
99
179
|
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
100
180
|
} else {
|
101
181
|
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
102
182
|
}
|
103
183
|
case ExpressionType::COMPARE_GREATERTHAN:
|
104
|
-
// X
|
184
|
+
// GreaterThan::Operation(X, C)
|
105
185
|
// this can be true only if max(X) > C
|
106
186
|
// if min(X) > C, then this is always true
|
107
|
-
if (min_value
|
187
|
+
if (GreaterThan::Operation(min_value, constant)) {
|
108
188
|
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
109
|
-
} else if (max_value
|
189
|
+
} else if (GreaterThan::Operation(max_value, constant)) {
|
110
190
|
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
111
191
|
} else {
|
112
192
|
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
113
193
|
}
|
114
194
|
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
115
|
-
// X
|
195
|
+
// LessThanEquals::Operation(X, C)
|
116
196
|
// this can be true only if min(X) <= C
|
117
197
|
// if max(X) <= C, then this is always true
|
118
|
-
if (max_value
|
198
|
+
if (LessThanEquals::Operation(max_value, constant)) {
|
119
199
|
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
120
|
-
} else if (min_value
|
200
|
+
} else if (LessThanEquals::Operation(min_value, constant)) {
|
121
201
|
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
122
202
|
} else {
|
123
203
|
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
124
204
|
}
|
125
205
|
case ExpressionType::COMPARE_LESSTHAN:
|
126
|
-
// X
|
206
|
+
// LessThan::Operation(X, C)
|
127
207
|
// this can be true only if min(X) < C
|
128
208
|
// if max(X) < C, then this is always true
|
129
|
-
if (max_value
|
209
|
+
if (LessThan::Operation(max_value, constant)) {
|
130
210
|
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
131
|
-
} else if (min_value
|
211
|
+
} else if (LessThan::Operation(min_value, constant)) {
|
132
212
|
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
133
213
|
} else {
|
134
214
|
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
@@ -138,6 +218,43 @@ FilterPropagateResult NumericStats::CheckZonemap(const BaseStatistics &stats, Ex
|
|
138
218
|
}
|
139
219
|
}
|
140
220
|
|
221
|
+
FilterPropagateResult NumericStats::CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
|
222
|
+
const Value &constant) {
|
223
|
+
D_ASSERT(constant.type() == stats.GetType());
|
224
|
+
if (constant.IsNull()) {
|
225
|
+
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
226
|
+
}
|
227
|
+
if (!NumericStats::HasMinMax(stats)) {
|
228
|
+
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
229
|
+
}
|
230
|
+
switch (stats.GetType().InternalType()) {
|
231
|
+
case PhysicalType::INT8:
|
232
|
+
return CheckZonemapTemplated<int8_t>(stats, comparison_type, constant);
|
233
|
+
case PhysicalType::INT16:
|
234
|
+
return CheckZonemapTemplated<int16_t>(stats, comparison_type, constant);
|
235
|
+
case PhysicalType::INT32:
|
236
|
+
return CheckZonemapTemplated<int32_t>(stats, comparison_type, constant);
|
237
|
+
case PhysicalType::INT64:
|
238
|
+
return CheckZonemapTemplated<int64_t>(stats, comparison_type, constant);
|
239
|
+
case PhysicalType::UINT8:
|
240
|
+
return CheckZonemapTemplated<uint8_t>(stats, comparison_type, constant);
|
241
|
+
case PhysicalType::UINT16:
|
242
|
+
return CheckZonemapTemplated<uint16_t>(stats, comparison_type, constant);
|
243
|
+
case PhysicalType::UINT32:
|
244
|
+
return CheckZonemapTemplated<uint32_t>(stats, comparison_type, constant);
|
245
|
+
case PhysicalType::UINT64:
|
246
|
+
return CheckZonemapTemplated<uint64_t>(stats, comparison_type, constant);
|
247
|
+
case PhysicalType::INT128:
|
248
|
+
return CheckZonemapTemplated<hugeint_t>(stats, comparison_type, constant);
|
249
|
+
case PhysicalType::FLOAT:
|
250
|
+
return CheckZonemapTemplated<float>(stats, comparison_type, constant);
|
251
|
+
case PhysicalType::DOUBLE:
|
252
|
+
return CheckZonemapTemplated<double>(stats, comparison_type, constant);
|
253
|
+
default:
|
254
|
+
throw InternalException("Unsupported type for NumericStats::CheckZonemap");
|
255
|
+
}
|
256
|
+
}
|
257
|
+
|
141
258
|
bool NumericStats::IsConstant(const BaseStatistics &stats) {
|
142
259
|
return NumericStats::Max(stats) <= NumericStats::Min(stats);
|
143
260
|
}
|
@@ -471,59 +588,4 @@ void NumericStats::Verify(const BaseStatistics &stats, Vector &vector, const Sel
|
|
471
588
|
}
|
472
589
|
}
|
473
590
|
|
474
|
-
template <>
|
475
|
-
int8_t &NumericValueUnion::GetReferenceUnsafe() {
|
476
|
-
return value_.tinyint;
|
477
|
-
}
|
478
|
-
|
479
|
-
template <>
|
480
|
-
int16_t &NumericValueUnion::GetReferenceUnsafe() {
|
481
|
-
return value_.smallint;
|
482
|
-
}
|
483
|
-
|
484
|
-
template <>
|
485
|
-
int32_t &NumericValueUnion::GetReferenceUnsafe() {
|
486
|
-
return value_.integer;
|
487
|
-
}
|
488
|
-
|
489
|
-
template <>
|
490
|
-
int64_t &NumericValueUnion::GetReferenceUnsafe() {
|
491
|
-
return value_.bigint;
|
492
|
-
}
|
493
|
-
|
494
|
-
template <>
|
495
|
-
hugeint_t &NumericValueUnion::GetReferenceUnsafe() {
|
496
|
-
return value_.hugeint;
|
497
|
-
}
|
498
|
-
|
499
|
-
template <>
|
500
|
-
uint8_t &NumericValueUnion::GetReferenceUnsafe() {
|
501
|
-
return value_.utinyint;
|
502
|
-
}
|
503
|
-
|
504
|
-
template <>
|
505
|
-
uint16_t &NumericValueUnion::GetReferenceUnsafe() {
|
506
|
-
return value_.usmallint;
|
507
|
-
}
|
508
|
-
|
509
|
-
template <>
|
510
|
-
uint32_t &NumericValueUnion::GetReferenceUnsafe() {
|
511
|
-
return value_.uinteger;
|
512
|
-
}
|
513
|
-
|
514
|
-
template <>
|
515
|
-
uint64_t &NumericValueUnion::GetReferenceUnsafe() {
|
516
|
-
return value_.ubigint;
|
517
|
-
}
|
518
|
-
|
519
|
-
template <>
|
520
|
-
float &NumericValueUnion::GetReferenceUnsafe() {
|
521
|
-
return value_.float_;
|
522
|
-
}
|
523
|
-
|
524
|
-
template <>
|
525
|
-
double &NumericValueUnion::GetReferenceUnsafe() {
|
526
|
-
return value_.double_;
|
527
|
-
}
|
528
|
-
|
529
591
|
} // namespace duckdb
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#include "duckdb/storage/statistics/numeric_stats_union.hpp"
|
2
|
+
|
3
|
+
namespace duckdb {
|
4
|
+
|
5
|
+
template <>
|
6
|
+
bool &NumericValueUnion::GetReferenceUnsafe() {
|
7
|
+
return value_.boolean;
|
8
|
+
}
|
9
|
+
|
10
|
+
template <>
|
11
|
+
int8_t &NumericValueUnion::GetReferenceUnsafe() {
|
12
|
+
return value_.tinyint;
|
13
|
+
}
|
14
|
+
|
15
|
+
template <>
|
16
|
+
int16_t &NumericValueUnion::GetReferenceUnsafe() {
|
17
|
+
return value_.smallint;
|
18
|
+
}
|
19
|
+
|
20
|
+
template <>
|
21
|
+
int32_t &NumericValueUnion::GetReferenceUnsafe() {
|
22
|
+
return value_.integer;
|
23
|
+
}
|
24
|
+
|
25
|
+
template <>
|
26
|
+
int64_t &NumericValueUnion::GetReferenceUnsafe() {
|
27
|
+
return value_.bigint;
|
28
|
+
}
|
29
|
+
|
30
|
+
template <>
|
31
|
+
hugeint_t &NumericValueUnion::GetReferenceUnsafe() {
|
32
|
+
return value_.hugeint;
|
33
|
+
}
|
34
|
+
|
35
|
+
template <>
|
36
|
+
uint8_t &NumericValueUnion::GetReferenceUnsafe() {
|
37
|
+
return value_.utinyint;
|
38
|
+
}
|
39
|
+
|
40
|
+
template <>
|
41
|
+
uint16_t &NumericValueUnion::GetReferenceUnsafe() {
|
42
|
+
return value_.usmallint;
|
43
|
+
}
|
44
|
+
|
45
|
+
template <>
|
46
|
+
uint32_t &NumericValueUnion::GetReferenceUnsafe() {
|
47
|
+
return value_.uinteger;
|
48
|
+
}
|
49
|
+
|
50
|
+
template <>
|
51
|
+
uint64_t &NumericValueUnion::GetReferenceUnsafe() {
|
52
|
+
return value_.ubigint;
|
53
|
+
}
|
54
|
+
|
55
|
+
template <>
|
56
|
+
float &NumericValueUnion::GetReferenceUnsafe() {
|
57
|
+
return value_.float_;
|
58
|
+
}
|
59
|
+
|
60
|
+
template <>
|
61
|
+
double &NumericValueUnion::GetReferenceUnsafe() {
|
62
|
+
return value_.double_;
|
63
|
+
}
|
64
|
+
|
65
|
+
} // namespace duckdb
|
@@ -1,12 +1,7 @@
|
|
1
|
-
|
2
1
|
#include "duckdb/storage/table/column_data.hpp"
|
3
|
-
|
2
|
+
#include "duckdb/storage/table/column_checkpoint_state.hpp"
|
4
3
|
#include "duckdb/storage/table/column_segment.hpp"
|
5
4
|
#include "duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp"
|
6
|
-
#include "duckdb/storage/table/validity_column_data.hpp"
|
7
|
-
#include "duckdb/storage/table/standard_column_data.hpp"
|
8
|
-
#include "duckdb/storage/table/list_column_data.hpp"
|
9
|
-
#include "duckdb/transaction/transaction.hpp"
|
10
5
|
#include "duckdb/storage/table/row_group.hpp"
|
11
6
|
#include "duckdb/storage/checkpoint/table_data_writer.hpp"
|
12
7
|
|
@@ -32,10 +32,9 @@ ColumnData::ColumnData(ColumnData &other, idx_t start, ColumnData *parent)
|
|
32
32
|
updates = make_unique<UpdateSegment>(*other.updates, *this);
|
33
33
|
}
|
34
34
|
idx_t offset = 0;
|
35
|
-
for (auto segment
|
36
|
-
|
37
|
-
|
38
|
-
offset += segment->count;
|
35
|
+
for (auto &segment : other.data.Segments()) {
|
36
|
+
this->data.AppendSegment(ColumnSegment::CreateSegment(segment, start + offset));
|
37
|
+
offset += segment.count;
|
39
38
|
}
|
40
39
|
}
|
41
40
|
|
@@ -75,7 +74,8 @@ idx_t ColumnData::GetMaxEntry() {
|
|
75
74
|
}
|
76
75
|
|
77
76
|
void ColumnData::InitializeScan(ColumnScanState &state) {
|
78
|
-
state.current =
|
77
|
+
state.current = data.GetRootSegment();
|
78
|
+
state.segment_tree = &data;
|
79
79
|
state.row_index = state.current ? state.current->start : 0;
|
80
80
|
state.internal_index = state.row_index;
|
81
81
|
state.initialized = false;
|
@@ -84,7 +84,8 @@ void ColumnData::InitializeScan(ColumnScanState &state) {
|
|
84
84
|
}
|
85
85
|
|
86
86
|
void ColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
|
87
|
-
state.current =
|
87
|
+
state.current = data.GetSegment(row_idx);
|
88
|
+
state.segment_tree = &data;
|
88
89
|
state.row_index = row_idx;
|
89
90
|
state.internal_index = state.current->start;
|
90
91
|
state.initialized = false;
|
@@ -125,11 +126,12 @@ idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remai
|
|
125
126
|
}
|
126
127
|
|
127
128
|
if (remaining > 0) {
|
128
|
-
|
129
|
+
auto next = data.GetNextSegment(state.current);
|
130
|
+
if (!next) {
|
129
131
|
break;
|
130
132
|
}
|
131
133
|
state.previous_states.emplace_back(std::move(state.scan_state));
|
132
|
-
state.current =
|
134
|
+
state.current = next;
|
133
135
|
state.current->InitializeScan(state);
|
134
136
|
state.segment_checked = false;
|
135
137
|
D_ASSERT(state.row_index >= state.current->start &&
|
@@ -234,14 +236,14 @@ void ColumnData::InitializeAppend(ColumnAppendState &state) {
|
|
234
236
|
// no segments yet, append an empty segment
|
235
237
|
AppendTransientSegment(l, start);
|
236
238
|
}
|
237
|
-
auto segment =
|
239
|
+
auto segment = data.GetLastSegment(l);
|
238
240
|
if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
|
239
241
|
// no transient segments yet
|
240
242
|
auto total_rows = segment->start + segment->count;
|
241
243
|
AppendTransientSegment(l, total_rows);
|
242
|
-
state.current =
|
244
|
+
state.current = data.GetLastSegment(l);
|
243
245
|
} else {
|
244
|
-
state.current =
|
246
|
+
state.current = segment;
|
245
247
|
}
|
246
248
|
|
247
249
|
D_ASSERT(state.current->segment_type == ColumnSegmentType::TRANSIENT);
|
@@ -264,7 +266,7 @@ void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, Uni
|
|
264
266
|
{
|
265
267
|
auto l = data.Lock();
|
266
268
|
AppendTransientSegment(l, state.current->start + state.current->count);
|
267
|
-
state.current =
|
269
|
+
state.current = data.GetLastSegment(l);
|
268
270
|
state.current->InitializeAppend(state);
|
269
271
|
}
|
270
272
|
offset += copied_elements;
|
@@ -284,7 +286,7 @@ void ColumnData::RevertAppend(row_t start_row) {
|
|
284
286
|
// find the segment index that the current row belongs to
|
285
287
|
idx_t segment_index = data.GetSegmentIndex(l, start_row);
|
286
288
|
auto segment = data.GetSegmentByIndex(l, segment_index);
|
287
|
-
auto &transient =
|
289
|
+
auto &transient = *segment;
|
288
290
|
D_ASSERT(transient.segment_type == ColumnSegmentType::TRANSIENT);
|
289
291
|
|
290
292
|
// remove any segments AFTER this segment: they should be deleted entirely
|
@@ -299,14 +301,14 @@ idx_t ColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) {
|
|
299
301
|
D_ASSERT(idx_t(row_id) >= start);
|
300
302
|
// perform the fetch within the segment
|
301
303
|
state.row_index = start + ((row_id - start) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE);
|
302
|
-
state.current =
|
304
|
+
state.current = data.GetSegment(state.row_index);
|
303
305
|
state.internal_index = state.current->start;
|
304
306
|
return ScanVector(state, result, STANDARD_VECTOR_SIZE);
|
305
307
|
}
|
306
308
|
|
307
309
|
void ColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
|
308
310
|
idx_t result_idx) {
|
309
|
-
auto segment =
|
311
|
+
auto segment = data.GetSegment(row_id);
|
310
312
|
|
311
313
|
// now perform the fetch within the segment
|
312
314
|
segment->FetchRow(state, row_id, result, result_idx);
|
@@ -357,15 +359,14 @@ void ColumnData::AppendTransientSegment(SegmentLock &l, idx_t start_row) {
|
|
357
359
|
}
|
358
360
|
|
359
361
|
void ColumnData::CommitDropColumn() {
|
360
|
-
auto
|
361
|
-
|
362
|
-
if (segment
|
363
|
-
auto block_id = segment
|
362
|
+
for (auto &segment_p : data.Segments()) {
|
363
|
+
auto &segment = segment_p;
|
364
|
+
if (segment.segment_type == ColumnSegmentType::PERSISTENT) {
|
365
|
+
auto block_id = segment.GetBlockId();
|
364
366
|
if (block_id != INVALID_BLOCK) {
|
365
367
|
block_manager.MarkBlockAsModified(block_id);
|
366
368
|
}
|
367
369
|
}
|
368
|
-
segment = (ColumnSegment *)segment->Next();
|
369
370
|
}
|
370
371
|
}
|
371
372
|
|
@@ -464,7 +465,6 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T
|
|
464
465
|
while (segment) {
|
465
466
|
ColumnSegmentInfo column_info;
|
466
467
|
column_info.row_group_index = row_group_index;
|
467
|
-
;
|
468
468
|
column_info.column_id = col_path[0];
|
469
469
|
column_info.column_path = col_path_str;
|
470
470
|
column_info.segment_idx = segment_idx;
|
@@ -487,7 +487,7 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T
|
|
487
487
|
result.column_segments.push_back(std::move(column_info));
|
488
488
|
|
489
489
|
segment_idx++;
|
490
|
-
segment = (ColumnSegment *)segment
|
490
|
+
segment = (ColumnSegment *)data.GetNextSegment(segment);
|
491
491
|
}
|
492
492
|
}
|
493
493
|
|
@@ -495,19 +495,13 @@ void ColumnData::Verify(RowGroup &parent) {
|
|
495
495
|
#ifdef DEBUG
|
496
496
|
D_ASSERT(this->start == parent.start);
|
497
497
|
data.Verify();
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
D_ASSERT(
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
prev_end = root->start + root->count;
|
506
|
-
if (!root->next) {
|
507
|
-
D_ASSERT(prev_end == parent.start + parent.count);
|
508
|
-
}
|
509
|
-
root = root->Next();
|
510
|
-
}
|
498
|
+
idx_t current_index = 0;
|
499
|
+
idx_t current_start = this->start;
|
500
|
+
for (auto &segment : data.Segments()) {
|
501
|
+
D_ASSERT(segment.index == current_index);
|
502
|
+
D_ASSERT(segment.start == current_start);
|
503
|
+
current_start += segment.count;
|
504
|
+
current_index++;
|
511
505
|
}
|
512
506
|
#endif
|
513
507
|
}
|
@@ -38,7 +38,7 @@ ColumnCheckpointState &ColumnDataCheckpointer::GetCheckpointState() {
|
|
38
38
|
void ColumnDataCheckpointer::ScanSegments(const std::function<void(Vector &, idx_t)> &callback) {
|
39
39
|
Vector scan_vector(intermediate.GetType(), nullptr);
|
40
40
|
for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
|
41
|
-
auto segment =
|
41
|
+
auto segment = nodes[segment_idx].node.get();
|
42
42
|
ColumnScanState scan_state;
|
43
43
|
scan_state.current = segment;
|
44
44
|
segment->InitializeScan(scan_state);
|
@@ -163,7 +163,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
|
|
163
163
|
// since the segments will be rewritten their old on disk data is no longer required
|
164
164
|
auto &block_manager = col_data.block_manager;
|
165
165
|
for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
|
166
|
-
auto segment =
|
166
|
+
auto segment = nodes[segment_idx].node.get();
|
167
167
|
if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
|
168
168
|
// persistent segment has updates: mark it as modified and rewrite the block with the merged updates
|
169
169
|
auto block_id = segment->GetBlockId();
|
@@ -194,7 +194,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
|
|
194
194
|
|
195
195
|
bool ColumnDataCheckpointer::HasChanges() {
|
196
196
|
for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
|
197
|
-
auto segment =
|
197
|
+
auto segment = nodes[segment_idx].node.get();
|
198
198
|
if (segment->segment_type == ColumnSegmentType::TRANSIENT) {
|
199
199
|
// transient segment: always need to write to disk
|
200
200
|
return true;
|
@@ -214,7 +214,7 @@ void ColumnDataCheckpointer::WritePersistentSegments() {
|
|
214
214
|
// all segments are persistent and there are no updates
|
215
215
|
// we only need to write the metadata
|
216
216
|
for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
|
217
|
-
auto segment =
|
217
|
+
auto segment = nodes[segment_idx].node.get();
|
218
218
|
D_ASSERT(segment->segment_type == ColumnSegmentType::PERSISTENT);
|
219
219
|
|
220
220
|
// set up the data pointer directly using the data from the persistent segment
|
@@ -235,7 +235,7 @@ void ColumnDataCheckpointer::WritePersistentSegments() {
|
|
235
235
|
}
|
236
236
|
}
|
237
237
|
|
238
|
-
void ColumnDataCheckpointer::Checkpoint(vector<SegmentNode
|
238
|
+
void ColumnDataCheckpointer::Checkpoint(vector<SegmentNode<ColumnSegment>> nodes) {
|
239
239
|
D_ASSERT(!nodes.empty());
|
240
240
|
this->nodes = std::move(nodes);
|
241
241
|
// first check if any of the segments have changes
|
@@ -58,9 +58,10 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx
|
|
58
58
|
ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type_p,
|
59
59
|
ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction *function_p,
|
60
60
|
BaseStatistics statistics, block_id_t block_id_p, idx_t offset_p, idx_t segment_size_p)
|
61
|
-
: SegmentBase(start, count), db(db), type(std::move(type_p)),
|
62
|
-
|
63
|
-
block_id(block_id_p), offset(offset_p),
|
61
|
+
: SegmentBase<ColumnSegment>(start, count), db(db), type(std::move(type_p)),
|
62
|
+
type_size(GetTypeIdSize(type.InternalType())), segment_type(segment_type), function(function_p),
|
63
|
+
stats(std::move(statistics)), block(std::move(block)), block_id(block_id_p), offset(offset_p),
|
64
|
+
segment_size(segment_size_p) {
|
64
65
|
D_ASSERT(function);
|
65
66
|
if (function->init_segment) {
|
66
67
|
segment_state = function->init_segment(*this, block_id);
|
@@ -68,10 +69,10 @@ ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block
|
|
68
69
|
}
|
69
70
|
|
70
71
|
ColumnSegment::ColumnSegment(ColumnSegment &other, idx_t start)
|
71
|
-
: SegmentBase(start, other.count), db(other.db), type(std::move(other.type)),
|
72
|
-
|
73
|
-
block(std::move(other.block)), block_id(other.block_id), offset(other.offset),
|
74
|
-
segment_state(std::move(other.segment_state)) {
|
72
|
+
: SegmentBase<ColumnSegment>(start, other.count.load()), db(other.db), type(std::move(other.type)),
|
73
|
+
type_size(other.type_size), segment_type(other.segment_type), function(other.function),
|
74
|
+
stats(std::move(other.stats)), block(std::move(other.block)), block_id(other.block_id), offset(other.offset),
|
75
|
+
segment_size(other.segment_size), segment_state(std::move(other.segment_state)) {
|
75
76
|
}
|
76
77
|
|
77
78
|
ColumnSegment::~ColumnSegment() {
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "duckdb/storage/table/list_column_data.hpp"
|
2
2
|
#include "duckdb/storage/statistics/list_stats.hpp"
|
3
3
|
#include "duckdb/transaction/transaction.hpp"
|
4
|
+
#include "duckdb/storage/table/column_checkpoint_state.hpp"
|
4
5
|
|
5
6
|
namespace duckdb {
|
6
7
|
|
@@ -40,7 +41,7 @@ void ListColumnData::InitializeScan(ColumnScanState &state) {
|
|
40
41
|
}
|
41
42
|
|
42
43
|
uint64_t ListColumnData::FetchListOffset(idx_t row_idx) {
|
43
|
-
auto segment =
|
44
|
+
auto segment = data.GetSegment(row_idx);
|
44
45
|
ColumnFetchState fetch_state;
|
45
46
|
Vector result(type, 1);
|
46
47
|
segment->FetchRow(fetch_state, row_idx, result, 0);
|
@@ -3,7 +3,8 @@
|
|
3
3
|
|
4
4
|
namespace duckdb {
|
5
5
|
|
6
|
-
PersistentTableData::PersistentTableData(idx_t column_count)
|
6
|
+
PersistentTableData::PersistentTableData(idx_t column_count)
|
7
|
+
: total_rows(0), row_group_count(0), block_id(INVALID_BLOCK), offset(0) {
|
7
8
|
}
|
8
9
|
|
9
10
|
PersistentTableData::~PersistentTableData() {
|