duckdb 0.7.2-dev832.0 → 0.7.2-dev886.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/catalog/catalog.cpp +21 -5
  3. package/src/duckdb/src/catalog/default/default_functions.cpp +3 -0
  4. package/src/duckdb/src/catalog/duck_catalog.cpp +34 -7
  5. package/src/duckdb/src/common/box_renderer.cpp +109 -23
  6. package/src/duckdb/src/common/types/value.cpp +0 -93
  7. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +3 -0
  8. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +5 -8
  9. package/src/duckdb/src/function/scalar/date/date_part.cpp +2 -2
  10. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +2 -2
  11. package/src/duckdb/src/function/scalar/list/list_aggregates.cpp +1 -1
  12. package/src/duckdb/src/function/scalar/list/list_lambdas.cpp +4 -0
  13. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +8 -8
  14. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  15. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +3 -0
  16. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -1
  17. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +8 -2
  18. package/src/duckdb/src/include/duckdb/common/types/value.hpp +0 -31
  19. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  20. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +9 -52
  21. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +62 -0
  22. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  23. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +6 -3
  24. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +3 -2
  25. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +5 -3
  26. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +4 -1
  27. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -3
  28. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +5 -3
  29. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +37 -0
  30. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +8 -1
  31. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +4 -3
  32. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +271 -26
  33. package/src/duckdb/src/optimizer/filter_pushdown.cpp +11 -7
  34. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -10
  35. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +1 -1
  36. package/src/duckdb/src/optimizer/pushdown/pushdown_single_join.cpp +1 -1
  37. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +11 -0
  38. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +31 -6
  39. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +22 -4
  40. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +1 -1
  41. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -11
  42. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +6 -0
  43. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -0
  44. package/src/duckdb/src/storage/compression/numeric_constant.cpp +2 -2
  45. package/src/duckdb/src/storage/data_table.cpp +1 -1
  46. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +145 -83
  47. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +65 -0
  48. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  49. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +1 -6
  50. package/src/duckdb/src/storage/table/column_data.cpp +29 -35
  51. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +5 -5
  52. package/src/duckdb/src/storage/table/column_segment.cpp +8 -7
  53. package/src/duckdb/src/storage/table/list_column_data.cpp +2 -1
  54. package/src/duckdb/src/storage/table/persistent_table_data.cpp +2 -1
  55. package/src/duckdb/src/storage/table/row_group.cpp +9 -9
  56. package/src/duckdb/src/storage/table/row_group_collection.cpp +82 -66
  57. package/src/duckdb/src/storage/table/scan_state.cpp +22 -3
  58. package/src/duckdb/src/storage/table/standard_column_data.cpp +1 -0
  59. package/src/duckdb/src/storage/table/struct_column_data.cpp +1 -0
  60. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11578 -11222
  61. package/src/duckdb/ub_src_storage_statistics.cpp +2 -0
  62. package/src/duckdb/ub_src_storage_table.cpp +0 -2
  63. package/src/duckdb/src/storage/table/segment_tree.cpp +0 -179
@@ -2,6 +2,7 @@
2
2
  #include "duckdb/storage/statistics/base_statistics.hpp"
3
3
  #include "duckdb/common/field_writer.hpp"
4
4
  #include "duckdb/common/types/vector.hpp"
5
+ #include "duckdb/common/operator/comparison_operators.hpp"
5
6
 
6
7
  namespace duckdb {
7
8
 
@@ -13,6 +14,9 @@ template <>
13
14
  void NumericStats::Update<list_entry_t>(BaseStatistics &stats, list_entry_t new_value) {
14
15
  }
15
16
 
17
+ //===--------------------------------------------------------------------===//
18
+ // NumericStats
19
+ //===--------------------------------------------------------------------===//
16
20
  BaseStatistics NumericStats::CreateUnknown(LogicalType type) {
17
21
  BaseStatistics result(std::move(type));
18
22
  result.InitializeUnknown();
@@ -62,73 +66,149 @@ void NumericStats::Merge(BaseStatistics &stats, const BaseStatistics &other) {
62
66
  }
63
67
  }
64
68
 
65
- FilterPropagateResult NumericStats::CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
66
- const Value &constant) {
67
- if (constant.IsNull()) {
68
- return FilterPropagateResult::FILTER_ALWAYS_FALSE;
69
- }
70
- if (!NumericStats::HasMinMax(stats)) {
71
- return FilterPropagateResult::NO_PRUNING_POSSIBLE;
72
- }
73
- auto min_value = NumericStats::Min(stats);
74
- auto max_value = NumericStats::Max(stats);
69
+ struct GetNumericValueUnion {
70
+ template <class T>
71
+ static T Operation(const NumericValueUnion &v);
72
+ };
73
+
74
+ template <>
75
+ int8_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
76
+ return v.value_.tinyint;
77
+ }
78
+
79
+ template <>
80
+ int16_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
81
+ return v.value_.smallint;
82
+ }
83
+
84
+ template <>
85
+ int32_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
86
+ return v.value_.integer;
87
+ }
88
+
89
+ template <>
90
+ int64_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
91
+ return v.value_.bigint;
92
+ }
93
+
94
+ template <>
95
+ hugeint_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
96
+ return v.value_.hugeint;
97
+ }
98
+
99
+ template <>
100
+ uint8_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
101
+ return v.value_.utinyint;
102
+ }
103
+
104
+ template <>
105
+ uint16_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
106
+ return v.value_.usmallint;
107
+ }
108
+
109
+ template <>
110
+ uint32_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
111
+ return v.value_.uinteger;
112
+ }
113
+
114
+ template <>
115
+ uint64_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
116
+ return v.value_.ubigint;
117
+ }
118
+
119
+ template <>
120
+ float GetNumericValueUnion::Operation(const NumericValueUnion &v) {
121
+ return v.value_.float_;
122
+ }
123
+
124
+ template <>
125
+ double GetNumericValueUnion::Operation(const NumericValueUnion &v) {
126
+ return v.value_.double_;
127
+ }
128
+
129
+ template <class T>
130
+ T NumericStats::GetMinUnsafe(const BaseStatistics &stats) {
131
+ return GetNumericValueUnion::Operation<T>(NumericStats::GetDataUnsafe(stats).min);
132
+ }
133
+
134
+ template <class T>
135
+ T NumericStats::GetMaxUnsafe(const BaseStatistics &stats) {
136
+ return GetNumericValueUnion::Operation<T>(NumericStats::GetDataUnsafe(stats).max);
137
+ }
138
+
139
+ template <class T>
140
+ bool ConstantExactRange(T min, T max, T constant) {
141
+ return Equals::Operation(constant, min) && Equals::Operation(constant, max);
142
+ }
143
+
144
+ template <class T>
145
+ bool ConstantValueInRange(T min, T max, T constant) {
146
+ return !(LessThan::Operation(constant, min) || GreaterThan::Operation(constant, max));
147
+ }
148
+
149
+ template <class T>
150
+ FilterPropagateResult CheckZonemapTemplated(const BaseStatistics &stats, ExpressionType comparison_type,
151
+ const Value &constant_value) {
152
+ T min_value = NumericStats::GetMinUnsafe<T>(stats);
153
+ T max_value = NumericStats::GetMaxUnsafe<T>(stats);
154
+ T constant = constant_value.GetValueUnsafe<T>();
75
155
  switch (comparison_type) {
76
156
  case ExpressionType::COMPARE_EQUAL:
77
- if (constant == min_value && constant == max_value) {
157
+ if (ConstantExactRange(min_value, max_value, constant)) {
78
158
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
79
- } else if (constant >= min_value && constant <= max_value) {
159
+ }
160
+ if (ConstantValueInRange(min_value, max_value, constant)) {
80
161
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
81
- } else {
82
- return FilterPropagateResult::FILTER_ALWAYS_FALSE;
83
162
  }
163
+ return FilterPropagateResult::FILTER_ALWAYS_FALSE;
84
164
  case ExpressionType::COMPARE_NOTEQUAL:
85
- if (constant < min_value || constant > max_value) {
165
+ if (!ConstantValueInRange(min_value, max_value, constant)) {
86
166
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
87
- } else if (min_value == max_value && min_value == constant) {
167
+ } else if (ConstantExactRange(min_value, max_value, constant)) {
88
168
  // corner case of a cluster with one numeric equal to the target constant
89
169
  return FilterPropagateResult::FILTER_ALWAYS_FALSE;
90
170
  }
91
171
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
92
172
  case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
93
- // X >= C
173
+ // GreaterThanEquals::Operation(X, C)
94
174
  // this can be true only if max(X) >= C
95
175
  // if min(X) >= C, then this is always true
96
- if (min_value >= constant) {
176
+ if (GreaterThanEquals::Operation(min_value, constant)) {
97
177
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
98
- } else if (max_value >= constant) {
178
+ } else if (GreaterThanEquals::Operation(max_value, constant)) {
99
179
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
100
180
  } else {
101
181
  return FilterPropagateResult::FILTER_ALWAYS_FALSE;
102
182
  }
103
183
  case ExpressionType::COMPARE_GREATERTHAN:
104
- // X > C
184
+ // GreaterThan::Operation(X, C)
105
185
  // this can be true only if max(X) > C
106
186
  // if min(X) > C, then this is always true
107
- if (min_value > constant) {
187
+ if (GreaterThan::Operation(min_value, constant)) {
108
188
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
109
- } else if (max_value > constant) {
189
+ } else if (GreaterThan::Operation(max_value, constant)) {
110
190
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
111
191
  } else {
112
192
  return FilterPropagateResult::FILTER_ALWAYS_FALSE;
113
193
  }
114
194
  case ExpressionType::COMPARE_LESSTHANOREQUALTO:
115
- // X <= C
195
+ // LessThanEquals::Operation(X, C)
116
196
  // this can be true only if min(X) <= C
117
197
  // if max(X) <= C, then this is always true
118
- if (max_value <= constant) {
198
+ if (LessThanEquals::Operation(max_value, constant)) {
119
199
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
120
- } else if (min_value <= constant) {
200
+ } else if (LessThanEquals::Operation(min_value, constant)) {
121
201
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
122
202
  } else {
123
203
  return FilterPropagateResult::FILTER_ALWAYS_FALSE;
124
204
  }
125
205
  case ExpressionType::COMPARE_LESSTHAN:
126
- // X < C
206
+ // LessThan::Operation(X, C)
127
207
  // this can be true only if min(X) < C
128
208
  // if max(X) < C, then this is always true
129
- if (max_value < constant) {
209
+ if (LessThan::Operation(max_value, constant)) {
130
210
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
131
- } else if (min_value < constant) {
211
+ } else if (LessThan::Operation(min_value, constant)) {
132
212
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
133
213
  } else {
134
214
  return FilterPropagateResult::FILTER_ALWAYS_FALSE;
@@ -138,6 +218,43 @@ FilterPropagateResult NumericStats::CheckZonemap(const BaseStatistics &stats, Ex
138
218
  }
139
219
  }
140
220
 
221
+ FilterPropagateResult NumericStats::CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
222
+ const Value &constant) {
223
+ D_ASSERT(constant.type() == stats.GetType());
224
+ if (constant.IsNull()) {
225
+ return FilterPropagateResult::FILTER_ALWAYS_FALSE;
226
+ }
227
+ if (!NumericStats::HasMinMax(stats)) {
228
+ return FilterPropagateResult::NO_PRUNING_POSSIBLE;
229
+ }
230
+ switch (stats.GetType().InternalType()) {
231
+ case PhysicalType::INT8:
232
+ return CheckZonemapTemplated<int8_t>(stats, comparison_type, constant);
233
+ case PhysicalType::INT16:
234
+ return CheckZonemapTemplated<int16_t>(stats, comparison_type, constant);
235
+ case PhysicalType::INT32:
236
+ return CheckZonemapTemplated<int32_t>(stats, comparison_type, constant);
237
+ case PhysicalType::INT64:
238
+ return CheckZonemapTemplated<int64_t>(stats, comparison_type, constant);
239
+ case PhysicalType::UINT8:
240
+ return CheckZonemapTemplated<uint8_t>(stats, comparison_type, constant);
241
+ case PhysicalType::UINT16:
242
+ return CheckZonemapTemplated<uint16_t>(stats, comparison_type, constant);
243
+ case PhysicalType::UINT32:
244
+ return CheckZonemapTemplated<uint32_t>(stats, comparison_type, constant);
245
+ case PhysicalType::UINT64:
246
+ return CheckZonemapTemplated<uint64_t>(stats, comparison_type, constant);
247
+ case PhysicalType::INT128:
248
+ return CheckZonemapTemplated<hugeint_t>(stats, comparison_type, constant);
249
+ case PhysicalType::FLOAT:
250
+ return CheckZonemapTemplated<float>(stats, comparison_type, constant);
251
+ case PhysicalType::DOUBLE:
252
+ return CheckZonemapTemplated<double>(stats, comparison_type, constant);
253
+ default:
254
+ throw InternalException("Unsupported type for NumericStats::CheckZonemap");
255
+ }
256
+ }
257
+
141
258
  bool NumericStats::IsConstant(const BaseStatistics &stats) {
142
259
  return NumericStats::Max(stats) <= NumericStats::Min(stats);
143
260
  }
@@ -471,59 +588,4 @@ void NumericStats::Verify(const BaseStatistics &stats, Vector &vector, const Sel
471
588
  }
472
589
  }
473
590
 
474
- template <>
475
- int8_t &NumericValueUnion::GetReferenceUnsafe() {
476
- return value_.tinyint;
477
- }
478
-
479
- template <>
480
- int16_t &NumericValueUnion::GetReferenceUnsafe() {
481
- return value_.smallint;
482
- }
483
-
484
- template <>
485
- int32_t &NumericValueUnion::GetReferenceUnsafe() {
486
- return value_.integer;
487
- }
488
-
489
- template <>
490
- int64_t &NumericValueUnion::GetReferenceUnsafe() {
491
- return value_.bigint;
492
- }
493
-
494
- template <>
495
- hugeint_t &NumericValueUnion::GetReferenceUnsafe() {
496
- return value_.hugeint;
497
- }
498
-
499
- template <>
500
- uint8_t &NumericValueUnion::GetReferenceUnsafe() {
501
- return value_.utinyint;
502
- }
503
-
504
- template <>
505
- uint16_t &NumericValueUnion::GetReferenceUnsafe() {
506
- return value_.usmallint;
507
- }
508
-
509
- template <>
510
- uint32_t &NumericValueUnion::GetReferenceUnsafe() {
511
- return value_.uinteger;
512
- }
513
-
514
- template <>
515
- uint64_t &NumericValueUnion::GetReferenceUnsafe() {
516
- return value_.ubigint;
517
- }
518
-
519
- template <>
520
- float &NumericValueUnion::GetReferenceUnsafe() {
521
- return value_.float_;
522
- }
523
-
524
- template <>
525
- double &NumericValueUnion::GetReferenceUnsafe() {
526
- return value_.double_;
527
- }
528
-
529
591
  } // namespace duckdb
@@ -0,0 +1,65 @@
1
+ #include "duckdb/storage/statistics/numeric_stats_union.hpp"
2
+
3
+ namespace duckdb {
4
+
5
+ template <>
6
+ bool &NumericValueUnion::GetReferenceUnsafe() {
7
+ return value_.boolean;
8
+ }
9
+
10
+ template <>
11
+ int8_t &NumericValueUnion::GetReferenceUnsafe() {
12
+ return value_.tinyint;
13
+ }
14
+
15
+ template <>
16
+ int16_t &NumericValueUnion::GetReferenceUnsafe() {
17
+ return value_.smallint;
18
+ }
19
+
20
+ template <>
21
+ int32_t &NumericValueUnion::GetReferenceUnsafe() {
22
+ return value_.integer;
23
+ }
24
+
25
+ template <>
26
+ int64_t &NumericValueUnion::GetReferenceUnsafe() {
27
+ return value_.bigint;
28
+ }
29
+
30
+ template <>
31
+ hugeint_t &NumericValueUnion::GetReferenceUnsafe() {
32
+ return value_.hugeint;
33
+ }
34
+
35
+ template <>
36
+ uint8_t &NumericValueUnion::GetReferenceUnsafe() {
37
+ return value_.utinyint;
38
+ }
39
+
40
+ template <>
41
+ uint16_t &NumericValueUnion::GetReferenceUnsafe() {
42
+ return value_.usmallint;
43
+ }
44
+
45
+ template <>
46
+ uint32_t &NumericValueUnion::GetReferenceUnsafe() {
47
+ return value_.uinteger;
48
+ }
49
+
50
+ template <>
51
+ uint64_t &NumericValueUnion::GetReferenceUnsafe() {
52
+ return value_.ubigint;
53
+ }
54
+
55
+ template <>
56
+ float &NumericValueUnion::GetReferenceUnsafe() {
57
+ return value_.float_;
58
+ }
59
+
60
+ template <>
61
+ double &NumericValueUnion::GetReferenceUnsafe() {
62
+ return value_.double_;
63
+ }
64
+
65
+ } // namespace duckdb
@@ -2,7 +2,7 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- const uint64_t VERSION_NUMBER = 47;
5
+ const uint64_t VERSION_NUMBER = 48;
6
6
 
7
7
  struct StorageVersionInfo {
8
8
  const char *version_name;
@@ -1,12 +1,7 @@
1
-
2
1
  #include "duckdb/storage/table/column_data.hpp"
3
-
2
+ #include "duckdb/storage/table/column_checkpoint_state.hpp"
4
3
  #include "duckdb/storage/table/column_segment.hpp"
5
4
  #include "duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp"
6
- #include "duckdb/storage/table/validity_column_data.hpp"
7
- #include "duckdb/storage/table/standard_column_data.hpp"
8
- #include "duckdb/storage/table/list_column_data.hpp"
9
- #include "duckdb/transaction/transaction.hpp"
10
5
  #include "duckdb/storage/table/row_group.hpp"
11
6
  #include "duckdb/storage/checkpoint/table_data_writer.hpp"
12
7
 
@@ -32,10 +32,9 @@ ColumnData::ColumnData(ColumnData &other, idx_t start, ColumnData *parent)
32
32
  updates = make_unique<UpdateSegment>(*other.updates, *this);
33
33
  }
34
34
  idx_t offset = 0;
35
- for (auto segment = other.data.GetRootSegment(); segment; segment = segment->Next()) {
36
- auto &other = (ColumnSegment &)*segment;
37
- this->data.AppendSegment(ColumnSegment::CreateSegment(other, start + offset));
38
- offset += segment->count;
35
+ for (auto &segment : other.data.Segments()) {
36
+ this->data.AppendSegment(ColumnSegment::CreateSegment(segment, start + offset));
37
+ offset += segment.count;
39
38
  }
40
39
  }
41
40
 
@@ -75,7 +74,8 @@ idx_t ColumnData::GetMaxEntry() {
75
74
  }
76
75
 
77
76
  void ColumnData::InitializeScan(ColumnScanState &state) {
78
- state.current = (ColumnSegment *)data.GetRootSegment();
77
+ state.current = data.GetRootSegment();
78
+ state.segment_tree = &data;
79
79
  state.row_index = state.current ? state.current->start : 0;
80
80
  state.internal_index = state.row_index;
81
81
  state.initialized = false;
@@ -84,7 +84,8 @@ void ColumnData::InitializeScan(ColumnScanState &state) {
84
84
  }
85
85
 
86
86
  void ColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
87
- state.current = (ColumnSegment *)data.GetSegment(row_idx);
87
+ state.current = data.GetSegment(row_idx);
88
+ state.segment_tree = &data;
88
89
  state.row_index = row_idx;
89
90
  state.internal_index = state.current->start;
90
91
  state.initialized = false;
@@ -125,11 +126,12 @@ idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remai
125
126
  }
126
127
 
127
128
  if (remaining > 0) {
128
- if (!state.current->next) {
129
+ auto next = data.GetNextSegment(state.current);
130
+ if (!next) {
129
131
  break;
130
132
  }
131
133
  state.previous_states.emplace_back(std::move(state.scan_state));
132
- state.current = (ColumnSegment *)state.current->Next();
134
+ state.current = next;
133
135
  state.current->InitializeScan(state);
134
136
  state.segment_checked = false;
135
137
  D_ASSERT(state.row_index >= state.current->start &&
@@ -234,14 +236,14 @@ void ColumnData::InitializeAppend(ColumnAppendState &state) {
234
236
  // no segments yet, append an empty segment
235
237
  AppendTransientSegment(l, start);
236
238
  }
237
- auto segment = (ColumnSegment *)data.GetLastSegment(l);
239
+ auto segment = data.GetLastSegment(l);
238
240
  if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
239
241
  // no transient segments yet
240
242
  auto total_rows = segment->start + segment->count;
241
243
  AppendTransientSegment(l, total_rows);
242
- state.current = (ColumnSegment *)data.GetLastSegment(l);
244
+ state.current = data.GetLastSegment(l);
243
245
  } else {
244
- state.current = (ColumnSegment *)segment;
246
+ state.current = segment;
245
247
  }
246
248
 
247
249
  D_ASSERT(state.current->segment_type == ColumnSegmentType::TRANSIENT);
@@ -264,7 +266,7 @@ void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, Uni
264
266
  {
265
267
  auto l = data.Lock();
266
268
  AppendTransientSegment(l, state.current->start + state.current->count);
267
- state.current = (ColumnSegment *)data.GetLastSegment(l);
269
+ state.current = data.GetLastSegment(l);
268
270
  state.current->InitializeAppend(state);
269
271
  }
270
272
  offset += copied_elements;
@@ -284,7 +286,7 @@ void ColumnData::RevertAppend(row_t start_row) {
284
286
  // find the segment index that the current row belongs to
285
287
  idx_t segment_index = data.GetSegmentIndex(l, start_row);
286
288
  auto segment = data.GetSegmentByIndex(l, segment_index);
287
- auto &transient = (ColumnSegment &)*segment;
289
+ auto &transient = *segment;
288
290
  D_ASSERT(transient.segment_type == ColumnSegmentType::TRANSIENT);
289
291
 
290
292
  // remove any segments AFTER this segment: they should be deleted entirely
@@ -299,14 +301,14 @@ idx_t ColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) {
299
301
  D_ASSERT(idx_t(row_id) >= start);
300
302
  // perform the fetch within the segment
301
303
  state.row_index = start + ((row_id - start) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE);
302
- state.current = (ColumnSegment *)data.GetSegment(state.row_index);
304
+ state.current = data.GetSegment(state.row_index);
303
305
  state.internal_index = state.current->start;
304
306
  return ScanVector(state, result, STANDARD_VECTOR_SIZE);
305
307
  }
306
308
 
307
309
  void ColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
308
310
  idx_t result_idx) {
309
- auto segment = (ColumnSegment *)data.GetSegment(row_id);
311
+ auto segment = data.GetSegment(row_id);
310
312
 
311
313
  // now perform the fetch within the segment
312
314
  segment->FetchRow(state, row_id, result, result_idx);
@@ -357,15 +359,14 @@ void ColumnData::AppendTransientSegment(SegmentLock &l, idx_t start_row) {
357
359
  }
358
360
 
359
361
  void ColumnData::CommitDropColumn() {
360
- auto segment = (ColumnSegment *)data.GetRootSegment();
361
- while (segment) {
362
- if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
363
- auto block_id = segment->GetBlockId();
362
+ for (auto &segment_p : data.Segments()) {
363
+ auto &segment = segment_p;
364
+ if (segment.segment_type == ColumnSegmentType::PERSISTENT) {
365
+ auto block_id = segment.GetBlockId();
364
366
  if (block_id != INVALID_BLOCK) {
365
367
  block_manager.MarkBlockAsModified(block_id);
366
368
  }
367
369
  }
368
- segment = (ColumnSegment *)segment->Next();
369
370
  }
370
371
  }
371
372
 
@@ -464,7 +465,6 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T
464
465
  while (segment) {
465
466
  ColumnSegmentInfo column_info;
466
467
  column_info.row_group_index = row_group_index;
467
- ;
468
468
  column_info.column_id = col_path[0];
469
469
  column_info.column_path = col_path_str;
470
470
  column_info.segment_idx = segment_idx;
@@ -487,7 +487,7 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T
487
487
  result.column_segments.push_back(std::move(column_info));
488
488
 
489
489
  segment_idx++;
490
- segment = (ColumnSegment *)segment->Next();
490
+ segment = (ColumnSegment *)data.GetNextSegment(segment);
491
491
  }
492
492
  }
493
493
 
@@ -495,19 +495,13 @@ void ColumnData::Verify(RowGroup &parent) {
495
495
  #ifdef DEBUG
496
496
  D_ASSERT(this->start == parent.start);
497
497
  data.Verify();
498
- auto root = data.GetRootSegment();
499
- if (root) {
500
- D_ASSERT(root != nullptr);
501
- D_ASSERT(root->start == this->start);
502
- idx_t prev_end = root->start;
503
- while (root) {
504
- D_ASSERT(prev_end == root->start);
505
- prev_end = root->start + root->count;
506
- if (!root->next) {
507
- D_ASSERT(prev_end == parent.start + parent.count);
508
- }
509
- root = root->Next();
510
- }
498
+ idx_t current_index = 0;
499
+ idx_t current_start = this->start;
500
+ for (auto &segment : data.Segments()) {
501
+ D_ASSERT(segment.index == current_index);
502
+ D_ASSERT(segment.start == current_start);
503
+ current_start += segment.count;
504
+ current_index++;
511
505
  }
512
506
  #endif
513
507
  }
@@ -38,7 +38,7 @@ ColumnCheckpointState &ColumnDataCheckpointer::GetCheckpointState() {
38
38
  void ColumnDataCheckpointer::ScanSegments(const std::function<void(Vector &, idx_t)> &callback) {
39
39
  Vector scan_vector(intermediate.GetType(), nullptr);
40
40
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
41
- auto segment = (ColumnSegment *)nodes[segment_idx].node.get();
41
+ auto segment = nodes[segment_idx].node.get();
42
42
  ColumnScanState scan_state;
43
43
  scan_state.current = segment;
44
44
  segment->InitializeScan(scan_state);
@@ -163,7 +163,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
163
163
  // since the segments will be rewritten their old on disk data is no longer required
164
164
  auto &block_manager = col_data.block_manager;
165
165
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
166
- auto segment = (ColumnSegment *)nodes[segment_idx].node.get();
166
+ auto segment = nodes[segment_idx].node.get();
167
167
  if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
168
168
  // persistent segment has updates: mark it as modified and rewrite the block with the merged updates
169
169
  auto block_id = segment->GetBlockId();
@@ -194,7 +194,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
194
194
 
195
195
  bool ColumnDataCheckpointer::HasChanges() {
196
196
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
197
- auto segment = (ColumnSegment *)nodes[segment_idx].node.get();
197
+ auto segment = nodes[segment_idx].node.get();
198
198
  if (segment->segment_type == ColumnSegmentType::TRANSIENT) {
199
199
  // transient segment: always need to write to disk
200
200
  return true;
@@ -214,7 +214,7 @@ void ColumnDataCheckpointer::WritePersistentSegments() {
214
214
  // all segments are persistent and there are no updates
215
215
  // we only need to write the metadata
216
216
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
217
- auto segment = (ColumnSegment *)nodes[segment_idx].node.get();
217
+ auto segment = nodes[segment_idx].node.get();
218
218
  D_ASSERT(segment->segment_type == ColumnSegmentType::PERSISTENT);
219
219
 
220
220
  // set up the data pointer directly using the data from the persistent segment
@@ -235,7 +235,7 @@ void ColumnDataCheckpointer::WritePersistentSegments() {
235
235
  }
236
236
  }
237
237
 
238
- void ColumnDataCheckpointer::Checkpoint(vector<SegmentNode> nodes) {
238
+ void ColumnDataCheckpointer::Checkpoint(vector<SegmentNode<ColumnSegment>> nodes) {
239
239
  D_ASSERT(!nodes.empty());
240
240
  this->nodes = std::move(nodes);
241
241
  // first check if any of the segments have changes
@@ -58,9 +58,10 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx
58
58
  ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type_p,
59
59
  ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction *function_p,
60
60
  BaseStatistics statistics, block_id_t block_id_p, idx_t offset_p, idx_t segment_size_p)
61
- : SegmentBase(start, count), db(db), type(std::move(type_p)), type_size(GetTypeIdSize(type.InternalType())),
62
- segment_type(segment_type), function(function_p), stats(std::move(statistics)), block(std::move(block)),
63
- block_id(block_id_p), offset(offset_p), segment_size(segment_size_p) {
61
+ : SegmentBase<ColumnSegment>(start, count), db(db), type(std::move(type_p)),
62
+ type_size(GetTypeIdSize(type.InternalType())), segment_type(segment_type), function(function_p),
63
+ stats(std::move(statistics)), block(std::move(block)), block_id(block_id_p), offset(offset_p),
64
+ segment_size(segment_size_p) {
64
65
  D_ASSERT(function);
65
66
  if (function->init_segment) {
66
67
  segment_state = function->init_segment(*this, block_id);
@@ -68,10 +69,10 @@ ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block
68
69
  }
69
70
 
70
71
  ColumnSegment::ColumnSegment(ColumnSegment &other, idx_t start)
71
- : SegmentBase(start, other.count), db(other.db), type(std::move(other.type)), type_size(other.type_size),
72
- segment_type(other.segment_type), function(other.function), stats(std::move(other.stats)),
73
- block(std::move(other.block)), block_id(other.block_id), offset(other.offset), segment_size(other.segment_size),
74
- segment_state(std::move(other.segment_state)) {
72
+ : SegmentBase<ColumnSegment>(start, other.count.load()), db(other.db), type(std::move(other.type)),
73
+ type_size(other.type_size), segment_type(other.segment_type), function(other.function),
74
+ stats(std::move(other.stats)), block(std::move(other.block)), block_id(other.block_id), offset(other.offset),
75
+ segment_size(other.segment_size), segment_state(std::move(other.segment_state)) {
75
76
  }
76
77
 
77
78
  ColumnSegment::~ColumnSegment() {
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/storage/table/list_column_data.hpp"
2
2
  #include "duckdb/storage/statistics/list_stats.hpp"
3
3
  #include "duckdb/transaction/transaction.hpp"
4
+ #include "duckdb/storage/table/column_checkpoint_state.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
@@ -40,7 +41,7 @@ void ListColumnData::InitializeScan(ColumnScanState &state) {
40
41
  }
41
42
 
42
43
  uint64_t ListColumnData::FetchListOffset(idx_t row_idx) {
43
- auto segment = (ColumnSegment *)data.GetSegment(row_idx);
44
+ auto segment = data.GetSegment(row_idx);
44
45
  ColumnFetchState fetch_state;
45
46
  Vector result(type, 1);
46
47
  segment->FetchRow(fetch_state, row_idx, result, 0);
@@ -3,7 +3,8 @@
3
3
 
4
4
  namespace duckdb {
5
5
 
6
- PersistentTableData::PersistentTableData(idx_t column_count) {
6
+ PersistentTableData::PersistentTableData(idx_t column_count)
7
+ : total_rows(0), row_group_count(0), block_id(INVALID_BLOCK), offset(0) {
7
8
  }
8
9
 
9
10
  PersistentTableData::~PersistentTableData() {