duckdb 0.7.2-dev717.0 → 0.7.2-dev865.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/binding.gyp +2 -0
  2. package/lib/duckdb.d.ts +12 -1
  3. package/lib/duckdb.js +19 -0
  4. package/package.json +1 -1
  5. package/src/duckdb/extension/json/include/json_common.hpp +1 -0
  6. package/src/duckdb/extension/json/include/json_functions.hpp +1 -0
  7. package/src/duckdb/extension/json/include/json_serializer.hpp +77 -0
  8. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +147 -0
  9. package/src/duckdb/extension/json/json_functions.cpp +1 -0
  10. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  11. package/src/duckdb/extension/json/json_serializer.cpp +217 -0
  12. package/src/duckdb/src/catalog/catalog.cpp +21 -5
  13. package/src/duckdb/src/common/enums/expression_type.cpp +8 -222
  14. package/src/duckdb/src/common/enums/join_type.cpp +3 -22
  15. package/src/duckdb/src/common/exception.cpp +2 -2
  16. package/src/duckdb/src/common/serializer/enum_serializer.cpp +1172 -0
  17. package/src/duckdb/src/common/types/value.cpp +117 -93
  18. package/src/duckdb/src/common/types/vector.cpp +140 -1
  19. package/src/duckdb/src/common/types.cpp +166 -89
  20. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +3 -0
  21. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +5 -8
  22. package/src/duckdb/src/function/scalar/date/date_part.cpp +2 -2
  23. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +2 -2
  24. package/src/duckdb/src/function/scalar/list/list_aggregates.cpp +1 -1
  25. package/src/duckdb/src/function/scalar/list/list_lambdas.cpp +4 -0
  26. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +8 -8
  27. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +243 -0
  28. package/src/duckdb/src/function/scalar/string/regexp/regexp_util.cpp +79 -0
  29. package/src/duckdb/src/function/scalar/string/regexp.cpp +21 -80
  30. package/src/duckdb/src/function/table/arrow_conversion.cpp +7 -1
  31. package/src/duckdb/src/function/table/table_scan.cpp +1 -1
  32. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  33. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +3 -0
  34. package/src/duckdb/src/include/duckdb/common/enums/aggregate_handling.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +2 -3
  36. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -0
  37. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +2 -0
  38. package/src/duckdb/src/include/duckdb/common/enums/set_operation_type.hpp +2 -1
  39. package/src/duckdb/src/include/duckdb/common/exception.hpp +40 -9
  40. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +3 -0
  41. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +113 -0
  42. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +336 -0
  43. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +268 -0
  44. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +126 -0
  45. package/src/duckdb/src/include/duckdb/common/string_util.hpp +12 -0
  46. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -31
  47. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -0
  48. package/src/duckdb/src/include/duckdb/common/types.hpp +8 -2
  49. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +81 -1
  50. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  51. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  52. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  53. package/src/duckdb/src/include/duckdb/parser/expression/bound_expression.hpp +2 -0
  54. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +5 -0
  55. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +2 -0
  56. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +2 -0
  57. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +2 -0
  58. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -0
  59. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +2 -0
  60. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  61. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  62. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -0
  63. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +2 -0
  64. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -0
  65. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +2 -0
  66. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +2 -0
  67. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +2 -0
  68. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +2 -0
  69. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +5 -0
  70. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +2 -0
  71. package/src/duckdb/src/include/duckdb/parser/parsed_expression.hpp +5 -0
  72. package/src/duckdb/src/include/duckdb/parser/query_node/recursive_cte_node.hpp +3 -0
  73. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +5 -0
  74. package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +3 -0
  75. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +11 -1
  76. package/src/duckdb/src/include/duckdb/parser/result_modifier.hpp +24 -1
  77. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -1
  78. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +6 -1
  79. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +4 -0
  80. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +2 -0
  81. package/src/duckdb/src/include/duckdb/parser/tableref/expressionlistref.hpp +3 -0
  82. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +3 -0
  83. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +9 -0
  84. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  85. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +3 -0
  86. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  87. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +9 -52
  88. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +62 -0
  89. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  90. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +6 -3
  91. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +3 -2
  92. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +5 -3
  93. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +4 -1
  94. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -3
  95. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +5 -3
  96. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +37 -0
  97. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +8 -1
  98. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +4 -3
  99. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +271 -26
  100. package/src/duckdb/src/main/extension/extension_install.cpp +7 -2
  101. package/src/duckdb/src/optimizer/deliminator.cpp +1 -1
  102. package/src/duckdb/src/optimizer/filter_combiner.cpp +1 -1
  103. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +3 -3
  104. package/src/duckdb/src/optimizer/rule/move_constants.cpp +2 -2
  105. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +1 -1
  106. package/src/duckdb/src/parser/common_table_expression_info.cpp +19 -0
  107. package/src/duckdb/src/parser/expression/between_expression.cpp +17 -0
  108. package/src/duckdb/src/parser/expression/case_expression.cpp +28 -0
  109. package/src/duckdb/src/parser/expression/cast_expression.cpp +17 -0
  110. package/src/duckdb/src/parser/expression/collate_expression.cpp +16 -0
  111. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -0
  112. package/src/duckdb/src/parser/expression/comparison_expression.cpp +16 -0
  113. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +15 -0
  114. package/src/duckdb/src/parser/expression/constant_expression.cpp +14 -0
  115. package/src/duckdb/src/parser/expression/default_expression.cpp +7 -0
  116. package/src/duckdb/src/parser/expression/function_expression.cpp +35 -0
  117. package/src/duckdb/src/parser/expression/lambda_expression.cpp +16 -0
  118. package/src/duckdb/src/parser/expression/operator_expression.cpp +15 -0
  119. package/src/duckdb/src/parser/expression/parameter_expression.cpp +15 -0
  120. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +14 -0
  121. package/src/duckdb/src/parser/expression/star_expression.cpp +20 -0
  122. package/src/duckdb/src/parser/expression/subquery_expression.cpp +20 -0
  123. package/src/duckdb/src/parser/expression/window_expression.cpp +43 -0
  124. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +22 -10
  125. package/src/duckdb/src/parser/parsed_expression.cpp +72 -0
  126. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +21 -0
  127. package/src/duckdb/src/parser/query_node/select_node.cpp +31 -0
  128. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +17 -0
  129. package/src/duckdb/src/parser/query_node.cpp +50 -0
  130. package/src/duckdb/src/parser/result_modifier.cpp +78 -0
  131. package/src/duckdb/src/parser/statement/select_statement.cpp +12 -0
  132. package/src/duckdb/src/parser/tableref/basetableref.cpp +21 -0
  133. package/src/duckdb/src/parser/tableref/emptytableref.cpp +4 -0
  134. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +17 -0
  135. package/src/duckdb/src/parser/tableref/joinref.cpp +25 -0
  136. package/src/duckdb/src/parser/tableref/pivotref.cpp +53 -0
  137. package/src/duckdb/src/parser/tableref/subqueryref.cpp +15 -0
  138. package/src/duckdb/src/parser/tableref/table_function.cpp +17 -0
  139. package/src/duckdb/src/parser/tableref.cpp +46 -0
  140. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +11 -0
  141. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +1 -1
  142. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  143. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  144. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +22 -4
  145. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +4 -0
  146. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +1 -1
  147. package/src/duckdb/src/planner/expression/bound_expression.cpp +4 -0
  148. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -11
  149. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +6 -0
  150. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -0
  151. package/src/duckdb/src/storage/compression/numeric_constant.cpp +2 -2
  152. package/src/duckdb/src/storage/data_table.cpp +1 -1
  153. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +145 -83
  154. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +65 -0
  155. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  156. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +1 -6
  157. package/src/duckdb/src/storage/table/column_data.cpp +29 -35
  158. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +5 -5
  159. package/src/duckdb/src/storage/table/column_segment.cpp +8 -7
  160. package/src/duckdb/src/storage/table/list_column_data.cpp +2 -1
  161. package/src/duckdb/src/storage/table/persistent_table_data.cpp +2 -1
  162. package/src/duckdb/src/storage/table/row_group.cpp +9 -9
  163. package/src/duckdb/src/storage/table/row_group_collection.cpp +82 -66
  164. package/src/duckdb/src/storage/table/scan_state.cpp +22 -3
  165. package/src/duckdb/src/storage/table/standard_column_data.cpp +1 -0
  166. package/src/duckdb/src/storage/table/struct_column_data.cpp +1 -0
  167. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +0 -1
  168. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11780 -11512
  169. package/src/duckdb/third_party/re2/re2/re2.cc +9 -0
  170. package/src/duckdb/third_party/re2/re2/re2.h +2 -0
  171. package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
  172. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  173. package/src/duckdb/ub_src_function_scalar_string_regexp.cpp +4 -0
  174. package/src/duckdb/ub_src_parser.cpp +2 -0
  175. package/src/duckdb/ub_src_storage_statistics.cpp +2 -0
  176. package/src/duckdb/ub_src_storage_table.cpp +0 -2
  177. package/src/utils.cpp +12 -0
  178. package/test/extension.test.ts +44 -26
  179. package/src/duckdb/src/storage/table/segment_tree.cpp +0 -179
@@ -33,7 +33,7 @@ static bool CreateJoinCondition(Expression &expr, const unordered_set<idx_t> &le
33
33
  if (left_side == JoinSide::RIGHT) {
34
34
  // left = right, right = left, flip the comparison symbol and reverse sides
35
35
  swap(left, right);
36
- condition.comparison = FlipComparisionExpression(expr.type);
36
+ condition.comparison = FlipComparisonExpression(expr.type);
37
37
  }
38
38
  condition.left = std::move(left);
39
39
  condition.right = std::move(right);
@@ -28,4 +28,8 @@ void BoundExpression::Serialize(FieldWriter &writer) const {
28
28
  throw SerializationException("Cannot copy or serialize bound expression");
29
29
  }
30
30
 
31
+ void BoundExpression::FormatSerialize(FormatSerializer &serializer) const {
32
+ throw SerializationException("Cannot copy or serialize bound expression");
33
+ }
34
+
31
35
  } // namespace duckdb
@@ -1,7 +1,5 @@
1
1
  #include "duckdb/storage/checkpoint/table_data_reader.hpp"
2
2
  #include "duckdb/storage/meta_block_reader.hpp"
3
-
4
- #include "duckdb/common/vector_operations/vector_operations.hpp"
5
3
  #include "duckdb/common/types/null_value.hpp"
6
4
 
7
5
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
@@ -9,9 +7,6 @@
9
7
  #include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
10
8
 
11
9
  #include "duckdb/main/database.hpp"
12
- #include "duckdb/main/client_context.hpp"
13
-
14
- #include "duckdb/storage/table/row_group.hpp"
15
10
 
16
11
  namespace duckdb {
17
12
 
@@ -27,12 +22,9 @@ void TableDataReader::ReadTableData() {
27
22
  info.data->table_stats.Deserialize(reader, columns);
28
23
 
29
24
  // deserialize each of the individual row groups
30
- auto row_group_count = reader.Read<uint64_t>();
31
- info.data->row_groups.reserve(row_group_count);
32
- for (idx_t i = 0; i < row_group_count; i++) {
33
- auto row_group_pointer = RowGroup::Deserialize(reader, columns);
34
- info.data->row_groups.push_back(std::move(row_group_pointer));
35
- }
25
+ info.data->row_group_count = reader.Read<uint64_t>();
26
+ info.data->block_id = reader.block->BlockId();
27
+ info.data->offset = reader.offset;
36
28
  }
37
29
 
38
30
  } // namespace duckdb
@@ -50,13 +50,19 @@ void SingleFileTableDataWriter::FinalizeTable(TableStatistics &&global_stats, Da
50
50
 
51
51
  // now start writing the row group pointers to disk
52
52
  table_data_writer.Write<uint64_t>(row_group_pointers.size());
53
+ idx_t total_rows = 0;
53
54
  for (auto &row_group_pointer : row_group_pointers) {
55
+ auto row_group_count = row_group_pointer.row_start + row_group_pointer.tuple_count;
56
+ if (row_group_count > total_rows) {
57
+ total_rows = row_group_count;
58
+ }
54
59
  RowGroup::Serialize(row_group_pointer, table_data_writer);
55
60
  }
56
61
 
57
62
  // Pointer to the table itself goes to the metadata stream.
58
63
  meta_data_writer.Write<block_id_t>(pointer.block_id);
59
64
  meta_data_writer.Write<uint64_t>(pointer.offset);
65
+ meta_data_writer.Write<idx_t>(total_rows);
60
66
 
61
67
  // Now we serialize indexes in the table_metadata_writer
62
68
  std::vector<BlockPointer> index_pointers = info->indexes.SerializeIndexes(table_data_writer);
@@ -467,6 +467,7 @@ void CheckpointReader::ReadTableData(ClientContext &context, MetaBlockReader &re
467
467
  TableDataReader data_reader(table_data_reader, bound_info);
468
468
 
469
469
  data_reader.ReadTableData();
470
+ bound_info.data->total_rows = reader.Read<idx_t>();
470
471
 
471
472
  // Get any indexes block info
472
473
  idx_t num_indexes = reader.Read<idx_t>();
@@ -33,7 +33,7 @@ void ConstantFillFunction(ColumnSegment &segment, Vector &result, idx_t start_id
33
33
  auto &nstats = segment.stats.statistics;
34
34
 
35
35
  auto data = FlatVector::GetData<T>(result);
36
- auto constant_value = NumericStats::GetMinUnsafe<T>(nstats);
36
+ auto constant_value = NumericStats::GetMin<T>(nstats);
37
37
  for (idx_t i = 0; i < count; i++) {
38
38
  data[start_idx + i] = constant_value;
39
39
  }
@@ -71,7 +71,7 @@ void ConstantScanFunction(ColumnSegment &segment, ColumnScanState &state, idx_t
71
71
  auto &nstats = segment.stats.statistics;
72
72
 
73
73
  auto data = FlatVector::GetData<T>(result);
74
- data[0] = NumericStats::GetMinUnsafe<T>(nstats);
74
+ data[0] = NumericStats::GetMin<T>(nstats);
75
75
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
76
76
  }
77
77
 
@@ -45,7 +45,7 @@ DataTable::DataTable(AttachedDatabase &db, shared_ptr<TableIOManager> table_io_m
45
45
  auto types = GetTypes();
46
46
  this->row_groups =
47
47
  make_shared<RowGroupCollection>(info, TableIOManager::Get(*this).GetBlockManagerForRowData(), types, 0);
48
- if (data && !data->row_groups.empty()) {
48
+ if (data && data->row_group_count > 0) {
49
49
  this->row_groups->Initialize(*data);
50
50
  } else {
51
51
  this->row_groups->InitializeEmpty();
@@ -2,6 +2,7 @@
2
2
  #include "duckdb/storage/statistics/base_statistics.hpp"
3
3
  #include "duckdb/common/field_writer.hpp"
4
4
  #include "duckdb/common/types/vector.hpp"
5
+ #include "duckdb/common/operator/comparison_operators.hpp"
5
6
 
6
7
  namespace duckdb {
7
8
 
@@ -13,6 +14,9 @@ template <>
13
14
  void NumericStats::Update<list_entry_t>(BaseStatistics &stats, list_entry_t new_value) {
14
15
  }
15
16
 
17
+ //===--------------------------------------------------------------------===//
18
+ // NumericStats
19
+ //===--------------------------------------------------------------------===//
16
20
  BaseStatistics NumericStats::CreateUnknown(LogicalType type) {
17
21
  BaseStatistics result(std::move(type));
18
22
  result.InitializeUnknown();
@@ -62,73 +66,149 @@ void NumericStats::Merge(BaseStatistics &stats, const BaseStatistics &other) {
62
66
  }
63
67
  }
64
68
 
65
- FilterPropagateResult NumericStats::CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
66
- const Value &constant) {
67
- if (constant.IsNull()) {
68
- return FilterPropagateResult::FILTER_ALWAYS_FALSE;
69
- }
70
- if (!NumericStats::HasMinMax(stats)) {
71
- return FilterPropagateResult::NO_PRUNING_POSSIBLE;
72
- }
73
- auto min_value = NumericStats::Min(stats);
74
- auto max_value = NumericStats::Max(stats);
69
+ struct GetNumericValueUnion {
70
+ template <class T>
71
+ static T Operation(const NumericValueUnion &v);
72
+ };
73
+
74
+ template <>
75
+ int8_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
76
+ return v.value_.tinyint;
77
+ }
78
+
79
+ template <>
80
+ int16_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
81
+ return v.value_.smallint;
82
+ }
83
+
84
+ template <>
85
+ int32_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
86
+ return v.value_.integer;
87
+ }
88
+
89
+ template <>
90
+ int64_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
91
+ return v.value_.bigint;
92
+ }
93
+
94
+ template <>
95
+ hugeint_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
96
+ return v.value_.hugeint;
97
+ }
98
+
99
+ template <>
100
+ uint8_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
101
+ return v.value_.utinyint;
102
+ }
103
+
104
+ template <>
105
+ uint16_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
106
+ return v.value_.usmallint;
107
+ }
108
+
109
+ template <>
110
+ uint32_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
111
+ return v.value_.uinteger;
112
+ }
113
+
114
+ template <>
115
+ uint64_t GetNumericValueUnion::Operation(const NumericValueUnion &v) {
116
+ return v.value_.ubigint;
117
+ }
118
+
119
+ template <>
120
+ float GetNumericValueUnion::Operation(const NumericValueUnion &v) {
121
+ return v.value_.float_;
122
+ }
123
+
124
+ template <>
125
+ double GetNumericValueUnion::Operation(const NumericValueUnion &v) {
126
+ return v.value_.double_;
127
+ }
128
+
129
+ template <class T>
130
+ T NumericStats::GetMinUnsafe(const BaseStatistics &stats) {
131
+ return GetNumericValueUnion::Operation<T>(NumericStats::GetDataUnsafe(stats).min);
132
+ }
133
+
134
+ template <class T>
135
+ T NumericStats::GetMaxUnsafe(const BaseStatistics &stats) {
136
+ return GetNumericValueUnion::Operation<T>(NumericStats::GetDataUnsafe(stats).max);
137
+ }
138
+
139
+ template <class T>
140
+ bool ConstantExactRange(T min, T max, T constant) {
141
+ return Equals::Operation(constant, min) && Equals::Operation(constant, max);
142
+ }
143
+
144
+ template <class T>
145
+ bool ConstantValueInRange(T min, T max, T constant) {
146
+ return !(LessThan::Operation(constant, min) || GreaterThan::Operation(constant, max));
147
+ }
148
+
149
+ template <class T>
150
+ FilterPropagateResult CheckZonemapTemplated(const BaseStatistics &stats, ExpressionType comparison_type,
151
+ const Value &constant_value) {
152
+ T min_value = NumericStats::GetMinUnsafe<T>(stats);
153
+ T max_value = NumericStats::GetMaxUnsafe<T>(stats);
154
+ T constant = constant_value.GetValueUnsafe<T>();
75
155
  switch (comparison_type) {
76
156
  case ExpressionType::COMPARE_EQUAL:
77
- if (constant == min_value && constant == max_value) {
157
+ if (ConstantExactRange(min_value, max_value, constant)) {
78
158
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
79
- } else if (constant >= min_value && constant <= max_value) {
159
+ }
160
+ if (ConstantValueInRange(min_value, max_value, constant)) {
80
161
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
81
- } else {
82
- return FilterPropagateResult::FILTER_ALWAYS_FALSE;
83
162
  }
163
+ return FilterPropagateResult::FILTER_ALWAYS_FALSE;
84
164
  case ExpressionType::COMPARE_NOTEQUAL:
85
- if (constant < min_value || constant > max_value) {
165
+ if (!ConstantValueInRange(min_value, max_value, constant)) {
86
166
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
87
- } else if (min_value == max_value && min_value == constant) {
167
+ } else if (ConstantExactRange(min_value, max_value, constant)) {
88
168
  // corner case of a cluster with one numeric equal to the target constant
89
169
  return FilterPropagateResult::FILTER_ALWAYS_FALSE;
90
170
  }
91
171
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
92
172
  case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
93
- // X >= C
173
+ // GreaterThanEquals::Operation(X, C)
94
174
  // this can be true only if max(X) >= C
95
175
  // if min(X) >= C, then this is always true
96
- if (min_value >= constant) {
176
+ if (GreaterThanEquals::Operation(min_value, constant)) {
97
177
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
98
- } else if (max_value >= constant) {
178
+ } else if (GreaterThanEquals::Operation(max_value, constant)) {
99
179
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
100
180
  } else {
101
181
  return FilterPropagateResult::FILTER_ALWAYS_FALSE;
102
182
  }
103
183
  case ExpressionType::COMPARE_GREATERTHAN:
104
- // X > C
184
+ // GreaterThan::Operation(X, C)
105
185
  // this can be true only if max(X) > C
106
186
  // if min(X) > C, then this is always true
107
- if (min_value > constant) {
187
+ if (GreaterThan::Operation(min_value, constant)) {
108
188
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
109
- } else if (max_value > constant) {
189
+ } else if (GreaterThan::Operation(max_value, constant)) {
110
190
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
111
191
  } else {
112
192
  return FilterPropagateResult::FILTER_ALWAYS_FALSE;
113
193
  }
114
194
  case ExpressionType::COMPARE_LESSTHANOREQUALTO:
115
- // X <= C
195
+ // LessThanEquals::Operation(X, C)
116
196
  // this can be true only if min(X) <= C
117
197
  // if max(X) <= C, then this is always true
118
- if (max_value <= constant) {
198
+ if (LessThanEquals::Operation(max_value, constant)) {
119
199
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
120
- } else if (min_value <= constant) {
200
+ } else if (LessThanEquals::Operation(min_value, constant)) {
121
201
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
122
202
  } else {
123
203
  return FilterPropagateResult::FILTER_ALWAYS_FALSE;
124
204
  }
125
205
  case ExpressionType::COMPARE_LESSTHAN:
126
- // X < C
206
+ // LessThan::Operation(X, C)
127
207
  // this can be true only if min(X) < C
128
208
  // if max(X) < C, then this is always true
129
- if (max_value < constant) {
209
+ if (LessThan::Operation(max_value, constant)) {
130
210
  return FilterPropagateResult::FILTER_ALWAYS_TRUE;
131
- } else if (min_value < constant) {
211
+ } else if (LessThan::Operation(min_value, constant)) {
132
212
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
133
213
  } else {
134
214
  return FilterPropagateResult::FILTER_ALWAYS_FALSE;
@@ -138,6 +218,43 @@ FilterPropagateResult NumericStats::CheckZonemap(const BaseStatistics &stats, Ex
138
218
  }
139
219
  }
140
220
 
221
+ FilterPropagateResult NumericStats::CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
222
+ const Value &constant) {
223
+ D_ASSERT(constant.type() == stats.GetType());
224
+ if (constant.IsNull()) {
225
+ return FilterPropagateResult::FILTER_ALWAYS_FALSE;
226
+ }
227
+ if (!NumericStats::HasMinMax(stats)) {
228
+ return FilterPropagateResult::NO_PRUNING_POSSIBLE;
229
+ }
230
+ switch (stats.GetType().InternalType()) {
231
+ case PhysicalType::INT8:
232
+ return CheckZonemapTemplated<int8_t>(stats, comparison_type, constant);
233
+ case PhysicalType::INT16:
234
+ return CheckZonemapTemplated<int16_t>(stats, comparison_type, constant);
235
+ case PhysicalType::INT32:
236
+ return CheckZonemapTemplated<int32_t>(stats, comparison_type, constant);
237
+ case PhysicalType::INT64:
238
+ return CheckZonemapTemplated<int64_t>(stats, comparison_type, constant);
239
+ case PhysicalType::UINT8:
240
+ return CheckZonemapTemplated<uint8_t>(stats, comparison_type, constant);
241
+ case PhysicalType::UINT16:
242
+ return CheckZonemapTemplated<uint16_t>(stats, comparison_type, constant);
243
+ case PhysicalType::UINT32:
244
+ return CheckZonemapTemplated<uint32_t>(stats, comparison_type, constant);
245
+ case PhysicalType::UINT64:
246
+ return CheckZonemapTemplated<uint64_t>(stats, comparison_type, constant);
247
+ case PhysicalType::INT128:
248
+ return CheckZonemapTemplated<hugeint_t>(stats, comparison_type, constant);
249
+ case PhysicalType::FLOAT:
250
+ return CheckZonemapTemplated<float>(stats, comparison_type, constant);
251
+ case PhysicalType::DOUBLE:
252
+ return CheckZonemapTemplated<double>(stats, comparison_type, constant);
253
+ default:
254
+ throw InternalException("Unsupported type for NumericStats::CheckZonemap");
255
+ }
256
+ }
257
+
141
258
  bool NumericStats::IsConstant(const BaseStatistics &stats) {
142
259
  return NumericStats::Max(stats) <= NumericStats::Min(stats);
143
260
  }
@@ -471,59 +588,4 @@ void NumericStats::Verify(const BaseStatistics &stats, Vector &vector, const Sel
471
588
  }
472
589
  }
473
590
 
474
- template <>
475
- int8_t &NumericValueUnion::GetReferenceUnsafe() {
476
- return value_.tinyint;
477
- }
478
-
479
- template <>
480
- int16_t &NumericValueUnion::GetReferenceUnsafe() {
481
- return value_.smallint;
482
- }
483
-
484
- template <>
485
- int32_t &NumericValueUnion::GetReferenceUnsafe() {
486
- return value_.integer;
487
- }
488
-
489
- template <>
490
- int64_t &NumericValueUnion::GetReferenceUnsafe() {
491
- return value_.bigint;
492
- }
493
-
494
- template <>
495
- hugeint_t &NumericValueUnion::GetReferenceUnsafe() {
496
- return value_.hugeint;
497
- }
498
-
499
- template <>
500
- uint8_t &NumericValueUnion::GetReferenceUnsafe() {
501
- return value_.utinyint;
502
- }
503
-
504
- template <>
505
- uint16_t &NumericValueUnion::GetReferenceUnsafe() {
506
- return value_.usmallint;
507
- }
508
-
509
- template <>
510
- uint32_t &NumericValueUnion::GetReferenceUnsafe() {
511
- return value_.uinteger;
512
- }
513
-
514
- template <>
515
- uint64_t &NumericValueUnion::GetReferenceUnsafe() {
516
- return value_.ubigint;
517
- }
518
-
519
- template <>
520
- float &NumericValueUnion::GetReferenceUnsafe() {
521
- return value_.float_;
522
- }
523
-
524
- template <>
525
- double &NumericValueUnion::GetReferenceUnsafe() {
526
- return value_.double_;
527
- }
528
-
529
591
  } // namespace duckdb
@@ -0,0 +1,65 @@
1
+ #include "duckdb/storage/statistics/numeric_stats_union.hpp"
2
+
3
+ namespace duckdb {
4
+
5
+ template <>
6
+ bool &NumericValueUnion::GetReferenceUnsafe() {
7
+ return value_.boolean;
8
+ }
9
+
10
+ template <>
11
+ int8_t &NumericValueUnion::GetReferenceUnsafe() {
12
+ return value_.tinyint;
13
+ }
14
+
15
+ template <>
16
+ int16_t &NumericValueUnion::GetReferenceUnsafe() {
17
+ return value_.smallint;
18
+ }
19
+
20
+ template <>
21
+ int32_t &NumericValueUnion::GetReferenceUnsafe() {
22
+ return value_.integer;
23
+ }
24
+
25
+ template <>
26
+ int64_t &NumericValueUnion::GetReferenceUnsafe() {
27
+ return value_.bigint;
28
+ }
29
+
30
+ template <>
31
+ hugeint_t &NumericValueUnion::GetReferenceUnsafe() {
32
+ return value_.hugeint;
33
+ }
34
+
35
+ template <>
36
+ uint8_t &NumericValueUnion::GetReferenceUnsafe() {
37
+ return value_.utinyint;
38
+ }
39
+
40
+ template <>
41
+ uint16_t &NumericValueUnion::GetReferenceUnsafe() {
42
+ return value_.usmallint;
43
+ }
44
+
45
+ template <>
46
+ uint32_t &NumericValueUnion::GetReferenceUnsafe() {
47
+ return value_.uinteger;
48
+ }
49
+
50
+ template <>
51
+ uint64_t &NumericValueUnion::GetReferenceUnsafe() {
52
+ return value_.ubigint;
53
+ }
54
+
55
+ template <>
56
+ float &NumericValueUnion::GetReferenceUnsafe() {
57
+ return value_.float_;
58
+ }
59
+
60
+ template <>
61
+ double &NumericValueUnion::GetReferenceUnsafe() {
62
+ return value_.double_;
63
+ }
64
+
65
+ } // namespace duckdb
@@ -2,7 +2,7 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- const uint64_t VERSION_NUMBER = 47;
5
+ const uint64_t VERSION_NUMBER = 48;
6
6
 
7
7
  struct StorageVersionInfo {
8
8
  const char *version_name;
@@ -1,12 +1,7 @@
1
-
2
1
  #include "duckdb/storage/table/column_data.hpp"
3
-
2
+ #include "duckdb/storage/table/column_checkpoint_state.hpp"
4
3
  #include "duckdb/storage/table/column_segment.hpp"
5
4
  #include "duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp"
6
- #include "duckdb/storage/table/validity_column_data.hpp"
7
- #include "duckdb/storage/table/standard_column_data.hpp"
8
- #include "duckdb/storage/table/list_column_data.hpp"
9
- #include "duckdb/transaction/transaction.hpp"
10
5
  #include "duckdb/storage/table/row_group.hpp"
11
6
  #include "duckdb/storage/checkpoint/table_data_writer.hpp"
12
7
 
@@ -32,10 +32,9 @@ ColumnData::ColumnData(ColumnData &other, idx_t start, ColumnData *parent)
32
32
  updates = make_unique<UpdateSegment>(*other.updates, *this);
33
33
  }
34
34
  idx_t offset = 0;
35
- for (auto segment = other.data.GetRootSegment(); segment; segment = segment->Next()) {
36
- auto &other = (ColumnSegment &)*segment;
37
- this->data.AppendSegment(ColumnSegment::CreateSegment(other, start + offset));
38
- offset += segment->count;
35
+ for (auto &segment : other.data.Segments()) {
36
+ this->data.AppendSegment(ColumnSegment::CreateSegment(segment, start + offset));
37
+ offset += segment.count;
39
38
  }
40
39
  }
41
40
 
@@ -75,7 +74,8 @@ idx_t ColumnData::GetMaxEntry() {
75
74
  }
76
75
 
77
76
  void ColumnData::InitializeScan(ColumnScanState &state) {
78
- state.current = (ColumnSegment *)data.GetRootSegment();
77
+ state.current = data.GetRootSegment();
78
+ state.segment_tree = &data;
79
79
  state.row_index = state.current ? state.current->start : 0;
80
80
  state.internal_index = state.row_index;
81
81
  state.initialized = false;
@@ -84,7 +84,8 @@ void ColumnData::InitializeScan(ColumnScanState &state) {
84
84
  }
85
85
 
86
86
  void ColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
87
- state.current = (ColumnSegment *)data.GetSegment(row_idx);
87
+ state.current = data.GetSegment(row_idx);
88
+ state.segment_tree = &data;
88
89
  state.row_index = row_idx;
89
90
  state.internal_index = state.current->start;
90
91
  state.initialized = false;
@@ -125,11 +126,12 @@ idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remai
125
126
  }
126
127
 
127
128
  if (remaining > 0) {
128
- if (!state.current->next) {
129
+ auto next = data.GetNextSegment(state.current);
130
+ if (!next) {
129
131
  break;
130
132
  }
131
133
  state.previous_states.emplace_back(std::move(state.scan_state));
132
- state.current = (ColumnSegment *)state.current->Next();
134
+ state.current = next;
133
135
  state.current->InitializeScan(state);
134
136
  state.segment_checked = false;
135
137
  D_ASSERT(state.row_index >= state.current->start &&
@@ -234,14 +236,14 @@ void ColumnData::InitializeAppend(ColumnAppendState &state) {
234
236
  // no segments yet, append an empty segment
235
237
  AppendTransientSegment(l, start);
236
238
  }
237
- auto segment = (ColumnSegment *)data.GetLastSegment(l);
239
+ auto segment = data.GetLastSegment(l);
238
240
  if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
239
241
  // no transient segments yet
240
242
  auto total_rows = segment->start + segment->count;
241
243
  AppendTransientSegment(l, total_rows);
242
- state.current = (ColumnSegment *)data.GetLastSegment(l);
244
+ state.current = data.GetLastSegment(l);
243
245
  } else {
244
- state.current = (ColumnSegment *)segment;
246
+ state.current = segment;
245
247
  }
246
248
 
247
249
  D_ASSERT(state.current->segment_type == ColumnSegmentType::TRANSIENT);
@@ -264,7 +266,7 @@ void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, Uni
264
266
  {
265
267
  auto l = data.Lock();
266
268
  AppendTransientSegment(l, state.current->start + state.current->count);
267
- state.current = (ColumnSegment *)data.GetLastSegment(l);
269
+ state.current = data.GetLastSegment(l);
268
270
  state.current->InitializeAppend(state);
269
271
  }
270
272
  offset += copied_elements;
@@ -284,7 +286,7 @@ void ColumnData::RevertAppend(row_t start_row) {
284
286
  // find the segment index that the current row belongs to
285
287
  idx_t segment_index = data.GetSegmentIndex(l, start_row);
286
288
  auto segment = data.GetSegmentByIndex(l, segment_index);
287
- auto &transient = (ColumnSegment &)*segment;
289
+ auto &transient = *segment;
288
290
  D_ASSERT(transient.segment_type == ColumnSegmentType::TRANSIENT);
289
291
 
290
292
  // remove any segments AFTER this segment: they should be deleted entirely
@@ -299,14 +301,14 @@ idx_t ColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) {
299
301
  D_ASSERT(idx_t(row_id) >= start);
300
302
  // perform the fetch within the segment
301
303
  state.row_index = start + ((row_id - start) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE);
302
- state.current = (ColumnSegment *)data.GetSegment(state.row_index);
304
+ state.current = data.GetSegment(state.row_index);
303
305
  state.internal_index = state.current->start;
304
306
  return ScanVector(state, result, STANDARD_VECTOR_SIZE);
305
307
  }
306
308
 
307
309
  void ColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
308
310
  idx_t result_idx) {
309
- auto segment = (ColumnSegment *)data.GetSegment(row_id);
311
+ auto segment = data.GetSegment(row_id);
310
312
 
311
313
  // now perform the fetch within the segment
312
314
  segment->FetchRow(state, row_id, result, result_idx);
@@ -357,15 +359,14 @@ void ColumnData::AppendTransientSegment(SegmentLock &l, idx_t start_row) {
357
359
  }
358
360
 
359
361
  void ColumnData::CommitDropColumn() {
360
- auto segment = (ColumnSegment *)data.GetRootSegment();
361
- while (segment) {
362
- if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
363
- auto block_id = segment->GetBlockId();
362
+ for (auto &segment_p : data.Segments()) {
363
+ auto &segment = segment_p;
364
+ if (segment.segment_type == ColumnSegmentType::PERSISTENT) {
365
+ auto block_id = segment.GetBlockId();
364
366
  if (block_id != INVALID_BLOCK) {
365
367
  block_manager.MarkBlockAsModified(block_id);
366
368
  }
367
369
  }
368
- segment = (ColumnSegment *)segment->Next();
369
370
  }
370
371
  }
371
372
 
@@ -464,7 +465,6 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T
464
465
  while (segment) {
465
466
  ColumnSegmentInfo column_info;
466
467
  column_info.row_group_index = row_group_index;
467
- ;
468
468
  column_info.column_id = col_path[0];
469
469
  column_info.column_path = col_path_str;
470
470
  column_info.segment_idx = segment_idx;
@@ -487,7 +487,7 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T
487
487
  result.column_segments.push_back(std::move(column_info));
488
488
 
489
489
  segment_idx++;
490
- segment = (ColumnSegment *)segment->Next();
490
+ segment = (ColumnSegment *)data.GetNextSegment(segment);
491
491
  }
492
492
  }
493
493
 
@@ -495,19 +495,13 @@ void ColumnData::Verify(RowGroup &parent) {
495
495
  #ifdef DEBUG
496
496
  D_ASSERT(this->start == parent.start);
497
497
  data.Verify();
498
- auto root = data.GetRootSegment();
499
- if (root) {
500
- D_ASSERT(root != nullptr);
501
- D_ASSERT(root->start == this->start);
502
- idx_t prev_end = root->start;
503
- while (root) {
504
- D_ASSERT(prev_end == root->start);
505
- prev_end = root->start + root->count;
506
- if (!root->next) {
507
- D_ASSERT(prev_end == parent.start + parent.count);
508
- }
509
- root = root->Next();
510
- }
498
+ idx_t current_index = 0;
499
+ idx_t current_start = this->start;
500
+ for (auto &segment : data.Segments()) {
501
+ D_ASSERT(segment.index == current_index);
502
+ D_ASSERT(segment.start == current_start);
503
+ current_start += segment.count;
504
+ current_index++;
511
505
  }
512
506
  #endif
513
507
  }