duckdb 0.5.2-dev18.0 → 0.5.2-dev194.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "09248843b"
15
- #define DUCKDB_VERSION "v0.5.2-dev18"
14
+ #define DUCKDB_SOURCE_ID "a0fc1e9cd"
15
+ #define DUCKDB_VERSION "v0.5.2-dev194"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -1170,7 +1170,7 @@ struct UserType{
1170
1170
 
1171
1171
  struct EnumType{
1172
1172
  DUCKDB_API static const string &GetTypeName(const LogicalType &type);
1173
- DUCKDB_API static int64_t GetPos(const LogicalType &type, const string& key);
1173
+ DUCKDB_API static int64_t GetPos(const LogicalType &type, const string_t& key);
1174
1174
  DUCKDB_API static Vector &GetValuesInsertOrder(const LogicalType &type);
1175
1175
  DUCKDB_API static idx_t GetSize(const LogicalType &type);
1176
1176
  DUCKDB_API static const string GetValue(const Value &val);
@@ -2871,6 +2871,7 @@ public:
2871
2871
 
2872
2872
  namespace duckdb {
2873
2873
 
2874
+ class CastFunctionSet;
2874
2875
  class Deserializer;
2875
2876
  class Serializer;
2876
2877
 
@@ -3052,12 +3053,20 @@ public:
3052
3053
  DUCKDB_API uintptr_t GetPointer() const;
3053
3054
 
3054
3055
  //! Cast this value to another type, throws exception if its not possible
3055
- DUCKDB_API Value CastAs(const LogicalType &target_type, bool strict = false) const;
3056
+ DUCKDB_API Value CastAs(CastFunctionSet &set, const LogicalType &target_type, bool strict = false) const;
3057
+ DUCKDB_API Value CastAs(ClientContext &context, const LogicalType &target_type, bool strict = false) const;
3058
+ DUCKDB_API Value DefaultCastAs(const LogicalType &target_type, bool strict = false) const;
3056
3059
  //! Tries to cast this value to another type, and stores the result in "new_value"
3057
- DUCKDB_API bool TryCastAs(const LogicalType &target_type, Value &new_value, string *error_message,
3058
- bool strict = false) const;
3060
+ DUCKDB_API bool TryCastAs(CastFunctionSet &set, const LogicalType &target_type, Value &new_value,
3061
+ string *error_message, bool strict = false) const;
3062
+ DUCKDB_API bool TryCastAs(ClientContext &context, const LogicalType &target_type, Value &new_value,
3063
+ string *error_message, bool strict = false) const;
3064
+ DUCKDB_API bool DefaultTryCastAs(const LogicalType &target_type, Value &new_value, string *error_message,
3065
+ bool strict = false) const;
3059
3066
  //! Tries to cast this value to another type, and stores the result in THIS value again
3060
- DUCKDB_API bool TryCastAs(const LogicalType &target_type, bool strict = false);
3067
+ DUCKDB_API bool TryCastAs(CastFunctionSet &set, const LogicalType &target_type, bool strict = false);
3068
+ DUCKDB_API bool TryCastAs(ClientContext &context, const LogicalType &target_type, bool strict = false);
3069
+ DUCKDB_API bool DefaultTryCastAs(const LogicalType &target_type, bool strict = false);
3061
3070
 
3062
3071
  //! Serializes a Value to a stand-alone binary blob
3063
3072
  DUCKDB_API void Serialize(Serializer &serializer) const;
@@ -3098,7 +3107,9 @@ public:
3098
3107
 
3099
3108
  //! Returns true if the values are (approximately) equivalent. Note this is NOT the SQL equivalence. For this
3100
3109
  //! function, NULL values are equivalent and floating point values that are close are equivalent.
3101
- DUCKDB_API static bool ValuesAreEqual(const Value &result_value, const Value &value);
3110
+ DUCKDB_API static bool ValuesAreEqual(CastFunctionSet &set, const Value &result_value, const Value &value);
3111
+ DUCKDB_API static bool ValuesAreEqual(ClientContext &context, const Value &result_value, const Value &value);
3112
+ DUCKDB_API static bool DefaultValuesAreEqual(const Value &result_value, const Value &value);
3102
3113
  //! Returns true if the values are not distinct from each other, following SQL semantics for NOT DISTINCT FROM.
3103
3114
  DUCKDB_API static bool NotDistinctFrom(const Value &lvalue, const Value &rvalue);
3104
3115
 
@@ -4818,6 +4829,7 @@ private:
4818
4829
  #include <functional>
4819
4830
 
4820
4831
  namespace duckdb {
4832
+ class CastFunctionSet;
4821
4833
 
4822
4834
  // VectorOperations contains a set of operations that operate on sets of
4823
4835
  // vectors. In general, the operators must all have the same type, otherwise an
@@ -4959,10 +4971,16 @@ struct VectorOperations {
4959
4971
  //! Cast the data from the source type to the target type. Any elements that could not be converted are turned into
4960
4972
  //! NULLs. If any elements cannot be converted, returns false and fills in the error_message. If no error message is
4961
4973
  //! provided, an exception is thrown instead.
4962
- DUCKDB_API static bool TryCast(Vector &source, Vector &result, idx_t count, string *error_message,
4963
- bool strict = false);
4974
+ DUCKDB_API static bool TryCast(CastFunctionSet &set, Vector &source, Vector &result, idx_t count,
4975
+ string *error_message, bool strict = false);
4976
+ DUCKDB_API static bool DefaultTryCast(Vector &source, Vector &result, idx_t count, string *error_message,
4977
+ bool strict = false);
4978
+ DUCKDB_API static bool TryCast(ClientContext &context, Vector &source, Vector &result, idx_t count,
4979
+ string *error_message, bool strict = false);
4964
4980
  //! Cast the data from the source type to the target type. Throws an exception if the cast fails.
4965
- DUCKDB_API static void Cast(Vector &source, Vector &result, idx_t count, bool strict = false);
4981
+ DUCKDB_API static void Cast(ClientContext &context, Vector &source, Vector &result, idx_t count,
4982
+ bool strict = false);
4983
+ DUCKDB_API static void DefaultCast(Vector &source, Vector &result, idx_t count, bool strict = false);
4966
4984
 
4967
4985
  // Copy the data of <source> to the target vector
4968
4986
  static void Copy(const Vector &source, Vector &target, idx_t source_count, idx_t source_offset,
@@ -6987,28 +7005,6 @@ public:
6987
7005
  DUCKDB_API static string CallToString(const string &name, const vector<LogicalType> &arguments,
6988
7006
  const named_parameter_type_map_t &named_parameters);
6989
7007
 
6990
- //! Bind a scalar function from the set of functions and input arguments. Returns the index of the chosen function,
6991
- //! returns DConstants::INVALID_INDEX and sets error if none could be found
6992
- DUCKDB_API static idx_t BindFunction(const string &name, ScalarFunctionSet &functions,
6993
- const vector<LogicalType> &arguments, string &error);
6994
- DUCKDB_API static idx_t BindFunction(const string &name, ScalarFunctionSet &functions,
6995
- vector<unique_ptr<Expression>> &arguments, string &error);
6996
- //! Bind an aggregate function from the set of functions and input arguments. Returns the index of the chosen
6997
- //! function, returns DConstants::INVALID_INDEX and sets error if none could be found
6998
- DUCKDB_API static idx_t BindFunction(const string &name, AggregateFunctionSet &functions,
6999
- const vector<LogicalType> &arguments, string &error);
7000
- DUCKDB_API static idx_t BindFunction(const string &name, AggregateFunctionSet &functions,
7001
- vector<unique_ptr<Expression>> &arguments, string &error);
7002
- //! Bind a table function from the set of functions and input arguments. Returns the index of the chosen
7003
- //! function, returns DConstants::INVALID_INDEX and sets error if none could be found
7004
- DUCKDB_API static idx_t BindFunction(const string &name, TableFunctionSet &functions,
7005
- const vector<LogicalType> &arguments, string &error);
7006
- DUCKDB_API static idx_t BindFunction(const string &name, TableFunctionSet &functions,
7007
- vector<unique_ptr<Expression>> &arguments, string &error);
7008
- //! Bind a pragma function from the set of functions and input arguments
7009
- DUCKDB_API static idx_t BindFunction(const string &name, PragmaFunctionSet &functions, PragmaInfo &info,
7010
- string &error);
7011
-
7012
7008
  //! Used in the bind to erase an argument from a function
7013
7009
  DUCKDB_API static void EraseArgument(SimpleFunction &bound_function, vector<unique_ptr<Expression>> &arguments,
7014
7010
  idx_t argument_index);
@@ -7068,9 +7064,6 @@ public:
7068
7064
  public:
7069
7065
  DUCKDB_API hash_t Hash() const;
7070
7066
 
7071
- //! Cast a set of expressions to the arguments of this function
7072
- DUCKDB_API void CastToFunctionArguments(vector<unique_ptr<Expression>> &children);
7073
-
7074
7067
  DUCKDB_API string ToString() override;
7075
7068
  };
7076
7069
 
@@ -8032,20 +8025,6 @@ public:
8032
8025
  function_serialize_t serialize;
8033
8026
  function_deserialize_t deserialize;
8034
8027
 
8035
- DUCKDB_API static unique_ptr<Expression> BindScalarFunction(ClientContext &context, const string &schema,
8036
- const string &name,
8037
- vector<unique_ptr<Expression>> children, string &error,
8038
- bool is_operator = false, Binder *binder = nullptr);
8039
- DUCKDB_API static unique_ptr<Expression> BindScalarFunction(ClientContext &context,
8040
- ScalarFunctionCatalogEntry &function,
8041
- vector<unique_ptr<Expression>> children, string &error,
8042
- bool is_operator = false, Binder *binder = nullptr);
8043
-
8044
- DUCKDB_API static unique_ptr<BoundFunctionExpression> BindScalarFunction(ClientContext &context,
8045
- ScalarFunction bound_function,
8046
- vector<unique_ptr<Expression>> children,
8047
- bool is_operator = false);
8048
-
8049
8028
  DUCKDB_API bool operator==(const ScalarFunction &rhs) const;
8050
8029
  DUCKDB_API bool operator!=(const ScalarFunction &rhs) const;
8051
8030
 
@@ -9491,7 +9470,7 @@ struct ForeignKeyInfo {
9491
9470
  //! The set of main key table's column's index
9492
9471
  vector<storage_t> pk_keys;
9493
9472
  //! The set of foreign key table's column's index
9494
- vector<idx_t> fk_keys;
9473
+ vector<storage_t> fk_keys;
9495
9474
  };
9496
9475
 
9497
9476
  //! Constraint is the base class of any type of table constraint.
@@ -10705,16 +10684,6 @@ public:
10705
10684
  return !(*this == rhs);
10706
10685
  }
10707
10686
 
10708
- DUCKDB_API static unique_ptr<BoundAggregateExpression>
10709
- BindAggregateFunction(ClientContext &context, AggregateFunction bound_function,
10710
- vector<unique_ptr<Expression>> children, unique_ptr<Expression> filter = nullptr,
10711
- bool is_distinct = false, unique_ptr<BoundOrderModifier> order_bys = nullptr);
10712
-
10713
- DUCKDB_API static unique_ptr<FunctionData> BindSortedAggregate(AggregateFunction &bound_function,
10714
- vector<unique_ptr<Expression>> &children,
10715
- unique_ptr<FunctionData> bind_info,
10716
- unique_ptr<BoundOrderModifier> order_bys);
10717
-
10718
10687
  public:
10719
10688
  template <class STATE, class RESULT_TYPE, class OP>
10720
10689
  static AggregateFunction NullaryAggregate(LogicalType return_type) {
@@ -13806,6 +13775,7 @@ struct ParseInfo {
13806
13775
 
13807
13776
 
13808
13777
  namespace duckdb {
13778
+ struct AlterInfo;
13809
13779
 
13810
13780
  enum class OnCreateConflict : uint8_t {
13811
13781
  // Standard: throw error
@@ -13813,7 +13783,9 @@ enum class OnCreateConflict : uint8_t {
13813
13783
  // CREATE IF NOT EXISTS, silently do nothing on conflict
13814
13784
  IGNORE_ON_CONFLICT,
13815
13785
  // CREATE OR REPLACE
13816
- REPLACE_ON_CONFLICT
13786
+ REPLACE_ON_CONFLICT,
13787
+ // Update on conflict - only support for functions. Add a function overload if the function already exists.
13788
+ ALTER_ON_CONFLICT
13817
13789
  };
13818
13790
 
13819
13791
  struct CreateInfo : public ParseInfo {
@@ -13850,6 +13822,8 @@ public:
13850
13822
  virtual unique_ptr<CreateInfo> Copy() const = 0;
13851
13823
 
13852
13824
  DUCKDB_API void CopyProperties(CreateInfo &other) const;
13825
+ //! Generates an alter statement from the create statement - used for OnCreateConflict::ALTER_ON_CONFLICT
13826
+ DUCKDB_API virtual unique_ptr<AlterInfo> GetAlterInfo() const;
13853
13827
  };
13854
13828
 
13855
13829
  } // namespace duckdb
@@ -13927,6 +13901,15 @@ protected:
13927
13901
 
13928
13902
 
13929
13903
 
13904
+ //===----------------------------------------------------------------------===//
13905
+ // DuckDB
13906
+ //
13907
+ // duckdb/parser/parsed_data/alter_info.hpp
13908
+ //
13909
+ //
13910
+ //===----------------------------------------------------------------------===//
13911
+
13912
+
13930
13913
 
13931
13914
 
13932
13915
 
@@ -13939,14 +13922,13 @@ enum class AlterType : uint8_t {
13939
13922
  ALTER_TABLE = 1,
13940
13923
  ALTER_VIEW = 2,
13941
13924
  ALTER_SEQUENCE = 3,
13942
- CHANGE_OWNERSHIP = 4
13925
+ CHANGE_OWNERSHIP = 4,
13926
+ ALTER_FUNCTION = 5
13943
13927
  };
13944
13928
 
13945
- enum AlterForeignKeyType : uint8_t { AFT_ADD = 0, AFT_DELETE = 1 };
13946
-
13947
13929
  struct AlterInfo : public ParseInfo {
13948
13930
  AlterInfo(AlterType type, string schema, string name, bool if_exists);
13949
- ~AlterInfo() override;
13931
+ virtual ~AlterInfo() override;
13950
13932
 
13951
13933
  AlterType type;
13952
13934
  //! if exists
@@ -13964,6 +13946,16 @@ public:
13964
13946
  static unique_ptr<AlterInfo> Deserialize(Deserializer &source);
13965
13947
  };
13966
13948
 
13949
+ } // namespace duckdb
13950
+
13951
+
13952
+
13953
+
13954
+
13955
+ namespace duckdb {
13956
+
13957
+ enum AlterForeignKeyType : uint8_t { AFT_ADD = 0, AFT_DELETE = 1 };
13958
+
13967
13959
  //===--------------------------------------------------------------------===//
13968
13960
  // Change Ownership
13969
13961
  //===--------------------------------------------------------------------===//
@@ -14749,17 +14741,124 @@ public:
14749
14741
 
14750
14742
 
14751
14743
 
14744
+ //===----------------------------------------------------------------------===//
14745
+ // DuckDB
14746
+ //
14747
+ // duckdb/function/cast/default_casts.hpp
14748
+ //
14749
+ //
14750
+ //===----------------------------------------------------------------------===//
14751
+
14752
+
14753
+
14754
+
14755
+
14756
+
14757
+ namespace duckdb {
14758
+ class CastFunctionSet;
14759
+
14760
+ //! Extra data that can be attached to a bind function of a cast, and is available during binding
14761
+ struct BindCastInfo {
14762
+ DUCKDB_API virtual ~BindCastInfo();
14763
+ };
14764
+
14765
+ //! Extra data that can be returned by the bind of a cast, and is available during execution of a cast
14766
+ struct BoundCastData {
14767
+ DUCKDB_API virtual ~BoundCastData();
14768
+
14769
+ DUCKDB_API virtual unique_ptr<BoundCastData> Copy() const = 0;
14770
+ };
14771
+
14772
+ struct CastParameters {
14773
+ CastParameters() {
14774
+ }
14775
+ CastParameters(BoundCastData *cast_data, bool strict, string *error_message)
14776
+ : cast_data(cast_data), strict(strict), error_message(error_message) {
14777
+ }
14778
+ CastParameters(CastParameters &parent, BoundCastData *cast_data = nullptr)
14779
+ : cast_data(cast_data), strict(parent.strict), error_message(parent.error_message) {
14780
+ }
14781
+
14782
+ //! The bound cast data (if any)
14783
+ BoundCastData *cast_data = nullptr;
14784
+ //! whether or not to enable strict casting
14785
+ bool strict = false;
14786
+ // out: error message in case cast has failed
14787
+ string *error_message = nullptr;
14788
+ };
14789
+
14790
+ typedef bool (*cast_function_t)(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
14791
+
14792
+ struct BoundCastInfo {
14793
+ BoundCastInfo(cast_function_t function,
14794
+ unique_ptr<BoundCastData> cast_data = nullptr); // NOLINT: allow explicit cast from cast_function_t
14795
+
14796
+ cast_function_t function;
14797
+ unique_ptr<BoundCastData> cast_data;
14798
+
14799
+ public:
14800
+ BoundCastInfo Copy() const;
14801
+ };
14802
+
14803
+ struct BindCastInput {
14804
+ BindCastInput(CastFunctionSet &function_set, BindCastInfo *info) : function_set(function_set), info(info) {
14805
+ }
14806
+
14807
+ CastFunctionSet &function_set;
14808
+ BindCastInfo *info;
14809
+ };
14810
+
14811
+ struct DefaultCasts {
14812
+ static BoundCastInfo GetDefaultCastFunction(BindCastInput &input, const LogicalType &source,
14813
+ const LogicalType &target);
14814
+
14815
+ static bool NopCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
14816
+ static bool TryVectorNullCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
14817
+ static bool ReinterpretCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
14818
+
14819
+ private:
14820
+ static BoundCastInfo BlobCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14821
+ static BoundCastInfo DateCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14822
+ static BoundCastInfo DecimalCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14823
+ static BoundCastInfo EnumCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14824
+ static BoundCastInfo IntervalCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14825
+ static BoundCastInfo ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14826
+ static BoundCastInfo NumericCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14827
+ static BoundCastInfo MapCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14828
+ static BoundCastInfo PointerCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14829
+ static BoundCastInfo StringCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14830
+ static BoundCastInfo StructCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14831
+ static BoundCastInfo TimeCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14832
+ static BoundCastInfo TimeTzCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14833
+ static BoundCastInfo TimestampCastSwitch(BindCastInput &input, const LogicalType &source,
14834
+ const LogicalType &target);
14835
+ static BoundCastInfo TimestampTzCastSwitch(BindCastInput &input, const LogicalType &source,
14836
+ const LogicalType &target);
14837
+ static BoundCastInfo TimestampNsCastSwitch(BindCastInput &input, const LogicalType &source,
14838
+ const LogicalType &target);
14839
+ static BoundCastInfo TimestampMsCastSwitch(BindCastInput &input, const LogicalType &source,
14840
+ const LogicalType &target);
14841
+ static BoundCastInfo TimestampSecCastSwitch(BindCastInput &input, const LogicalType &source,
14842
+ const LogicalType &target);
14843
+ static BoundCastInfo UUIDCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target);
14844
+ };
14845
+
14846
+ } // namespace duckdb
14847
+
14752
14848
 
14753
14849
  namespace duckdb {
14754
14850
 
14755
14851
  class BoundCastExpression : public Expression {
14756
14852
  public:
14757
- BoundCastExpression(unique_ptr<Expression> child, LogicalType target_type, bool try_cast = false);
14853
+ BoundCastExpression(unique_ptr<Expression> child, LogicalType target_type, BoundCastInfo bound_cast,
14854
+ bool try_cast = false);
14758
14855
 
14759
14856
  //! The child type
14760
14857
  unique_ptr<Expression> child;
14761
14858
  //! Whether to use try_cast or not. try_cast converts cast failures into NULLs instead of throwing an error.
14762
14859
  bool try_cast;
14860
+ //! The bound cast info
14861
+ BoundCastInfo bound_cast;
14763
14862
 
14764
14863
  public:
14765
14864
  LogicalType source_type() {
@@ -14767,9 +14866,12 @@ public:
14767
14866
  return child->return_type;
14768
14867
  }
14769
14868
 
14869
+ //! Cast an expression to the specified SQL type, using only the built-in SQL casts
14870
+ static unique_ptr<Expression> AddDefaultCastToType(unique_ptr<Expression> expr, const LogicalType &target_type,
14871
+ bool try_cast = false);
14770
14872
  //! Cast an expression to the specified SQL type if required
14771
- static unique_ptr<Expression> AddCastToType(unique_ptr<Expression> expr, const LogicalType &target_type,
14772
- bool try_cast = false);
14873
+ static unique_ptr<Expression> AddCastToType(ClientContext &context, unique_ptr<Expression> expr,
14874
+ const LogicalType &target_type, bool try_cast = false);
14773
14875
  //! Returns true if a cast is invertible (i.e. CAST(s -> t -> s) = s for all values of s). This is not true for e.g.
14774
14876
  //! boolean casts, because that can be e.g. -1 -> TRUE -> 1. This is necessary to prevent some optimizer bugs.
14775
14877
  static bool CastIsInvertible(const LogicalType &source_type, const LogicalType &target_type);
@@ -15622,21 +15724,21 @@ class ScalarFunctionSet : public FunctionSet<ScalarFunction> {
15622
15724
  public:
15623
15725
  DUCKDB_API explicit ScalarFunctionSet(string name);
15624
15726
 
15625
- DUCKDB_API ScalarFunction GetFunctionByArguments(const vector<LogicalType> &arguments);
15727
+ DUCKDB_API ScalarFunction GetFunctionByArguments(ClientContext &context, const vector<LogicalType> &arguments);
15626
15728
  };
15627
15729
 
15628
15730
  class AggregateFunctionSet : public FunctionSet<AggregateFunction> {
15629
15731
  public:
15630
15732
  DUCKDB_API explicit AggregateFunctionSet(string name);
15631
15733
 
15632
- DUCKDB_API AggregateFunction GetFunctionByArguments(const vector<LogicalType> &arguments);
15734
+ DUCKDB_API AggregateFunction GetFunctionByArguments(ClientContext &context, const vector<LogicalType> &arguments);
15633
15735
  };
15634
15736
 
15635
15737
  class TableFunctionSet : public FunctionSet<TableFunction> {
15636
15738
  public:
15637
15739
  DUCKDB_API explicit TableFunctionSet(string name);
15638
15740
 
15639
- TableFunction GetFunctionByArguments(const vector<LogicalType> &arguments);
15741
+ TableFunction GetFunctionByArguments(ClientContext &context, const vector<LogicalType> &arguments);
15640
15742
  };
15641
15743
 
15642
15744
  class PragmaFunctionSet : public FunctionSet<PragmaFunction> {
@@ -16080,11 +16182,13 @@ public:
16080
16182
  //! TableBinding is exactly like the Binding, except it keeps track of which columns were bound in the linked LogicalGet
16081
16183
  //! node for projection pushdown purposes.
16082
16184
  struct TableBinding : public Binding {
16083
- TableBinding(const string &alias, vector<LogicalType> types, vector<string> names, LogicalGet &get, idx_t index,
16084
- bool add_row_id = false);
16185
+ TableBinding(const string &alias, vector<LogicalType> types, vector<string> names,
16186
+ vector<column_t> &bound_column_ids, StandardEntry *entry, idx_t index, bool add_row_id = false);
16085
16187
 
16086
- //! the underlying LogicalGet
16087
- LogicalGet &get;
16188
+ //! A reference to the set of bound column ids
16189
+ vector<column_t> &bound_column_ids;
16190
+ //! The underlying catalog entry (if any)
16191
+ StandardEntry *entry;
16088
16192
 
16089
16193
  public:
16090
16194
  unique_ptr<ParsedExpression> ExpandGeneratedColumn(const string &column_name);
@@ -16176,10 +16280,10 @@ public:
16176
16280
 
16177
16281
  //! Adds a base table with the given alias to the BindContext.
16178
16282
  void AddBaseTable(idx_t index, const string &alias, const vector<string> &names, const vector<LogicalType> &types,
16179
- LogicalGet &get);
16283
+ vector<column_t> &bound_column_ids, StandardEntry *entry);
16180
16284
  //! Adds a call to a table function with the given alias to the BindContext.
16181
16285
  void AddTableFunction(idx_t index, const string &alias, const vector<string> &names,
16182
- const vector<LogicalType> &types, LogicalGet &get);
16286
+ const vector<LogicalType> &types, vector<column_t> &bound_column_ids, StandardEntry *entry);
16183
16287
  //! Adds a table view with a given alias to the BindContext.
16184
16288
  void AddView(idx_t index, const string &alias, SubqueryRef &ref, BoundQueryNode &subquery, ViewCatalogEntry *view);
16185
16289
  //! Adds a subquery with a given alias to the BindContext.
@@ -21197,6 +21301,7 @@ public:
21197
21301
 
21198
21302
  } // namespace duckdb
21199
21303
 
21304
+
21200
21305
  //===----------------------------------------------------------------------===//
21201
21306
  // DuckDB
21202
21307
  //
@@ -21235,9 +21340,10 @@ public:
21235
21340
 
21236
21341
 
21237
21342
  namespace duckdb {
21343
+ class CastFunctionSet;
21238
21344
  class ClientContext;
21239
- class TableFunctionRef;
21240
21345
  class CompressionFunction;
21346
+ class TableFunctionRef;
21241
21347
 
21242
21348
  struct CompressionFunctionSet;
21243
21349
  struct DBConfig;
@@ -21329,6 +21435,7 @@ struct DBConfigOptions {
21329
21435
  //! Whether unsigned extensions should be loaded
21330
21436
  bool allow_unsigned_extensions = false;
21331
21437
  };
21438
+
21332
21439
  struct DBConfig {
21333
21440
  friend class DatabaseInstance;
21334
21441
  friend class StorageManager;
@@ -21348,9 +21455,9 @@ public:
21348
21455
  unique_ptr<Allocator> allocator;
21349
21456
  //! Database configuration options
21350
21457
  DBConfigOptions options;
21351
-
21352
21458
  //! Extensions made to the parser
21353
21459
  vector<ParserExtension> parser_extensions;
21460
+ //! Extensions made to the optimizer
21354
21461
  vector<OptimizerExtension> optimizer_extensions;
21355
21462
 
21356
21463
  DUCKDB_API void AddExtensionOption(string name, string description, LogicalType parameter,
@@ -21378,8 +21485,11 @@ public:
21378
21485
  //! Return the compression function for the specified compression type/physical type combo
21379
21486
  DUCKDB_API CompressionFunction *GetCompressionFunction(CompressionType type, PhysicalType data_type);
21380
21487
 
21488
+ DUCKDB_API CastFunctionSet &GetCastFunctions();
21489
+
21381
21490
  private:
21382
21491
  unique_ptr<CompressionFunctionSet> compression_functions;
21492
+ unique_ptr<CastFunctionSet> cast_functions;
21383
21493
  };
21384
21494
 
21385
21495
  } // namespace duckdb
@@ -22417,29 +22527,14 @@ public:
22417
22527
  namespace duckdb {
22418
22528
 
22419
22529
  struct CreateScalarFunctionInfo : public CreateFunctionInfo {
22420
- explicit CreateScalarFunctionInfo(ScalarFunction function)
22421
- : CreateFunctionInfo(CatalogType::SCALAR_FUNCTION_ENTRY), functions(function.name) {
22422
- name = function.name;
22423
- functions.AddFunction(move(function));
22424
- }
22425
- explicit CreateScalarFunctionInfo(ScalarFunctionSet set)
22426
- : CreateFunctionInfo(CatalogType::SCALAR_FUNCTION_ENTRY), functions(move(set)) {
22427
- name = functions.name;
22428
- for (auto &func : functions.functions) {
22429
- func.name = functions.name;
22430
- }
22431
- }
22530
+ DUCKDB_API explicit CreateScalarFunctionInfo(ScalarFunction function);
22531
+ DUCKDB_API explicit CreateScalarFunctionInfo(ScalarFunctionSet set);
22432
22532
 
22433
22533
  ScalarFunctionSet functions;
22434
22534
 
22435
22535
  public:
22436
- unique_ptr<CreateInfo> Copy() const override {
22437
- ScalarFunctionSet set(name);
22438
- set.functions = functions.functions;
22439
- auto result = make_unique<CreateScalarFunctionInfo>(move(set));
22440
- CopyProperties(*result);
22441
- return move(result);
22442
- }
22536
+ DUCKDB_API unique_ptr<CreateInfo> Copy() const override;
22537
+ DUCKDB_API unique_ptr<AlterInfo> GetAlterInfo() const override;
22443
22538
  };
22444
22539
 
22445
22540
  } // namespace duckdb
@@ -22450,12 +22545,13 @@ namespace duckdb {
22450
22545
  //! A table function in the catalog
22451
22546
  class ScalarFunctionCatalogEntry : public StandardEntry {
22452
22547
  public:
22453
- ScalarFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateScalarFunctionInfo *info)
22454
- : StandardEntry(CatalogType::SCALAR_FUNCTION_ENTRY, schema, catalog, info->name), functions(info->functions) {
22455
- }
22548
+ ScalarFunctionCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schema, CreateScalarFunctionInfo *info);
22456
22549
 
22457
22550
  //! The scalar functions
22458
22551
  ScalarFunctionSet functions;
22552
+
22553
+ public:
22554
+ unique_ptr<CatalogEntry> AlterEntry(ClientContext &context, AlterInfo *info) override;
22459
22555
  };
22460
22556
  } // namespace duckdb
22461
22557
  //===----------------------------------------------------------------------===//
@@ -23154,6 +23250,45 @@ enum IndexConstraintType : uint8_t {
23154
23250
  } // namespace duckdb
23155
23251
 
23156
23252
 
23253
+ //===----------------------------------------------------------------------===//
23254
+ // DuckDB
23255
+ //
23256
+ // duckdb/common/sort/sort.hpp
23257
+ //
23258
+ //
23259
+ //===----------------------------------------------------------------------===//
23260
+
23261
+
23262
+
23263
+ //===----------------------------------------------------------------------===//
23264
+ // DuckDB
23265
+ //
23266
+ // duckdb/common/sort/sorted_block.hpp
23267
+ //
23268
+ //
23269
+ //===----------------------------------------------------------------------===//
23270
+
23271
+
23272
+ //===----------------------------------------------------------------------===//
23273
+ // DuckDB
23274
+ //
23275
+ // duckdb/common/types/row_layout.hpp
23276
+ //
23277
+ //
23278
+ //===----------------------------------------------------------------------===//
23279
+
23280
+
23281
+
23282
+
23283
+
23284
+
23285
+ //===----------------------------------------------------------------------===//
23286
+ // DuckDB
23287
+ //
23288
+ // duckdb/execution/operator/aggregate/aggregate_object.hpp
23289
+ //
23290
+ //
23291
+ //===----------------------------------------------------------------------===//
23157
23292
 
23158
23293
 
23159
23294
 
@@ -23305,10 +23440,144 @@ private:
23305
23440
  };
23306
23441
  } // namespace duckdb
23307
23442
 
23308
- //===----------------------------------------------------------------------===//
23443
+
23444
+
23445
+ namespace duckdb {
23446
+
23447
+ class BoundAggregateExpression;
23448
+
23449
+ struct AggregateObject {
23450
+ AggregateObject(AggregateFunction function, FunctionData *bind_data, idx_t child_count, idx_t payload_size,
23451
+ bool distinct, PhysicalType return_type, Expression *filter = nullptr);
23452
+ AggregateObject(BoundAggregateExpression *aggr);
23453
+
23454
+ AggregateFunction function;
23455
+ FunctionData *bind_data;
23456
+ idx_t child_count;
23457
+ idx_t payload_size;
23458
+ bool distinct;
23459
+ PhysicalType return_type;
23460
+ Expression *filter = nullptr;
23461
+
23462
+ static vector<AggregateObject> CreateAggregateObjects(const vector<BoundAggregateExpression *> &bindings);
23463
+ };
23464
+
23465
+ struct AggregateFilterData {
23466
+ AggregateFilterData(Allocator &allocator, Expression &filter_expr, const vector<LogicalType> &payload_types);
23467
+
23468
+ idx_t ApplyFilter(DataChunk &payload);
23469
+
23470
+ ExpressionExecutor filter_executor;
23471
+ DataChunk filtered_payload;
23472
+ SelectionVector true_sel;
23473
+ };
23474
+
23475
+ struct AggregateFilterDataSet {
23476
+ AggregateFilterDataSet();
23477
+
23478
+ vector<unique_ptr<AggregateFilterData>> filter_data;
23479
+
23480
+ public:
23481
+ void Initialize(Allocator &allocator, const vector<AggregateObject> &aggregates,
23482
+ const vector<LogicalType> &payload_types);
23483
+
23484
+ AggregateFilterData &GetFilterData(idx_t aggr_idx);
23485
+ };
23486
+
23487
+ } // namespace duckdb
23488
+
23489
+
23490
+ namespace duckdb {
23491
+
23492
+ class RowLayout {
23493
+ public:
23494
+ using Aggregates = vector<AggregateObject>;
23495
+ using ValidityBytes = TemplatedValidityMask<uint8_t>;
23496
+
23497
+ //! Creates an empty RowLayout
23498
+ RowLayout();
23499
+
23500
+ public:
23501
+ //! Initializes the RowLayout with the specified types and aggregates to an empty RowLayout
23502
+ void Initialize(vector<LogicalType> types_p, Aggregates aggregates_p, bool align = true);
23503
+ //! Initializes the RowLayout with the specified types to an empty RowLayout
23504
+ void Initialize(vector<LogicalType> types, bool align = true);
23505
+ //! Initializes the RowLayout with the specified aggregates to an empty RowLayout
23506
+ void Initialize(Aggregates aggregates_p, bool align = true);
23507
+ //! Returns the number of data columns
23508
+ inline idx_t ColumnCount() const {
23509
+ return types.size();
23510
+ }
23511
+ //! Returns a list of the column types for this data chunk
23512
+ inline const vector<LogicalType> &GetTypes() const {
23513
+ return types;
23514
+ }
23515
+ //! Returns the number of aggregates
23516
+ inline idx_t AggregateCount() const {
23517
+ return aggregates.size();
23518
+ }
23519
+ //! Returns a list of the aggregates for this data chunk
23520
+ inline Aggregates &GetAggregates() {
23521
+ return aggregates;
23522
+ }
23523
+ //! Returns the total width required for each row, including padding
23524
+ inline idx_t GetRowWidth() const {
23525
+ return row_width;
23526
+ }
23527
+ //! Returns the offset to the start of the data
23528
+ inline idx_t GetDataOffset() const {
23529
+ return flag_width;
23530
+ }
23531
+ //! Returns the total width required for the data, including padding
23532
+ inline idx_t GetDataWidth() const {
23533
+ return data_width;
23534
+ }
23535
+ //! Returns the offset to the start of the aggregates
23536
+ inline idx_t GetAggrOffset() const {
23537
+ return flag_width + data_width;
23538
+ }
23539
+ //! Returns the total width required for the aggregates, including padding
23540
+ inline idx_t GetAggrWidth() const {
23541
+ return aggr_width;
23542
+ }
23543
+ //! Returns the column offsets into each row
23544
+ inline const vector<idx_t> &GetOffsets() const {
23545
+ return offsets;
23546
+ }
23547
+ //! Returns whether all columns in this layout are constant size
23548
+ inline bool AllConstant() const {
23549
+ return all_constant;
23550
+ }
23551
+ inline idx_t GetHeapOffset() const {
23552
+ return heap_pointer_offset;
23553
+ }
23554
+
23555
+ private:
23556
+ //! The types of the data columns
23557
+ vector<LogicalType> types;
23558
+ //! The aggregate functions
23559
+ Aggregates aggregates;
23560
+ //! The width of the validity header
23561
+ idx_t flag_width;
23562
+ //! The width of the data portion
23563
+ idx_t data_width;
23564
+ //! The width of the aggregate state portion
23565
+ idx_t aggr_width;
23566
+ //! The width of the entire row
23567
+ idx_t row_width;
23568
+ //! The offsets to the columns and aggregate data in each row
23569
+ vector<idx_t> offsets;
23570
+ //! Whether all columns in this layout are constant size
23571
+ bool all_constant;
23572
+ //! Offset to the pointer to the heap for each row
23573
+ idx_t heap_pointer_offset;
23574
+ };
23575
+
23576
+ } // namespace duckdb
23577
+
23309
23578
  // DuckDB
23310
23579
  //
23311
- // duckdb/storage/meta_block_writer.hpp
23580
+ // duckdb/common/fast_mem.hpp
23312
23581
  //
23313
23582
  //
23314
23583
  //===----------------------------------------------------------------------===//
@@ -23318,10 +23587,692 @@ private:
23318
23587
 
23319
23588
 
23320
23589
 
23590
+ template <size_t SIZE>
23591
+ static inline void MemcpyFixed(void *dest, const void *src) {
23592
+ memcpy(dest, src, SIZE);
23593
+ }
23594
+
23595
+ template <size_t SIZE>
23596
+ static inline int MemcmpFixed(const void *str1, const void *str2) {
23597
+ return memcmp(str1, str2, SIZE);
23598
+ }
23599
+
23600
+ namespace duckdb {
23601
+
23602
+ //! This templated memcpy is significantly faster than std::memcpy,
23603
+ //! but only when you are calling memcpy with a const size in a loop.
23604
+ //! For instance `while (<cond>) { memcpy(<dest>, <src>, const_size); ... }`
23605
+ static inline void FastMemcpy(void *dest, const void *src, const size_t size) {
23606
+ // LCOV_EXCL_START
23607
+ switch (size) {
23608
+ case 0:
23609
+ return;
23610
+ case 1:
23611
+ return MemcpyFixed<1>(dest, src);
23612
+ case 2:
23613
+ return MemcpyFixed<2>(dest, src);
23614
+ case 3:
23615
+ return MemcpyFixed<3>(dest, src);
23616
+ case 4:
23617
+ return MemcpyFixed<4>(dest, src);
23618
+ case 5:
23619
+ return MemcpyFixed<5>(dest, src);
23620
+ case 6:
23621
+ return MemcpyFixed<6>(dest, src);
23622
+ case 7:
23623
+ return MemcpyFixed<7>(dest, src);
23624
+ case 8:
23625
+ return MemcpyFixed<8>(dest, src);
23626
+ case 9:
23627
+ return MemcpyFixed<9>(dest, src);
23628
+ case 10:
23629
+ return MemcpyFixed<10>(dest, src);
23630
+ case 11:
23631
+ return MemcpyFixed<11>(dest, src);
23632
+ case 12:
23633
+ return MemcpyFixed<12>(dest, src);
23634
+ case 13:
23635
+ return MemcpyFixed<13>(dest, src);
23636
+ case 14:
23637
+ return MemcpyFixed<14>(dest, src);
23638
+ case 15:
23639
+ return MemcpyFixed<15>(dest, src);
23640
+ case 16:
23641
+ return MemcpyFixed<16>(dest, src);
23642
+ case 17:
23643
+ return MemcpyFixed<17>(dest, src);
23644
+ case 18:
23645
+ return MemcpyFixed<18>(dest, src);
23646
+ case 19:
23647
+ return MemcpyFixed<19>(dest, src);
23648
+ case 20:
23649
+ return MemcpyFixed<20>(dest, src);
23650
+ case 21:
23651
+ return MemcpyFixed<21>(dest, src);
23652
+ case 22:
23653
+ return MemcpyFixed<22>(dest, src);
23654
+ case 23:
23655
+ return MemcpyFixed<23>(dest, src);
23656
+ case 24:
23657
+ return MemcpyFixed<24>(dest, src);
23658
+ case 25:
23659
+ return MemcpyFixed<25>(dest, src);
23660
+ case 26:
23661
+ return MemcpyFixed<26>(dest, src);
23662
+ case 27:
23663
+ return MemcpyFixed<27>(dest, src);
23664
+ case 28:
23665
+ return MemcpyFixed<28>(dest, src);
23666
+ case 29:
23667
+ return MemcpyFixed<29>(dest, src);
23668
+ case 30:
23669
+ return MemcpyFixed<30>(dest, src);
23670
+ case 31:
23671
+ return MemcpyFixed<31>(dest, src);
23672
+ case 32:
23673
+ return MemcpyFixed<32>(dest, src);
23674
+ case 33:
23675
+ return MemcpyFixed<33>(dest, src);
23676
+ case 34:
23677
+ return MemcpyFixed<34>(dest, src);
23678
+ case 35:
23679
+ return MemcpyFixed<35>(dest, src);
23680
+ case 36:
23681
+ return MemcpyFixed<36>(dest, src);
23682
+ case 37:
23683
+ return MemcpyFixed<37>(dest, src);
23684
+ case 38:
23685
+ return MemcpyFixed<38>(dest, src);
23686
+ case 39:
23687
+ return MemcpyFixed<39>(dest, src);
23688
+ case 40:
23689
+ return MemcpyFixed<40>(dest, src);
23690
+ case 41:
23691
+ return MemcpyFixed<41>(dest, src);
23692
+ case 42:
23693
+ return MemcpyFixed<42>(dest, src);
23694
+ case 43:
23695
+ return MemcpyFixed<43>(dest, src);
23696
+ case 44:
23697
+ return MemcpyFixed<44>(dest, src);
23698
+ case 45:
23699
+ return MemcpyFixed<45>(dest, src);
23700
+ case 46:
23701
+ return MemcpyFixed<46>(dest, src);
23702
+ case 47:
23703
+ return MemcpyFixed<47>(dest, src);
23704
+ case 48:
23705
+ return MemcpyFixed<48>(dest, src);
23706
+ case 49:
23707
+ return MemcpyFixed<49>(dest, src);
23708
+ case 50:
23709
+ return MemcpyFixed<50>(dest, src);
23710
+ case 51:
23711
+ return MemcpyFixed<51>(dest, src);
23712
+ case 52:
23713
+ return MemcpyFixed<52>(dest, src);
23714
+ case 53:
23715
+ return MemcpyFixed<53>(dest, src);
23716
+ case 54:
23717
+ return MemcpyFixed<54>(dest, src);
23718
+ case 55:
23719
+ return MemcpyFixed<55>(dest, src);
23720
+ case 56:
23721
+ return MemcpyFixed<56>(dest, src);
23722
+ case 57:
23723
+ return MemcpyFixed<57>(dest, src);
23724
+ case 58:
23725
+ return MemcpyFixed<58>(dest, src);
23726
+ case 59:
23727
+ return MemcpyFixed<59>(dest, src);
23728
+ case 60:
23729
+ return MemcpyFixed<60>(dest, src);
23730
+ case 61:
23731
+ return MemcpyFixed<61>(dest, src);
23732
+ case 62:
23733
+ return MemcpyFixed<62>(dest, src);
23734
+ case 63:
23735
+ return MemcpyFixed<63>(dest, src);
23736
+ case 64:
23737
+ return MemcpyFixed<64>(dest, src);
23738
+ case 65:
23739
+ return MemcpyFixed<65>(dest, src);
23740
+ case 66:
23741
+ return MemcpyFixed<66>(dest, src);
23742
+ case 67:
23743
+ return MemcpyFixed<67>(dest, src);
23744
+ case 68:
23745
+ return MemcpyFixed<68>(dest, src);
23746
+ case 69:
23747
+ return MemcpyFixed<69>(dest, src);
23748
+ case 70:
23749
+ return MemcpyFixed<70>(dest, src);
23750
+ case 71:
23751
+ return MemcpyFixed<71>(dest, src);
23752
+ case 72:
23753
+ return MemcpyFixed<72>(dest, src);
23754
+ case 73:
23755
+ return MemcpyFixed<73>(dest, src);
23756
+ case 74:
23757
+ return MemcpyFixed<74>(dest, src);
23758
+ case 75:
23759
+ return MemcpyFixed<75>(dest, src);
23760
+ case 76:
23761
+ return MemcpyFixed<76>(dest, src);
23762
+ case 77:
23763
+ return MemcpyFixed<77>(dest, src);
23764
+ case 78:
23765
+ return MemcpyFixed<78>(dest, src);
23766
+ case 79:
23767
+ return MemcpyFixed<79>(dest, src);
23768
+ case 80:
23769
+ return MemcpyFixed<80>(dest, src);
23770
+ case 81:
23771
+ return MemcpyFixed<81>(dest, src);
23772
+ case 82:
23773
+ return MemcpyFixed<82>(dest, src);
23774
+ case 83:
23775
+ return MemcpyFixed<83>(dest, src);
23776
+ case 84:
23777
+ return MemcpyFixed<84>(dest, src);
23778
+ case 85:
23779
+ return MemcpyFixed<85>(dest, src);
23780
+ case 86:
23781
+ return MemcpyFixed<86>(dest, src);
23782
+ case 87:
23783
+ return MemcpyFixed<87>(dest, src);
23784
+ case 88:
23785
+ return MemcpyFixed<88>(dest, src);
23786
+ case 89:
23787
+ return MemcpyFixed<89>(dest, src);
23788
+ case 90:
23789
+ return MemcpyFixed<90>(dest, src);
23790
+ case 91:
23791
+ return MemcpyFixed<91>(dest, src);
23792
+ case 92:
23793
+ return MemcpyFixed<92>(dest, src);
23794
+ case 93:
23795
+ return MemcpyFixed<93>(dest, src);
23796
+ case 94:
23797
+ return MemcpyFixed<94>(dest, src);
23798
+ case 95:
23799
+ return MemcpyFixed<95>(dest, src);
23800
+ case 96:
23801
+ return MemcpyFixed<96>(dest, src);
23802
+ case 97:
23803
+ return MemcpyFixed<97>(dest, src);
23804
+ case 98:
23805
+ return MemcpyFixed<98>(dest, src);
23806
+ case 99:
23807
+ return MemcpyFixed<99>(dest, src);
23808
+ case 100:
23809
+ return MemcpyFixed<100>(dest, src);
23810
+ case 101:
23811
+ return MemcpyFixed<101>(dest, src);
23812
+ case 102:
23813
+ return MemcpyFixed<102>(dest, src);
23814
+ case 103:
23815
+ return MemcpyFixed<103>(dest, src);
23816
+ case 104:
23817
+ return MemcpyFixed<104>(dest, src);
23818
+ case 105:
23819
+ return MemcpyFixed<105>(dest, src);
23820
+ case 106:
23821
+ return MemcpyFixed<106>(dest, src);
23822
+ case 107:
23823
+ return MemcpyFixed<107>(dest, src);
23824
+ case 108:
23825
+ return MemcpyFixed<108>(dest, src);
23826
+ case 109:
23827
+ return MemcpyFixed<109>(dest, src);
23828
+ case 110:
23829
+ return MemcpyFixed<110>(dest, src);
23830
+ case 111:
23831
+ return MemcpyFixed<111>(dest, src);
23832
+ case 112:
23833
+ return MemcpyFixed<112>(dest, src);
23834
+ case 113:
23835
+ return MemcpyFixed<113>(dest, src);
23836
+ case 114:
23837
+ return MemcpyFixed<114>(dest, src);
23838
+ case 115:
23839
+ return MemcpyFixed<115>(dest, src);
23840
+ case 116:
23841
+ return MemcpyFixed<116>(dest, src);
23842
+ case 117:
23843
+ return MemcpyFixed<117>(dest, src);
23844
+ case 118:
23845
+ return MemcpyFixed<118>(dest, src);
23846
+ case 119:
23847
+ return MemcpyFixed<119>(dest, src);
23848
+ case 120:
23849
+ return MemcpyFixed<120>(dest, src);
23850
+ case 121:
23851
+ return MemcpyFixed<121>(dest, src);
23852
+ case 122:
23853
+ return MemcpyFixed<122>(dest, src);
23854
+ case 123:
23855
+ return MemcpyFixed<123>(dest, src);
23856
+ case 124:
23857
+ return MemcpyFixed<124>(dest, src);
23858
+ case 125:
23859
+ return MemcpyFixed<125>(dest, src);
23860
+ case 126:
23861
+ return MemcpyFixed<126>(dest, src);
23862
+ case 127:
23863
+ return MemcpyFixed<127>(dest, src);
23864
+ case 128:
23865
+ return MemcpyFixed<128>(dest, src);
23866
+ case 129:
23867
+ return MemcpyFixed<129>(dest, src);
23868
+ case 130:
23869
+ return MemcpyFixed<130>(dest, src);
23870
+ case 131:
23871
+ return MemcpyFixed<131>(dest, src);
23872
+ case 132:
23873
+ return MemcpyFixed<132>(dest, src);
23874
+ case 133:
23875
+ return MemcpyFixed<133>(dest, src);
23876
+ case 134:
23877
+ return MemcpyFixed<134>(dest, src);
23878
+ case 135:
23879
+ return MemcpyFixed<135>(dest, src);
23880
+ case 136:
23881
+ return MemcpyFixed<136>(dest, src);
23882
+ case 137:
23883
+ return MemcpyFixed<137>(dest, src);
23884
+ case 138:
23885
+ return MemcpyFixed<138>(dest, src);
23886
+ case 139:
23887
+ return MemcpyFixed<139>(dest, src);
23888
+ case 140:
23889
+ return MemcpyFixed<140>(dest, src);
23890
+ case 141:
23891
+ return MemcpyFixed<141>(dest, src);
23892
+ case 142:
23893
+ return MemcpyFixed<142>(dest, src);
23894
+ case 143:
23895
+ return MemcpyFixed<143>(dest, src);
23896
+ case 144:
23897
+ return MemcpyFixed<144>(dest, src);
23898
+ case 145:
23899
+ return MemcpyFixed<145>(dest, src);
23900
+ case 146:
23901
+ return MemcpyFixed<146>(dest, src);
23902
+ case 147:
23903
+ return MemcpyFixed<147>(dest, src);
23904
+ case 148:
23905
+ return MemcpyFixed<148>(dest, src);
23906
+ case 149:
23907
+ return MemcpyFixed<149>(dest, src);
23908
+ case 150:
23909
+ return MemcpyFixed<150>(dest, src);
23910
+ case 151:
23911
+ return MemcpyFixed<151>(dest, src);
23912
+ case 152:
23913
+ return MemcpyFixed<152>(dest, src);
23914
+ case 153:
23915
+ return MemcpyFixed<153>(dest, src);
23916
+ case 154:
23917
+ return MemcpyFixed<154>(dest, src);
23918
+ case 155:
23919
+ return MemcpyFixed<155>(dest, src);
23920
+ case 156:
23921
+ return MemcpyFixed<156>(dest, src);
23922
+ case 157:
23923
+ return MemcpyFixed<157>(dest, src);
23924
+ case 158:
23925
+ return MemcpyFixed<158>(dest, src);
23926
+ case 159:
23927
+ return MemcpyFixed<159>(dest, src);
23928
+ case 160:
23929
+ return MemcpyFixed<160>(dest, src);
23930
+ case 161:
23931
+ return MemcpyFixed<161>(dest, src);
23932
+ case 162:
23933
+ return MemcpyFixed<162>(dest, src);
23934
+ case 163:
23935
+ return MemcpyFixed<163>(dest, src);
23936
+ case 164:
23937
+ return MemcpyFixed<164>(dest, src);
23938
+ case 165:
23939
+ return MemcpyFixed<165>(dest, src);
23940
+ case 166:
23941
+ return MemcpyFixed<166>(dest, src);
23942
+ case 167:
23943
+ return MemcpyFixed<167>(dest, src);
23944
+ case 168:
23945
+ return MemcpyFixed<168>(dest, src);
23946
+ case 169:
23947
+ return MemcpyFixed<169>(dest, src);
23948
+ case 170:
23949
+ return MemcpyFixed<170>(dest, src);
23950
+ case 171:
23951
+ return MemcpyFixed<171>(dest, src);
23952
+ case 172:
23953
+ return MemcpyFixed<172>(dest, src);
23954
+ case 173:
23955
+ return MemcpyFixed<173>(dest, src);
23956
+ case 174:
23957
+ return MemcpyFixed<174>(dest, src);
23958
+ case 175:
23959
+ return MemcpyFixed<175>(dest, src);
23960
+ case 176:
23961
+ return MemcpyFixed<176>(dest, src);
23962
+ case 177:
23963
+ return MemcpyFixed<177>(dest, src);
23964
+ case 178:
23965
+ return MemcpyFixed<178>(dest, src);
23966
+ case 179:
23967
+ return MemcpyFixed<179>(dest, src);
23968
+ case 180:
23969
+ return MemcpyFixed<180>(dest, src);
23970
+ case 181:
23971
+ return MemcpyFixed<181>(dest, src);
23972
+ case 182:
23973
+ return MemcpyFixed<182>(dest, src);
23974
+ case 183:
23975
+ return MemcpyFixed<183>(dest, src);
23976
+ case 184:
23977
+ return MemcpyFixed<184>(dest, src);
23978
+ case 185:
23979
+ return MemcpyFixed<185>(dest, src);
23980
+ case 186:
23981
+ return MemcpyFixed<186>(dest, src);
23982
+ case 187:
23983
+ return MemcpyFixed<187>(dest, src);
23984
+ case 188:
23985
+ return MemcpyFixed<188>(dest, src);
23986
+ case 189:
23987
+ return MemcpyFixed<189>(dest, src);
23988
+ case 190:
23989
+ return MemcpyFixed<190>(dest, src);
23990
+ case 191:
23991
+ return MemcpyFixed<191>(dest, src);
23992
+ case 192:
23993
+ return MemcpyFixed<192>(dest, src);
23994
+ case 193:
23995
+ return MemcpyFixed<193>(dest, src);
23996
+ case 194:
23997
+ return MemcpyFixed<194>(dest, src);
23998
+ case 195:
23999
+ return MemcpyFixed<195>(dest, src);
24000
+ case 196:
24001
+ return MemcpyFixed<196>(dest, src);
24002
+ case 197:
24003
+ return MemcpyFixed<197>(dest, src);
24004
+ case 198:
24005
+ return MemcpyFixed<198>(dest, src);
24006
+ case 199:
24007
+ return MemcpyFixed<199>(dest, src);
24008
+ case 200:
24009
+ return MemcpyFixed<200>(dest, src);
24010
+ case 201:
24011
+ return MemcpyFixed<201>(dest, src);
24012
+ case 202:
24013
+ return MemcpyFixed<202>(dest, src);
24014
+ case 203:
24015
+ return MemcpyFixed<203>(dest, src);
24016
+ case 204:
24017
+ return MemcpyFixed<204>(dest, src);
24018
+ case 205:
24019
+ return MemcpyFixed<205>(dest, src);
24020
+ case 206:
24021
+ return MemcpyFixed<206>(dest, src);
24022
+ case 207:
24023
+ return MemcpyFixed<207>(dest, src);
24024
+ case 208:
24025
+ return MemcpyFixed<208>(dest, src);
24026
+ case 209:
24027
+ return MemcpyFixed<209>(dest, src);
24028
+ case 210:
24029
+ return MemcpyFixed<210>(dest, src);
24030
+ case 211:
24031
+ return MemcpyFixed<211>(dest, src);
24032
+ case 212:
24033
+ return MemcpyFixed<212>(dest, src);
24034
+ case 213:
24035
+ return MemcpyFixed<213>(dest, src);
24036
+ case 214:
24037
+ return MemcpyFixed<214>(dest, src);
24038
+ case 215:
24039
+ return MemcpyFixed<215>(dest, src);
24040
+ case 216:
24041
+ return MemcpyFixed<216>(dest, src);
24042
+ case 217:
24043
+ return MemcpyFixed<217>(dest, src);
24044
+ case 218:
24045
+ return MemcpyFixed<218>(dest, src);
24046
+ case 219:
24047
+ return MemcpyFixed<219>(dest, src);
24048
+ case 220:
24049
+ return MemcpyFixed<220>(dest, src);
24050
+ case 221:
24051
+ return MemcpyFixed<221>(dest, src);
24052
+ case 222:
24053
+ return MemcpyFixed<222>(dest, src);
24054
+ case 223:
24055
+ return MemcpyFixed<223>(dest, src);
24056
+ case 224:
24057
+ return MemcpyFixed<224>(dest, src);
24058
+ case 225:
24059
+ return MemcpyFixed<225>(dest, src);
24060
+ case 226:
24061
+ return MemcpyFixed<226>(dest, src);
24062
+ case 227:
24063
+ return MemcpyFixed<227>(dest, src);
24064
+ case 228:
24065
+ return MemcpyFixed<228>(dest, src);
24066
+ case 229:
24067
+ return MemcpyFixed<229>(dest, src);
24068
+ case 230:
24069
+ return MemcpyFixed<230>(dest, src);
24070
+ case 231:
24071
+ return MemcpyFixed<231>(dest, src);
24072
+ case 232:
24073
+ return MemcpyFixed<232>(dest, src);
24074
+ case 233:
24075
+ return MemcpyFixed<233>(dest, src);
24076
+ case 234:
24077
+ return MemcpyFixed<234>(dest, src);
24078
+ case 235:
24079
+ return MemcpyFixed<235>(dest, src);
24080
+ case 236:
24081
+ return MemcpyFixed<236>(dest, src);
24082
+ case 237:
24083
+ return MemcpyFixed<237>(dest, src);
24084
+ case 238:
24085
+ return MemcpyFixed<238>(dest, src);
24086
+ case 239:
24087
+ return MemcpyFixed<239>(dest, src);
24088
+ case 240:
24089
+ return MemcpyFixed<240>(dest, src);
24090
+ case 241:
24091
+ return MemcpyFixed<241>(dest, src);
24092
+ case 242:
24093
+ return MemcpyFixed<242>(dest, src);
24094
+ case 243:
24095
+ return MemcpyFixed<243>(dest, src);
24096
+ case 244:
24097
+ return MemcpyFixed<244>(dest, src);
24098
+ case 245:
24099
+ return MemcpyFixed<245>(dest, src);
24100
+ case 246:
24101
+ return MemcpyFixed<246>(dest, src);
24102
+ case 247:
24103
+ return MemcpyFixed<247>(dest, src);
24104
+ case 248:
24105
+ return MemcpyFixed<248>(dest, src);
24106
+ case 249:
24107
+ return MemcpyFixed<249>(dest, src);
24108
+ case 250:
24109
+ return MemcpyFixed<250>(dest, src);
24110
+ case 251:
24111
+ return MemcpyFixed<251>(dest, src);
24112
+ case 252:
24113
+ return MemcpyFixed<252>(dest, src);
24114
+ case 253:
24115
+ return MemcpyFixed<253>(dest, src);
24116
+ case 254:
24117
+ return MemcpyFixed<254>(dest, src);
24118
+ case 255:
24119
+ return MemcpyFixed<255>(dest, src);
24120
+ case 256:
24121
+ return MemcpyFixed<256>(dest, src);
24122
+ default:
24123
+ memcpy(dest, src, size);
24124
+ }
24125
+ // LCOV_EXCL_STOP
24126
+ }
24127
+
24128
+ //! This templated memcmp is significantly faster than std::memcmp,
24129
+ //! but only when you are calling memcmp with a const size in a loop.
24130
+ //! For instance `while (<cond>) { memcmp(<str1>, <str2>, const_size); ... }`
24131
+ static inline int FastMemcmp(const void *str1, const void *str2, const size_t size) {
24132
+ // LCOV_EXCL_START
24133
+ switch (size) {
24134
+ case 0:
24135
+ return 0;
24136
+ case 1:
24137
+ return MemcmpFixed<1>(str1, str2);
24138
+ case 2:
24139
+ return MemcmpFixed<2>(str1, str2);
24140
+ case 3:
24141
+ return MemcmpFixed<3>(str1, str2);
24142
+ case 4:
24143
+ return MemcmpFixed<4>(str1, str2);
24144
+ case 5:
24145
+ return MemcmpFixed<5>(str1, str2);
24146
+ case 6:
24147
+ return MemcmpFixed<6>(str1, str2);
24148
+ case 7:
24149
+ return MemcmpFixed<7>(str1, str2);
24150
+ case 8:
24151
+ return MemcmpFixed<8>(str1, str2);
24152
+ case 9:
24153
+ return MemcmpFixed<9>(str1, str2);
24154
+ case 10:
24155
+ return MemcmpFixed<10>(str1, str2);
24156
+ case 11:
24157
+ return MemcmpFixed<11>(str1, str2);
24158
+ case 12:
24159
+ return MemcmpFixed<12>(str1, str2);
24160
+ case 13:
24161
+ return MemcmpFixed<13>(str1, str2);
24162
+ case 14:
24163
+ return MemcmpFixed<14>(str1, str2);
24164
+ case 15:
24165
+ return MemcmpFixed<15>(str1, str2);
24166
+ case 16:
24167
+ return MemcmpFixed<16>(str1, str2);
24168
+ case 17:
24169
+ return MemcmpFixed<17>(str1, str2);
24170
+ case 18:
24171
+ return MemcmpFixed<18>(str1, str2);
24172
+ case 19:
24173
+ return MemcmpFixed<19>(str1, str2);
24174
+ case 20:
24175
+ return MemcmpFixed<20>(str1, str2);
24176
+ case 21:
24177
+ return MemcmpFixed<21>(str1, str2);
24178
+ case 22:
24179
+ return MemcmpFixed<22>(str1, str2);
24180
+ case 23:
24181
+ return MemcmpFixed<23>(str1, str2);
24182
+ case 24:
24183
+ return MemcmpFixed<24>(str1, str2);
24184
+ case 25:
24185
+ return MemcmpFixed<25>(str1, str2);
24186
+ case 26:
24187
+ return MemcmpFixed<26>(str1, str2);
24188
+ case 27:
24189
+ return MemcmpFixed<27>(str1, str2);
24190
+ case 28:
24191
+ return MemcmpFixed<28>(str1, str2);
24192
+ case 29:
24193
+ return MemcmpFixed<29>(str1, str2);
24194
+ case 30:
24195
+ return MemcmpFixed<30>(str1, str2);
24196
+ case 31:
24197
+ return MemcmpFixed<31>(str1, str2);
24198
+ case 32:
24199
+ return MemcmpFixed<32>(str1, str2);
24200
+ case 33:
24201
+ return MemcmpFixed<33>(str1, str2);
24202
+ case 34:
24203
+ return MemcmpFixed<34>(str1, str2);
24204
+ case 35:
24205
+ return MemcmpFixed<35>(str1, str2);
24206
+ case 36:
24207
+ return MemcmpFixed<36>(str1, str2);
24208
+ case 37:
24209
+ return MemcmpFixed<37>(str1, str2);
24210
+ case 38:
24211
+ return MemcmpFixed<38>(str1, str2);
24212
+ case 39:
24213
+ return MemcmpFixed<39>(str1, str2);
24214
+ case 40:
24215
+ return MemcmpFixed<40>(str1, str2);
24216
+ case 41:
24217
+ return MemcmpFixed<41>(str1, str2);
24218
+ case 42:
24219
+ return MemcmpFixed<42>(str1, str2);
24220
+ case 43:
24221
+ return MemcmpFixed<43>(str1, str2);
24222
+ case 44:
24223
+ return MemcmpFixed<44>(str1, str2);
24224
+ case 45:
24225
+ return MemcmpFixed<45>(str1, str2);
24226
+ case 46:
24227
+ return MemcmpFixed<46>(str1, str2);
24228
+ case 47:
24229
+ return MemcmpFixed<47>(str1, str2);
24230
+ case 48:
24231
+ return MemcmpFixed<48>(str1, str2);
24232
+ case 49:
24233
+ return MemcmpFixed<49>(str1, str2);
24234
+ case 50:
24235
+ return MemcmpFixed<50>(str1, str2);
24236
+ case 51:
24237
+ return MemcmpFixed<51>(str1, str2);
24238
+ case 52:
24239
+ return MemcmpFixed<52>(str1, str2);
24240
+ case 53:
24241
+ return MemcmpFixed<53>(str1, str2);
24242
+ case 54:
24243
+ return MemcmpFixed<54>(str1, str2);
24244
+ case 55:
24245
+ return MemcmpFixed<55>(str1, str2);
24246
+ case 56:
24247
+ return MemcmpFixed<56>(str1, str2);
24248
+ case 57:
24249
+ return MemcmpFixed<57>(str1, str2);
24250
+ case 58:
24251
+ return MemcmpFixed<58>(str1, str2);
24252
+ case 59:
24253
+ return MemcmpFixed<59>(str1, str2);
24254
+ case 60:
24255
+ return MemcmpFixed<60>(str1, str2);
24256
+ case 61:
24257
+ return MemcmpFixed<61>(str1, str2);
24258
+ case 62:
24259
+ return MemcmpFixed<62>(str1, str2);
24260
+ case 63:
24261
+ return MemcmpFixed<63>(str1, str2);
24262
+ case 64:
24263
+ return MemcmpFixed<64>(str1, str2);
24264
+ default:
24265
+ return memcmp(str1, str2, size);
24266
+ }
24267
+ // LCOV_EXCL_STOP
24268
+ }
24269
+
24270
+ } // namespace duckdb
24271
+
23321
24272
  //===----------------------------------------------------------------------===//
23322
24273
  // DuckDB
23323
24274
  //
23324
- // duckdb/storage/block_manager.hpp
24275
+ // duckdb/common/sort/comparators.hpp
23325
24276
  //
23326
24277
  //
23327
24278
  //===----------------------------------------------------------------------===//
@@ -23331,65 +24282,957 @@ private:
23331
24282
 
23332
24283
 
23333
24284
 
23334
-
23335
24285
  namespace duckdb {
23336
- class ClientContext;
23337
- class DatabaseInstance;
23338
24286
 
23339
- //! BlockManager is an abstract representation to manage blocks on DuckDB. When writing or reading blocks, the
23340
- //! BlockManager creates and accesses blocks. The concrete types implements how blocks are stored.
23341
- class BlockManager {
24287
+ struct SortLayout;
24288
+ struct SBScanState;
24289
+
24290
+ using ValidityBytes = RowLayout::ValidityBytes;
24291
+
24292
+ struct Comparators {
23342
24293
  public:
23343
- virtual ~BlockManager() = default;
24294
+ //! Whether a tie between two blobs can be broken
24295
+ static bool TieIsBreakable(const idx_t &col_idx, const data_ptr_t row_ptr, const RowLayout &row_layout);
24296
+ //! Compares the tuples that a being read from in the 'left' and 'right blocks during merge sort
24297
+ //! (only in case we cannot simply 'memcmp' - if there are blob columns)
24298
+ static int CompareTuple(const SBScanState &left, const SBScanState &right, const data_ptr_t &l_ptr,
24299
+ const data_ptr_t &r_ptr, const SortLayout &sort_layout, const bool &external_sort);
24300
+ //! Compare two blob values
24301
+ static int CompareVal(const data_ptr_t l_ptr, const data_ptr_t r_ptr, const LogicalType &type);
23344
24302
 
23345
- virtual void StartCheckpoint() = 0;
23346
- //! Creates a new block inside the block manager
23347
- virtual unique_ptr<Block> CreateBlock(block_id_t block_id) = 0;
23348
- //! Return the next free block id
23349
- virtual block_id_t GetFreeBlockId() = 0;
23350
- //! Returns whether or not a specified block is the root block
23351
- virtual bool IsRootBlock(block_id_t root) = 0;
23352
- //! Mark a block as "modified"; modified blocks are added to the free list after a checkpoint (i.e. their data is
23353
- //! assumed to be rewritten)
23354
- virtual void MarkBlockAsModified(block_id_t block_id) = 0;
23355
- //! Increase the reference count of a block. The block should hold at least one reference before this method is
23356
- //! called.
23357
- virtual void IncreaseBlockReferenceCount(block_id_t block_id) = 0;
23358
- //! Get the first meta block id
23359
- virtual block_id_t GetMetaBlock() = 0;
23360
- //! Read the content of the block from disk
23361
- virtual void Read(Block &block) = 0;
23362
- //! Writes the block to disk
23363
- virtual void Write(FileBuffer &block, block_id_t block_id) = 0;
23364
- //! Writes the block to disk
23365
- void Write(Block &block) {
23366
- Write(block, block.id);
23367
- }
23368
- //! Write the header; should be the final step of a checkpoint
23369
- virtual void WriteHeader(DatabaseHeader header) = 0;
24303
+ private:
24304
+ //! Compares two blob values that were initially tied by their prefix
24305
+ static int BreakBlobTie(const idx_t &tie_col, const SBScanState &left, const SBScanState &right,
24306
+ const SortLayout &sort_layout, const bool &external);
24307
+ //! Compare two fixed-size values
24308
+ template <class T>
24309
+ static int TemplatedCompareVal(const data_ptr_t &left_ptr, const data_ptr_t &right_ptr);
23370
24310
 
23371
- //! Returns the number of total blocks
23372
- virtual idx_t TotalBlocks() = 0;
23373
- //! Returns the number of free blocks
23374
- virtual idx_t FreeBlocks() = 0;
24311
+ //! Compare two values at the pointers (can be recursive if nested type)
24312
+ static int CompareValAndAdvance(data_ptr_t &l_ptr, data_ptr_t &r_ptr, const LogicalType &type);
24313
+ //! Compares two fixed-size values at the given pointers
24314
+ template <class T>
24315
+ static int TemplatedCompareAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr);
24316
+ //! Compares two string values at the given pointers
24317
+ static int CompareStringAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr);
24318
+ //! Compares two struct values at the given pointers (recursive)
24319
+ static int CompareStructAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr,
24320
+ const child_list_t<LogicalType> &types);
24321
+ //! Compare two list values at the pointers (can be recursive if nested type)
24322
+ static int CompareListAndAdvance(data_ptr_t &left_ptr, data_ptr_t &right_ptr, const LogicalType &type);
24323
+ //! Compares a list of fixed-size values
24324
+ template <class T>
24325
+ static int TemplatedCompareListLoop(data_ptr_t &left_ptr, data_ptr_t &right_ptr, const ValidityBytes &left_validity,
24326
+ const ValidityBytes &right_validity, const idx_t &count);
23375
24327
 
23376
- static BlockManager &GetBlockManager(ClientContext &context);
23377
- static BlockManager &GetBlockManager(DatabaseInstance &db);
24328
+ //! Unwizzles an offset into a pointer
24329
+ static void UnswizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type);
24330
+ //! Swizzles a pointer into an offset
24331
+ static void SwizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type);
23378
24332
  };
24333
+
23379
24334
  } // namespace duckdb
23380
24335
 
23381
24336
 
23382
24337
 
23383
24338
  namespace duckdb {
23384
- class DatabaseInstance;
23385
24339
 
23386
- //! This struct is responsible for writing metadata to disk
23387
- class MetaBlockWriter : public Serializer {
23388
- public:
23389
- MetaBlockWriter(DatabaseInstance &db, block_id_t initial_block_id = INVALID_BLOCK);
23390
- ~MetaBlockWriter() override;
24340
+ class BufferManager;
24341
+ struct RowDataBlock;
24342
+ struct SortLayout;
24343
+ struct GlobalSortState;
23391
24344
 
23392
- DatabaseInstance &db;
24345
+ enum class SortedDataType { BLOB, PAYLOAD };
24346
+
24347
+ //! Object that holds sorted rows, and an accompanying heap if there are blobs
24348
+ struct SortedData {
24349
+ public:
24350
+ SortedData(SortedDataType type, const RowLayout &layout, BufferManager &buffer_manager, GlobalSortState &state);
24351
+ //! Number of rows that this object holds
24352
+ idx_t Count();
24353
+ //! Initialize new block to write to
24354
+ void CreateBlock();
24355
+ //! Create a slice that holds the rows between the start and end indices
24356
+ unique_ptr<SortedData> CreateSlice(idx_t start_block_index, idx_t end_block_index, idx_t end_entry_index);
24357
+ //! Unswizzles all
24358
+ void Unswizzle();
24359
+
24360
+ public:
24361
+ const SortedDataType type;
24362
+ //! Layout of this data
24363
+ const RowLayout layout;
24364
+ //! Data and heap blocks
24365
+ vector<unique_ptr<RowDataBlock>> data_blocks;
24366
+ vector<unique_ptr<RowDataBlock>> heap_blocks;
24367
+ //! Whether the pointers in this sorted data are swizzled
24368
+ bool swizzled;
24369
+
24370
+ private:
24371
+ //! The buffer manager
24372
+ BufferManager &buffer_manager;
24373
+ //! The global state
24374
+ GlobalSortState &state;
24375
+ };
24376
+
24377
+ //! Block that holds sorted rows: radix, blob and payload data
24378
+ struct SortedBlock {
24379
+ public:
24380
+ SortedBlock(BufferManager &buffer_manager, GlobalSortState &gstate);
24381
+ //! Number of rows that this object holds
24382
+ idx_t Count() const;
24383
+ //! Initialize this block to write data to
24384
+ void InitializeWrite();
24385
+ //! Init new block to write to
24386
+ void CreateBlock();
24387
+ //! Fill this sorted block by appending the blocks held by a vector of sorted blocks
24388
+ void AppendSortedBlocks(vector<unique_ptr<SortedBlock>> &sorted_blocks);
24389
+ //! Locate the block and entry index of a row in this block,
24390
+ //! given an index between 0 and the total number of rows in this block
24391
+ void GlobalToLocalIndex(const idx_t &global_idx, idx_t &local_block_index, idx_t &local_entry_index);
24392
+ //! Create a slice that holds the rows between the start and end indices
24393
+ unique_ptr<SortedBlock> CreateSlice(const idx_t start, const idx_t end, idx_t &entry_idx);
24394
+
24395
+ //! Size (in bytes) of the heap of this block
24396
+ idx_t HeapSize() const;
24397
+ //! Total size (in bytes) of this block
24398
+ idx_t SizeInBytes() const;
24399
+
24400
+ public:
24401
+ //! Radix/memcmp sortable data
24402
+ vector<unique_ptr<RowDataBlock>> radix_sorting_data;
24403
+ //! Variable sized sorting data
24404
+ unique_ptr<SortedData> blob_sorting_data;
24405
+ //! Payload data
24406
+ unique_ptr<SortedData> payload_data;
24407
+
24408
+ private:
24409
+ //! Buffer manager, global state, and sorting layout constants
24410
+ BufferManager &buffer_manager;
24411
+ GlobalSortState &state;
24412
+ const SortLayout &sort_layout;
24413
+ const RowLayout &payload_layout;
24414
+ };
24415
+
24416
+ //! State used to scan a SortedBlock e.g. during merge sort
24417
+ struct SBScanState {
24418
+ public:
24419
+ SBScanState(BufferManager &buffer_manager, GlobalSortState &state);
24420
+
24421
+ void PinRadix(idx_t block_idx_to);
24422
+ void PinData(SortedData &sd);
24423
+
24424
+ data_ptr_t RadixPtr() const;
24425
+ data_ptr_t DataPtr(SortedData &sd) const;
24426
+ data_ptr_t HeapPtr(SortedData &sd) const;
24427
+ data_ptr_t BaseHeapPtr(SortedData &sd) const;
24428
+
24429
+ idx_t Remaining() const;
24430
+
24431
+ void SetIndices(idx_t block_idx_to, idx_t entry_idx_to);
24432
+
24433
+ public:
24434
+ BufferManager &buffer_manager;
24435
+ const SortLayout &sort_layout;
24436
+ GlobalSortState &state;
24437
+
24438
+ SortedBlock *sb;
24439
+
24440
+ idx_t block_idx;
24441
+ idx_t entry_idx;
24442
+
24443
+ BufferHandle radix_handle;
24444
+
24445
+ BufferHandle blob_sorting_data_handle;
24446
+ BufferHandle blob_sorting_heap_handle;
24447
+
24448
+ BufferHandle payload_data_handle;
24449
+ BufferHandle payload_heap_handle;
24450
+ };
24451
+
24452
+ //! Used to scan the data into DataChunks after sorting
24453
+ struct PayloadScanner {
24454
+ public:
24455
+ PayloadScanner(SortedData &sorted_data, GlobalSortState &global_sort_state, bool flush = true);
24456
+ explicit PayloadScanner(GlobalSortState &global_sort_state, bool flush = true);
24457
+
24458
+ //! Scan a single block
24459
+ PayloadScanner(GlobalSortState &global_sort_state, idx_t block_idx);
24460
+
24461
+ //! The type layout of the payload
24462
+ inline const vector<LogicalType> &GetPayloadTypes() const {
24463
+ return sorted_data.layout.GetTypes();
24464
+ }
24465
+
24466
+ //! The number of rows scanned so far
24467
+ inline idx_t Scanned() const {
24468
+ return total_scanned;
24469
+ }
24470
+
24471
+ //! The number of remaining rows
24472
+ inline idx_t Remaining() const {
24473
+ return total_count - total_scanned;
24474
+ }
24475
+
24476
+ //! Scans the next data chunk from the sorted data
24477
+ void Scan(DataChunk &chunk);
24478
+
24479
+ private:
24480
+ //! The sorted data being scanned
24481
+ SortedData &sorted_data;
24482
+ //! Read state
24483
+ SBScanState read_state;
24484
+ //! The total count of sorted_data
24485
+ const idx_t total_count;
24486
+ //! Addresses used to gather from the sorted data
24487
+ Vector addresses = Vector(LogicalType::POINTER);
24488
+ //! The number of rows scanned so far
24489
+ idx_t total_scanned;
24490
+ //! Whether to flush the blocks after scanning
24491
+ const bool flush;
24492
+ //! Whether we are unswizzling the blocks
24493
+ const bool unswizzling;
24494
+
24495
+ //! Checks that the newest block is valid
24496
+ void ValidateUnscannedBlock() const;
24497
+ };
24498
+
24499
+ struct SBIterator {
24500
+ static int ComparisonValue(ExpressionType comparison);
24501
+
24502
+ SBIterator(GlobalSortState &gss, ExpressionType comparison, idx_t entry_idx_p = 0);
24503
+
24504
+ inline idx_t GetIndex() const {
24505
+ return entry_idx;
24506
+ }
24507
+
24508
+ inline void SetIndex(idx_t entry_idx_p) {
24509
+ const auto new_block_idx = entry_idx_p / block_capacity;
24510
+ if (new_block_idx != scan.block_idx) {
24511
+ scan.SetIndices(new_block_idx, 0);
24512
+ if (new_block_idx < block_count) {
24513
+ scan.PinRadix(scan.block_idx);
24514
+ block_ptr = scan.RadixPtr();
24515
+ if (!all_constant) {
24516
+ scan.PinData(*scan.sb->blob_sorting_data);
24517
+ }
24518
+ }
24519
+ }
24520
+
24521
+ scan.entry_idx = entry_idx_p % block_capacity;
24522
+ entry_ptr = block_ptr + scan.entry_idx * entry_size;
24523
+ entry_idx = entry_idx_p;
24524
+ }
24525
+
24526
+ inline SBIterator &operator++() {
24527
+ if (++scan.entry_idx < block_capacity) {
24528
+ entry_ptr += entry_size;
24529
+ ++entry_idx;
24530
+ } else {
24531
+ SetIndex(entry_idx + 1);
24532
+ }
24533
+
24534
+ return *this;
24535
+ }
24536
+
24537
+ inline SBIterator &operator--() {
24538
+ if (scan.entry_idx) {
24539
+ --scan.entry_idx;
24540
+ --entry_idx;
24541
+ entry_ptr -= entry_size;
24542
+ } else {
24543
+ SetIndex(entry_idx - 1);
24544
+ }
24545
+
24546
+ return *this;
24547
+ }
24548
+
24549
+ inline bool Compare(const SBIterator &other) const {
24550
+ int comp_res;
24551
+ if (all_constant) {
24552
+ comp_res = FastMemcmp(entry_ptr, other.entry_ptr, cmp_size);
24553
+ } else {
24554
+ comp_res = Comparators::CompareTuple(scan, other.scan, entry_ptr, other.entry_ptr, sort_layout, external);
24555
+ }
24556
+
24557
+ return comp_res <= cmp;
24558
+ }
24559
+
24560
+ // Fixed comparison parameters
24561
+ const SortLayout &sort_layout;
24562
+ const idx_t block_count;
24563
+ const idx_t block_capacity;
24564
+ const size_t cmp_size;
24565
+ const size_t entry_size;
24566
+ const bool all_constant;
24567
+ const bool external;
24568
+ const int cmp;
24569
+
24570
+ // Iteration state
24571
+ SBScanState scan;
24572
+ idx_t entry_idx;
24573
+ data_ptr_t block_ptr;
24574
+ data_ptr_t entry_ptr;
24575
+ };
24576
+
24577
+ } // namespace duckdb
24578
+
24579
+ //===----------------------------------------------------------------------===//
24580
+ // DuckDB
24581
+ //
24582
+ // duckdb/common/types/row_data_collection.hpp
24583
+ //
24584
+ //
24585
+ //===----------------------------------------------------------------------===//
24586
+
24587
+
24588
+
24589
+
24590
+
24591
+ //===----------------------------------------------------------------------===//
24592
+ // DuckDB
24593
+ //
24594
+ // duckdb/storage/buffer_manager.hpp
24595
+ //
24596
+ //
24597
+ //===----------------------------------------------------------------------===//
24598
+
24599
+
24600
+
24601
+
24602
+
24603
+
24604
+
24605
+ //===----------------------------------------------------------------------===//
24606
+ // DuckDB
24607
+ //
24608
+ // duckdb/storage/block_manager.hpp
24609
+ //
24610
+ //
24611
+ //===----------------------------------------------------------------------===//
24612
+
24613
+
24614
+
24615
+
24616
+
24617
+
24618
+
24619
+ namespace duckdb {
24620
+ class ClientContext;
24621
+ class DatabaseInstance;
24622
+
24623
+ //! BlockManager is an abstract representation to manage blocks on DuckDB. When writing or reading blocks, the
24624
+ //! BlockManager creates and accesses blocks. The concrete types implements how blocks are stored.
24625
+ class BlockManager {
24626
+ public:
24627
+ virtual ~BlockManager() = default;
24628
+
24629
+ virtual void StartCheckpoint() = 0;
24630
+ //! Creates a new block inside the block manager
24631
+ virtual unique_ptr<Block> CreateBlock(block_id_t block_id) = 0;
24632
+ //! Return the next free block id
24633
+ virtual block_id_t GetFreeBlockId() = 0;
24634
+ //! Returns whether or not a specified block is the root block
24635
+ virtual bool IsRootBlock(block_id_t root) = 0;
24636
+ //! Mark a block as "modified"; modified blocks are added to the free list after a checkpoint (i.e. their data is
24637
+ //! assumed to be rewritten)
24638
+ virtual void MarkBlockAsModified(block_id_t block_id) = 0;
24639
+ //! Increase the reference count of a block. The block should hold at least one reference before this method is
24640
+ //! called.
24641
+ virtual void IncreaseBlockReferenceCount(block_id_t block_id) = 0;
24642
+ //! Get the first meta block id
24643
+ virtual block_id_t GetMetaBlock() = 0;
24644
+ //! Read the content of the block from disk
24645
+ virtual void Read(Block &block) = 0;
24646
+ //! Writes the block to disk
24647
+ virtual void Write(FileBuffer &block, block_id_t block_id) = 0;
24648
+ //! Writes the block to disk
24649
+ void Write(Block &block) {
24650
+ Write(block, block.id);
24651
+ }
24652
+ //! Write the header; should be the final step of a checkpoint
24653
+ virtual void WriteHeader(DatabaseHeader header) = 0;
24654
+
24655
+ //! Returns the number of total blocks
24656
+ virtual idx_t TotalBlocks() = 0;
24657
+ //! Returns the number of free blocks
24658
+ virtual idx_t FreeBlocks() = 0;
24659
+
24660
+ static BlockManager &GetBlockManager(ClientContext &context);
24661
+ static BlockManager &GetBlockManager(DatabaseInstance &db);
24662
+ };
24663
+ } // namespace duckdb
24664
+
24665
+ //===----------------------------------------------------------------------===//
24666
+ // DuckDB
24667
+ //
24668
+ // duckdb/storage/buffer/block_handle.hpp
24669
+ //
24670
+ //
24671
+ //===----------------------------------------------------------------------===//
24672
+
24673
+
24674
+
24675
+
24676
+
24677
+
24678
+
24679
+
24680
+ namespace duckdb {
24681
+ class BufferHandle;
24682
+ class BufferManager;
24683
+ class DatabaseInstance;
24684
+ class FileBuffer;
24685
+
24686
+ enum class BlockState : uint8_t { BLOCK_UNLOADED = 0, BLOCK_LOADED = 1 };
24687
+
24688
+ class BlockHandle {
24689
+ friend struct BufferEvictionNode;
24690
+ friend class BufferHandle;
24691
+ friend class BufferManager;
24692
+
24693
+ public:
24694
+ BlockHandle(DatabaseInstance &db, block_id_t block_id);
24695
+ BlockHandle(DatabaseInstance &db, block_id_t block_id, unique_ptr<FileBuffer> buffer, bool can_destroy,
24696
+ idx_t block_size);
24697
+ ~BlockHandle();
24698
+
24699
+ DatabaseInstance &db;
24700
+
24701
+ public:
24702
+ block_id_t BlockId() {
24703
+ return block_id;
24704
+ }
24705
+
24706
+ int32_t Readers() const {
24707
+ return readers;
24708
+ }
24709
+
24710
+ inline bool IsSwizzled() const {
24711
+ return !unswizzled;
24712
+ }
24713
+
24714
+ inline void SetSwizzling(const char *unswizzler) {
24715
+ unswizzled = unswizzler;
24716
+ }
24717
+
24718
+ private:
24719
+ static BufferHandle Load(shared_ptr<BlockHandle> &handle, unique_ptr<FileBuffer> buffer = nullptr);
24720
+ unique_ptr<FileBuffer> UnloadAndTakeBlock();
24721
+ void Unload();
24722
+ bool CanUnload();
24723
+
24724
+ //! The block-level lock
24725
+ mutex lock;
24726
+ //! Whether or not the block is loaded/unloaded
24727
+ BlockState state;
24728
+ //! Amount of concurrent readers
24729
+ atomic<int32_t> readers;
24730
+ //! The block id of the block
24731
+ const block_id_t block_id;
24732
+ //! Pointer to loaded data (if any)
24733
+ unique_ptr<FileBuffer> buffer;
24734
+ //! Internal eviction timestamp
24735
+ atomic<idx_t> eviction_timestamp;
24736
+ //! Whether or not the buffer can be destroyed (only used for temporary buffers)
24737
+ const bool can_destroy;
24738
+ //! The memory usage of the block
24739
+ idx_t memory_usage;
24740
+ //! Does the block contain any memory pointers?
24741
+ const char *unswizzled;
24742
+ };
24743
+
24744
+ } // namespace duckdb
24745
+
24746
+
24747
+ //===----------------------------------------------------------------------===//
24748
+ // DuckDB
24749
+ //
24750
+ // duckdb/storage/buffer/managed_buffer.hpp
24751
+ //
24752
+ //
24753
+ //===----------------------------------------------------------------------===//
24754
+
24755
+
24756
+
24757
+
24758
+
24759
+
24760
+
24761
+ namespace duckdb {
24762
+ class DatabaseInstance;
24763
+
24764
+ //! Managed buffer is an arbitrarily-sized buffer that is at least of size >= BLOCK_SIZE
24765
+ class ManagedBuffer : public FileBuffer {
24766
+ public:
24767
+ ManagedBuffer(DatabaseInstance &db, idx_t size, bool can_destroy, block_id_t id);
24768
+ ManagedBuffer(DatabaseInstance &db, FileBuffer &source, bool can_destroy, block_id_t id);
24769
+
24770
+ DatabaseInstance &db;
24771
+ //! Whether or not the managed buffer can be freely destroyed when unpinned.
24772
+ //! - If can_destroy is true, the buffer can be destroyed when unpinned and hence be unrecoverable. After being
24773
+ //! destroyed, Pin() will return false.
24774
+ //! - If can_destroy is false, the buffer will instead be written to a temporary file on disk when unloaded from
24775
+ //! memory, and read back into memory when Pin() is called.
24776
+ bool can_destroy;
24777
+ //! The internal id of the buffer
24778
+ block_id_t id;
24779
+ };
24780
+
24781
+ } // namespace duckdb
24782
+
24783
+
24784
+
24785
+ namespace duckdb {
24786
+ class DatabaseInstance;
24787
+ class TemporaryDirectoryHandle;
24788
+ struct EvictionQueue;
24789
+
24790
+ //! The buffer manager is in charge of handling memory management for the database. It hands out memory buffers that can
24791
+ //! be used by the database internally.
24792
+ class BufferManager {
24793
+ friend class BufferHandle;
24794
+ friend class BlockHandle;
24795
+
24796
+ public:
24797
+ BufferManager(DatabaseInstance &db, string temp_directory, idx_t maximum_memory);
24798
+ ~BufferManager();
24799
+
24800
+ //! Register a block with the given block id in the base file
24801
+ shared_ptr<BlockHandle> RegisterBlock(block_id_t block_id);
24802
+
24803
+ //! Register an in-memory buffer of arbitrary size, as long as it is >= BLOCK_SIZE. can_destroy signifies whether or
24804
+ //! not the buffer can be destroyed when unpinned, or whether or not it needs to be written to a temporary file so
24805
+ //! it can be reloaded. The resulting buffer will already be allocated, but needs to be pinned in order to be used.
24806
+ shared_ptr<BlockHandle> RegisterMemory(idx_t block_size, bool can_destroy);
24807
+
24808
+ //! Convert an existing in-memory buffer into a persistent disk-backed block
24809
+ shared_ptr<BlockHandle> ConvertToPersistent(BlockManager &block_manager, block_id_t block_id,
24810
+ shared_ptr<BlockHandle> old_block);
24811
+
24812
+ //! Allocate an in-memory buffer with a single pin.
24813
+ //! The allocated memory is released when the buffer handle is destroyed.
24814
+ DUCKDB_API BufferHandle Allocate(idx_t block_size);
24815
+
24816
+ //! Reallocate an in-memory buffer that is pinned.
24817
+ void ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size);
24818
+
24819
+ BufferHandle Pin(shared_ptr<BlockHandle> &handle);
24820
+ void Unpin(shared_ptr<BlockHandle> &handle);
24821
+
24822
+ void UnregisterBlock(block_id_t block_id, bool can_destroy);
24823
+
24824
+ //! Set a new memory limit to the buffer manager, throws an exception if the new limit is too low and not enough
24825
+ //! blocks can be evicted
24826
+ void SetLimit(idx_t limit = (idx_t)-1);
24827
+
24828
+ static BufferManager &GetBufferManager(ClientContext &context);
24829
+ DUCKDB_API static BufferManager &GetBufferManager(DatabaseInstance &db);
24830
+
24831
+ idx_t GetUsedMemory() {
24832
+ return current_memory;
24833
+ }
24834
+ idx_t GetMaxMemory() {
24835
+ return maximum_memory;
24836
+ }
24837
+
24838
+ const string &GetTemporaryDirectory() {
24839
+ return temp_directory;
24840
+ }
24841
+
24842
+ void SetTemporaryDirectory(string new_dir);
24843
+
24844
+ DUCKDB_API Allocator &GetBufferAllocator();
24845
+
24846
+ private:
24847
+ //! Evict blocks until the currently used memory + extra_memory fit, returns false if this was not possible
24848
+ //! (i.e. not enough blocks could be evicted)
24849
+ //! If the "buffer" argument is specified AND the system can find a buffer to re-use for the given allocation size
24850
+ //! "buffer" will be made to point to the re-usable memory. Note that this is not guaranteed.
24851
+ bool EvictBlocks(idx_t extra_memory, idx_t memory_limit, unique_ptr<FileBuffer> *buffer = nullptr);
24852
+
24853
+ //! Garbage collect eviction queue
24854
+ void PurgeQueue();
24855
+
24856
+ //! Write a temporary buffer to disk
24857
+ void WriteTemporaryBuffer(ManagedBuffer &buffer);
24858
+ //! Read a temporary buffer from disk
24859
+ unique_ptr<FileBuffer> ReadTemporaryBuffer(block_id_t id, unique_ptr<FileBuffer> buffer = nullptr);
24860
+ //! Get the path of the temporary buffer
24861
+ string GetTemporaryPath(block_id_t id);
24862
+
24863
+ void DeleteTemporaryFile(block_id_t id);
24864
+
24865
+ void RequireTemporaryDirectory();
24866
+
24867
+ void AddToEvictionQueue(shared_ptr<BlockHandle> &handle);
24868
+
24869
+ string InMemoryWarning();
24870
+
24871
+ static data_ptr_t BufferAllocatorAllocate(PrivateAllocatorData *private_data, idx_t size);
24872
+ static void BufferAllocatorFree(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size);
24873
+ static data_ptr_t BufferAllocatorRealloc(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t old_size,
24874
+ idx_t size);
24875
+
24876
+ private:
24877
+ //! The database instance
24878
+ DatabaseInstance &db;
24879
+ //! The lock for changing the memory limit
24880
+ mutex limit_lock;
24881
+ //! The current amount of memory that is occupied by the buffer manager (in bytes)
24882
+ atomic<idx_t> current_memory;
24883
+ //! The maximum amount of memory that the buffer manager can keep (in bytes)
24884
+ atomic<idx_t> maximum_memory;
24885
+ //! The directory name where temporary files are stored
24886
+ string temp_directory;
24887
+ //! Lock for creating the temp handle
24888
+ mutex temp_handle_lock;
24889
+ //! Handle for the temporary directory
24890
+ unique_ptr<TemporaryDirectoryHandle> temp_directory_handle;
24891
+ //! The lock for the set of blocks
24892
+ mutex blocks_lock;
24893
+ //! A mapping of block id -> BlockHandle
24894
+ unordered_map<block_id_t, weak_ptr<BlockHandle>> blocks;
24895
+ //! Eviction queue
24896
+ unique_ptr<EvictionQueue> queue;
24897
+ //! The temporary id used for managed buffers
24898
+ atomic<block_id_t> temporary_id;
24899
+ //! Allocator associated with the buffer manager, that passes all allocations through this buffer manager
24900
+ Allocator buffer_allocator;
24901
+ };
24902
+ } // namespace duckdb
24903
+
24904
+
24905
+ namespace duckdb {
24906
+
24907
+ struct RowDataBlock {
24908
+ public:
24909
+ RowDataBlock(BufferManager &buffer_manager, idx_t capacity, idx_t entry_size)
24910
+ : capacity(capacity), entry_size(entry_size), count(0), byte_offset(0) {
24911
+ block = buffer_manager.RegisterMemory(capacity * entry_size, false);
24912
+ }
24913
+ explicit RowDataBlock(idx_t entry_size) : entry_size(entry_size) {
24914
+ }
24915
+ //! The buffer block handle
24916
+ shared_ptr<BlockHandle> block;
24917
+ //! Capacity (number of entries) and entry size that fit in this block
24918
+ idx_t capacity;
24919
+ const idx_t entry_size;
24920
+ //! Number of entries currently in this block
24921
+ idx_t count;
24922
+ //! Write offset (if variable size entries)
24923
+ idx_t byte_offset;
24924
+
24925
+ private:
24926
+ //! Implicit copying is not allowed
24927
+ RowDataBlock(const RowDataBlock &) = delete;
24928
+
24929
+ public:
24930
+ unique_ptr<RowDataBlock> Copy() {
24931
+ auto result = make_unique<RowDataBlock>(entry_size);
24932
+ result->block = block;
24933
+ result->capacity = capacity;
24934
+ result->count = count;
24935
+ result->byte_offset = byte_offset;
24936
+ return result;
24937
+ }
24938
+ };
24939
+
24940
+ struct BlockAppendEntry {
24941
+ BlockAppendEntry(data_ptr_t baseptr, idx_t count) : baseptr(baseptr), count(count) {
24942
+ }
24943
+ data_ptr_t baseptr;
24944
+ idx_t count;
24945
+ };
24946
+
24947
+ class RowDataCollection {
24948
+ public:
24949
+ RowDataCollection(BufferManager &buffer_manager, idx_t block_capacity, idx_t entry_size, bool keep_pinned = false);
24950
+
24951
+ unique_ptr<RowDataCollection> CloneEmpty(bool keep_pinned = false) const {
24952
+ return make_unique<RowDataCollection>(buffer_manager, block_capacity, entry_size, keep_pinned);
24953
+ }
24954
+
24955
+ //! BufferManager
24956
+ BufferManager &buffer_manager;
24957
+ //! The total number of stored entries
24958
+ idx_t count;
24959
+ //! The number of entries per block
24960
+ idx_t block_capacity;
24961
+ //! Size of entries in the blocks
24962
+ idx_t entry_size;
24963
+ //! The blocks holding the main data
24964
+ vector<unique_ptr<RowDataBlock>> blocks;
24965
+ //! The blocks that this collection currently has pinned
24966
+ vector<BufferHandle> pinned_blocks;
24967
+ //! Whether the blocks should stay pinned (necessary for e.g. a heap)
24968
+ const bool keep_pinned;
24969
+
24970
+ public:
24971
+ idx_t AppendToBlock(RowDataBlock &block, BufferHandle &handle, vector<BlockAppendEntry> &append_entries,
24972
+ idx_t remaining, idx_t entry_sizes[]);
24973
+ RowDataBlock &CreateBlock();
24974
+ vector<BufferHandle> Build(idx_t added_count, data_ptr_t key_locations[], idx_t entry_sizes[],
24975
+ const SelectionVector *sel = FlatVector::IncrementalSelectionVector());
24976
+
24977
+ void Merge(RowDataCollection &other);
24978
+
24979
+ void Clear() {
24980
+ blocks.clear();
24981
+ pinned_blocks.clear();
24982
+ count = 0;
24983
+ }
24984
+
24985
+ //! The size (in bytes) of this RowDataCollection if it were stored in a single block
24986
+ idx_t SizeInBytes() const {
24987
+ idx_t bytes = 0;
24988
+ if (entry_size == 1) {
24989
+ for (auto &block : blocks) {
24990
+ bytes += block->byte_offset;
24991
+ }
24992
+ } else {
24993
+ bytes = count * entry_size;
24994
+ }
24995
+ return bytes;
24996
+ }
24997
+
24998
+ static inline idx_t EntriesPerBlock(idx_t width) {
24999
+ return (Storage::BLOCK_SIZE + width * STANDARD_VECTOR_SIZE - 1) / width;
25000
+ }
25001
+
25002
+ private:
25003
+ mutex rdc_lock;
25004
+
25005
+ //! Copying is not allowed
25006
+ RowDataCollection(const RowDataCollection &) = delete;
25007
+ };
25008
+
25009
+ } // namespace duckdb
25010
+
25011
+
25012
+
25013
+ namespace duckdb {
25014
+
25015
+ class RowLayout;
25016
+ struct LocalSortState;
25017
+
25018
+ struct SortConstants {
25019
+ static constexpr idx_t VALUES_PER_RADIX = 256;
25020
+ static constexpr idx_t MSD_RADIX_LOCATIONS = VALUES_PER_RADIX + 1;
25021
+ static constexpr idx_t INSERTION_SORT_THRESHOLD = 24;
25022
+ static constexpr idx_t MSD_RADIX_SORT_SIZE_THRESHOLD = 4;
25023
+ };
25024
+
25025
+ struct SortLayout {
25026
+ public:
25027
+ SortLayout() {
25028
+ }
25029
+ explicit SortLayout(const vector<BoundOrderByNode> &orders);
25030
+ SortLayout GetPrefixComparisonLayout(idx_t num_prefix_cols) const;
25031
+
25032
+ public:
25033
+ idx_t column_count;
25034
+ vector<OrderType> order_types;
25035
+ vector<OrderByNullType> order_by_null_types;
25036
+ vector<LogicalType> logical_types;
25037
+
25038
+ bool all_constant;
25039
+ vector<bool> constant_size;
25040
+ vector<idx_t> column_sizes;
25041
+ vector<idx_t> prefix_lengths;
25042
+ vector<BaseStatistics *> stats;
25043
+ vector<bool> has_null;
25044
+
25045
+ idx_t comparison_size;
25046
+ idx_t entry_size;
25047
+
25048
+ RowLayout blob_layout;
25049
+ unordered_map<idx_t, idx_t> sorting_to_blob_col;
25050
+ };
25051
+
25052
+ struct GlobalSortState {
25053
+ public:
25054
+ GlobalSortState(BufferManager &buffer_manager, const vector<BoundOrderByNode> &orders, RowLayout &payload_layout);
25055
+
25056
+ //! Add local state sorted data to this global state
25057
+ void AddLocalState(LocalSortState &local_sort_state);
25058
+ //! Prepares the GlobalSortState for the merge sort phase (after completing radix sort phase)
25059
+ void PrepareMergePhase();
25060
+ //! Initializes the global sort state for another round of merging
25061
+ void InitializeMergeRound();
25062
+ //! Completes the cascaded merge sort round.
25063
+ //! Pass true if you wish to use the radix data for further comparisons.
25064
+ void CompleteMergeRound(bool keep_radix_data = false);
25065
+ //! Print the sorted data to the console.
25066
+ void Print();
25067
+
25068
+ public:
25069
+ //! The lock for updating the order global state
25070
+ mutex lock;
25071
+ //! The buffer manager
25072
+ BufferManager &buffer_manager;
25073
+
25074
+ //! Sorting and payload layouts
25075
+ const SortLayout sort_layout;
25076
+ const RowLayout payload_layout;
25077
+
25078
+ //! Sorted data
25079
+ vector<unique_ptr<SortedBlock>> sorted_blocks;
25080
+ vector<vector<unique_ptr<SortedBlock>>> sorted_blocks_temp;
25081
+ unique_ptr<SortedBlock> odd_one_out;
25082
+
25083
+ //! Pinned heap data (if sorting in memory)
25084
+ vector<unique_ptr<RowDataBlock>> heap_blocks;
25085
+ vector<BufferHandle> pinned_blocks;
25086
+
25087
+ //! Capacity (number of rows) used to initialize blocks
25088
+ idx_t block_capacity;
25089
+ //! Whether we are doing an external sort
25090
+ bool external;
25091
+
25092
+ //! Progress in merge path stage
25093
+ idx_t pair_idx;
25094
+ idx_t num_pairs;
25095
+ idx_t l_start;
25096
+ idx_t r_start;
25097
+ };
25098
+
25099
+ struct LocalSortState {
25100
+ public:
25101
+ LocalSortState();
25102
+
25103
+ //! Initialize the layouts and RowDataCollections
25104
+ void Initialize(GlobalSortState &global_sort_state, BufferManager &buffer_manager_p);
25105
+ //! Sink one DataChunk into the local sort state
25106
+ void SinkChunk(DataChunk &sort, DataChunk &payload);
25107
+ //! Size of accumulated data in bytes
25108
+ idx_t SizeInBytes() const;
25109
+ //! Sort the data accumulated so far
25110
+ void Sort(GlobalSortState &global_sort_state, bool reorder_heap);
25111
+ //! Concatenate the blocks held by a RowDataCollection into a single block
25112
+ static unique_ptr<RowDataBlock> ConcatenateBlocks(RowDataCollection &row_data);
25113
+
25114
+ private:
25115
+ //! Sorts the data in the newly created SortedBlock
25116
+ void SortInMemory();
25117
+ //! Re-order the local state after sorting
25118
+ void ReOrder(GlobalSortState &gstate, bool reorder_heap);
25119
+ //! Re-order a SortedData object after sorting
25120
+ void ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataCollection &heap, GlobalSortState &gstate,
25121
+ bool reorder_heap);
25122
+
25123
+ public:
25124
+ //! Whether this local state has been initialized
25125
+ bool initialized;
25126
+ //! The buffer manager
25127
+ BufferManager *buffer_manager;
25128
+ //! The sorting and payload layouts
25129
+ const SortLayout *sort_layout;
25130
+ const RowLayout *payload_layout;
25131
+ //! Radix/memcmp sortable data
25132
+ unique_ptr<RowDataCollection> radix_sorting_data;
25133
+ //! Variable sized sorting data and accompanying heap
25134
+ unique_ptr<RowDataCollection> blob_sorting_data;
25135
+ unique_ptr<RowDataCollection> blob_sorting_heap;
25136
+ //! Payload data and accompanying heap
25137
+ unique_ptr<RowDataCollection> payload_data;
25138
+ unique_ptr<RowDataCollection> payload_heap;
25139
+ //! Sorted data
25140
+ vector<unique_ptr<SortedBlock>> sorted_blocks;
25141
+
25142
+ private:
25143
+ //! Selection vector and addresses for scattering the data to rows
25144
+ const SelectionVector &sel_ptr = *FlatVector::IncrementalSelectionVector();
25145
+ Vector addresses = Vector(LogicalType::POINTER);
25146
+ };
25147
+
25148
+ struct MergeSorter {
25149
+ public:
25150
+ MergeSorter(GlobalSortState &state, BufferManager &buffer_manager);
25151
+
25152
+ //! Finds and merges partitions until the current cascaded merge round is finished
25153
+ void PerformInMergeRound();
25154
+
25155
+ private:
25156
+ //! The global sorting state
25157
+ GlobalSortState &state;
25158
+ //! The sorting and payload layouts
25159
+ BufferManager &buffer_manager;
25160
+ const SortLayout &sort_layout;
25161
+
25162
+ //! The left and right reader
25163
+ unique_ptr<SBScanState> left;
25164
+ unique_ptr<SBScanState> right;
25165
+
25166
+ //! Input and output blocks
25167
+ unique_ptr<SortedBlock> left_input;
25168
+ unique_ptr<SortedBlock> right_input;
25169
+ SortedBlock *result;
25170
+
25171
+ private:
25172
+ //! Computes the left and right block that will be merged next (Merge Path partition)
25173
+ void GetNextPartition();
25174
+ //! Finds the boundary of the next partition using binary search
25175
+ void GetIntersection(const idx_t diagonal, idx_t &l_idx, idx_t &r_idx);
25176
+ //! Compare values within SortedBlocks using a global index
25177
+ int CompareUsingGlobalIndex(SBScanState &l, SBScanState &r, const idx_t l_idx, const idx_t r_idx);
25178
+
25179
+ //! Finds the next partition and merges it
25180
+ void MergePartition();
25181
+
25182
+ //! Computes how the next 'count' tuples should be merged by setting the 'left_smaller' array
25183
+ void ComputeMerge(const idx_t &count, bool left_smaller[]);
25184
+
25185
+ //! Merges the radix sorting blocks according to the 'left_smaller' array
25186
+ void MergeRadix(const idx_t &count, const bool left_smaller[]);
25187
+ //! Merges SortedData according to the 'left_smaller' array
25188
+ void MergeData(SortedData &result_data, SortedData &l_data, SortedData &r_data, const idx_t &count,
25189
+ const bool left_smaller[], idx_t next_entry_sizes[], bool reset_indices);
25190
+ //! Merges constant size rows according to the 'left_smaller' array
25191
+ void MergeRows(data_ptr_t &l_ptr, idx_t &l_entry_idx, const idx_t &l_count, data_ptr_t &r_ptr, idx_t &r_entry_idx,
25192
+ const idx_t &r_count, RowDataBlock &target_block, data_ptr_t &target_ptr, const idx_t &entry_size,
25193
+ const bool left_smaller[], idx_t &copied, const idx_t &count);
25194
+ //! Flushes constant size rows into the result
25195
+ void FlushRows(data_ptr_t &source_ptr, idx_t &source_entry_idx, const idx_t &source_count,
25196
+ RowDataBlock &target_block, data_ptr_t &target_ptr, const idx_t &entry_size, idx_t &copied,
25197
+ const idx_t &count);
25198
+ //! Flushes blob rows and accompanying heap
25199
+ void FlushBlobs(const RowLayout &layout, const idx_t &source_count, data_ptr_t &source_data_ptr,
25200
+ idx_t &source_entry_idx, data_ptr_t &source_heap_ptr, RowDataBlock &target_data_block,
25201
+ data_ptr_t &target_data_ptr, RowDataBlock &target_heap_block, BufferHandle &target_heap_handle,
25202
+ data_ptr_t &target_heap_ptr, idx_t &copied, const idx_t &count);
25203
+ };
25204
+
25205
+ } // namespace duckdb
25206
+
25207
+
25208
+
25209
+
25210
+ //===----------------------------------------------------------------------===//
25211
+ // DuckDB
25212
+ //
25213
+ // duckdb/storage/meta_block_writer.hpp
25214
+ //
25215
+ //
25216
+ //===----------------------------------------------------------------------===//
25217
+
25218
+
25219
+
25220
+
25221
+
25222
+
25223
+
25224
+
25225
+
25226
+ namespace duckdb {
25227
+ class DatabaseInstance;
25228
+
25229
+ //! This struct is responsible for writing metadata to disk
25230
+ class MetaBlockWriter : public Serializer {
25231
+ public:
25232
+ MetaBlockWriter(DatabaseInstance &db, block_id_t initial_block_id = INVALID_BLOCK);
25233
+ ~MetaBlockWriter() override;
25234
+
25235
+ DatabaseInstance &db;
23393
25236
  unique_ptr<Block> block;
23394
25237
  set<block_id_t> written_blocks;
23395
25238
  idx_t offset;
@@ -23407,6 +25250,7 @@ protected:
23407
25250
  } // namespace duckdb
23408
25251
 
23409
25252
 
25253
+
23410
25254
  namespace duckdb {
23411
25255
 
23412
25256
  class ClientContext;
@@ -23469,6 +25313,10 @@ public:
23469
25313
 
23470
25314
  //! Insert data into the index. Does not lock the index.
23471
25315
  virtual bool Insert(IndexLock &lock, DataChunk &input, Vector &row_identifiers) = 0;
25316
+ //! Construct an index from sorted chunks of keys.
25317
+ virtual void ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator &allocator) = 0;
25318
+ //! Merge other_index into this index.
25319
+ void MergeIndexes(Index *other_index);
23472
25320
 
23473
25321
  //! Returns true if the index is affected by updates on the specified column ids, and false otherwise
23474
25322
  bool IndexIsUpdated(const vector<column_t> &column_ids) const;
@@ -23558,224 +25406,36 @@ struct BoundCreateTableInfo {
23558
25406
  //! Column dependency manager of the table
23559
25407
  ColumnDependencyManager column_dependency_manager;
23560
25408
  //! List of constraints on the table
23561
- vector<unique_ptr<Constraint>> constraints;
23562
- //! List of bound constraints on the table
23563
- vector<unique_ptr<BoundConstraint>> bound_constraints;
23564
- //! Bound default values
23565
- vector<unique_ptr<Expression>> bound_defaults;
23566
- //! Dependents of the table (in e.g. default values)
23567
- unordered_set<CatalogEntry *> dependencies;
23568
- //! The existing table data on disk (if any)
23569
- unique_ptr<PersistentTableData> data;
23570
- //! CREATE TABLE from QUERY
23571
- unique_ptr<LogicalOperator> query;
23572
- //! Indexes created by this table <Block_ID, Offset>
23573
- vector<BlockPointer> indexes;
23574
-
23575
- //! Serializes a BoundCreateTableInfo to a stand-alone binary blob
23576
- void Serialize(Serializer &serializer) const;
23577
- //! Deserializes a blob back into a BoundCreateTableInfo
23578
- static unique_ptr<BoundCreateTableInfo> Deserialize(Deserializer &source, PlanDeserializationState &state);
23579
-
23580
- CreateTableInfo &Base() {
23581
- D_ASSERT(base);
23582
- return (CreateTableInfo &)*base;
23583
- }
23584
- };
23585
-
23586
- } // namespace duckdb
23587
- //===----------------------------------------------------------------------===//
23588
- // DuckDB
23589
- //
23590
- // duckdb/parser/constraints/not_null_constraint.hpp
23591
- //
23592
- //
23593
- //===----------------------------------------------------------------------===//
23594
-
23595
-
23596
-
23597
-
23598
-
23599
- namespace duckdb {
23600
-
23601
- class NotNullConstraint : public Constraint {
23602
- public:
23603
- DUCKDB_API explicit NotNullConstraint(column_t index);
23604
- DUCKDB_API ~NotNullConstraint() override;
23605
-
23606
- //! Column index this constraint pertains to
23607
- column_t index;
23608
-
23609
- public:
23610
- DUCKDB_API string ToString() const override;
23611
-
23612
- DUCKDB_API unique_ptr<Constraint> Copy() const override;
23613
-
23614
- //! Serialize to a stand-alone binary blob
23615
- DUCKDB_API void Serialize(FieldWriter &writer) const override;
23616
- //! Deserializes a NotNullConstraint
23617
- DUCKDB_API static unique_ptr<Constraint> Deserialize(FieldReader &source);
23618
- };
23619
-
23620
- } // namespace duckdb
23621
- //===----------------------------------------------------------------------===//
23622
- // DuckDB
23623
- //
23624
- // duckdb/storage/data_table.hpp
23625
- //
23626
- //
23627
- //===----------------------------------------------------------------------===//
23628
-
23629
-
23630
-
23631
-
23632
-
23633
-
23634
-
23635
-
23636
-
23637
-
23638
- //===----------------------------------------------------------------------===//
23639
- // DuckDB
23640
- //
23641
- // duckdb/storage/statistics/column_statistics.hpp
23642
- //
23643
- //
23644
- //===----------------------------------------------------------------------===//
23645
-
23646
-
23647
-
23648
-
23649
-
23650
- namespace duckdb {
23651
-
23652
- class ColumnStatistics {
23653
- public:
23654
- explicit ColumnStatistics(unique_ptr<BaseStatistics> stats_p);
23655
-
23656
- unique_ptr<BaseStatistics> stats;
23657
-
23658
- public:
23659
- static shared_ptr<ColumnStatistics> CreateEmptyStats(const LogicalType &type);
23660
- };
23661
-
23662
- } // namespace duckdb
23663
-
23664
- //===----------------------------------------------------------------------===//
23665
- // DuckDB
23666
- //
23667
- // duckdb/storage/table/column_segment.hpp
23668
- //
23669
- //
23670
- //===----------------------------------------------------------------------===//
23671
-
23672
-
23673
-
23674
-
23675
-
23676
-
23677
-
23678
- //===----------------------------------------------------------------------===//
23679
- // DuckDB
23680
- //
23681
- // duckdb/storage/buffer_manager.hpp
23682
- //
23683
- //
23684
- //===----------------------------------------------------------------------===//
23685
-
23686
-
23687
-
23688
-
23689
-
23690
-
23691
-
23692
-
23693
- //===----------------------------------------------------------------------===//
23694
- // DuckDB
23695
- //
23696
- // duckdb/storage/buffer/block_handle.hpp
23697
- //
23698
- //
23699
- //===----------------------------------------------------------------------===//
23700
-
23701
-
23702
-
23703
-
23704
-
23705
-
23706
-
23707
-
23708
- namespace duckdb {
23709
- class BufferHandle;
23710
- class BufferManager;
23711
- class DatabaseInstance;
23712
- class FileBuffer;
23713
-
23714
- enum class BlockState : uint8_t { BLOCK_UNLOADED = 0, BLOCK_LOADED = 1 };
23715
-
23716
- class BlockHandle {
23717
- friend struct BufferEvictionNode;
23718
- friend class BufferHandle;
23719
- friend class BufferManager;
23720
-
23721
- public:
23722
- BlockHandle(DatabaseInstance &db, block_id_t block_id);
23723
- BlockHandle(DatabaseInstance &db, block_id_t block_id, unique_ptr<FileBuffer> buffer, bool can_destroy,
23724
- idx_t block_size);
23725
- ~BlockHandle();
23726
-
23727
- DatabaseInstance &db;
23728
-
23729
- public:
23730
- block_id_t BlockId() {
23731
- return block_id;
23732
- }
23733
-
23734
- int32_t Readers() const {
23735
- return readers;
23736
- }
25409
+ vector<unique_ptr<Constraint>> constraints;
25410
+ //! List of bound constraints on the table
25411
+ vector<unique_ptr<BoundConstraint>> bound_constraints;
25412
+ //! Bound default values
25413
+ vector<unique_ptr<Expression>> bound_defaults;
25414
+ //! Dependents of the table (in e.g. default values)
25415
+ unordered_set<CatalogEntry *> dependencies;
25416
+ //! The existing table data on disk (if any)
25417
+ unique_ptr<PersistentTableData> data;
25418
+ //! CREATE TABLE from QUERY
25419
+ unique_ptr<LogicalOperator> query;
25420
+ //! Indexes created by this table <Block_ID, Offset>
25421
+ vector<BlockPointer> indexes;
23737
25422
 
23738
- inline bool IsSwizzled() const {
23739
- return !unswizzled;
23740
- }
25423
+ //! Serializes a BoundCreateTableInfo to a stand-alone binary blob
25424
+ void Serialize(Serializer &serializer) const;
25425
+ //! Deserializes a blob back into a BoundCreateTableInfo
25426
+ static unique_ptr<BoundCreateTableInfo> Deserialize(Deserializer &source, PlanDeserializationState &state);
23741
25427
 
23742
- inline void SetSwizzling(const char *unswizzler) {
23743
- unswizzled = unswizzler;
25428
+ CreateTableInfo &Base() {
25429
+ D_ASSERT(base);
25430
+ return (CreateTableInfo &)*base;
23744
25431
  }
23745
-
23746
- private:
23747
- static BufferHandle Load(shared_ptr<BlockHandle> &handle, unique_ptr<FileBuffer> buffer = nullptr);
23748
- unique_ptr<FileBuffer> UnloadAndTakeBlock();
23749
- void Unload();
23750
- bool CanUnload();
23751
-
23752
- //! The block-level lock
23753
- mutex lock;
23754
- //! Whether or not the block is loaded/unloaded
23755
- BlockState state;
23756
- //! Amount of concurrent readers
23757
- atomic<int32_t> readers;
23758
- //! The block id of the block
23759
- const block_id_t block_id;
23760
- //! Pointer to loaded data (if any)
23761
- unique_ptr<FileBuffer> buffer;
23762
- //! Internal eviction timestamp
23763
- atomic<idx_t> eviction_timestamp;
23764
- //! Whether or not the buffer can be destroyed (only used for temporary buffers)
23765
- const bool can_destroy;
23766
- //! The memory usage of the block
23767
- idx_t memory_usage;
23768
- //! Does the block contain any memory pointers?
23769
- const char *unswizzled;
23770
25432
  };
23771
25433
 
23772
25434
  } // namespace duckdb
23773
-
23774
-
23775
25435
  //===----------------------------------------------------------------------===//
23776
25436
  // DuckDB
23777
25437
  //
23778
- // duckdb/storage/buffer/managed_buffer.hpp
25438
+ // duckdb/parser/constraints/not_null_constraint.hpp
23779
25439
  //
23780
25440
  //
23781
25441
  //===----------------------------------------------------------------------===//
@@ -23784,150 +25444,85 @@ private:
23784
25444
 
23785
25445
 
23786
25446
 
25447
+ namespace duckdb {
23787
25448
 
25449
+ class NotNullConstraint : public Constraint {
25450
+ public:
25451
+ DUCKDB_API explicit NotNullConstraint(column_t index);
25452
+ DUCKDB_API ~NotNullConstraint() override;
23788
25453
 
23789
- namespace duckdb {
23790
- class DatabaseInstance;
25454
+ //! Column index this constraint pertains to
25455
+ column_t index;
23791
25456
 
23792
- //! Managed buffer is an arbitrarily-sized buffer that is at least of size >= BLOCK_SIZE
23793
- class ManagedBuffer : public FileBuffer {
23794
25457
  public:
23795
- ManagedBuffer(DatabaseInstance &db, idx_t size, bool can_destroy, block_id_t id);
23796
- ManagedBuffer(DatabaseInstance &db, FileBuffer &source, bool can_destroy, block_id_t id);
25458
+ DUCKDB_API string ToString() const override;
23797
25459
 
23798
- DatabaseInstance &db;
23799
- //! Whether or not the managed buffer can be freely destroyed when unpinned.
23800
- //! - If can_destroy is true, the buffer can be destroyed when unpinned and hence be unrecoverable. After being
23801
- //! destroyed, Pin() will return false.
23802
- //! - If can_destroy is false, the buffer will instead be written to a temporary file on disk when unloaded from
23803
- //! memory, and read back into memory when Pin() is called.
23804
- bool can_destroy;
23805
- //! The internal id of the buffer
23806
- block_id_t id;
25460
+ DUCKDB_API unique_ptr<Constraint> Copy() const override;
25461
+
25462
+ //! Serialize to a stand-alone binary blob
25463
+ DUCKDB_API void Serialize(FieldWriter &writer) const override;
25464
+ //! Deserializes a NotNullConstraint
25465
+ DUCKDB_API static unique_ptr<Constraint> Deserialize(FieldReader &source);
23807
25466
  };
23808
25467
 
23809
25468
  } // namespace duckdb
25469
+ //===----------------------------------------------------------------------===//
25470
+ // DuckDB
25471
+ //
25472
+ // duckdb/storage/data_table.hpp
25473
+ //
25474
+ //
25475
+ //===----------------------------------------------------------------------===//
23810
25476
 
23811
25477
 
23812
25478
 
23813
- namespace duckdb {
23814
- class DatabaseInstance;
23815
- class TemporaryDirectoryHandle;
23816
- struct EvictionQueue;
23817
25479
 
23818
- //! The buffer manager is in charge of handling memory management for the database. It hands out memory buffers that can
23819
- //! be used by the database internally.
23820
- class BufferManager {
23821
- friend class BufferHandle;
23822
- friend class BlockHandle;
23823
25480
 
23824
- public:
23825
- BufferManager(DatabaseInstance &db, string temp_directory, idx_t maximum_memory);
23826
- ~BufferManager();
23827
25481
 
23828
- //! Register a block with the given block id in the base file
23829
- shared_ptr<BlockHandle> RegisterBlock(block_id_t block_id);
23830
25482
 
23831
- //! Register an in-memory buffer of arbitrary size, as long as it is >= BLOCK_SIZE. can_destroy signifies whether or
23832
- //! not the buffer can be destroyed when unpinned, or whether or not it needs to be written to a temporary file so
23833
- //! it can be reloaded. The resulting buffer will already be allocated, but needs to be pinned in order to be used.
23834
- shared_ptr<BlockHandle> RegisterMemory(idx_t block_size, bool can_destroy);
23835
25483
 
23836
- //! Convert an existing in-memory buffer into a persistent disk-backed block
23837
- shared_ptr<BlockHandle> ConvertToPersistent(BlockManager &block_manager, block_id_t block_id,
23838
- shared_ptr<BlockHandle> old_block);
23839
25484
 
23840
- //! Allocate an in-memory buffer with a single pin.
23841
- //! The allocated memory is released when the buffer handle is destroyed.
23842
- DUCKDB_API BufferHandle Allocate(idx_t block_size);
23843
25485
 
23844
- //! Reallocate an in-memory buffer that is pinned.
23845
- void ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size);
25486
+ //===----------------------------------------------------------------------===//
25487
+ // DuckDB
25488
+ //
25489
+ // duckdb/storage/statistics/column_statistics.hpp
25490
+ //
25491
+ //
25492
+ //===----------------------------------------------------------------------===//
23846
25493
 
23847
- BufferHandle Pin(shared_ptr<BlockHandle> &handle);
23848
- void Unpin(shared_ptr<BlockHandle> &handle);
23849
25494
 
23850
- void UnregisterBlock(block_id_t block_id, bool can_destroy);
23851
25495
 
23852
- //! Set a new memory limit to the buffer manager, throws an exception if the new limit is too low and not enough
23853
- //! blocks can be evicted
23854
- void SetLimit(idx_t limit = (idx_t)-1);
23855
25496
 
23856
- static BufferManager &GetBufferManager(ClientContext &context);
23857
- DUCKDB_API static BufferManager &GetBufferManager(DatabaseInstance &db);
23858
25497
 
23859
- idx_t GetUsedMemory() {
23860
- return current_memory;
23861
- }
23862
- idx_t GetMaxMemory() {
23863
- return maximum_memory;
23864
- }
25498
+ namespace duckdb {
23865
25499
 
23866
- const string &GetTemporaryDirectory() {
23867
- return temp_directory;
23868
- }
25500
+ class ColumnStatistics {
25501
+ public:
25502
+ explicit ColumnStatistics(unique_ptr<BaseStatistics> stats_p);
23869
25503
 
23870
- void SetTemporaryDirectory(string new_dir);
25504
+ unique_ptr<BaseStatistics> stats;
23871
25505
 
23872
- DUCKDB_API Allocator &GetBufferAllocator();
25506
+ public:
25507
+ static shared_ptr<ColumnStatistics> CreateEmptyStats(const LogicalType &type);
25508
+ };
23873
25509
 
23874
- private:
23875
- //! Evict blocks until the currently used memory + extra_memory fit, returns false if this was not possible
23876
- //! (i.e. not enough blocks could be evicted)
23877
- //! If the "buffer" argument is specified AND the system can find a buffer to re-use for the given allocation size
23878
- //! "buffer" will be made to point to the re-usable memory. Note that this is not guaranteed.
23879
- bool EvictBlocks(idx_t extra_memory, idx_t memory_limit, unique_ptr<FileBuffer> *buffer = nullptr);
25510
+ } // namespace duckdb
23880
25511
 
23881
- //! Garbage collect eviction queue
23882
- void PurgeQueue();
25512
+ //===----------------------------------------------------------------------===//
25513
+ // DuckDB
25514
+ //
25515
+ // duckdb/storage/table/column_segment.hpp
25516
+ //
25517
+ //
25518
+ //===----------------------------------------------------------------------===//
23883
25519
 
23884
- //! Write a temporary buffer to disk
23885
- void WriteTemporaryBuffer(ManagedBuffer &buffer);
23886
- //! Read a temporary buffer from disk
23887
- unique_ptr<FileBuffer> ReadTemporaryBuffer(block_id_t id, unique_ptr<FileBuffer> buffer = nullptr);
23888
- //! Get the path of the temporary buffer
23889
- string GetTemporaryPath(block_id_t id);
23890
25520
 
23891
- void DeleteTemporaryFile(block_id_t id);
23892
25521
 
23893
- void RequireTemporaryDirectory();
23894
25522
 
23895
- void AddToEvictionQueue(shared_ptr<BlockHandle> &handle);
23896
25523
 
23897
- string InMemoryWarning();
23898
25524
 
23899
- static data_ptr_t BufferAllocatorAllocate(PrivateAllocatorData *private_data, idx_t size);
23900
- static void BufferAllocatorFree(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size);
23901
- static data_ptr_t BufferAllocatorRealloc(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t old_size,
23902
- idx_t size);
23903
25525
 
23904
- private:
23905
- //! The database instance
23906
- DatabaseInstance &db;
23907
- //! The lock for changing the memory limit
23908
- mutex limit_lock;
23909
- //! The current amount of memory that is occupied by the buffer manager (in bytes)
23910
- atomic<idx_t> current_memory;
23911
- //! The maximum amount of memory that the buffer manager can keep (in bytes)
23912
- atomic<idx_t> maximum_memory;
23913
- //! The directory name where temporary files are stored
23914
- string temp_directory;
23915
- //! Lock for creating the temp handle
23916
- mutex temp_handle_lock;
23917
- //! Handle for the temporary directory
23918
- unique_ptr<TemporaryDirectoryHandle> temp_directory_handle;
23919
- //! The lock for the set of blocks
23920
- mutex blocks_lock;
23921
- //! A mapping of block id -> BlockHandle
23922
- unordered_map<block_id_t, weak_ptr<BlockHandle>> blocks;
23923
- //! Eviction queue
23924
- unique_ptr<EvictionQueue> queue;
23925
- //! The temporary id used for managed buffers
23926
- atomic<block_id_t> temporary_id;
23927
- //! Allocator associated with the buffer manager, that passes all allocations through this buffer manager
23928
- Allocator buffer_allocator;
23929
- };
23930
- } // namespace duckdb
23931
25526
 
23932
25527
 
23933
25528
 
@@ -24369,9 +25964,6 @@ public:
24369
25964
  void UpdateColumn(TableCatalogEntry &table, ClientContext &context, Vector &row_ids,
24370
25965
  const vector<column_t> &column_path, DataChunk &updates);
24371
25966
 
24372
- //! Add an index to the DataTable
24373
- void AddIndex(unique_ptr<Index> index, const vector<unique_ptr<Expression>> &expressions);
24374
-
24375
25967
  //! Begin appending structs to this table, obtaining necessary locks, etc
24376
25968
  void InitializeAppend(Transaction &transaction, TableAppendState &state, idx_t append_count);
24377
25969
  //! Append a chunk to the table using the AppendState obtained from BeginAppend
@@ -24400,6 +25992,9 @@ public:
24400
25992
  void SetAsRoot() {
24401
25993
  this->is_root = true;
24402
25994
  }
25995
+ bool IsRoot() {
25996
+ return this->is_root;
25997
+ }
24403
25998
 
24404
25999
  //! Get statistics of a physical column within the table
24405
26000
  unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id);
@@ -24419,6 +26014,11 @@ public:
24419
26014
  vector<vector<Value>> GetStorageInfo();
24420
26015
  static bool IsForeignKeyIndex(const vector<idx_t> &fk_keys, Index &index, ForeignKeyType fk_type);
24421
26016
 
26017
+ //! Initializes a special scan that is used to create an index on the table, it keeps locks on the table
26018
+ void InitializeCreateIndexScan(CreateIndexScanState &state, const vector<column_t> &column_ids);
26019
+ //! Scans the next chunk for the CREATE INDEX operator
26020
+ bool CreateIndexScan(TableScanState &state, DataChunk &result, TableScanType type);
26021
+
24422
26022
  private:
24423
26023
  //! Verify the new added constraints against current persistent&local data
24424
26024
  void VerifyNewConstraint(ClientContext &context, DataTable &parent, const BoundConstraint *constraint);
@@ -24436,10 +26036,6 @@ private:
24436
26036
  idx_t max_row);
24437
26037
  bool ScanBaseTable(Transaction &transaction, DataChunk &result, TableScanState &state);
24438
26038
 
24439
- //! The CreateIndexScan is a special scan that is used to create an index on the table, it keeps locks on the table
24440
- void InitializeCreateIndexScan(CreateIndexScanState &state, const vector<column_t> &column_ids);
24441
- bool ScanCreateIndex(CreateIndexScanState &state, DataChunk &result, TableScanType type);
24442
-
24443
26039
  private:
24444
26040
  //! Lock for appending entries to the table
24445
26041
  mutex append_lock;
@@ -27884,6 +29480,53 @@ public:
27884
29480
 
27885
29481
 
27886
29482
 
29483
+ //===----------------------------------------------------------------------===//
29484
+ // DuckDB
29485
+ //
29486
+ // duckdb/parser/parsed_data/alter_function_info.hpp
29487
+ //
29488
+ //
29489
+ //===----------------------------------------------------------------------===//
29490
+
29491
+
29492
+
29493
+
29494
+
29495
+
29496
+
29497
+ namespace duckdb {
29498
+
29499
+ //===--------------------------------------------------------------------===//
29500
+ // Alter Table
29501
+ //===--------------------------------------------------------------------===//
29502
+ enum class AlterFunctionType : uint8_t { INVALID = 0, ADD_FUNCTION_OVERLOADS = 1 };
29503
+
29504
+ struct AlterFunctionInfo : public AlterInfo {
29505
+ AlterFunctionInfo(AlterFunctionType type, string schema, string name, bool if_exists);
29506
+ virtual ~AlterFunctionInfo() override;
29507
+
29508
+ AlterFunctionType alter_function_type;
29509
+
29510
+ public:
29511
+ CatalogType GetCatalogType() const override;
29512
+ void Serialize(FieldWriter &writer) const override;
29513
+ static unique_ptr<AlterInfo> Deserialize(FieldReader &reader);
29514
+ };
29515
+
29516
+ //===--------------------------------------------------------------------===//
29517
+ // AddFunctionOverloadInfo
29518
+ //===--------------------------------------------------------------------===//
29519
+ struct AddFunctionOverloadInfo : public AlterFunctionInfo {
29520
+ AddFunctionOverloadInfo(string schema, string name, bool if_exists, ScalarFunctionSet new_overloads);
29521
+ ~AddFunctionOverloadInfo() override;
29522
+
29523
+ ScalarFunctionSet new_overloads;
29524
+
29525
+ public:
29526
+ unique_ptr<AlterInfo> Copy() const override;
29527
+ };
29528
+
29529
+ } // namespace duckdb
27887
29530
  //===----------------------------------------------------------------------===//
27888
29531
  // DuckDB
27889
29532
  //
@@ -28200,6 +29843,7 @@ public:
28200
29843
 
28201
29844
 
28202
29845
 
29846
+
28203
29847
  namespace duckdb {
28204
29848
 
28205
29849
  struct CreateIndexInfo : public CreateInfo {
@@ -28218,7 +29862,12 @@ struct CreateIndexInfo : public CreateInfo {
28218
29862
  vector<unique_ptr<ParsedExpression>> expressions;
28219
29863
  vector<unique_ptr<ParsedExpression>> parsed_expressions;
28220
29864
 
28221
- vector<idx_t> column_ids;
29865
+ //! Types used for the CREATE INDEX scan
29866
+ vector<LogicalType> scan_types;
29867
+ //! The names of the columns, used for the CREATE INDEX scan
29868
+ vector<string> names;
29869
+ //! Column IDs needed for index creation
29870
+ vector<column_t> column_ids;
28222
29871
 
28223
29872
  protected:
28224
29873
  void SerializeInternal(Serializer &serializer) const override;
@@ -28453,14 +30102,16 @@ struct ShowSelectInfo : public ParseInfo {
28453
30102
  namespace duckdb {
28454
30103
 
28455
30104
  struct CreateTypeInfo : public CreateInfo {
28456
-
28457
30105
  CreateTypeInfo() : CreateInfo(CatalogType::TYPE_ENTRY) {
28458
30106
  }
30107
+ CreateTypeInfo(string name_p, LogicalType type_p)
30108
+ : CreateInfo(CatalogType::TYPE_ENTRY), name(move(name_p)), type(move(type_p)) {
30109
+ }
28459
30110
 
28460
30111
  //! Name of the Type
28461
30112
  string name;
28462
30113
  //! Logical Type
28463
- LogicalType type; // Shouldn't this be named `logical_type`? (shadows a parent member `type`)
30114
+ LogicalType type;
28464
30115
 
28465
30116
  public:
28466
30117
  unique_ptr<CreateInfo> Copy() const override {