duckdb 0.3.5-dev411.0 → 0.3.5-dev457.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "43bb9d9ea"
15
- #define DUCKDB_VERSION "v0.3.5-dev411"
14
+ #define DUCKDB_SOURCE_ID "e9ab0d515"
15
+ #define DUCKDB_VERSION "v0.3.5-dev457"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -3978,6 +3978,9 @@ struct SequenceVector {
3978
3978
  extern "C" {
3979
3979
  #endif
3980
3980
 
3981
+ #ifndef ARROW_C_DATA_INTERFACE
3982
+ #define ARROW_C_DATA_INTERFACE
3983
+
3981
3984
  #define ARROW_FLAG_DICTIONARY_ORDERED 1
3982
3985
  #define ARROW_FLAG_NULLABLE 2
3983
3986
  #define ARROW_FLAG_MAP_KEYS_SORTED 4
@@ -4014,7 +4017,10 @@ struct ArrowArray {
4014
4017
  // Opaque producer-specific data
4015
4018
  void *private_data;
4016
4019
  };
4020
+ #endif
4017
4021
 
4022
+ #ifndef ARROW_C_STREAM_INTERFACE
4023
+ #define ARROW_C_STREAM_INTERFACE
4018
4024
  // EXPERIMENTAL
4019
4025
  struct ArrowArrayStream {
4020
4026
  // Callback to get the stream type
@@ -4039,6 +4045,7 @@ struct ArrowArrayStream {
4039
4045
  // Opaque producer-specific data
4040
4046
  void *private_data;
4041
4047
  };
4048
+ #endif
4042
4049
 
4043
4050
  #ifdef __cplusplus
4044
4051
  }
@@ -7161,32 +7168,46 @@ class FieldWriter;
7161
7168
  class FieldReader;
7162
7169
  class Vector;
7163
7170
  class ValidityStatistics;
7171
+ class DistinctStatistics;
7172
+ struct VectorData;
7173
+
7174
+ enum StatisticsType { LOCAL_STATS = 0, GLOBAL_STATS = 1 };
7164
7175
 
7165
7176
  class BaseStatistics {
7166
7177
  public:
7167
- explicit BaseStatistics(LogicalType type);
7178
+ BaseStatistics(LogicalType type, StatisticsType stats_type);
7168
7179
  virtual ~BaseStatistics();
7169
7180
 
7170
7181
  //! The type of the logical segment
7171
7182
  LogicalType type;
7172
7183
  //! The validity stats of the column (if any)
7173
7184
  unique_ptr<BaseStatistics> validity_stats;
7185
+ //! The approximate count distinct stats of the column (if any)
7186
+ unique_ptr<BaseStatistics> distinct_stats;
7187
+ //! Whether these are 'global' stats, i.e., over a whole table, or just over a segment
7188
+ //! Some statistics are more expensive to keep, therefore we only keep them globally
7189
+ StatisticsType stats_type;
7174
7190
 
7175
7191
  public:
7192
+ static unique_ptr<BaseStatistics> CreateEmpty(LogicalType type, StatisticsType stats_type);
7193
+
7176
7194
  bool CanHaveNull() const;
7177
7195
  bool CanHaveNoNull() const;
7178
7196
 
7197
+ void UpdateDistinctStatistics(Vector &v, idx_t count);
7198
+
7179
7199
  virtual bool IsConstant() const {
7180
7200
  return false;
7181
7201
  }
7182
7202
 
7183
- static unique_ptr<BaseStatistics> CreateEmpty(LogicalType type);
7184
-
7185
7203
  virtual void Merge(const BaseStatistics &other);
7186
7204
 
7187
7205
  virtual unique_ptr<BaseStatistics> Copy() const;
7188
- void Serialize(Serializer &serializer) const;
7206
+ void CopyBase(const BaseStatistics &orig);
7207
+
7208
+ virtual void Serialize(Serializer &serializer) const;
7189
7209
  virtual void Serialize(FieldWriter &writer) const;
7210
+
7190
7211
  static unique_ptr<BaseStatistics> Deserialize(Deserializer &source, LogicalType type);
7191
7212
 
7192
7213
  //! Verify that a vector does not violate the statistics
@@ -7194,6 +7215,9 @@ public:
7194
7215
  void Verify(Vector &vector, idx_t count) const;
7195
7216
 
7196
7217
  virtual string ToString() const;
7218
+
7219
+ protected:
7220
+ void InitializeBase();
7197
7221
  };
7198
7222
 
7199
7223
  } // namespace duckdb
@@ -21653,7 +21677,7 @@ class Vector;
21653
21677
 
21654
21678
  class ValidityStatistics : public BaseStatistics {
21655
21679
  public:
21656
- ValidityStatistics(bool has_null = false, bool has_no_null = true);
21680
+ explicit ValidityStatistics(bool has_null = false, bool has_no_null = true);
21657
21681
 
21658
21682
  //! Whether or not the segment can contain NULL values
21659
21683
  bool has_null;
@@ -21666,8 +21690,10 @@ public:
21666
21690
  bool IsConstant() const override;
21667
21691
 
21668
21692
  unique_ptr<BaseStatistics> Copy() const override;
21693
+
21669
21694
  void Serialize(FieldWriter &writer) const override;
21670
- static unique_ptr<BaseStatistics> Deserialize(FieldReader &reader);
21695
+ static unique_ptr<ValidityStatistics> Deserialize(FieldReader &reader);
21696
+
21671
21697
  void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const override;
21672
21698
 
21673
21699
  static unique_ptr<BaseStatistics> Combine(const unique_ptr<BaseStatistics> &lstats,
@@ -21686,7 +21712,7 @@ public:
21686
21712
  constexpr static uint32_t MAX_STRING_MINMAX_SIZE = 8;
21687
21713
 
21688
21714
  public:
21689
- explicit StringStatistics(LogicalType type);
21715
+ explicit StringStatistics(LogicalType type, StatisticsType stats_type);
21690
21716
 
21691
21717
  //! The minimum value of the segment, potentially truncated
21692
21718
  data_t min[MAX_STRING_MINMAX_SIZE];
@@ -21780,8 +21806,8 @@ namespace duckdb {
21780
21806
 
21781
21807
  class NumericStatistics : public BaseStatistics {
21782
21808
  public:
21783
- explicit NumericStatistics(LogicalType type);
21784
- NumericStatistics(LogicalType type, Value min, Value max);
21809
+ explicit NumericStatistics(LogicalType type, StatisticsType stats_type);
21810
+ NumericStatistics(LogicalType type, Value min, Value max, StatisticsType stats_type);
21785
21811
 
21786
21812
  //! The minimum value of the segment
21787
21813
  Value min;