duckdb 0.4.1-dev696.0 → 0.4.1-dev723.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "ef9aeaa61"
15
- #define DUCKDB_VERSION "v0.4.1-dev696"
14
+ #define DUCKDB_SOURCE_ID "2c623978d"
15
+ #define DUCKDB_VERSION "v0.4.1-dev723"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -3892,7 +3892,7 @@ private:
3892
3892
 
3893
3893
  namespace duckdb {
3894
3894
 
3895
- struct VectorData {
3895
+ struct UnifiedVectorFormat {
3896
3896
  const SelectionVector *sel;
3897
3897
  data_ptr_t data;
3898
3898
  ValidityMask validity;
@@ -3986,10 +3986,15 @@ public:
3986
3986
  DUCKDB_API void Print();
3987
3987
 
3988
3988
  //! Flatten the vector, removing any compression and turning it into a FLAT_VECTOR
3989
- DUCKDB_API void Normalify(idx_t count);
3990
- DUCKDB_API void Normalify(const SelectionVector &sel, idx_t count);
3991
- //! Obtains a selection vector and data pointer through which the data of this vector can be accessed
3992
- DUCKDB_API void Orrify(idx_t count, VectorData &data);
3989
+ DUCKDB_API void Flatten(idx_t count);
3990
+ DUCKDB_API void Flatten(const SelectionVector &sel, idx_t count);
3991
+ //! Creates a UnifiedVectorFormat of a vector
3992
+ //! The UnifiedVectorFormat allows efficient reading of vectors regardless of their vector type
3993
+ //! It contains (1) a data pointer, (2) a validity mask, and (3) a selection vector
3994
+ //! Access to the individual vector elements can be performed through data_pointer[sel_idx[i]]/validity[sel_idx[i]]
3995
+ //! The most common vector types (flat, constant & dictionary) can be converted to the canonical format "for free"
3996
+ //! ToUnifiedFormat was originally called Orrify, as a tribute to Orri Erling who came up with it
3997
+ DUCKDB_API void ToUnifiedFormat(idx_t count, UnifiedVectorFormat &data);
3993
3998
 
3994
3999
  //! Turn the vector into a sequence vector
3995
4000
  DUCKDB_API void Sequence(int64_t start, int64_t increment);
@@ -4505,9 +4510,9 @@ public:
4505
4510
  DUCKDB_API void Fuse(DataChunk &other);
4506
4511
 
4507
4512
  //! Turn all the vectors from the chunk into flat vectors
4508
- DUCKDB_API void Normalify();
4513
+ DUCKDB_API void Flatten();
4509
4514
 
4510
- DUCKDB_API unique_ptr<VectorData[]> Orrify();
4515
+ DUCKDB_API unique_ptr<UnifiedVectorFormat[]> ToUnifiedFormat();
4511
4516
 
4512
4517
  DUCKDB_API void Slice(const SelectionVector &sel_vector, idx_t count);
4513
4518
  DUCKDB_API void Slice(DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset = 0);
@@ -4919,10 +4924,10 @@ struct BinaryExecutor {
4919
4924
 
4920
4925
  template <class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE, class OPWRAPPER, class OP, class FUNC>
4921
4926
  static void ExecuteGeneric(Vector &left, Vector &right, Vector &result, idx_t count, FUNC fun) {
4922
- VectorData ldata, rdata;
4927
+ UnifiedVectorFormat ldata, rdata;
4923
4928
 
4924
- left.Orrify(count, ldata);
4925
- right.Orrify(count, rdata);
4929
+ left.ToUnifiedFormat(count, ldata);
4930
+ right.ToUnifiedFormat(count, rdata);
4926
4931
 
4927
4932
  result.SetVectorType(VectorType::FLAT_VECTOR);
4928
4933
  auto result_data = FlatVector::GetData<RESULT_TYPE>(result);
@@ -5178,10 +5183,10 @@ public:
5178
5183
  template <class LEFT_TYPE, class RIGHT_TYPE, class OP>
5179
5184
  static idx_t SelectGeneric(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
5180
5185
  SelectionVector *true_sel, SelectionVector *false_sel) {
5181
- VectorData ldata, rdata;
5186
+ UnifiedVectorFormat ldata, rdata;
5182
5187
 
5183
- left.Orrify(count, ldata);
5184
- right.Orrify(count, rdata);
5188
+ left.ToUnifiedFormat(count, ldata);
5189
+ right.ToUnifiedFormat(count, rdata);
5185
5190
 
5186
5191
  return SelectGenericLoopSwitch<LEFT_TYPE, RIGHT_TYPE, OP>((LEFT_TYPE *)ldata.data, (RIGHT_TYPE *)rdata.data,
5187
5192
  ldata.sel, rdata.sel, sel, count, ldata.validity,
@@ -5297,10 +5302,10 @@ public:
5297
5302
  } else {
5298
5303
  result.SetVectorType(VectorType::FLAT_VECTOR);
5299
5304
 
5300
- VectorData adata, bdata, cdata;
5301
- a.Orrify(count, adata);
5302
- b.Orrify(count, bdata);
5303
- c.Orrify(count, cdata);
5305
+ UnifiedVectorFormat adata, bdata, cdata;
5306
+ a.ToUnifiedFormat(count, adata);
5307
+ b.ToUnifiedFormat(count, bdata);
5308
+ c.ToUnifiedFormat(count, cdata);
5304
5309
 
5305
5310
  ExecuteLoop<A_TYPE, B_TYPE, C_TYPE, RESULT_TYPE, OPWRAPPER>(
5306
5311
  (A_TYPE *)adata.data, (B_TYPE *)bdata.data, (C_TYPE *)cdata.data,
@@ -5355,9 +5360,9 @@ private:
5355
5360
  }
5356
5361
 
5357
5362
  template <class A_TYPE, class B_TYPE, class C_TYPE, class OP, bool NO_NULL>
5358
- static inline idx_t SelectLoopSelSwitch(VectorData &adata, VectorData &bdata, VectorData &cdata,
5359
- const SelectionVector *sel, idx_t count, SelectionVector *true_sel,
5360
- SelectionVector *false_sel) {
5363
+ static inline idx_t SelectLoopSelSwitch(UnifiedVectorFormat &adata, UnifiedVectorFormat &bdata,
5364
+ UnifiedVectorFormat &cdata, const SelectionVector *sel, idx_t count,
5365
+ SelectionVector *true_sel, SelectionVector *false_sel) {
5361
5366
  if (true_sel && false_sel) {
5362
5367
  return SelectLoop<A_TYPE, B_TYPE, C_TYPE, OP, NO_NULL, true, true>(
5363
5368
  (A_TYPE *)adata.data, (B_TYPE *)bdata.data, (C_TYPE *)cdata.data, sel, count, *adata.sel, *bdata.sel,
@@ -5375,9 +5380,9 @@ private:
5375
5380
  }
5376
5381
 
5377
5382
  template <class A_TYPE, class B_TYPE, class C_TYPE, class OP>
5378
- static inline idx_t SelectLoopSwitch(VectorData &adata, VectorData &bdata, VectorData &cdata,
5379
- const SelectionVector *sel, idx_t count, SelectionVector *true_sel,
5380
- SelectionVector *false_sel) {
5383
+ static inline idx_t SelectLoopSwitch(UnifiedVectorFormat &adata, UnifiedVectorFormat &bdata,
5384
+ UnifiedVectorFormat &cdata, const SelectionVector *sel, idx_t count,
5385
+ SelectionVector *true_sel, SelectionVector *false_sel) {
5381
5386
  if (!adata.validity.AllValid() || !bdata.validity.AllValid() || !cdata.validity.AllValid()) {
5382
5387
  return SelectLoopSelSwitch<A_TYPE, B_TYPE, C_TYPE, OP, false>(adata, bdata, cdata, sel, count, true_sel,
5383
5388
  false_sel);
@@ -5394,10 +5399,10 @@ public:
5394
5399
  if (!sel) {
5395
5400
  sel = FlatVector::IncrementalSelectionVector();
5396
5401
  }
5397
- VectorData adata, bdata, cdata;
5398
- a.Orrify(count, adata);
5399
- b.Orrify(count, bdata);
5400
- c.Orrify(count, cdata);
5402
+ UnifiedVectorFormat adata, bdata, cdata;
5403
+ a.ToUnifiedFormat(count, adata);
5404
+ b.ToUnifiedFormat(count, bdata);
5405
+ c.ToUnifiedFormat(count, cdata);
5401
5406
 
5402
5407
  return SelectLoopSwitch<A_TYPE, B_TYPE, C_TYPE, OP>(adata, bdata, cdata, sel, count, true_sel, false_sel);
5403
5408
  }
@@ -5577,8 +5582,8 @@ private:
5577
5582
  break;
5578
5583
  }
5579
5584
  default: {
5580
- VectorData vdata;
5581
- input.Orrify(count, vdata);
5585
+ UnifiedVectorFormat vdata;
5586
+ input.ToUnifiedFormat(count, vdata);
5582
5587
 
5583
5588
  result.SetVectorType(VectorType::FLAT_VECTOR);
5584
5589
  auto result_data = FlatVector::GetData<RESULT_TYPE>(result);
@@ -7617,7 +7622,7 @@ class FieldReader;
7617
7622
  class Vector;
7618
7623
  class ValidityStatistics;
7619
7624
  class DistinctStatistics;
7620
- struct VectorData;
7625
+ struct UnifiedVectorFormat;
7621
7626
 
7622
7627
  enum StatisticsType { LOCAL_STATS = 0, GLOBAL_STATS = 1 };
7623
7628
 
@@ -9249,8 +9254,8 @@ public:
9249
9254
  auto sdata = FlatVector::GetData<STATE_TYPE *>(states);
9250
9255
  NullaryFlatLoop<STATE_TYPE, OP>(sdata, aggr_input_data, count);
9251
9256
  } else {
9252
- VectorData sdata;
9253
- states.Orrify(count, sdata);
9257
+ UnifiedVectorFormat sdata;
9258
+ states.ToUnifiedFormat(count, sdata);
9254
9259
  NullaryScatterLoop<STATE_TYPE, OP>((STATE_TYPE **)sdata.data, aggr_input_data, *sdata.sel, count);
9255
9260
  }
9256
9261
  }
@@ -9280,9 +9285,9 @@ public:
9280
9285
  UnaryFlatLoop<STATE_TYPE, INPUT_TYPE, OP>(idata, aggr_input_data, sdata, FlatVector::Validity(input),
9281
9286
  count);
9282
9287
  } else {
9283
- VectorData idata, sdata;
9284
- input.Orrify(count, idata);
9285
- states.Orrify(count, sdata);
9288
+ UnifiedVectorFormat idata, sdata;
9289
+ input.ToUnifiedFormat(count, idata);
9290
+ states.ToUnifiedFormat(count, sdata);
9286
9291
  UnaryScatterLoop<STATE_TYPE, INPUT_TYPE, OP>((INPUT_TYPE *)idata.data, aggr_input_data,
9287
9292
  (STATE_TYPE **)sdata.data, *idata.sel, *sdata.sel,
9288
9293
  idata.validity, count);
@@ -9308,8 +9313,8 @@ public:
9308
9313
  break;
9309
9314
  }
9310
9315
  default: {
9311
- VectorData idata;
9312
- input.Orrify(count, idata);
9316
+ UnifiedVectorFormat idata;
9317
+ input.ToUnifiedFormat(count, idata);
9313
9318
  UnaryUpdateLoop<STATE_TYPE, INPUT_TYPE, OP>((INPUT_TYPE *)idata.data, aggr_input_data, (STATE_TYPE *)state,
9314
9319
  count, idata.validity, *idata.sel);
9315
9320
  break;
@@ -9319,11 +9324,11 @@ public:
9319
9324
 
9320
9325
  template <class STATE_TYPE, class A_TYPE, class B_TYPE, class OP>
9321
9326
  static void BinaryScatter(AggregateInputData &aggr_input_data, Vector &a, Vector &b, Vector &states, idx_t count) {
9322
- VectorData adata, bdata, sdata;
9327
+ UnifiedVectorFormat adata, bdata, sdata;
9323
9328
 
9324
- a.Orrify(count, adata);
9325
- b.Orrify(count, bdata);
9326
- states.Orrify(count, sdata);
9329
+ a.ToUnifiedFormat(count, adata);
9330
+ b.ToUnifiedFormat(count, bdata);
9331
+ states.ToUnifiedFormat(count, sdata);
9327
9332
 
9328
9333
  BinaryScatterLoop<STATE_TYPE, A_TYPE, B_TYPE, OP>((A_TYPE *)adata.data, aggr_input_data, (B_TYPE *)bdata.data,
9329
9334
  (STATE_TYPE **)sdata.data, count, *adata.sel, *bdata.sel,
@@ -9332,10 +9337,10 @@ public:
9332
9337
 
9333
9338
  template <class STATE_TYPE, class A_TYPE, class B_TYPE, class OP>
9334
9339
  static void BinaryUpdate(AggregateInputData &aggr_input_data, Vector &a, Vector &b, data_ptr_t state, idx_t count) {
9335
- VectorData adata, bdata;
9340
+ UnifiedVectorFormat adata, bdata;
9336
9341
 
9337
- a.Orrify(count, adata);
9338
- b.Orrify(count, bdata);
9342
+ a.ToUnifiedFormat(count, adata);
9343
+ b.ToUnifiedFormat(count, bdata);
9339
9344
 
9340
9345
  BinaryUpdateLoop<STATE_TYPE, A_TYPE, B_TYPE, OP>((A_TYPE *)adata.data, aggr_input_data, (B_TYPE *)bdata.data,
9341
9346
  (STATE_TYPE *)state, count, *adata.sel, *bdata.sel,
@@ -21769,8 +21774,8 @@ typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &stat
21769
21774
  // Append (optional)
21770
21775
  //===--------------------------------------------------------------------===//
21771
21776
  typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id);
21772
- typedef idx_t (*compression_append_t)(ColumnSegment &segment, SegmentStatistics &stats, VectorData &data, idx_t offset,
21773
- idx_t count);
21777
+ typedef idx_t (*compression_append_t)(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data,
21778
+ idx_t offset, idx_t count);
21774
21779
  typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
21775
21780
  typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
21776
21781
 
@@ -21909,7 +21914,7 @@ public:
21909
21914
  //! Initialize an append of this segment. Appends are only supported on transient segments.
21910
21915
  void InitializeAppend(ColumnAppendState &state);
21911
21916
  //! Appends a (part of) vector to the segment, returns the amount of entries successfully appended
21912
- idx_t Append(ColumnAppendState &state, VectorData &data, idx_t offset, idx_t count);
21917
+ idx_t Append(ColumnAppendState &state, UnifiedVectorFormat &data, idx_t offset, idx_t count);
21913
21918
  //! Finalize the segment for appending - no more appends can follow on this segment
21914
21919
  //! The segment should be compacted as much as possible
21915
21920
  //! Returns the number of bytes occupied within the segment