duckdb 0.4.1-dev696.0 → 0.4.1-dev723.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/connection.cpp +1 -1
- package/src/data_chunk.cpp +2 -2
- package/src/duckdb.cpp +473 -463
- package/src/duckdb.hpp +54 -49
- package/src/parquet-amalgamation.cpp +17822 -17822
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.4.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "2c623978d"
|
|
15
|
+
#define DUCKDB_VERSION "v0.4.1-dev723"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -3892,7 +3892,7 @@ private:
|
|
|
3892
3892
|
|
|
3893
3893
|
namespace duckdb {
|
|
3894
3894
|
|
|
3895
|
-
struct
|
|
3895
|
+
struct UnifiedVectorFormat {
|
|
3896
3896
|
const SelectionVector *sel;
|
|
3897
3897
|
data_ptr_t data;
|
|
3898
3898
|
ValidityMask validity;
|
|
@@ -3986,10 +3986,15 @@ public:
|
|
|
3986
3986
|
DUCKDB_API void Print();
|
|
3987
3987
|
|
|
3988
3988
|
//! Flatten the vector, removing any compression and turning it into a FLAT_VECTOR
|
|
3989
|
-
DUCKDB_API void
|
|
3990
|
-
DUCKDB_API void
|
|
3991
|
-
//!
|
|
3992
|
-
|
|
3989
|
+
DUCKDB_API void Flatten(idx_t count);
|
|
3990
|
+
DUCKDB_API void Flatten(const SelectionVector &sel, idx_t count);
|
|
3991
|
+
//! Creates a UnifiedVectorFormat of a vector
|
|
3992
|
+
//! The UnifiedVectorFormat allows efficient reading of vectors regardless of their vector type
|
|
3993
|
+
//! It contains (1) a data pointer, (2) a validity mask, and (3) a selection vector
|
|
3994
|
+
//! Access to the individual vector elements can be performed through data_pointer[sel_idx[i]]/validity[sel_idx[i]]
|
|
3995
|
+
//! The most common vector types (flat, constant & dictionary) can be converted to the canonical format "for free"
|
|
3996
|
+
//! ToUnifiedFormat was originally called Orrify, as a tribute to Orri Erling who came up with it
|
|
3997
|
+
DUCKDB_API void ToUnifiedFormat(idx_t count, UnifiedVectorFormat &data);
|
|
3993
3998
|
|
|
3994
3999
|
//! Turn the vector into a sequence vector
|
|
3995
4000
|
DUCKDB_API void Sequence(int64_t start, int64_t increment);
|
|
@@ -4505,9 +4510,9 @@ public:
|
|
|
4505
4510
|
DUCKDB_API void Fuse(DataChunk &other);
|
|
4506
4511
|
|
|
4507
4512
|
//! Turn all the vectors from the chunk into flat vectors
|
|
4508
|
-
DUCKDB_API void
|
|
4513
|
+
DUCKDB_API void Flatten();
|
|
4509
4514
|
|
|
4510
|
-
DUCKDB_API unique_ptr<
|
|
4515
|
+
DUCKDB_API unique_ptr<UnifiedVectorFormat[]> ToUnifiedFormat();
|
|
4511
4516
|
|
|
4512
4517
|
DUCKDB_API void Slice(const SelectionVector &sel_vector, idx_t count);
|
|
4513
4518
|
DUCKDB_API void Slice(DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset = 0);
|
|
@@ -4919,10 +4924,10 @@ struct BinaryExecutor {
|
|
|
4919
4924
|
|
|
4920
4925
|
template <class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE, class OPWRAPPER, class OP, class FUNC>
|
|
4921
4926
|
static void ExecuteGeneric(Vector &left, Vector &right, Vector &result, idx_t count, FUNC fun) {
|
|
4922
|
-
|
|
4927
|
+
UnifiedVectorFormat ldata, rdata;
|
|
4923
4928
|
|
|
4924
|
-
left.
|
|
4925
|
-
right.
|
|
4929
|
+
left.ToUnifiedFormat(count, ldata);
|
|
4930
|
+
right.ToUnifiedFormat(count, rdata);
|
|
4926
4931
|
|
|
4927
4932
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
4928
4933
|
auto result_data = FlatVector::GetData<RESULT_TYPE>(result);
|
|
@@ -5178,10 +5183,10 @@ public:
|
|
|
5178
5183
|
template <class LEFT_TYPE, class RIGHT_TYPE, class OP>
|
|
5179
5184
|
static idx_t SelectGeneric(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
|
|
5180
5185
|
SelectionVector *true_sel, SelectionVector *false_sel) {
|
|
5181
|
-
|
|
5186
|
+
UnifiedVectorFormat ldata, rdata;
|
|
5182
5187
|
|
|
5183
|
-
left.
|
|
5184
|
-
right.
|
|
5188
|
+
left.ToUnifiedFormat(count, ldata);
|
|
5189
|
+
right.ToUnifiedFormat(count, rdata);
|
|
5185
5190
|
|
|
5186
5191
|
return SelectGenericLoopSwitch<LEFT_TYPE, RIGHT_TYPE, OP>((LEFT_TYPE *)ldata.data, (RIGHT_TYPE *)rdata.data,
|
|
5187
5192
|
ldata.sel, rdata.sel, sel, count, ldata.validity,
|
|
@@ -5297,10 +5302,10 @@ public:
|
|
|
5297
5302
|
} else {
|
|
5298
5303
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
5299
5304
|
|
|
5300
|
-
|
|
5301
|
-
a.
|
|
5302
|
-
b.
|
|
5303
|
-
c.
|
|
5305
|
+
UnifiedVectorFormat adata, bdata, cdata;
|
|
5306
|
+
a.ToUnifiedFormat(count, adata);
|
|
5307
|
+
b.ToUnifiedFormat(count, bdata);
|
|
5308
|
+
c.ToUnifiedFormat(count, cdata);
|
|
5304
5309
|
|
|
5305
5310
|
ExecuteLoop<A_TYPE, B_TYPE, C_TYPE, RESULT_TYPE, OPWRAPPER>(
|
|
5306
5311
|
(A_TYPE *)adata.data, (B_TYPE *)bdata.data, (C_TYPE *)cdata.data,
|
|
@@ -5355,9 +5360,9 @@ private:
|
|
|
5355
5360
|
}
|
|
5356
5361
|
|
|
5357
5362
|
template <class A_TYPE, class B_TYPE, class C_TYPE, class OP, bool NO_NULL>
|
|
5358
|
-
static inline idx_t SelectLoopSelSwitch(
|
|
5359
|
-
const SelectionVector *sel, idx_t count,
|
|
5360
|
-
SelectionVector *false_sel) {
|
|
5363
|
+
static inline idx_t SelectLoopSelSwitch(UnifiedVectorFormat &adata, UnifiedVectorFormat &bdata,
|
|
5364
|
+
UnifiedVectorFormat &cdata, const SelectionVector *sel, idx_t count,
|
|
5365
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
|
5361
5366
|
if (true_sel && false_sel) {
|
|
5362
5367
|
return SelectLoop<A_TYPE, B_TYPE, C_TYPE, OP, NO_NULL, true, true>(
|
|
5363
5368
|
(A_TYPE *)adata.data, (B_TYPE *)bdata.data, (C_TYPE *)cdata.data, sel, count, *adata.sel, *bdata.sel,
|
|
@@ -5375,9 +5380,9 @@ private:
|
|
|
5375
5380
|
}
|
|
5376
5381
|
|
|
5377
5382
|
template <class A_TYPE, class B_TYPE, class C_TYPE, class OP>
|
|
5378
|
-
static inline idx_t SelectLoopSwitch(
|
|
5379
|
-
const SelectionVector *sel, idx_t count,
|
|
5380
|
-
SelectionVector *false_sel) {
|
|
5383
|
+
static inline idx_t SelectLoopSwitch(UnifiedVectorFormat &adata, UnifiedVectorFormat &bdata,
|
|
5384
|
+
UnifiedVectorFormat &cdata, const SelectionVector *sel, idx_t count,
|
|
5385
|
+
SelectionVector *true_sel, SelectionVector *false_sel) {
|
|
5381
5386
|
if (!adata.validity.AllValid() || !bdata.validity.AllValid() || !cdata.validity.AllValid()) {
|
|
5382
5387
|
return SelectLoopSelSwitch<A_TYPE, B_TYPE, C_TYPE, OP, false>(adata, bdata, cdata, sel, count, true_sel,
|
|
5383
5388
|
false_sel);
|
|
@@ -5394,10 +5399,10 @@ public:
|
|
|
5394
5399
|
if (!sel) {
|
|
5395
5400
|
sel = FlatVector::IncrementalSelectionVector();
|
|
5396
5401
|
}
|
|
5397
|
-
|
|
5398
|
-
a.
|
|
5399
|
-
b.
|
|
5400
|
-
c.
|
|
5402
|
+
UnifiedVectorFormat adata, bdata, cdata;
|
|
5403
|
+
a.ToUnifiedFormat(count, adata);
|
|
5404
|
+
b.ToUnifiedFormat(count, bdata);
|
|
5405
|
+
c.ToUnifiedFormat(count, cdata);
|
|
5401
5406
|
|
|
5402
5407
|
return SelectLoopSwitch<A_TYPE, B_TYPE, C_TYPE, OP>(adata, bdata, cdata, sel, count, true_sel, false_sel);
|
|
5403
5408
|
}
|
|
@@ -5577,8 +5582,8 @@ private:
|
|
|
5577
5582
|
break;
|
|
5578
5583
|
}
|
|
5579
5584
|
default: {
|
|
5580
|
-
|
|
5581
|
-
input.
|
|
5585
|
+
UnifiedVectorFormat vdata;
|
|
5586
|
+
input.ToUnifiedFormat(count, vdata);
|
|
5582
5587
|
|
|
5583
5588
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
5584
5589
|
auto result_data = FlatVector::GetData<RESULT_TYPE>(result);
|
|
@@ -7617,7 +7622,7 @@ class FieldReader;
|
|
|
7617
7622
|
class Vector;
|
|
7618
7623
|
class ValidityStatistics;
|
|
7619
7624
|
class DistinctStatistics;
|
|
7620
|
-
struct
|
|
7625
|
+
struct UnifiedVectorFormat;
|
|
7621
7626
|
|
|
7622
7627
|
enum StatisticsType { LOCAL_STATS = 0, GLOBAL_STATS = 1 };
|
|
7623
7628
|
|
|
@@ -9249,8 +9254,8 @@ public:
|
|
|
9249
9254
|
auto sdata = FlatVector::GetData<STATE_TYPE *>(states);
|
|
9250
9255
|
NullaryFlatLoop<STATE_TYPE, OP>(sdata, aggr_input_data, count);
|
|
9251
9256
|
} else {
|
|
9252
|
-
|
|
9253
|
-
states.
|
|
9257
|
+
UnifiedVectorFormat sdata;
|
|
9258
|
+
states.ToUnifiedFormat(count, sdata);
|
|
9254
9259
|
NullaryScatterLoop<STATE_TYPE, OP>((STATE_TYPE **)sdata.data, aggr_input_data, *sdata.sel, count);
|
|
9255
9260
|
}
|
|
9256
9261
|
}
|
|
@@ -9280,9 +9285,9 @@ public:
|
|
|
9280
9285
|
UnaryFlatLoop<STATE_TYPE, INPUT_TYPE, OP>(idata, aggr_input_data, sdata, FlatVector::Validity(input),
|
|
9281
9286
|
count);
|
|
9282
9287
|
} else {
|
|
9283
|
-
|
|
9284
|
-
input.
|
|
9285
|
-
states.
|
|
9288
|
+
UnifiedVectorFormat idata, sdata;
|
|
9289
|
+
input.ToUnifiedFormat(count, idata);
|
|
9290
|
+
states.ToUnifiedFormat(count, sdata);
|
|
9286
9291
|
UnaryScatterLoop<STATE_TYPE, INPUT_TYPE, OP>((INPUT_TYPE *)idata.data, aggr_input_data,
|
|
9287
9292
|
(STATE_TYPE **)sdata.data, *idata.sel, *sdata.sel,
|
|
9288
9293
|
idata.validity, count);
|
|
@@ -9308,8 +9313,8 @@ public:
|
|
|
9308
9313
|
break;
|
|
9309
9314
|
}
|
|
9310
9315
|
default: {
|
|
9311
|
-
|
|
9312
|
-
input.
|
|
9316
|
+
UnifiedVectorFormat idata;
|
|
9317
|
+
input.ToUnifiedFormat(count, idata);
|
|
9313
9318
|
UnaryUpdateLoop<STATE_TYPE, INPUT_TYPE, OP>((INPUT_TYPE *)idata.data, aggr_input_data, (STATE_TYPE *)state,
|
|
9314
9319
|
count, idata.validity, *idata.sel);
|
|
9315
9320
|
break;
|
|
@@ -9319,11 +9324,11 @@ public:
|
|
|
9319
9324
|
|
|
9320
9325
|
template <class STATE_TYPE, class A_TYPE, class B_TYPE, class OP>
|
|
9321
9326
|
static void BinaryScatter(AggregateInputData &aggr_input_data, Vector &a, Vector &b, Vector &states, idx_t count) {
|
|
9322
|
-
|
|
9327
|
+
UnifiedVectorFormat adata, bdata, sdata;
|
|
9323
9328
|
|
|
9324
|
-
a.
|
|
9325
|
-
b.
|
|
9326
|
-
states.
|
|
9329
|
+
a.ToUnifiedFormat(count, adata);
|
|
9330
|
+
b.ToUnifiedFormat(count, bdata);
|
|
9331
|
+
states.ToUnifiedFormat(count, sdata);
|
|
9327
9332
|
|
|
9328
9333
|
BinaryScatterLoop<STATE_TYPE, A_TYPE, B_TYPE, OP>((A_TYPE *)adata.data, aggr_input_data, (B_TYPE *)bdata.data,
|
|
9329
9334
|
(STATE_TYPE **)sdata.data, count, *adata.sel, *bdata.sel,
|
|
@@ -9332,10 +9337,10 @@ public:
|
|
|
9332
9337
|
|
|
9333
9338
|
template <class STATE_TYPE, class A_TYPE, class B_TYPE, class OP>
|
|
9334
9339
|
static void BinaryUpdate(AggregateInputData &aggr_input_data, Vector &a, Vector &b, data_ptr_t state, idx_t count) {
|
|
9335
|
-
|
|
9340
|
+
UnifiedVectorFormat adata, bdata;
|
|
9336
9341
|
|
|
9337
|
-
a.
|
|
9338
|
-
b.
|
|
9342
|
+
a.ToUnifiedFormat(count, adata);
|
|
9343
|
+
b.ToUnifiedFormat(count, bdata);
|
|
9339
9344
|
|
|
9340
9345
|
BinaryUpdateLoop<STATE_TYPE, A_TYPE, B_TYPE, OP>((A_TYPE *)adata.data, aggr_input_data, (B_TYPE *)bdata.data,
|
|
9341
9346
|
(STATE_TYPE *)state, count, *adata.sel, *bdata.sel,
|
|
@@ -21769,8 +21774,8 @@ typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &stat
|
|
|
21769
21774
|
// Append (optional)
|
|
21770
21775
|
//===--------------------------------------------------------------------===//
|
|
21771
21776
|
typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id);
|
|
21772
|
-
typedef idx_t (*compression_append_t)(ColumnSegment &segment, SegmentStatistics &stats,
|
|
21773
|
-
idx_t count);
|
|
21777
|
+
typedef idx_t (*compression_append_t)(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data,
|
|
21778
|
+
idx_t offset, idx_t count);
|
|
21774
21779
|
typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
|
|
21775
21780
|
typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
|
|
21776
21781
|
|
|
@@ -21909,7 +21914,7 @@ public:
|
|
|
21909
21914
|
//! Initialize an append of this segment. Appends are only supported on transient segments.
|
|
21910
21915
|
void InitializeAppend(ColumnAppendState &state);
|
|
21911
21916
|
//! Appends a (part of) vector to the segment, returns the amount of entries successfully appended
|
|
21912
|
-
idx_t Append(ColumnAppendState &state,
|
|
21917
|
+
idx_t Append(ColumnAppendState &state, UnifiedVectorFormat &data, idx_t offset, idx_t count);
|
|
21913
21918
|
//! Finalize the segment for appending - no more appends can follow on this segment
|
|
21914
21919
|
//! The segment should be compacted as much as possible
|
|
21915
21920
|
//! Returns the number of bytes occupied within the segment
|