duckdb 0.5.2-dev1131.0 → 0.5.2-dev1144.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +106 -134
- package/src/duckdb.hpp +9 -8
- package/src/parquet-amalgamation.cpp +28319 -28319
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -47220,7 +47220,7 @@ void ValidityMask::Resize(idx_t old_size, idx_t new_size) {
|
|
|
47220
47220
|
}
|
|
47221
47221
|
}
|
|
47222
47222
|
|
|
47223
|
-
void ValidityMask::Slice(const ValidityMask &other, idx_t offset) {
|
|
47223
|
+
void ValidityMask::Slice(const ValidityMask &other, idx_t offset, idx_t end) {
|
|
47224
47224
|
if (other.AllValid()) {
|
|
47225
47225
|
validity_mask = nullptr;
|
|
47226
47226
|
validity_data.reset();
|
|
@@ -47230,11 +47230,11 @@ void ValidityMask::Slice(const ValidityMask &other, idx_t offset) {
|
|
|
47230
47230
|
Initialize(other);
|
|
47231
47231
|
return;
|
|
47232
47232
|
}
|
|
47233
|
-
ValidityMask new_mask(
|
|
47233
|
+
ValidityMask new_mask(end - offset);
|
|
47234
47234
|
|
|
47235
47235
|
// FIXME THIS NEEDS FIXING!
|
|
47236
47236
|
#if 1
|
|
47237
|
-
for (idx_t i = offset; i <
|
|
47237
|
+
for (idx_t i = offset; i < end; i++) {
|
|
47238
47238
|
new_mask.Set(i - offset, other.RowIsValid(i));
|
|
47239
47239
|
}
|
|
47240
47240
|
Initialize(new_mask);
|
|
@@ -49366,8 +49366,8 @@ Vector::Vector(Vector &other, const SelectionVector &sel, idx_t count) : type(ot
|
|
|
49366
49366
|
Slice(other, sel, count);
|
|
49367
49367
|
}
|
|
49368
49368
|
|
|
49369
|
-
Vector::Vector(Vector &other, idx_t offset) : type(other.type) {
|
|
49370
|
-
Slice(other, offset);
|
|
49369
|
+
Vector::Vector(Vector &other, idx_t offset, idx_t end) : type(other.type) {
|
|
49370
|
+
Slice(other, offset, end);
|
|
49371
49371
|
}
|
|
49372
49372
|
|
|
49373
49373
|
Vector::Vector(const Value &value) : type(value.type()) {
|
|
@@ -49431,7 +49431,7 @@ void Vector::ResetFromCache(const VectorCache &cache) {
|
|
|
49431
49431
|
cache.ResetFromCache(*this);
|
|
49432
49432
|
}
|
|
49433
49433
|
|
|
49434
|
-
void Vector::Slice(Vector &other, idx_t offset) {
|
|
49434
|
+
void Vector::Slice(Vector &other, idx_t offset, idx_t end) {
|
|
49435
49435
|
if (other.GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
|
49436
49436
|
Reference(other);
|
|
49437
49437
|
return;
|
|
@@ -49445,10 +49445,10 @@ void Vector::Slice(Vector &other, idx_t offset) {
|
|
|
49445
49445
|
auto &other_entries = StructVector::GetEntries(other);
|
|
49446
49446
|
D_ASSERT(entries.size() == other_entries.size());
|
|
49447
49447
|
for (idx_t i = 0; i < entries.size(); i++) {
|
|
49448
|
-
entries[i]->Slice(*other_entries[i], offset);
|
|
49448
|
+
entries[i]->Slice(*other_entries[i], offset, end);
|
|
49449
49449
|
}
|
|
49450
49450
|
if (offset > 0) {
|
|
49451
|
-
new_vector.validity.Slice(other.validity, offset);
|
|
49451
|
+
new_vector.validity.Slice(other.validity, offset, end);
|
|
49452
49452
|
} else {
|
|
49453
49453
|
new_vector.validity = other.validity;
|
|
49454
49454
|
}
|
|
@@ -49457,7 +49457,7 @@ void Vector::Slice(Vector &other, idx_t offset) {
|
|
|
49457
49457
|
Reference(other);
|
|
49458
49458
|
if (offset > 0) {
|
|
49459
49459
|
data = data + GetTypeIdSize(internal_type) * offset;
|
|
49460
|
-
validity.Slice(other.validity, offset);
|
|
49460
|
+
validity.Slice(other.validity, offset, end);
|
|
49461
49461
|
}
|
|
49462
49462
|
}
|
|
49463
49463
|
}
|
|
@@ -64892,7 +64892,7 @@ public:
|
|
|
64892
64892
|
using FrameBounds = std::pair<idx_t, idx_t>;
|
|
64893
64893
|
|
|
64894
64894
|
WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info, const LogicalType &result_type,
|
|
64895
|
-
|
|
64895
|
+
DataChunk *input, const ValidityMask &filter_mask, WindowAggregationMode mode);
|
|
64896
64896
|
~WindowSegmentTree();
|
|
64897
64897
|
|
|
64898
64898
|
//! First row contains the result.
|
|
@@ -64931,8 +64931,6 @@ private:
|
|
|
64931
64931
|
Vector statep;
|
|
64932
64932
|
//! The frame boundaries, used for the window functions
|
|
64933
64933
|
FrameBounds frame;
|
|
64934
|
-
//! The active data in the inputs. Used for the window functions
|
|
64935
|
-
FrameBounds active;
|
|
64936
64934
|
//! Reused result state container for the window functions
|
|
64937
64935
|
Vector statev;
|
|
64938
64936
|
|
|
@@ -64945,7 +64943,7 @@ private:
|
|
|
64945
64943
|
idx_t internal_nodes;
|
|
64946
64944
|
|
|
64947
64945
|
//! The (sorted) input chunk collection on which the tree is built
|
|
64948
|
-
|
|
64946
|
+
DataChunk *input_ref;
|
|
64949
64947
|
|
|
64950
64948
|
//! The filtered rows in input_ref.
|
|
64951
64949
|
const ValidityMask &filter_mask;
|
|
@@ -65785,21 +65783,23 @@ static bool WindowNeedsRank(BoundWindowExpression *wexpr) {
|
|
|
65785
65783
|
}
|
|
65786
65784
|
|
|
65787
65785
|
template <typename T>
|
|
65788
|
-
static T GetCell(
|
|
65789
|
-
D_ASSERT(
|
|
65790
|
-
auto &chunk = collection.GetChunkForRow(index);
|
|
65786
|
+
static T GetCell(DataChunk &chunk, idx_t column, idx_t index) {
|
|
65787
|
+
D_ASSERT(chunk.ColumnCount() > column);
|
|
65791
65788
|
auto &source = chunk.data[column];
|
|
65792
|
-
const auto source_offset = index % STANDARD_VECTOR_SIZE;
|
|
65793
65789
|
const auto data = FlatVector::GetData<T>(source);
|
|
65794
|
-
return data[
|
|
65790
|
+
return data[index];
|
|
65795
65791
|
}
|
|
65796
65792
|
|
|
65797
|
-
static bool CellIsNull(
|
|
65798
|
-
D_ASSERT(
|
|
65799
|
-
auto &chunk = collection.GetChunkForRow(index);
|
|
65793
|
+
static bool CellIsNull(DataChunk &chunk, idx_t column, idx_t index) {
|
|
65794
|
+
D_ASSERT(chunk.ColumnCount() > column);
|
|
65800
65795
|
auto &source = chunk.data[column];
|
|
65801
|
-
|
|
65802
|
-
|
|
65796
|
+
return FlatVector::IsNull(source, index);
|
|
65797
|
+
}
|
|
65798
|
+
|
|
65799
|
+
static void CopyCell(DataChunk &chunk, idx_t column, idx_t index, Vector &target, idx_t target_offset) {
|
|
65800
|
+
D_ASSERT(chunk.ColumnCount() > column);
|
|
65801
|
+
auto &source = chunk.data[column];
|
|
65802
|
+
VectorOperations::Copy(source, target, index + 1, index, target_offset);
|
|
65803
65803
|
}
|
|
65804
65804
|
|
|
65805
65805
|
template <typename T>
|
|
@@ -66098,7 +66098,7 @@ struct WindowExecutor {
|
|
|
66098
66098
|
uint64_t rank = 1;
|
|
66099
66099
|
|
|
66100
66100
|
// Expression collections
|
|
66101
|
-
|
|
66101
|
+
DataChunk payload_collection;
|
|
66102
66102
|
ExpressionExecutor payload_executor;
|
|
66103
66103
|
DataChunk payload_chunk;
|
|
66104
66104
|
|
|
@@ -66127,10 +66127,9 @@ struct WindowExecutor {
|
|
|
66127
66127
|
};
|
|
66128
66128
|
|
|
66129
66129
|
WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, Allocator &allocator, const idx_t count)
|
|
66130
|
-
: wexpr(wexpr), bounds(wexpr, count), payload_collection(
|
|
66131
|
-
|
|
66132
|
-
|
|
66133
|
-
boundary_end(wexpr->end_expr.get(), allocator),
|
|
66130
|
+
: wexpr(wexpr), bounds(wexpr, count), payload_collection(), payload_executor(allocator), filter_executor(allocator),
|
|
66131
|
+
leadlag_offset(wexpr->offset_expr.get(), allocator), leadlag_default(wexpr->default_expr.get(), allocator),
|
|
66132
|
+
boundary_start(wexpr->start_expr.get(), allocator), boundary_end(wexpr->end_expr.get(), allocator),
|
|
66134
66133
|
range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr->orders[0].expression.get() : nullptr,
|
|
66135
66134
|
allocator, count)
|
|
66136
66135
|
|
|
@@ -66154,6 +66153,11 @@ WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, Allocator &allocato
|
|
|
66154
66153
|
exprs.push_back(child.get());
|
|
66155
66154
|
}
|
|
66156
66155
|
PrepareInputExpressions(exprs.data(), exprs.size(), payload_executor, payload_chunk);
|
|
66156
|
+
|
|
66157
|
+
auto types = payload_chunk.GetTypes();
|
|
66158
|
+
if (!types.empty()) {
|
|
66159
|
+
payload_collection.Initialize(allocator, types);
|
|
66160
|
+
}
|
|
66157
66161
|
}
|
|
66158
66162
|
|
|
66159
66163
|
void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
|
|
@@ -66182,7 +66186,7 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const i
|
|
|
66182
66186
|
payload_chunk.Reset();
|
|
66183
66187
|
payload_executor.Execute(input_chunk, payload_chunk);
|
|
66184
66188
|
payload_chunk.Verify();
|
|
66185
|
-
payload_collection.Append(payload_chunk);
|
|
66189
|
+
payload_collection.Append(payload_chunk, true);
|
|
66186
66190
|
|
|
66187
66191
|
// process payload chunks while they are still piping hot
|
|
66188
66192
|
if (check_nulls) {
|
|
@@ -66194,11 +66198,18 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const i
|
|
|
66194
66198
|
ignore_nulls.Initialize(total_count);
|
|
66195
66199
|
}
|
|
66196
66200
|
// Write to the current position
|
|
66197
|
-
|
|
66198
|
-
|
|
66199
|
-
|
|
66200
|
-
|
|
66201
|
-
|
|
66201
|
+
if (input_idx % ValidityMask::BITS_PER_VALUE == 0) {
|
|
66202
|
+
// If we are at the edge of an output entry, just copy the entries
|
|
66203
|
+
auto dst = ignore_nulls.GetData() + ignore_nulls.EntryCount(input_idx);
|
|
66204
|
+
auto src = vdata.validity.GetData();
|
|
66205
|
+
for (auto entry_count = vdata.validity.EntryCount(count); entry_count-- > 0;) {
|
|
66206
|
+
*dst++ = *src++;
|
|
66207
|
+
}
|
|
66208
|
+
} else {
|
|
66209
|
+
// If not, we have ragged data and need to copy one bit at a time.
|
|
66210
|
+
for (idx_t i = 0; i < count; ++i) {
|
|
66211
|
+
ignore_nulls.Set(input_idx + i, vdata.validity.RowIsValid(i));
|
|
66212
|
+
}
|
|
66202
66213
|
}
|
|
66203
66214
|
}
|
|
66204
66215
|
}
|
|
@@ -66354,7 +66365,7 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
|
|
|
66354
66365
|
// else offset is zero, so don't move.
|
|
66355
66366
|
|
|
66356
66367
|
if (!delta) {
|
|
66357
|
-
|
|
66368
|
+
CopyCell(payload_collection, 0, val_idx, result, output_offset);
|
|
66358
66369
|
} else if (wexpr->default_expr) {
|
|
66359
66370
|
leadlag_default.CopyCell(result, output_offset);
|
|
66360
66371
|
} else {
|
|
@@ -66365,13 +66376,13 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
|
|
|
66365
66376
|
case ExpressionType::WINDOW_FIRST_VALUE: {
|
|
66366
66377
|
idx_t n = 1;
|
|
66367
66378
|
const auto first_idx = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
|
|
66368
|
-
|
|
66379
|
+
CopyCell(payload_collection, 0, first_idx, result, output_offset);
|
|
66369
66380
|
break;
|
|
66370
66381
|
}
|
|
66371
66382
|
case ExpressionType::WINDOW_LAST_VALUE: {
|
|
66372
66383
|
idx_t n = 1;
|
|
66373
|
-
|
|
66374
|
-
|
|
66384
|
+
CopyCell(payload_collection, 0, FindPrevStart(ignore_nulls, bounds.window_start, bounds.window_end, n),
|
|
66385
|
+
result, output_offset);
|
|
66375
66386
|
break;
|
|
66376
66387
|
}
|
|
66377
66388
|
case ExpressionType::WINDOW_NTH_VALUE: {
|
|
@@ -66388,7 +66399,7 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
|
|
|
66388
66399
|
auto n = idx_t(n_param);
|
|
66389
66400
|
const auto nth_index = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
|
|
66390
66401
|
if (!n) {
|
|
66391
|
-
|
|
66402
|
+
CopyCell(payload_collection, 0, nth_index, result, output_offset);
|
|
66392
66403
|
} else {
|
|
66393
66404
|
FlatVector::SetNull(result, output_offset, true);
|
|
66394
66405
|
}
|
|
@@ -86704,27 +86715,26 @@ void BaseReservoirSampling::ReplaceElement() {
|
|
|
86704
86715
|
namespace duckdb {
|
|
86705
86716
|
|
|
86706
86717
|
WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info,
|
|
86707
|
-
const LogicalType &result_type_p,
|
|
86718
|
+
const LogicalType &result_type_p, DataChunk *input,
|
|
86708
86719
|
const ValidityMask &filter_mask_p, WindowAggregationMode mode_p)
|
|
86709
86720
|
: aggregate(aggregate), bind_info(bind_info), result_type(result_type_p), state(aggregate.state_size()),
|
|
86710
|
-
statep(Value::POINTER((idx_t)state.data())), frame(0, 0),
|
|
86711
|
-
|
|
86712
|
-
|
|
86713
|
-
#if STANDARD_VECTOR_SIZE < 512
|
|
86714
|
-
throw NotImplementedException("Window functions are not supported for vector sizes < 512");
|
|
86715
|
-
#endif
|
|
86716
|
-
statep.Flatten(STANDARD_VECTOR_SIZE);
|
|
86721
|
+
statep(Value::POINTER((idx_t)state.data())), frame(0, 0), statev(Value::POINTER((idx_t)state.data())),
|
|
86722
|
+
internal_nodes(0), input_ref(input), filter_mask(filter_mask_p), mode(mode_p) {
|
|
86723
|
+
statep.Flatten(input->size());
|
|
86717
86724
|
statev.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants
|
|
86718
86725
|
|
|
86719
86726
|
if (input_ref && input_ref->ColumnCount() > 0) {
|
|
86720
|
-
filter_sel.Initialize(
|
|
86721
|
-
inputs.Initialize(Allocator::DefaultAllocator(), input_ref->
|
|
86727
|
+
filter_sel.Initialize(input->size());
|
|
86728
|
+
inputs.Initialize(Allocator::DefaultAllocator(), input_ref->GetTypes());
|
|
86722
86729
|
// if we have a frame-by-frame method, share the single state
|
|
86723
86730
|
if (aggregate.window && UseWindowAPI()) {
|
|
86724
86731
|
AggregateInit();
|
|
86725
|
-
inputs.Reference(input_ref
|
|
86726
|
-
} else
|
|
86727
|
-
|
|
86732
|
+
inputs.Reference(*input_ref);
|
|
86733
|
+
} else {
|
|
86734
|
+
inputs.SetCapacity(*input_ref);
|
|
86735
|
+
if (aggregate.combine && UseCombineAPI()) {
|
|
86736
|
+
ConstructTree();
|
|
86737
|
+
}
|
|
86728
86738
|
}
|
|
86729
86739
|
}
|
|
86730
86740
|
}
|
|
@@ -86769,35 +86779,15 @@ void WindowSegmentTree::AggegateFinal(Vector &result, idx_t rid) {
|
|
|
86769
86779
|
|
|
86770
86780
|
void WindowSegmentTree::ExtractFrame(idx_t begin, idx_t end) {
|
|
86771
86781
|
const auto size = end - begin;
|
|
86772
|
-
if (size >= STANDARD_VECTOR_SIZE) {
|
|
86773
|
-
throw InternalException("Cannot compute window aggregation: bounds are too large");
|
|
86774
|
-
}
|
|
86775
86782
|
|
|
86776
|
-
|
|
86783
|
+
auto &chunk = *input_ref;
|
|
86777
86784
|
const auto input_count = input_ref->ColumnCount();
|
|
86778
|
-
|
|
86779
|
-
|
|
86780
|
-
auto &
|
|
86781
|
-
|
|
86782
|
-
|
|
86783
|
-
|
|
86784
|
-
v.Slice(vec, start_in_vector);
|
|
86785
|
-
v.Verify(size);
|
|
86786
|
-
}
|
|
86787
|
-
} else {
|
|
86788
|
-
inputs.Reset();
|
|
86789
|
-
inputs.SetCardinality(size);
|
|
86790
|
-
|
|
86791
|
-
// we cannot just slice the individual vector!
|
|
86792
|
-
auto &chunk_a = input_ref->GetChunkForRow(begin);
|
|
86793
|
-
auto &chunk_b = input_ref->GetChunkForRow(end);
|
|
86794
|
-
idx_t chunk_a_count = chunk_a.size() - start_in_vector;
|
|
86795
|
-
idx_t chunk_b_count = inputs.size() - chunk_a_count;
|
|
86796
|
-
for (idx_t i = 0; i < input_count; ++i) {
|
|
86797
|
-
auto &v = inputs.data[i];
|
|
86798
|
-
VectorOperations::Copy(chunk_a.data[i], v, chunk_a.size(), start_in_vector, 0);
|
|
86799
|
-
VectorOperations::Copy(chunk_b.data[i], v, chunk_b_count, 0, chunk_a_count);
|
|
86800
|
-
}
|
|
86785
|
+
inputs.SetCardinality(size);
|
|
86786
|
+
for (idx_t i = 0; i < input_count; ++i) {
|
|
86787
|
+
auto &v = inputs.data[i];
|
|
86788
|
+
auto &vec = chunk.data[i];
|
|
86789
|
+
v.Slice(vec, begin, end);
|
|
86790
|
+
v.Verify(size);
|
|
86801
86791
|
}
|
|
86802
86792
|
|
|
86803
86793
|
// Slice to any filtered rows
|
|
@@ -86820,29 +86810,24 @@ void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end)
|
|
|
86820
86810
|
return;
|
|
86821
86811
|
}
|
|
86822
86812
|
|
|
86823
|
-
|
|
86824
|
-
|
|
86825
|
-
}
|
|
86826
|
-
|
|
86827
|
-
Vector s(statep, 0);
|
|
86813
|
+
const auto count = end - begin;
|
|
86814
|
+
Vector s(statep, 0, count);
|
|
86828
86815
|
if (l_idx == 0) {
|
|
86829
86816
|
ExtractFrame(begin, end);
|
|
86830
86817
|
AggregateInputData aggr_input_data(bind_info, Allocator::DefaultAllocator());
|
|
86831
86818
|
aggregate.update(&inputs.data[0], aggr_input_data, input_ref->ColumnCount(), s, inputs.size());
|
|
86832
86819
|
} else {
|
|
86833
|
-
inputs.Reset();
|
|
86834
|
-
inputs.SetCardinality(end - begin);
|
|
86835
86820
|
// find out where the states begin
|
|
86836
86821
|
data_ptr_t begin_ptr = levels_flat_native.get() + state.size() * (begin + levels_flat_start[l_idx - 1]);
|
|
86837
86822
|
// set up a vector of pointers that point towards the set of states
|
|
86838
|
-
Vector v(LogicalType::POINTER);
|
|
86823
|
+
Vector v(LogicalType::POINTER, count);
|
|
86839
86824
|
auto pdata = FlatVector::GetData<data_ptr_t>(v);
|
|
86840
|
-
for (idx_t i = 0; i <
|
|
86825
|
+
for (idx_t i = 0; i < count; i++) {
|
|
86841
86826
|
pdata[i] = begin_ptr + i * state.size();
|
|
86842
86827
|
}
|
|
86843
|
-
v.Verify(
|
|
86828
|
+
v.Verify(count);
|
|
86844
86829
|
AggregateInputData aggr_input_data(bind_info, Allocator::DefaultAllocator());
|
|
86845
|
-
aggregate.combine(v, s, aggr_input_data,
|
|
86830
|
+
aggregate.combine(v, s, aggr_input_data, count);
|
|
86846
86831
|
}
|
|
86847
86832
|
}
|
|
86848
86833
|
|
|
@@ -86852,7 +86837,7 @@ void WindowSegmentTree::ConstructTree() {
|
|
|
86852
86837
|
|
|
86853
86838
|
// compute space required to store internal nodes of segment tree
|
|
86854
86839
|
internal_nodes = 0;
|
|
86855
|
-
idx_t level_nodes = input_ref->
|
|
86840
|
+
idx_t level_nodes = input_ref->size();
|
|
86856
86841
|
do {
|
|
86857
86842
|
level_nodes = (level_nodes + (TREE_FANOUT - 1)) / TREE_FANOUT;
|
|
86858
86843
|
internal_nodes += level_nodes;
|
|
@@ -86865,7 +86850,7 @@ void WindowSegmentTree::ConstructTree() {
|
|
|
86865
86850
|
// level 0 is data itself
|
|
86866
86851
|
idx_t level_size;
|
|
86867
86852
|
// iterate over the levels of the segment tree
|
|
86868
|
-
while ((level_size = (level_current == 0 ? input_ref->
|
|
86853
|
+
while ((level_size = (level_current == 0 ? input_ref->size()
|
|
86869
86854
|
: levels_flat_offset - levels_flat_start[level_current - 1])) > 1) {
|
|
86870
86855
|
for (idx_t pos = 0; pos < level_size; pos += TREE_FANOUT) {
|
|
86871
86856
|
// compute the aggregate for this entry in the segment tree
|
|
@@ -86914,39 +86899,9 @@ void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t en
|
|
|
86914
86899
|
frame = FrameBounds(begin, end);
|
|
86915
86900
|
|
|
86916
86901
|
// Extract the range
|
|
86917
|
-
auto &coll = *input_ref;
|
|
86918
|
-
const auto prev_active = active;
|
|
86919
|
-
const FrameBounds combined(MinValue(frame.first, prev.first), MaxValue(frame.second, prev.second));
|
|
86920
|
-
|
|
86921
|
-
// The chunk bounds are the range that includes the begin and end - 1
|
|
86922
|
-
const FrameBounds prev_chunks(coll.LocateChunk(prev_active.first), coll.LocateChunk(prev_active.second - 1));
|
|
86923
|
-
const FrameBounds active_chunks(coll.LocateChunk(combined.first), coll.LocateChunk(combined.second - 1));
|
|
86924
|
-
|
|
86925
|
-
// Extract the range
|
|
86926
|
-
if (active_chunks.first == active_chunks.second) {
|
|
86927
|
-
// If all the data is in a single chunk, then just reference it
|
|
86928
|
-
if (prev_chunks != active_chunks || (!prev.first && !prev.second)) {
|
|
86929
|
-
inputs.Reference(coll.GetChunk(active_chunks.first));
|
|
86930
|
-
}
|
|
86931
|
-
} else if (active_chunks.first == prev_chunks.first && prev_chunks.first != prev_chunks.second) {
|
|
86932
|
-
// If the start chunk did not change, and we are not just a reference, then extend if necessary
|
|
86933
|
-
for (auto chunk_idx = prev_chunks.second + 1; chunk_idx <= active_chunks.second; ++chunk_idx) {
|
|
86934
|
-
inputs.Append(coll.GetChunk(chunk_idx), true);
|
|
86935
|
-
}
|
|
86936
|
-
} else {
|
|
86937
|
-
// If the first chunk changed, start over
|
|
86938
|
-
inputs.Reset();
|
|
86939
|
-
for (auto chunk_idx = active_chunks.first; chunk_idx <= active_chunks.second; ++chunk_idx) {
|
|
86940
|
-
inputs.Append(coll.GetChunk(chunk_idx), true);
|
|
86941
|
-
}
|
|
86942
|
-
}
|
|
86943
|
-
|
|
86944
|
-
active = FrameBounds(active_chunks.first * STANDARD_VECTOR_SIZE,
|
|
86945
|
-
MinValue((active_chunks.second + 1) * STANDARD_VECTOR_SIZE, coll.Count()));
|
|
86946
|
-
|
|
86947
86902
|
AggregateInputData aggr_input_data(bind_info, Allocator::DefaultAllocator());
|
|
86948
|
-
aggregate.window(
|
|
86949
|
-
prev, result, rid,
|
|
86903
|
+
aggregate.window(input_ref->data.data(), filter_mask, aggr_input_data, inputs.ColumnCount(), state.data(),
|
|
86904
|
+
frame, prev, result, rid, 0);
|
|
86950
86905
|
return;
|
|
86951
86906
|
}
|
|
86952
86907
|
|
|
@@ -87856,13 +87811,27 @@ void BuiltinFunctions::RegisterAlgebraicAggregates() {
|
|
|
87856
87811
|
namespace duckdb {
|
|
87857
87812
|
|
|
87858
87813
|
struct ApproxDistinctCountState {
|
|
87814
|
+
ApproxDistinctCountState() : log(nullptr) {
|
|
87815
|
+
}
|
|
87816
|
+
~ApproxDistinctCountState() {
|
|
87817
|
+
if (log) {
|
|
87818
|
+
delete log;
|
|
87819
|
+
}
|
|
87820
|
+
}
|
|
87821
|
+
void Resize(idx_t count) {
|
|
87822
|
+
indices.resize(count);
|
|
87823
|
+
counts.resize(count);
|
|
87824
|
+
}
|
|
87825
|
+
|
|
87859
87826
|
HyperLogLog *log;
|
|
87827
|
+
vector<uint64_t> indices;
|
|
87828
|
+
vector<uint8_t> counts;
|
|
87860
87829
|
};
|
|
87861
87830
|
|
|
87862
87831
|
struct ApproxCountDistinctFunction {
|
|
87863
87832
|
template <class STATE>
|
|
87864
87833
|
static void Initialize(STATE *state) {
|
|
87865
|
-
state
|
|
87834
|
+
new (state) STATE;
|
|
87866
87835
|
}
|
|
87867
87836
|
|
|
87868
87837
|
template <class STATE, class OP>
|
|
@@ -87894,9 +87863,7 @@ struct ApproxCountDistinctFunction {
|
|
|
87894
87863
|
}
|
|
87895
87864
|
template <class STATE>
|
|
87896
87865
|
static void Destroy(STATE *state) {
|
|
87897
|
-
|
|
87898
|
-
delete state->log;
|
|
87899
|
-
}
|
|
87866
|
+
state->~STATE();
|
|
87900
87867
|
}
|
|
87901
87868
|
};
|
|
87902
87869
|
|
|
@@ -87912,8 +87879,9 @@ static void ApproxCountDistinctSimpleUpdateFunction(Vector inputs[], AggregateIn
|
|
|
87912
87879
|
UnifiedVectorFormat vdata;
|
|
87913
87880
|
inputs[0].ToUnifiedFormat(count, vdata);
|
|
87914
87881
|
|
|
87915
|
-
|
|
87916
|
-
|
|
87882
|
+
agg_state->Resize(count);
|
|
87883
|
+
auto indices = agg_state->indices.data();
|
|
87884
|
+
auto counts = agg_state->counts.data();
|
|
87917
87885
|
|
|
87918
87886
|
HyperLogLog::ProcessEntries(vdata, inputs[0].GetType(), indices, counts, count);
|
|
87919
87887
|
agg_state->log->AddToLog(vdata, count, indices, counts);
|
|
@@ -87927,19 +87895,23 @@ static void ApproxCountDistinctUpdateFunction(Vector inputs[], AggregateInputDat
|
|
|
87927
87895
|
state_vector.ToUnifiedFormat(count, sdata);
|
|
87928
87896
|
auto states = (ApproxDistinctCountState **)sdata.data;
|
|
87929
87897
|
|
|
87898
|
+
uint64_t *indices;
|
|
87899
|
+
uint8_t *counts;
|
|
87930
87900
|
for (idx_t i = 0; i < count; i++) {
|
|
87931
87901
|
auto agg_state = states[sdata.sel->get_index(i)];
|
|
87932
87902
|
if (!agg_state->log) {
|
|
87933
87903
|
agg_state->log = new HyperLogLog();
|
|
87934
87904
|
}
|
|
87905
|
+
if (i == 0) {
|
|
87906
|
+
agg_state->Resize(count);
|
|
87907
|
+
indices = agg_state->indices.data();
|
|
87908
|
+
counts = agg_state->counts.data();
|
|
87909
|
+
}
|
|
87935
87910
|
}
|
|
87936
87911
|
|
|
87937
87912
|
UnifiedVectorFormat vdata;
|
|
87938
87913
|
inputs[0].ToUnifiedFormat(count, vdata);
|
|
87939
87914
|
|
|
87940
|
-
uint64_t indices[STANDARD_VECTOR_SIZE];
|
|
87941
|
-
uint8_t counts[STANDARD_VECTOR_SIZE];
|
|
87942
|
-
|
|
87943
87915
|
HyperLogLog::ProcessEntries(vdata, inputs[0].GetType(), indices, counts, count);
|
|
87944
87916
|
HyperLogLog::AddToLogs(vdata, count, indices, counts, (HyperLogLog ***)states, sdata.sel);
|
|
87945
87917
|
}
|
|
@@ -210859,7 +210831,7 @@ void RowGroup::Update(TransactionData transaction, DataChunk &update_chunk, row_
|
|
|
210859
210831
|
D_ASSERT(column != COLUMN_IDENTIFIER_ROW_ID);
|
|
210860
210832
|
D_ASSERT(columns[column]->type.id() == update_chunk.data[i].GetType().id());
|
|
210861
210833
|
if (offset > 0) {
|
|
210862
|
-
Vector sliced_vector(update_chunk.data[i], offset);
|
|
210834
|
+
Vector sliced_vector(update_chunk.data[i], offset, offset + count);
|
|
210863
210835
|
sliced_vector.Flatten(count);
|
|
210864
210836
|
columns[column]->Update(transaction, column, sliced_vector, ids + offset, count);
|
|
210865
210837
|
} else {
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "0e999f15d"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev1144"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -2857,7 +2857,7 @@ public:
|
|
|
2857
2857
|
public:
|
|
2858
2858
|
DUCKDB_API void Resize(idx_t old_size, idx_t new_size);
|
|
2859
2859
|
|
|
2860
|
-
DUCKDB_API void Slice(const ValidityMask &other, idx_t offset);
|
|
2860
|
+
DUCKDB_API void Slice(const ValidityMask &other, idx_t offset, idx_t end);
|
|
2861
2861
|
DUCKDB_API void Combine(const ValidityMask &other, idx_t count);
|
|
2862
2862
|
DUCKDB_API string ToString(idx_t count) const;
|
|
2863
2863
|
};
|
|
@@ -4188,11 +4188,11 @@ class Vector {
|
|
|
4188
4188
|
|
|
4189
4189
|
public:
|
|
4190
4190
|
//! Create a vector that references the other vector
|
|
4191
|
-
DUCKDB_API
|
|
4191
|
+
DUCKDB_API Vector(Vector &other);
|
|
4192
4192
|
//! Create a vector that slices another vector
|
|
4193
4193
|
DUCKDB_API explicit Vector(Vector &other, const SelectionVector &sel, idx_t count);
|
|
4194
|
-
//! Create a vector that slices another vector
|
|
4195
|
-
DUCKDB_API explicit Vector(Vector &other, idx_t offset);
|
|
4194
|
+
//! Create a vector that slices another vector between a pair of offsets
|
|
4195
|
+
DUCKDB_API explicit Vector(Vector &other, idx_t offset, idx_t end);
|
|
4196
4196
|
//! Create a vector of size one holding the passed on value
|
|
4197
4197
|
DUCKDB_API explicit Vector(const Value &value);
|
|
4198
4198
|
//! Create a vector of size tuple_count (non-standard)
|
|
@@ -4233,7 +4233,7 @@ public:
|
|
|
4233
4233
|
DUCKDB_API void ResetFromCache(const VectorCache &cache);
|
|
4234
4234
|
|
|
4235
4235
|
//! Creates a reference to a slice of the other vector
|
|
4236
|
-
DUCKDB_API void Slice(Vector &other, idx_t offset);
|
|
4236
|
+
DUCKDB_API void Slice(Vector &other, idx_t offset, idx_t end);
|
|
4237
4237
|
//! Creates a reference to a slice of the other vector
|
|
4238
4238
|
DUCKDB_API void Slice(Vector &other, const SelectionVector &sel, idx_t count);
|
|
4239
4239
|
//! Turns the vector into a dictionary vector with the specified dictionary
|
|
@@ -4340,7 +4340,8 @@ protected:
|
|
|
4340
4340
|
//! The DictionaryBuffer holds a selection vector
|
|
4341
4341
|
class VectorChildBuffer : public VectorBuffer {
|
|
4342
4342
|
public:
|
|
4343
|
-
VectorChildBuffer(Vector vector)
|
|
4343
|
+
explicit VectorChildBuffer(Vector vector)
|
|
4344
|
+
: VectorBuffer(VectorBufferType::VECTOR_CHILD_BUFFER), data(move(vector)) {
|
|
4344
4345
|
}
|
|
4345
4346
|
|
|
4346
4347
|
public:
|