duckdb 0.5.2-dev1131.0 → 0.5.2-dev1144.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1131.0",
5
+ "version": "0.5.2-dev1144.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -47220,7 +47220,7 @@ void ValidityMask::Resize(idx_t old_size, idx_t new_size) {
47220
47220
  }
47221
47221
  }
47222
47222
 
47223
- void ValidityMask::Slice(const ValidityMask &other, idx_t offset) {
47223
+ void ValidityMask::Slice(const ValidityMask &other, idx_t offset, idx_t end) {
47224
47224
  if (other.AllValid()) {
47225
47225
  validity_mask = nullptr;
47226
47226
  validity_data.reset();
@@ -47230,11 +47230,11 @@ void ValidityMask::Slice(const ValidityMask &other, idx_t offset) {
47230
47230
  Initialize(other);
47231
47231
  return;
47232
47232
  }
47233
- ValidityMask new_mask(STANDARD_VECTOR_SIZE);
47233
+ ValidityMask new_mask(end - offset);
47234
47234
 
47235
47235
  // FIXME THIS NEEDS FIXING!
47236
47236
  #if 1
47237
- for (idx_t i = offset; i < STANDARD_VECTOR_SIZE; i++) {
47237
+ for (idx_t i = offset; i < end; i++) {
47238
47238
  new_mask.Set(i - offset, other.RowIsValid(i));
47239
47239
  }
47240
47240
  Initialize(new_mask);
@@ -49366,8 +49366,8 @@ Vector::Vector(Vector &other, const SelectionVector &sel, idx_t count) : type(ot
49366
49366
  Slice(other, sel, count);
49367
49367
  }
49368
49368
 
49369
- Vector::Vector(Vector &other, idx_t offset) : type(other.type) {
49370
- Slice(other, offset);
49369
+ Vector::Vector(Vector &other, idx_t offset, idx_t end) : type(other.type) {
49370
+ Slice(other, offset, end);
49371
49371
  }
49372
49372
 
49373
49373
  Vector::Vector(const Value &value) : type(value.type()) {
@@ -49431,7 +49431,7 @@ void Vector::ResetFromCache(const VectorCache &cache) {
49431
49431
  cache.ResetFromCache(*this);
49432
49432
  }
49433
49433
 
49434
- void Vector::Slice(Vector &other, idx_t offset) {
49434
+ void Vector::Slice(Vector &other, idx_t offset, idx_t end) {
49435
49435
  if (other.GetVectorType() == VectorType::CONSTANT_VECTOR) {
49436
49436
  Reference(other);
49437
49437
  return;
@@ -49445,10 +49445,10 @@ void Vector::Slice(Vector &other, idx_t offset) {
49445
49445
  auto &other_entries = StructVector::GetEntries(other);
49446
49446
  D_ASSERT(entries.size() == other_entries.size());
49447
49447
  for (idx_t i = 0; i < entries.size(); i++) {
49448
- entries[i]->Slice(*other_entries[i], offset);
49448
+ entries[i]->Slice(*other_entries[i], offset, end);
49449
49449
  }
49450
49450
  if (offset > 0) {
49451
- new_vector.validity.Slice(other.validity, offset);
49451
+ new_vector.validity.Slice(other.validity, offset, end);
49452
49452
  } else {
49453
49453
  new_vector.validity = other.validity;
49454
49454
  }
@@ -49457,7 +49457,7 @@ void Vector::Slice(Vector &other, idx_t offset) {
49457
49457
  Reference(other);
49458
49458
  if (offset > 0) {
49459
49459
  data = data + GetTypeIdSize(internal_type) * offset;
49460
- validity.Slice(other.validity, offset);
49460
+ validity.Slice(other.validity, offset, end);
49461
49461
  }
49462
49462
  }
49463
49463
  }
@@ -64892,7 +64892,7 @@ public:
64892
64892
  using FrameBounds = std::pair<idx_t, idx_t>;
64893
64893
 
64894
64894
  WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info, const LogicalType &result_type,
64895
- ChunkCollection *input, const ValidityMask &filter_mask, WindowAggregationMode mode);
64895
+ DataChunk *input, const ValidityMask &filter_mask, WindowAggregationMode mode);
64896
64896
  ~WindowSegmentTree();
64897
64897
 
64898
64898
  //! First row contains the result.
@@ -64931,8 +64931,6 @@ private:
64931
64931
  Vector statep;
64932
64932
  //! The frame boundaries, used for the window functions
64933
64933
  FrameBounds frame;
64934
- //! The active data in the inputs. Used for the window functions
64935
- FrameBounds active;
64936
64934
  //! Reused result state container for the window functions
64937
64935
  Vector statev;
64938
64936
 
@@ -64945,7 +64943,7 @@ private:
64945
64943
  idx_t internal_nodes;
64946
64944
 
64947
64945
  //! The (sorted) input chunk collection on which the tree is built
64948
- ChunkCollection *input_ref;
64946
+ DataChunk *input_ref;
64949
64947
 
64950
64948
  //! The filtered rows in input_ref.
64951
64949
  const ValidityMask &filter_mask;
@@ -65785,21 +65783,23 @@ static bool WindowNeedsRank(BoundWindowExpression *wexpr) {
65785
65783
  }
65786
65784
 
65787
65785
  template <typename T>
65788
- static T GetCell(ChunkCollection &collection, idx_t column, idx_t index) {
65789
- D_ASSERT(collection.ColumnCount() > column);
65790
- auto &chunk = collection.GetChunkForRow(index);
65786
+ static T GetCell(DataChunk &chunk, idx_t column, idx_t index) {
65787
+ D_ASSERT(chunk.ColumnCount() > column);
65791
65788
  auto &source = chunk.data[column];
65792
- const auto source_offset = index % STANDARD_VECTOR_SIZE;
65793
65789
  const auto data = FlatVector::GetData<T>(source);
65794
- return data[source_offset];
65790
+ return data[index];
65795
65791
  }
65796
65792
 
65797
- static bool CellIsNull(ChunkCollection &collection, idx_t column, idx_t index) {
65798
- D_ASSERT(collection.ColumnCount() > column);
65799
- auto &chunk = collection.GetChunkForRow(index);
65793
+ static bool CellIsNull(DataChunk &chunk, idx_t column, idx_t index) {
65794
+ D_ASSERT(chunk.ColumnCount() > column);
65800
65795
  auto &source = chunk.data[column];
65801
- const auto source_offset = index % STANDARD_VECTOR_SIZE;
65802
- return FlatVector::IsNull(source, source_offset);
65796
+ return FlatVector::IsNull(source, index);
65797
+ }
65798
+
65799
+ static void CopyCell(DataChunk &chunk, idx_t column, idx_t index, Vector &target, idx_t target_offset) {
65800
+ D_ASSERT(chunk.ColumnCount() > column);
65801
+ auto &source = chunk.data[column];
65802
+ VectorOperations::Copy(source, target, index + 1, index, target_offset);
65803
65803
  }
65804
65804
 
65805
65805
  template <typename T>
@@ -66098,7 +66098,7 @@ struct WindowExecutor {
66098
66098
  uint64_t rank = 1;
66099
66099
 
66100
66100
  // Expression collections
66101
- ChunkCollection payload_collection;
66101
+ DataChunk payload_collection;
66102
66102
  ExpressionExecutor payload_executor;
66103
66103
  DataChunk payload_chunk;
66104
66104
 
@@ -66127,10 +66127,9 @@ struct WindowExecutor {
66127
66127
  };
66128
66128
 
66129
66129
  WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, Allocator &allocator, const idx_t count)
66130
- : wexpr(wexpr), bounds(wexpr, count), payload_collection(allocator), payload_executor(allocator),
66131
- filter_executor(allocator), leadlag_offset(wexpr->offset_expr.get(), allocator),
66132
- leadlag_default(wexpr->default_expr.get(), allocator), boundary_start(wexpr->start_expr.get(), allocator),
66133
- boundary_end(wexpr->end_expr.get(), allocator),
66130
+ : wexpr(wexpr), bounds(wexpr, count), payload_collection(), payload_executor(allocator), filter_executor(allocator),
66131
+ leadlag_offset(wexpr->offset_expr.get(), allocator), leadlag_default(wexpr->default_expr.get(), allocator),
66132
+ boundary_start(wexpr->start_expr.get(), allocator), boundary_end(wexpr->end_expr.get(), allocator),
66134
66133
  range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr->orders[0].expression.get() : nullptr,
66135
66134
  allocator, count)
66136
66135
 
@@ -66154,6 +66153,11 @@ WindowExecutor::WindowExecutor(BoundWindowExpression *wexpr, Allocator &allocato
66154
66153
  exprs.push_back(child.get());
66155
66154
  }
66156
66155
  PrepareInputExpressions(exprs.data(), exprs.size(), payload_executor, payload_chunk);
66156
+
66157
+ auto types = payload_chunk.GetTypes();
66158
+ if (!types.empty()) {
66159
+ payload_collection.Initialize(allocator, types);
66160
+ }
66157
66161
  }
66158
66162
 
66159
66163
  void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
@@ -66182,7 +66186,7 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const i
66182
66186
  payload_chunk.Reset();
66183
66187
  payload_executor.Execute(input_chunk, payload_chunk);
66184
66188
  payload_chunk.Verify();
66185
- payload_collection.Append(payload_chunk);
66189
+ payload_collection.Append(payload_chunk, true);
66186
66190
 
66187
66191
  // process payload chunks while they are still piping hot
66188
66192
  if (check_nulls) {
@@ -66194,11 +66198,18 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const i
66194
66198
  ignore_nulls.Initialize(total_count);
66195
66199
  }
66196
66200
  // Write to the current position
66197
- // Chunks in a collection are full, so we don't have to worry about raggedness
66198
- auto dst = ignore_nulls.GetData() + ignore_nulls.EntryCount(input_idx);
66199
- auto src = vdata.validity.GetData();
66200
- for (auto entry_count = vdata.validity.EntryCount(count); entry_count-- > 0;) {
66201
- *dst++ = *src++;
66201
+ if (input_idx % ValidityMask::BITS_PER_VALUE == 0) {
66202
+ // If we are at the edge of an output entry, just copy the entries
66203
+ auto dst = ignore_nulls.GetData() + ignore_nulls.EntryCount(input_idx);
66204
+ auto src = vdata.validity.GetData();
66205
+ for (auto entry_count = vdata.validity.EntryCount(count); entry_count-- > 0;) {
66206
+ *dst++ = *src++;
66207
+ }
66208
+ } else {
66209
+ // If not, we have ragged data and need to copy one bit at a time.
66210
+ for (idx_t i = 0; i < count; ++i) {
66211
+ ignore_nulls.Set(input_idx + i, vdata.validity.RowIsValid(i));
66212
+ }
66202
66213
  }
66203
66214
  }
66204
66215
  }
@@ -66354,7 +66365,7 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
66354
66365
  // else offset is zero, so don't move.
66355
66366
 
66356
66367
  if (!delta) {
66357
- payload_collection.CopyCell(0, val_idx, result, output_offset);
66368
+ CopyCell(payload_collection, 0, val_idx, result, output_offset);
66358
66369
  } else if (wexpr->default_expr) {
66359
66370
  leadlag_default.CopyCell(result, output_offset);
66360
66371
  } else {
@@ -66365,13 +66376,13 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
66365
66376
  case ExpressionType::WINDOW_FIRST_VALUE: {
66366
66377
  idx_t n = 1;
66367
66378
  const auto first_idx = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
66368
- payload_collection.CopyCell(0, first_idx, result, output_offset);
66379
+ CopyCell(payload_collection, 0, first_idx, result, output_offset);
66369
66380
  break;
66370
66381
  }
66371
66382
  case ExpressionType::WINDOW_LAST_VALUE: {
66372
66383
  idx_t n = 1;
66373
- payload_collection.CopyCell(0, FindPrevStart(ignore_nulls, bounds.window_start, bounds.window_end, n),
66374
- result, output_offset);
66384
+ CopyCell(payload_collection, 0, FindPrevStart(ignore_nulls, bounds.window_start, bounds.window_end, n),
66385
+ result, output_offset);
66375
66386
  break;
66376
66387
  }
66377
66388
  case ExpressionType::WINDOW_NTH_VALUE: {
@@ -66388,7 +66399,7 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
66388
66399
  auto n = idx_t(n_param);
66389
66400
  const auto nth_index = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
66390
66401
  if (!n) {
66391
- payload_collection.CopyCell(0, nth_index, result, output_offset);
66402
+ CopyCell(payload_collection, 0, nth_index, result, output_offset);
66392
66403
  } else {
66393
66404
  FlatVector::SetNull(result, output_offset, true);
66394
66405
  }
@@ -86704,27 +86715,26 @@ void BaseReservoirSampling::ReplaceElement() {
86704
86715
  namespace duckdb {
86705
86716
 
86706
86717
  WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData *bind_info,
86707
- const LogicalType &result_type_p, ChunkCollection *input,
86718
+ const LogicalType &result_type_p, DataChunk *input,
86708
86719
  const ValidityMask &filter_mask_p, WindowAggregationMode mode_p)
86709
86720
  : aggregate(aggregate), bind_info(bind_info), result_type(result_type_p), state(aggregate.state_size()),
86710
- statep(Value::POINTER((idx_t)state.data())), frame(0, 0), active(0, 1),
86711
- statev(Value::POINTER((idx_t)state.data())), internal_nodes(0), input_ref(input), filter_mask(filter_mask_p),
86712
- mode(mode_p) {
86713
- #if STANDARD_VECTOR_SIZE < 512
86714
- throw NotImplementedException("Window functions are not supported for vector sizes < 512");
86715
- #endif
86716
- statep.Flatten(STANDARD_VECTOR_SIZE);
86721
+ statep(Value::POINTER((idx_t)state.data())), frame(0, 0), statev(Value::POINTER((idx_t)state.data())),
86722
+ internal_nodes(0), input_ref(input), filter_mask(filter_mask_p), mode(mode_p) {
86723
+ statep.Flatten(input->size());
86717
86724
  statev.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants
86718
86725
 
86719
86726
  if (input_ref && input_ref->ColumnCount() > 0) {
86720
- filter_sel.Initialize(STANDARD_VECTOR_SIZE);
86721
- inputs.Initialize(Allocator::DefaultAllocator(), input_ref->Types());
86727
+ filter_sel.Initialize(input->size());
86728
+ inputs.Initialize(Allocator::DefaultAllocator(), input_ref->GetTypes());
86722
86729
  // if we have a frame-by-frame method, share the single state
86723
86730
  if (aggregate.window && UseWindowAPI()) {
86724
86731
  AggregateInit();
86725
- inputs.Reference(input_ref->GetChunk(0));
86726
- } else if (aggregate.combine && UseCombineAPI()) {
86727
- ConstructTree();
86732
+ inputs.Reference(*input_ref);
86733
+ } else {
86734
+ inputs.SetCapacity(*input_ref);
86735
+ if (aggregate.combine && UseCombineAPI()) {
86736
+ ConstructTree();
86737
+ }
86728
86738
  }
86729
86739
  }
86730
86740
  }
@@ -86769,35 +86779,15 @@ void WindowSegmentTree::AggegateFinal(Vector &result, idx_t rid) {
86769
86779
 
86770
86780
  void WindowSegmentTree::ExtractFrame(idx_t begin, idx_t end) {
86771
86781
  const auto size = end - begin;
86772
- if (size >= STANDARD_VECTOR_SIZE) {
86773
- throw InternalException("Cannot compute window aggregation: bounds are too large");
86774
- }
86775
86782
 
86776
- const idx_t start_in_vector = begin % STANDARD_VECTOR_SIZE;
86783
+ auto &chunk = *input_ref;
86777
86784
  const auto input_count = input_ref->ColumnCount();
86778
- if (start_in_vector + size <= STANDARD_VECTOR_SIZE) {
86779
- inputs.SetCardinality(size);
86780
- auto &chunk = input_ref->GetChunkForRow(begin);
86781
- for (idx_t i = 0; i < input_count; ++i) {
86782
- auto &v = inputs.data[i];
86783
- auto &vec = chunk.data[i];
86784
- v.Slice(vec, start_in_vector);
86785
- v.Verify(size);
86786
- }
86787
- } else {
86788
- inputs.Reset();
86789
- inputs.SetCardinality(size);
86790
-
86791
- // we cannot just slice the individual vector!
86792
- auto &chunk_a = input_ref->GetChunkForRow(begin);
86793
- auto &chunk_b = input_ref->GetChunkForRow(end);
86794
- idx_t chunk_a_count = chunk_a.size() - start_in_vector;
86795
- idx_t chunk_b_count = inputs.size() - chunk_a_count;
86796
- for (idx_t i = 0; i < input_count; ++i) {
86797
- auto &v = inputs.data[i];
86798
- VectorOperations::Copy(chunk_a.data[i], v, chunk_a.size(), start_in_vector, 0);
86799
- VectorOperations::Copy(chunk_b.data[i], v, chunk_b_count, 0, chunk_a_count);
86800
- }
86785
+ inputs.SetCardinality(size);
86786
+ for (idx_t i = 0; i < input_count; ++i) {
86787
+ auto &v = inputs.data[i];
86788
+ auto &vec = chunk.data[i];
86789
+ v.Slice(vec, begin, end);
86790
+ v.Verify(size);
86801
86791
  }
86802
86792
 
86803
86793
  // Slice to any filtered rows
@@ -86820,29 +86810,24 @@ void WindowSegmentTree::WindowSegmentValue(idx_t l_idx, idx_t begin, idx_t end)
86820
86810
  return;
86821
86811
  }
86822
86812
 
86823
- if (end - begin >= STANDARD_VECTOR_SIZE) {
86824
- throw InternalException("Cannot compute window aggregation: bounds are too large");
86825
- }
86826
-
86827
- Vector s(statep, 0);
86813
+ const auto count = end - begin;
86814
+ Vector s(statep, 0, count);
86828
86815
  if (l_idx == 0) {
86829
86816
  ExtractFrame(begin, end);
86830
86817
  AggregateInputData aggr_input_data(bind_info, Allocator::DefaultAllocator());
86831
86818
  aggregate.update(&inputs.data[0], aggr_input_data, input_ref->ColumnCount(), s, inputs.size());
86832
86819
  } else {
86833
- inputs.Reset();
86834
- inputs.SetCardinality(end - begin);
86835
86820
  // find out where the states begin
86836
86821
  data_ptr_t begin_ptr = levels_flat_native.get() + state.size() * (begin + levels_flat_start[l_idx - 1]);
86837
86822
  // set up a vector of pointers that point towards the set of states
86838
- Vector v(LogicalType::POINTER);
86823
+ Vector v(LogicalType::POINTER, count);
86839
86824
  auto pdata = FlatVector::GetData<data_ptr_t>(v);
86840
- for (idx_t i = 0; i < inputs.size(); i++) {
86825
+ for (idx_t i = 0; i < count; i++) {
86841
86826
  pdata[i] = begin_ptr + i * state.size();
86842
86827
  }
86843
- v.Verify(inputs.size());
86828
+ v.Verify(count);
86844
86829
  AggregateInputData aggr_input_data(bind_info, Allocator::DefaultAllocator());
86845
- aggregate.combine(v, s, aggr_input_data, inputs.size());
86830
+ aggregate.combine(v, s, aggr_input_data, count);
86846
86831
  }
86847
86832
  }
86848
86833
 
@@ -86852,7 +86837,7 @@ void WindowSegmentTree::ConstructTree() {
86852
86837
 
86853
86838
  // compute space required to store internal nodes of segment tree
86854
86839
  internal_nodes = 0;
86855
- idx_t level_nodes = input_ref->Count();
86840
+ idx_t level_nodes = input_ref->size();
86856
86841
  do {
86857
86842
  level_nodes = (level_nodes + (TREE_FANOUT - 1)) / TREE_FANOUT;
86858
86843
  internal_nodes += level_nodes;
@@ -86865,7 +86850,7 @@ void WindowSegmentTree::ConstructTree() {
86865
86850
  // level 0 is data itself
86866
86851
  idx_t level_size;
86867
86852
  // iterate over the levels of the segment tree
86868
- while ((level_size = (level_current == 0 ? input_ref->Count()
86853
+ while ((level_size = (level_current == 0 ? input_ref->size()
86869
86854
  : levels_flat_offset - levels_flat_start[level_current - 1])) > 1) {
86870
86855
  for (idx_t pos = 0; pos < level_size; pos += TREE_FANOUT) {
86871
86856
  // compute the aggregate for this entry in the segment tree
@@ -86914,39 +86899,9 @@ void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t en
86914
86899
  frame = FrameBounds(begin, end);
86915
86900
 
86916
86901
  // Extract the range
86917
- auto &coll = *input_ref;
86918
- const auto prev_active = active;
86919
- const FrameBounds combined(MinValue(frame.first, prev.first), MaxValue(frame.second, prev.second));
86920
-
86921
- // The chunk bounds are the range that includes the begin and end - 1
86922
- const FrameBounds prev_chunks(coll.LocateChunk(prev_active.first), coll.LocateChunk(prev_active.second - 1));
86923
- const FrameBounds active_chunks(coll.LocateChunk(combined.first), coll.LocateChunk(combined.second - 1));
86924
-
86925
- // Extract the range
86926
- if (active_chunks.first == active_chunks.second) {
86927
- // If all the data is in a single chunk, then just reference it
86928
- if (prev_chunks != active_chunks || (!prev.first && !prev.second)) {
86929
- inputs.Reference(coll.GetChunk(active_chunks.first));
86930
- }
86931
- } else if (active_chunks.first == prev_chunks.first && prev_chunks.first != prev_chunks.second) {
86932
- // If the start chunk did not change, and we are not just a reference, then extend if necessary
86933
- for (auto chunk_idx = prev_chunks.second + 1; chunk_idx <= active_chunks.second; ++chunk_idx) {
86934
- inputs.Append(coll.GetChunk(chunk_idx), true);
86935
- }
86936
- } else {
86937
- // If the first chunk changed, start over
86938
- inputs.Reset();
86939
- for (auto chunk_idx = active_chunks.first; chunk_idx <= active_chunks.second; ++chunk_idx) {
86940
- inputs.Append(coll.GetChunk(chunk_idx), true);
86941
- }
86942
- }
86943
-
86944
- active = FrameBounds(active_chunks.first * STANDARD_VECTOR_SIZE,
86945
- MinValue((active_chunks.second + 1) * STANDARD_VECTOR_SIZE, coll.Count()));
86946
-
86947
86902
  AggregateInputData aggr_input_data(bind_info, Allocator::DefaultAllocator());
86948
- aggregate.window(inputs.data.data(), filter_mask, aggr_input_data, inputs.ColumnCount(), state.data(), frame,
86949
- prev, result, rid, active.first);
86903
+ aggregate.window(input_ref->data.data(), filter_mask, aggr_input_data, inputs.ColumnCount(), state.data(),
86904
+ frame, prev, result, rid, 0);
86950
86905
  return;
86951
86906
  }
86952
86907
 
@@ -87856,13 +87811,27 @@ void BuiltinFunctions::RegisterAlgebraicAggregates() {
87856
87811
  namespace duckdb {
87857
87812
 
87858
87813
  struct ApproxDistinctCountState {
87814
+ ApproxDistinctCountState() : log(nullptr) {
87815
+ }
87816
+ ~ApproxDistinctCountState() {
87817
+ if (log) {
87818
+ delete log;
87819
+ }
87820
+ }
87821
+ void Resize(idx_t count) {
87822
+ indices.resize(count);
87823
+ counts.resize(count);
87824
+ }
87825
+
87859
87826
  HyperLogLog *log;
87827
+ vector<uint64_t> indices;
87828
+ vector<uint8_t> counts;
87860
87829
  };
87861
87830
 
87862
87831
  struct ApproxCountDistinctFunction {
87863
87832
  template <class STATE>
87864
87833
  static void Initialize(STATE *state) {
87865
- state->log = nullptr;
87834
+ new (state) STATE;
87866
87835
  }
87867
87836
 
87868
87837
  template <class STATE, class OP>
@@ -87894,9 +87863,7 @@ struct ApproxCountDistinctFunction {
87894
87863
  }
87895
87864
  template <class STATE>
87896
87865
  static void Destroy(STATE *state) {
87897
- if (state->log) {
87898
- delete state->log;
87899
- }
87866
+ state->~STATE();
87900
87867
  }
87901
87868
  };
87902
87869
 
@@ -87912,8 +87879,9 @@ static void ApproxCountDistinctSimpleUpdateFunction(Vector inputs[], AggregateIn
87912
87879
  UnifiedVectorFormat vdata;
87913
87880
  inputs[0].ToUnifiedFormat(count, vdata);
87914
87881
 
87915
- uint64_t indices[STANDARD_VECTOR_SIZE];
87916
- uint8_t counts[STANDARD_VECTOR_SIZE];
87882
+ agg_state->Resize(count);
87883
+ auto indices = agg_state->indices.data();
87884
+ auto counts = agg_state->counts.data();
87917
87885
 
87918
87886
  HyperLogLog::ProcessEntries(vdata, inputs[0].GetType(), indices, counts, count);
87919
87887
  agg_state->log->AddToLog(vdata, count, indices, counts);
@@ -87927,19 +87895,23 @@ static void ApproxCountDistinctUpdateFunction(Vector inputs[], AggregateInputDat
87927
87895
  state_vector.ToUnifiedFormat(count, sdata);
87928
87896
  auto states = (ApproxDistinctCountState **)sdata.data;
87929
87897
 
87898
+ uint64_t *indices;
87899
+ uint8_t *counts;
87930
87900
  for (idx_t i = 0; i < count; i++) {
87931
87901
  auto agg_state = states[sdata.sel->get_index(i)];
87932
87902
  if (!agg_state->log) {
87933
87903
  agg_state->log = new HyperLogLog();
87934
87904
  }
87905
+ if (i == 0) {
87906
+ agg_state->Resize(count);
87907
+ indices = agg_state->indices.data();
87908
+ counts = agg_state->counts.data();
87909
+ }
87935
87910
  }
87936
87911
 
87937
87912
  UnifiedVectorFormat vdata;
87938
87913
  inputs[0].ToUnifiedFormat(count, vdata);
87939
87914
 
87940
- uint64_t indices[STANDARD_VECTOR_SIZE];
87941
- uint8_t counts[STANDARD_VECTOR_SIZE];
87942
-
87943
87915
  HyperLogLog::ProcessEntries(vdata, inputs[0].GetType(), indices, counts, count);
87944
87916
  HyperLogLog::AddToLogs(vdata, count, indices, counts, (HyperLogLog ***)states, sdata.sel);
87945
87917
  }
@@ -210859,7 +210831,7 @@ void RowGroup::Update(TransactionData transaction, DataChunk &update_chunk, row_
210859
210831
  D_ASSERT(column != COLUMN_IDENTIFIER_ROW_ID);
210860
210832
  D_ASSERT(columns[column]->type.id() == update_chunk.data[i].GetType().id());
210861
210833
  if (offset > 0) {
210862
- Vector sliced_vector(update_chunk.data[i], offset);
210834
+ Vector sliced_vector(update_chunk.data[i], offset, offset + count);
210863
210835
  sliced_vector.Flatten(count);
210864
210836
  columns[column]->Update(transaction, column, sliced_vector, ids + offset, count);
210865
210837
  } else {
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "8ecd292ba"
15
- #define DUCKDB_VERSION "v0.5.2-dev1131"
14
+ #define DUCKDB_SOURCE_ID "0e999f15d"
15
+ #define DUCKDB_VERSION "v0.5.2-dev1144"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -2857,7 +2857,7 @@ public:
2857
2857
  public:
2858
2858
  DUCKDB_API void Resize(idx_t old_size, idx_t new_size);
2859
2859
 
2860
- DUCKDB_API void Slice(const ValidityMask &other, idx_t offset);
2860
+ DUCKDB_API void Slice(const ValidityMask &other, idx_t offset, idx_t end);
2861
2861
  DUCKDB_API void Combine(const ValidityMask &other, idx_t count);
2862
2862
  DUCKDB_API string ToString(idx_t count) const;
2863
2863
  };
@@ -4188,11 +4188,11 @@ class Vector {
4188
4188
 
4189
4189
  public:
4190
4190
  //! Create a vector that references the other vector
4191
- DUCKDB_API explicit Vector(Vector &other);
4191
+ DUCKDB_API Vector(Vector &other);
4192
4192
  //! Create a vector that slices another vector
4193
4193
  DUCKDB_API explicit Vector(Vector &other, const SelectionVector &sel, idx_t count);
4194
- //! Create a vector that slices another vector starting from a specific offset
4195
- DUCKDB_API explicit Vector(Vector &other, idx_t offset);
4194
+ //! Create a vector that slices another vector between a pair of offsets
4195
+ DUCKDB_API explicit Vector(Vector &other, idx_t offset, idx_t end);
4196
4196
  //! Create a vector of size one holding the passed on value
4197
4197
  DUCKDB_API explicit Vector(const Value &value);
4198
4198
  //! Create a vector of size tuple_count (non-standard)
@@ -4233,7 +4233,7 @@ public:
4233
4233
  DUCKDB_API void ResetFromCache(const VectorCache &cache);
4234
4234
 
4235
4235
  //! Creates a reference to a slice of the other vector
4236
- DUCKDB_API void Slice(Vector &other, idx_t offset);
4236
+ DUCKDB_API void Slice(Vector &other, idx_t offset, idx_t end);
4237
4237
  //! Creates a reference to a slice of the other vector
4238
4238
  DUCKDB_API void Slice(Vector &other, const SelectionVector &sel, idx_t count);
4239
4239
  //! Turns the vector into a dictionary vector with the specified dictionary
@@ -4340,7 +4340,8 @@ protected:
4340
4340
  //! The DictionaryBuffer holds a selection vector
4341
4341
  class VectorChildBuffer : public VectorBuffer {
4342
4342
  public:
4343
- VectorChildBuffer(Vector vector) : VectorBuffer(VectorBufferType::VECTOR_CHILD_BUFFER), data(move(vector)) {
4343
+ explicit VectorChildBuffer(Vector vector)
4344
+ : VectorBuffer(VectorBufferType::VECTOR_CHILD_BUFFER), data(move(vector)) {
4344
4345
  }
4345
4346
 
4346
4347
  public: