duckdb 0.4.1-dev1777.0 → 0.4.1-dev1784.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.4.1-dev1777.0",
4
+ "version": "0.4.1-dev1784.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -31367,6 +31367,9 @@ struct RowOperations {
31367
31367
  //! Swizzles the base pointer of each row to offset within heap block
31368
31368
  static void SwizzleHeapPointer(const RowLayout &layout, data_ptr_t row_ptr, const data_ptr_t heap_base_ptr,
31369
31369
  const idx_t count, const idx_t base_offset = 0);
31370
+ //! Unswizzles the base offset within heap block the rows to pointers
31371
+ static void UnswizzleHeapPointer(const RowLayout &layout, const data_ptr_t base_row_ptr,
31372
+ const data_ptr_t base_heap_ptr, const idx_t count);
31370
31373
  //! Unswizzles all offsets back to pointers
31371
31374
  static void UnswizzlePointers(const RowLayout &layout, const data_ptr_t base_row_ptr,
31372
31375
  const data_ptr_t base_heap_ptr, const idx_t count);
@@ -31693,6 +31696,16 @@ void RowOperations::SwizzleHeapPointer(const RowLayout &layout, data_ptr_t row_p
31693
31696
  }
31694
31697
  }
31695
31698
 
31699
+ void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr_t base_row_ptr,
31700
+ const data_ptr_t base_heap_ptr, const idx_t count) {
31701
+ const auto row_width = layout.GetRowWidth();
31702
+ data_ptr_t heap_ptr_ptr = base_row_ptr + layout.GetHeapPointerOffset();
31703
+ for (idx_t i = 0; i < count; i++) {
31704
+ Store<data_ptr_t>(base_heap_ptr + Load<idx_t>(heap_ptr_ptr), heap_ptr_ptr);
31705
+ heap_ptr_ptr += row_width;
31706
+ }
31707
+ }
31708
+
31696
31709
  void RowOperations::UnswizzlePointers(const RowLayout &layout, const data_ptr_t base_row_ptr,
31697
31710
  const data_ptr_t base_heap_ptr, const idx_t count) {
31698
31711
  const idx_t row_width = layout.GetRowWidth();
@@ -31861,6 +31874,8 @@ public:
31861
31874
  vector<RowDataBlock> blocks;
31862
31875
  //! The blocks that this collection currently has pinned
31863
31876
  vector<BufferHandle> pinned_blocks;
31877
+ //! Whether the blocks should stay pinned (necessary for e.g. a heap)
31878
+ const bool keep_pinned;
31864
31879
 
31865
31880
  public:
31866
31881
  idx_t AppendToBlock(RowDataBlock &block, BufferHandle &handle, vector<BlockAppendEntry> &append_entries,
@@ -31891,9 +31906,6 @@ public:
31891
31906
 
31892
31907
  private:
31893
31908
  mutex rdc_lock;
31894
-
31895
- //! Whether the blocks should stay pinned (necessary for e.g. a heap)
31896
- const bool keep_pinned;
31897
31909
  };
31898
31910
 
31899
31911
  } // namespace duckdb
@@ -34638,14 +34650,17 @@ private:
34638
34650
  SBScanState read_state;
34639
34651
  //! The total count of sorted_data
34640
34652
  const idx_t total_count;
34641
- //! The global sort state
34642
- GlobalSortState &global_sort_state;
34643
34653
  //! Addresses used to gather from the sorted data
34644
34654
  Vector addresses = Vector(LogicalType::POINTER);
34645
34655
  //! The number of rows scanned so far
34646
34656
  idx_t total_scanned;
34647
34657
  //! Whether to flush the blocks after scanning
34648
34658
  const bool flush;
34659
+ //! Whether we are unswizzling the blocks
34660
+ const bool unswizzling;
34661
+
34662
+ //! Checks that the newest block is valid
34663
+ void ValidateUnscannedBlock() const;
34649
34664
  };
34650
34665
 
34651
34666
  struct SBIterator {
@@ -36453,6 +36468,7 @@ void LocalSortState::SinkChunk(DataChunk &sort, DataChunk &payload) {
36453
36468
  auto blob_data = blob_chunk.ToUnifiedFormat();
36454
36469
  RowOperations::Scatter(blob_chunk, blob_data.get(), sort_layout->blob_layout, addresses, *blob_sorting_heap,
36455
36470
  sel_ptr, blob_chunk.size());
36471
+ D_ASSERT(blob_sorting_heap->keep_pinned);
36456
36472
  }
36457
36473
 
36458
36474
  // Finally, serialize payload data
@@ -36460,6 +36476,7 @@ void LocalSortState::SinkChunk(DataChunk &sort, DataChunk &payload) {
36460
36476
  auto input_data = payload.ToUnifiedFormat();
36461
36477
  RowOperations::Scatter(payload, input_data.get(), *payload_layout, addresses, *payload_heap, sel_ptr,
36462
36478
  payload.size());
36479
+ D_ASSERT(payload_heap->keep_pinned);
36463
36480
  }
36464
36481
 
36465
36482
  idx_t LocalSortState::SizeInBytes() const {
@@ -36548,6 +36565,7 @@ void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataColl
36548
36565
  ordered_data_ptr += row_width;
36549
36566
  sorting_ptr += sorting_entry_size;
36550
36567
  }
36568
+ ordered_data_block.block->SetSwizzling(sd.layout.AllConstant() ? nullptr : "LocalSortState::ReOrder.ordered_data");
36551
36569
  // Replace the unordered data block with the re-ordered data block
36552
36570
  sd.data_blocks.clear();
36553
36571
  sd.data_blocks.push_back(move(ordered_data_block));
@@ -36555,6 +36573,7 @@ void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataColl
36555
36573
  if (!sd.layout.AllConstant() && reorder_heap) {
36556
36574
  // Swizzle the column pointers to offsets
36557
36575
  RowOperations::SwizzleColumns(sd.layout, ordered_data_handle.Ptr(), count);
36576
+ sd.data_blocks.back().block->SetSwizzling(nullptr);
36558
36577
  // Create a single heap block to store the ordered heap
36559
36578
  idx_t total_byte_offset = std::accumulate(heap.blocks.begin(), heap.blocks.end(), 0,
36560
36579
  [](idx_t a, const RowDataBlock &b) { return a + b.byte_offset; });
@@ -36785,9 +36804,11 @@ void SortedData::Unswizzle() {
36785
36804
  for (idx_t i = 0; i < data_blocks.size(); i++) {
36786
36805
  auto &data_block = data_blocks[i];
36787
36806
  auto &heap_block = heap_blocks[i];
36807
+ D_ASSERT(data_block.block->IsSwizzled());
36788
36808
  auto data_handle_p = buffer_manager.Pin(data_block.block);
36789
36809
  auto heap_handle_p = buffer_manager.Pin(heap_block.block);
36790
36810
  RowOperations::UnswizzlePointers(layout, data_handle_p.Ptr(), heap_handle_p.Ptr(), data_block.count);
36811
+ data_block.block->SetSwizzling("SortedData::Unswizzle");
36791
36812
  state.heap_blocks.push_back(move(heap_block));
36792
36813
  state.pinned_blocks.push_back(move(heap_handle_p));
36793
36814
  }
@@ -37003,7 +37024,9 @@ void SBScanState::SetIndices(idx_t block_idx_to, idx_t entry_idx_to) {
37003
37024
 
37004
37025
  PayloadScanner::PayloadScanner(SortedData &sorted_data, GlobalSortState &global_sort_state, bool flush_p)
37005
37026
  : sorted_data(sorted_data), read_state(global_sort_state.buffer_manager, global_sort_state),
37006
- total_count(sorted_data.Count()), global_sort_state(global_sort_state), total_scanned(0), flush(flush_p) {
37027
+ total_count(sorted_data.Count()), total_scanned(0), flush(flush_p),
37028
+ unswizzling(!sorted_data.layout.AllConstant() && global_sort_state.external) {
37029
+ ValidateUnscannedBlock();
37007
37030
  }
37008
37031
 
37009
37032
  PayloadScanner::PayloadScanner(GlobalSortState &global_sort_state, bool flush_p)
@@ -37013,9 +37036,16 @@ PayloadScanner::PayloadScanner(GlobalSortState &global_sort_state, bool flush_p)
37013
37036
  PayloadScanner::PayloadScanner(GlobalSortState &global_sort_state, idx_t block_idx)
37014
37037
  : sorted_data(*global_sort_state.sorted_blocks[0]->payload_data),
37015
37038
  read_state(global_sort_state.buffer_manager, global_sort_state),
37016
- total_count(sorted_data.data_blocks[block_idx].count), global_sort_state(global_sort_state), total_scanned(0),
37017
- flush(false) {
37039
+ total_count(sorted_data.data_blocks[block_idx].count), total_scanned(0), flush(false),
37040
+ unswizzling(!sorted_data.layout.AllConstant() && global_sort_state.external) {
37018
37041
  read_state.SetIndices(block_idx, 0);
37042
+ ValidateUnscannedBlock();
37043
+ }
37044
+
37045
+ void PayloadScanner::ValidateUnscannedBlock() const {
37046
+ if (unswizzling && read_state.block_idx < sorted_data.data_blocks.size()) {
37047
+ D_ASSERT(sorted_data.data_blocks[read_state.block_idx].block->IsSwizzled());
37048
+ }
37019
37049
  }
37020
37050
 
37021
37051
  void PayloadScanner::Scan(DataChunk &chunk) {
@@ -37028,6 +37058,9 @@ void PayloadScanner::Scan(DataChunk &chunk) {
37028
37058
  if (flush) {
37029
37059
  for (idx_t i = 0; i < read_state.block_idx; i++) {
37030
37060
  sorted_data.data_blocks[i].block = nullptr;
37061
+ if (unswizzling) {
37062
+ sorted_data.heap_blocks[i].block = nullptr;
37063
+ }
37031
37064
  }
37032
37065
  }
37033
37066
  const idx_t &row_width = sorted_data.layout.GetRowWidth();
@@ -37046,14 +37079,16 @@ void PayloadScanner::Scan(DataChunk &chunk) {
37046
37079
  row_ptr += row_width;
37047
37080
  }
37048
37081
  // Unswizzle the offsets back to pointers (if needed)
37049
- if (!sorted_data.layout.AllConstant() && global_sort_state.external) {
37082
+ if (unswizzling) {
37050
37083
  RowOperations::UnswizzlePointers(sorted_data.layout, data_ptr, read_state.payload_heap_handle.Ptr(), next);
37084
+ sorted_data.data_blocks[read_state.block_idx].block->SetSwizzling("PayloadScanner::Scan");
37051
37085
  }
37052
37086
  // Update state indices
37053
37087
  read_state.entry_idx += next;
37054
37088
  if (read_state.entry_idx == data_block.count) {
37055
37089
  read_state.block_idx++;
37056
37090
  read_state.entry_idx = 0;
37091
+ ValidateUnscannedBlock();
37057
37092
  }
37058
37093
  scanned += next;
37059
37094
  }
@@ -44492,6 +44527,7 @@ namespace duckdb {
44492
44527
 
44493
44528
  class BufferHandle;
44494
44529
  class RowDataCollection;
44530
+ struct RowDataBlock;
44495
44531
  class DataChunk;
44496
44532
 
44497
44533
  //! Used to scan the data into DataChunks after sorting
@@ -44513,6 +44549,9 @@ public:
44513
44549
 
44514
44550
  BufferHandle data_handle;
44515
44551
  BufferHandle heap_handle;
44552
+
44553
+ // We must pin ALL blocks we are going to gather from
44554
+ vector<BufferHandle> pinned_blocks;
44516
44555
  };
44517
44556
 
44518
44557
  //! Ensure that heap blocks correspond to row blocks
@@ -44543,6 +44582,8 @@ public:
44543
44582
  //! we need to re-swizzle.
44544
44583
  void ReSwizzle();
44545
44584
 
44585
+ void SwizzleBlock(RowDataBlock &data_block, RowDataBlock &heap_block);
44586
+
44546
44587
  //! Scans the next data chunk from the sorted data
44547
44588
  void Scan(DataChunk &chunk);
44548
44589
 
@@ -44565,6 +44606,11 @@ private:
44565
44606
  const bool external;
44566
44607
  //! Whether to flush the blocks after scanning
44567
44608
  const bool flush;
44609
+ //! Whether we are unswizzling the blocks
44610
+ const bool unswizzling;
44611
+
44612
+ //! Checks that the newest block is valid
44613
+ void ValidateUnscannedBlock() const;
44568
44614
  };
44569
44615
 
44570
44616
  } // namespace duckdb
@@ -44584,21 +44630,19 @@ void RowDataCollectionScanner::AlignHeapBlocks(RowDataCollection &swizzled_block
44584
44630
  return;
44585
44631
  }
44586
44632
 
44587
- // The main data blocks can just be moved
44588
- swizzled_block_collection.Merge(block_collection);
44589
- block_collection.Clear();
44590
-
44591
44633
  if (layout.AllConstant()) {
44592
- // No heap blocks!
44634
+ // No heap blocks! Just merge fixed-size data
44635
+ swizzled_block_collection.Merge(block_collection);
44593
44636
  return;
44594
44637
  }
44595
44638
 
44596
44639
  // We create one heap block per data block and swizzle the pointers
44597
- auto &buffer_manager = swizzled_block_collection.buffer_manager;
44640
+ D_ASSERT(string_heap.keep_pinned == swizzled_string_heap.keep_pinned);
44641
+ auto &buffer_manager = block_collection.buffer_manager;
44598
44642
  auto &heap_blocks = string_heap.blocks;
44599
44643
  idx_t heap_block_idx = 0;
44600
44644
  idx_t heap_block_remaining = heap_blocks[heap_block_idx].count;
44601
- for (auto &data_block : swizzled_block_collection.blocks) {
44645
+ for (auto &data_block : block_collection.blocks) {
44602
44646
  if (heap_block_remaining == 0) {
44603
44647
  heap_block_remaining = heap_blocks[++heap_block_idx].count;
44604
44648
  }
@@ -44606,27 +44650,44 @@ void RowDataCollectionScanner::AlignHeapBlocks(RowDataCollection &swizzled_block
44606
44650
  // Pin the data block and swizzle the pointers within the rows
44607
44651
  auto data_handle = buffer_manager.Pin(data_block.block);
44608
44652
  auto data_ptr = data_handle.Ptr();
44609
- RowOperations::SwizzleColumns(layout, data_ptr, data_block.count);
44653
+ if (!string_heap.keep_pinned) {
44654
+ D_ASSERT(!data_block.block->IsSwizzled());
44655
+ RowOperations::SwizzleColumns(layout, data_ptr, data_block.count);
44656
+ data_block.block->SetSwizzling(nullptr);
44657
+ }
44658
+ // At this point the data block is pinned and the heap pointer is valid
44659
+ // so we can copy heap data as needed
44610
44660
 
44611
44661
  // We want to copy as little of the heap data as possible, check how the data and heap blocks line up
44612
44662
  if (heap_block_remaining >= data_block.count) {
44613
44663
  // Easy: current heap block contains all strings for this data block, just copy (reference) the block
44614
44664
  swizzled_string_heap.blocks.emplace_back(RowDataBlock(heap_blocks[heap_block_idx]));
44615
- swizzled_string_heap.blocks.back().count = 0;
44616
-
44617
- // Swizzle the heap pointer
44618
- auto heap_handle = buffer_manager.Pin(swizzled_string_heap.blocks.back().block);
44619
- auto heap_ptr = Load<data_ptr_t>(data_ptr + layout.GetHeapPointerOffset());
44620
- auto heap_offset = heap_ptr - heap_handle.Ptr();
44621
- RowOperations::SwizzleHeapPointer(layout, data_ptr, heap_ptr, data_block.count, heap_offset);
44665
+ swizzled_string_heap.blocks.back().count = data_block.count;
44666
+
44667
+ // Swizzle the heap pointer if we are not pinning the heap
44668
+ auto &heap_block = swizzled_string_heap.blocks.back().block;
44669
+ auto heap_handle = buffer_manager.Pin(heap_block);
44670
+ if (!swizzled_string_heap.keep_pinned) {
44671
+ auto heap_ptr = Load<data_ptr_t>(data_ptr + layout.GetHeapPointerOffset());
44672
+ auto heap_offset = heap_ptr - heap_handle.Ptr();
44673
+ RowOperations::SwizzleHeapPointer(layout, data_ptr, heap_ptr, data_block.count, heap_offset);
44674
+ } else {
44675
+ swizzled_string_heap.pinned_blocks.emplace_back(move(heap_handle));
44676
+ }
44622
44677
 
44623
44678
  // Update counter
44624
44679
  heap_block_remaining -= data_block.count;
44625
44680
  } else {
44626
44681
  // Strings for this data block are spread over the current heap block and the next (and possibly more)
44682
+ if (string_heap.keep_pinned) {
44683
+ // The heap is changing underneath the data block,
44684
+ // so swizzle the string pointers to make them portable.
44685
+ RowOperations::SwizzleColumns(layout, data_ptr, data_block.count);
44686
+ }
44627
44687
  idx_t data_block_remaining = data_block.count;
44628
44688
  vector<std::pair<data_ptr_t, idx_t>> ptrs_and_sizes;
44629
44689
  idx_t total_size = 0;
44690
+ const auto base_row_ptr = data_ptr;
44630
44691
  while (data_block_remaining > 0) {
44631
44692
  if (heap_block_remaining == 0) {
44632
44693
  heap_block_remaining = heap_blocks[++heap_block_idx].count;
@@ -44656,12 +44717,21 @@ void RowDataCollectionScanner::AlignHeapBlocks(RowDataCollection &swizzled_block
44656
44717
  RowDataBlock(buffer_manager, MaxValue<idx_t>(total_size, (idx_t)Storage::BLOCK_SIZE), 1));
44657
44718
  auto new_heap_handle = buffer_manager.Pin(swizzled_string_heap.blocks.back().block);
44658
44719
  auto new_heap_ptr = new_heap_handle.Ptr();
44720
+ if (swizzled_string_heap.keep_pinned) {
44721
+ // Since the heap blocks are pinned, we can unswizzle the data again.
44722
+ swizzled_string_heap.pinned_blocks.emplace_back(move(new_heap_handle));
44723
+ RowOperations::UnswizzlePointers(layout, base_row_ptr, new_heap_ptr, data_block.count);
44724
+ RowOperations::UnswizzleHeapPointer(layout, base_row_ptr, new_heap_ptr, data_block.count);
44725
+ }
44659
44726
  for (auto &ptr_and_size : ptrs_and_sizes) {
44660
44727
  memcpy(new_heap_ptr, ptr_and_size.first, ptr_and_size.second);
44661
44728
  new_heap_ptr += ptr_and_size.second;
44662
44729
  }
44663
44730
  }
44664
44731
  }
44732
+
44733
+ // We're done with variable-sized data, now just merge the fixed-size data
44734
+ swizzled_block_collection.Merge(block_collection);
44665
44735
  D_ASSERT(swizzled_block_collection.blocks.size() == swizzled_string_heap.blocks.size());
44666
44736
 
44667
44737
  // Update counts and cleanup
@@ -44691,11 +44761,28 @@ void RowDataCollectionScanner::ScanState::PinData() {
44691
44761
  RowDataCollectionScanner::RowDataCollectionScanner(RowDataCollection &rows_p, RowDataCollection &heap_p,
44692
44762
  const RowLayout &layout_p, bool external_p, bool flush_p)
44693
44763
  : rows(rows_p), heap(heap_p), layout(layout_p), read_state(*this), total_count(rows.count), total_scanned(0),
44694
- external(external_p), flush(flush_p) {
44764
+ external(external_p), flush(flush_p), unswizzling(!layout.AllConstant() && external && !heap.keep_pinned) {
44695
44765
 
44696
- if (!layout.AllConstant() && external) {
44766
+ if (unswizzling) {
44697
44767
  D_ASSERT(rows.blocks.size() == heap.blocks.size());
44698
44768
  }
44769
+
44770
+ ValidateUnscannedBlock();
44771
+ }
44772
+
44773
+ void RowDataCollectionScanner::SwizzleBlock(RowDataBlock &data_block, RowDataBlock &heap_block) {
44774
+ // Pin the data block and swizzle the pointers within the rows
44775
+ D_ASSERT(!data_block.block->IsSwizzled());
44776
+ auto data_handle = rows.buffer_manager.Pin(data_block.block);
44777
+ auto data_ptr = data_handle.Ptr();
44778
+ RowOperations::SwizzleColumns(layout, data_ptr, data_block.count);
44779
+ data_block.block->SetSwizzling(nullptr);
44780
+
44781
+ // Swizzle the heap pointers
44782
+ auto heap_handle = heap.buffer_manager.Pin(heap_block.block);
44783
+ auto heap_ptr = Load<data_ptr_t>(data_ptr + layout.GetHeapPointerOffset());
44784
+ auto heap_offset = heap_ptr - heap_handle.Ptr();
44785
+ RowOperations::SwizzleHeapPointer(layout, data_ptr, heap_ptr, data_block.count, heap_offset);
44699
44786
  }
44700
44787
 
44701
44788
  void RowDataCollectionScanner::ReSwizzle() {
@@ -44703,27 +44790,23 @@ void RowDataCollectionScanner::ReSwizzle() {
44703
44790
  return;
44704
44791
  }
44705
44792
 
44706
- if (layout.AllConstant() || !external) {
44707
- // No heap blocks!
44793
+ if (!unswizzling) {
44794
+ // No swizzled blocks!
44708
44795
  return;
44709
44796
  }
44710
44797
 
44711
44798
  D_ASSERT(rows.blocks.size() == heap.blocks.size());
44799
+ for (idx_t i = 0; i < rows.blocks.size(); ++i) {
44800
+ auto &data_block = rows.blocks[i];
44801
+ if (data_block.block && !data_block.block->IsSwizzled()) {
44802
+ SwizzleBlock(data_block, heap.blocks[i]);
44803
+ }
44804
+ }
44805
+ }
44712
44806
 
44713
- // We create one heap block per data block and swizzle the pointers
44714
- idx_t heap_block_idx = 0;
44715
- for (auto &data_block : rows.blocks) {
44716
- // Pin the data block and swizzle the pointers within the rows
44717
- auto data_handle = rows.buffer_manager.Pin(data_block.block);
44718
- auto data_ptr = data_handle.Ptr();
44719
- RowOperations::SwizzleColumns(layout, data_ptr, data_block.count);
44720
-
44721
- // Swizzle the heap pointers
44722
- auto &heap_block = heap.blocks[heap_block_idx++];
44723
- auto heap_handle = heap.buffer_manager.Pin(heap_block.block);
44724
- auto heap_ptr = Load<data_ptr_t>(data_ptr + layout.GetHeapPointerOffset());
44725
- auto heap_offset = heap_ptr - heap_handle.Ptr();
44726
- RowOperations::SwizzleHeapPointer(layout, data_ptr, heap_ptr, data_block.count, heap_offset);
44807
+ void RowDataCollectionScanner::ValidateUnscannedBlock() const {
44808
+ if (unswizzling && read_state.block_idx < rows.blocks.size()) {
44809
+ D_ASSERT(rows.blocks[read_state.block_idx].block->IsSwizzled());
44727
44810
  }
44728
44811
  }
44729
44812
 
@@ -44733,19 +44816,13 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
44733
44816
  chunk.SetCardinality(count);
44734
44817
  return;
44735
44818
  }
44736
- // Eagerly delete references to blocks that we've passed
44737
- if (flush) {
44738
- for (idx_t i = 0; i < read_state.block_idx; ++i) {
44739
- rows.blocks[i].block = nullptr;
44740
- if (!layout.AllConstant() && external) {
44741
- heap.blocks[i].block = nullptr;
44742
- }
44743
- }
44744
- }
44745
44819
  const idx_t &row_width = layout.GetRowWidth();
44746
44820
  // Set up a batch of pointers to scan data from
44747
44821
  idx_t scanned = 0;
44748
44822
  auto data_pointers = FlatVector::GetData<data_ptr_t>(addresses);
44823
+
44824
+ // We must pin ALL blocks we are going to gather from
44825
+ vector<BufferHandle> pinned_blocks;
44749
44826
  while (scanned < count) {
44750
44827
  read_state.PinData();
44751
44828
  auto &data_block = rows.blocks[read_state.block_idx];
@@ -44758,14 +44835,22 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
44758
44835
  row_ptr += row_width;
44759
44836
  }
44760
44837
  // Unswizzle the offsets back to pointers (if needed)
44761
- if (!layout.AllConstant() && external) {
44838
+ if (unswizzling) {
44762
44839
  RowOperations::UnswizzlePointers(layout, data_ptr, read_state.heap_handle.Ptr(), next);
44840
+ rows.blocks[read_state.block_idx].block->SetSwizzling("RowDataCollectionScanner::Scan");
44763
44841
  }
44764
44842
  // Update state indices
44765
44843
  read_state.entry_idx += next;
44766
44844
  if (read_state.entry_idx == data_block.count) {
44845
+ // Pin completed blocks so we don't lose them
44846
+ pinned_blocks.emplace_back(rows.buffer_manager.Pin(data_block.block));
44847
+ if (unswizzling) {
44848
+ auto &heap_block = heap.blocks[read_state.block_idx];
44849
+ pinned_blocks.emplace_back(heap.buffer_manager.Pin(heap_block.block));
44850
+ }
44767
44851
  read_state.block_idx++;
44768
44852
  read_state.entry_idx = 0;
44853
+ ValidateUnscannedBlock();
44769
44854
  }
44770
44855
  scanned += next;
44771
44856
  }
@@ -44779,6 +44864,27 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
44779
44864
  chunk.SetCardinality(count);
44780
44865
  chunk.Verify();
44781
44866
  total_scanned += scanned;
44867
+
44868
+ // Switch to a new set of pinned blocks
44869
+ read_state.pinned_blocks.swap(pinned_blocks);
44870
+
44871
+ if (flush) {
44872
+ // Release blocks we have passed.
44873
+ for (idx_t i = 0; i < read_state.block_idx; ++i) {
44874
+ rows.blocks[i].block = nullptr;
44875
+ if (unswizzling) {
44876
+ heap.blocks[i].block = nullptr;
44877
+ }
44878
+ }
44879
+ } else if (unswizzling) {
44880
+ // Reswizzle blocks we have passed so they can be flushed safely.
44881
+ for (idx_t i = 0; i < read_state.block_idx; ++i) {
44882
+ auto &data_block = rows.blocks[i];
44883
+ if (data_block.block && !data_block.block->IsSwizzled()) {
44884
+ SwizzleBlock(data_block, heap.blocks[i]);
44885
+ }
44886
+ }
44887
+ }
44782
44888
  }
44783
44889
 
44784
44890
  } // namespace duckdb
@@ -63120,10 +63226,10 @@ void WindowLocalSinkState::Group(WindowGlobalSinkState &gstate) {
63120
63226
  }
63121
63227
 
63122
63228
  auto &payload_data = *ungrouped->local_sort->payload_data;
63123
- auto rows = payload_data.CloneEmpty();
63229
+ auto rows = payload_data.CloneEmpty(payload_data.keep_pinned);
63124
63230
 
63125
63231
  auto &payload_heap = *ungrouped->local_sort->payload_heap;
63126
- auto heap = payload_heap.CloneEmpty();
63232
+ auto heap = payload_heap.CloneEmpty(payload_heap.keep_pinned);
63127
63233
 
63128
63234
  RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, payload_data, payload_heap, payload_layout);
63129
63235
  RowDataCollectionScanner scanner(*rows, *heap, payload_layout, true);
@@ -63146,7 +63252,7 @@ void WindowLocalSinkState::Sink(DataChunk &input_chunk, WindowGlobalSinkState &g
63146
63252
 
63147
63253
  // OVER()
63148
63254
  if (over_chunk.ColumnCount() == 0) {
63149
- // No sorts, so build row chunks
63255
+ // No sorts, so build paged row chunks
63150
63256
  if (!rows) {
63151
63257
  const auto entry_size = payload_layout.GetRowWidth();
63152
63258
  const auto capacity = MaxValue<idx_t>(STANDARD_VECTOR_SIZE, (Storage::BLOCK_SIZE / entry_size) + 1);
@@ -63157,16 +63263,17 @@ void WindowLocalSinkState::Sink(DataChunk &input_chunk, WindowGlobalSinkState &g
63157
63263
  const auto row_sel = FlatVector::IncrementalSelectionVector();
63158
63264
  Vector addresses(LogicalType::POINTER);
63159
63265
  auto key_locations = FlatVector::GetData<data_ptr_t>(addresses);
63266
+ const auto prev_rows_blocks = rows->blocks.size();
63160
63267
  auto handles = rows->Build(row_count, key_locations, nullptr, row_sel);
63161
- vector<UnifiedVectorFormat> input_data;
63162
- input_data.reserve(input_chunk.ColumnCount());
63163
- for (idx_t i = 0; i < input_chunk.ColumnCount(); i++) {
63164
- UnifiedVectorFormat pdata;
63165
- input_chunk.data[i].ToUnifiedFormat(row_count, pdata);
63166
- input_data.emplace_back(move(pdata));
63167
- }
63168
- RowOperations::Scatter(input_chunk, input_data.data(), payload_layout, addresses, *strings, *row_sel,
63169
- row_count);
63268
+ auto input_data = input_chunk.ToUnifiedFormat();
63269
+ RowOperations::Scatter(input_chunk, input_data.get(), payload_layout, addresses, *strings, *row_sel, row_count);
63270
+ // Mark that row blocks contain pointers (heap blocks are pinned)
63271
+ if (!payload_layout.AllConstant()) {
63272
+ D_ASSERT(strings->keep_pinned);
63273
+ for (size_t i = prev_rows_blocks; i < rows->blocks.size(); ++i) {
63274
+ rows->blocks[i].block->SetSwizzling("WindowLocalSinkState::Sink");
63275
+ }
63276
+ }
63170
63277
  return;
63171
63278
  }
63172
63279
 
@@ -64433,8 +64540,8 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
64433
64540
  partition_mask.SetValidUnsafe(0);
64434
64541
  order_mask.SetValidUnsafe(0);
64435
64542
  // No partition - align the heap blocks with the row blocks
64436
- rows = gstate.rows->CloneEmpty();
64437
- heap = gstate.strings->CloneEmpty();
64543
+ rows = gstate.rows->CloneEmpty(gstate.rows->keep_pinned);
64544
+ heap = gstate.strings->CloneEmpty(gstate.strings->keep_pinned);
64438
64545
  RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gstate.rows, *gstate.strings, layout);
64439
64546
  external = true;
64440
64547
  } else if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
@@ -71726,7 +71833,7 @@ PhysicalRangeJoin::GlobalSortedTable::GlobalSortedTable(ClientContext &context,
71726
71833
  memory_per_thread(0) {
71727
71834
  D_ASSERT(orders.size() == 1);
71728
71835
 
71729
- // Set external (can be force with the PRAGMA)
71836
+ // Set external (can be forced with the PRAGMA)
71730
71837
  auto &config = ClientConfig::GetConfig(context);
71731
71838
  global_sort_state.external = config.force_external;
71732
71839
  memory_per_thread = PhysicalRangeJoin::GetMaxThreadMemory(context);
@@ -71973,6 +72080,7 @@ void PhysicalRangeJoin::SliceSortedPayload(DataChunk &payload, GlobalSortState &
71973
72080
  if (!sorted_data.layout.AllConstant() && state.external) {
71974
72081
  RowOperations::UnswizzlePointers(sorted_data.layout, data_ptr, read_state.payload_heap_handle.Ptr(),
71975
72082
  addr_count);
72083
+ sorted_data.data_blocks[read_state.block_idx].block->SetSwizzling("PhysicalRangeJoin::SliceSortedPayload");
71976
72084
  }
71977
72085
 
71978
72086
  // Deserialize the payload data
@@ -185207,7 +185315,8 @@ struct BufferAllocatorData : PrivateAllocatorData {
185207
185315
  };
185208
185316
 
185209
185317
  BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p)
185210
- : db(db), readers(0), block_id(block_id_p), buffer(nullptr), eviction_timestamp(0), can_destroy(false) {
185318
+ : db(db), readers(0), block_id(block_id_p), buffer(nullptr), eviction_timestamp(0), can_destroy(false),
185319
+ unswizzled(nullptr) {
185211
185320
  eviction_timestamp = 0;
185212
185321
  state = BlockState::BLOCK_UNLOADED;
185213
185322
  memory_usage = Storage::BLOCK_ALLOC_SIZE;
@@ -185215,7 +185324,7 @@ BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p)
185215
185324
 
185216
185325
  BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p, unique_ptr<FileBuffer> buffer_p,
185217
185326
  bool can_destroy_p, idx_t block_size)
185218
- : db(db), readers(0), block_id(block_id_p), eviction_timestamp(0), can_destroy(can_destroy_p) {
185327
+ : db(db), readers(0), block_id(block_id_p), eviction_timestamp(0), can_destroy(can_destroy_p), unswizzled(nullptr) {
185219
185328
  D_ASSERT(block_size >= Storage::BLOCK_SIZE);
185220
185329
  buffer = move(buffer_p);
185221
185330
  state = BlockState::BLOCK_LOADED;
@@ -185224,6 +185333,8 @@ BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p, unique_ptr
185224
185333
 
185225
185334
  BlockHandle::~BlockHandle() {
185226
185335
  auto &buffer_manager = BufferManager::GetBufferManager(db);
185336
+ // being destroyed, so any unswizzled pointers are just binary junk now.
185337
+ unswizzled = nullptr;
185227
185338
  // no references remain to this block: erase
185228
185339
  if (state == BlockState::BLOCK_LOADED) {
185229
185340
  // the block is still loaded in memory: erase it
@@ -185301,6 +185412,7 @@ unique_ptr<FileBuffer> BlockHandle::UnloadAndTakeBlock() {
185301
185412
  // already unloaded: nothing to do
185302
185413
  return nullptr;
185303
185414
  }
185415
+ D_ASSERT(!unswizzled);
185304
185416
  D_ASSERT(CanUnload());
185305
185417
  D_ASSERT(memory_usage >= Storage::BLOCK_ALLOC_SIZE);
185306
185418
 
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "c0a4ab96c"
15
- #define DUCKDB_VERSION "v0.4.1-dev1777"
14
+ #define DUCKDB_SOURCE_ID "005bf35e9"
15
+ #define DUCKDB_VERSION "v0.4.1-dev1784"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -22829,6 +22829,14 @@ public:
22829
22829
  return readers;
22830
22830
  }
22831
22831
 
22832
+ inline bool IsSwizzled() const {
22833
+ return !unswizzled;
22834
+ }
22835
+
22836
+ inline void SetSwizzling(const char *unswizzler) {
22837
+ unswizzled = unswizzler;
22838
+ }
22839
+
22832
22840
  private:
22833
22841
  static BufferHandle Load(shared_ptr<BlockHandle> &handle, unique_ptr<FileBuffer> buffer = nullptr);
22834
22842
  unique_ptr<FileBuffer> UnloadAndTakeBlock();
@@ -22851,6 +22859,8 @@ private:
22851
22859
  const bool can_destroy;
22852
22860
  //! The memory usage of the block
22853
22861
  idx_t memory_usage;
22862
+ //! Does the block contain any memory pointers?
22863
+ const char *unswizzled;
22854
22864
  };
22855
22865
 
22856
22866
  } // namespace duckdb