duckdb 0.4.1-dev1777.0 → 0.4.1-dev1784.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +181 -69
- package/src/duckdb.hpp +12 -2
- package/src/parquet-amalgamation.cpp +37451 -37451
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -31367,6 +31367,9 @@ struct RowOperations {
|
|
|
31367
31367
|
//! Swizzles the base pointer of each row to offset within heap block
|
|
31368
31368
|
static void SwizzleHeapPointer(const RowLayout &layout, data_ptr_t row_ptr, const data_ptr_t heap_base_ptr,
|
|
31369
31369
|
const idx_t count, const idx_t base_offset = 0);
|
|
31370
|
+
//! Unswizzles the base offset within heap block the rows to pointers
|
|
31371
|
+
static void UnswizzleHeapPointer(const RowLayout &layout, const data_ptr_t base_row_ptr,
|
|
31372
|
+
const data_ptr_t base_heap_ptr, const idx_t count);
|
|
31370
31373
|
//! Unswizzles all offsets back to pointers
|
|
31371
31374
|
static void UnswizzlePointers(const RowLayout &layout, const data_ptr_t base_row_ptr,
|
|
31372
31375
|
const data_ptr_t base_heap_ptr, const idx_t count);
|
|
@@ -31693,6 +31696,16 @@ void RowOperations::SwizzleHeapPointer(const RowLayout &layout, data_ptr_t row_p
|
|
|
31693
31696
|
}
|
|
31694
31697
|
}
|
|
31695
31698
|
|
|
31699
|
+
void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr_t base_row_ptr,
|
|
31700
|
+
const data_ptr_t base_heap_ptr, const idx_t count) {
|
|
31701
|
+
const auto row_width = layout.GetRowWidth();
|
|
31702
|
+
data_ptr_t heap_ptr_ptr = base_row_ptr + layout.GetHeapPointerOffset();
|
|
31703
|
+
for (idx_t i = 0; i < count; i++) {
|
|
31704
|
+
Store<data_ptr_t>(base_heap_ptr + Load<idx_t>(heap_ptr_ptr), heap_ptr_ptr);
|
|
31705
|
+
heap_ptr_ptr += row_width;
|
|
31706
|
+
}
|
|
31707
|
+
}
|
|
31708
|
+
|
|
31696
31709
|
void RowOperations::UnswizzlePointers(const RowLayout &layout, const data_ptr_t base_row_ptr,
|
|
31697
31710
|
const data_ptr_t base_heap_ptr, const idx_t count) {
|
|
31698
31711
|
const idx_t row_width = layout.GetRowWidth();
|
|
@@ -31861,6 +31874,8 @@ public:
|
|
|
31861
31874
|
vector<RowDataBlock> blocks;
|
|
31862
31875
|
//! The blocks that this collection currently has pinned
|
|
31863
31876
|
vector<BufferHandle> pinned_blocks;
|
|
31877
|
+
//! Whether the blocks should stay pinned (necessary for e.g. a heap)
|
|
31878
|
+
const bool keep_pinned;
|
|
31864
31879
|
|
|
31865
31880
|
public:
|
|
31866
31881
|
idx_t AppendToBlock(RowDataBlock &block, BufferHandle &handle, vector<BlockAppendEntry> &append_entries,
|
|
@@ -31891,9 +31906,6 @@ public:
|
|
|
31891
31906
|
|
|
31892
31907
|
private:
|
|
31893
31908
|
mutex rdc_lock;
|
|
31894
|
-
|
|
31895
|
-
//! Whether the blocks should stay pinned (necessary for e.g. a heap)
|
|
31896
|
-
const bool keep_pinned;
|
|
31897
31909
|
};
|
|
31898
31910
|
|
|
31899
31911
|
} // namespace duckdb
|
|
@@ -34638,14 +34650,17 @@ private:
|
|
|
34638
34650
|
SBScanState read_state;
|
|
34639
34651
|
//! The total count of sorted_data
|
|
34640
34652
|
const idx_t total_count;
|
|
34641
|
-
//! The global sort state
|
|
34642
|
-
GlobalSortState &global_sort_state;
|
|
34643
34653
|
//! Addresses used to gather from the sorted data
|
|
34644
34654
|
Vector addresses = Vector(LogicalType::POINTER);
|
|
34645
34655
|
//! The number of rows scanned so far
|
|
34646
34656
|
idx_t total_scanned;
|
|
34647
34657
|
//! Whether to flush the blocks after scanning
|
|
34648
34658
|
const bool flush;
|
|
34659
|
+
//! Whether we are unswizzling the blocks
|
|
34660
|
+
const bool unswizzling;
|
|
34661
|
+
|
|
34662
|
+
//! Checks that the newest block is valid
|
|
34663
|
+
void ValidateUnscannedBlock() const;
|
|
34649
34664
|
};
|
|
34650
34665
|
|
|
34651
34666
|
struct SBIterator {
|
|
@@ -36453,6 +36468,7 @@ void LocalSortState::SinkChunk(DataChunk &sort, DataChunk &payload) {
|
|
|
36453
36468
|
auto blob_data = blob_chunk.ToUnifiedFormat();
|
|
36454
36469
|
RowOperations::Scatter(blob_chunk, blob_data.get(), sort_layout->blob_layout, addresses, *blob_sorting_heap,
|
|
36455
36470
|
sel_ptr, blob_chunk.size());
|
|
36471
|
+
D_ASSERT(blob_sorting_heap->keep_pinned);
|
|
36456
36472
|
}
|
|
36457
36473
|
|
|
36458
36474
|
// Finally, serialize payload data
|
|
@@ -36460,6 +36476,7 @@ void LocalSortState::SinkChunk(DataChunk &sort, DataChunk &payload) {
|
|
|
36460
36476
|
auto input_data = payload.ToUnifiedFormat();
|
|
36461
36477
|
RowOperations::Scatter(payload, input_data.get(), *payload_layout, addresses, *payload_heap, sel_ptr,
|
|
36462
36478
|
payload.size());
|
|
36479
|
+
D_ASSERT(payload_heap->keep_pinned);
|
|
36463
36480
|
}
|
|
36464
36481
|
|
|
36465
36482
|
idx_t LocalSortState::SizeInBytes() const {
|
|
@@ -36548,6 +36565,7 @@ void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataColl
|
|
|
36548
36565
|
ordered_data_ptr += row_width;
|
|
36549
36566
|
sorting_ptr += sorting_entry_size;
|
|
36550
36567
|
}
|
|
36568
|
+
ordered_data_block.block->SetSwizzling(sd.layout.AllConstant() ? nullptr : "LocalSortState::ReOrder.ordered_data");
|
|
36551
36569
|
// Replace the unordered data block with the re-ordered data block
|
|
36552
36570
|
sd.data_blocks.clear();
|
|
36553
36571
|
sd.data_blocks.push_back(move(ordered_data_block));
|
|
@@ -36555,6 +36573,7 @@ void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataColl
|
|
|
36555
36573
|
if (!sd.layout.AllConstant() && reorder_heap) {
|
|
36556
36574
|
// Swizzle the column pointers to offsets
|
|
36557
36575
|
RowOperations::SwizzleColumns(sd.layout, ordered_data_handle.Ptr(), count);
|
|
36576
|
+
sd.data_blocks.back().block->SetSwizzling(nullptr);
|
|
36558
36577
|
// Create a single heap block to store the ordered heap
|
|
36559
36578
|
idx_t total_byte_offset = std::accumulate(heap.blocks.begin(), heap.blocks.end(), 0,
|
|
36560
36579
|
[](idx_t a, const RowDataBlock &b) { return a + b.byte_offset; });
|
|
@@ -36785,9 +36804,11 @@ void SortedData::Unswizzle() {
|
|
|
36785
36804
|
for (idx_t i = 0; i < data_blocks.size(); i++) {
|
|
36786
36805
|
auto &data_block = data_blocks[i];
|
|
36787
36806
|
auto &heap_block = heap_blocks[i];
|
|
36807
|
+
D_ASSERT(data_block.block->IsSwizzled());
|
|
36788
36808
|
auto data_handle_p = buffer_manager.Pin(data_block.block);
|
|
36789
36809
|
auto heap_handle_p = buffer_manager.Pin(heap_block.block);
|
|
36790
36810
|
RowOperations::UnswizzlePointers(layout, data_handle_p.Ptr(), heap_handle_p.Ptr(), data_block.count);
|
|
36811
|
+
data_block.block->SetSwizzling("SortedData::Unswizzle");
|
|
36791
36812
|
state.heap_blocks.push_back(move(heap_block));
|
|
36792
36813
|
state.pinned_blocks.push_back(move(heap_handle_p));
|
|
36793
36814
|
}
|
|
@@ -37003,7 +37024,9 @@ void SBScanState::SetIndices(idx_t block_idx_to, idx_t entry_idx_to) {
|
|
|
37003
37024
|
|
|
37004
37025
|
PayloadScanner::PayloadScanner(SortedData &sorted_data, GlobalSortState &global_sort_state, bool flush_p)
|
|
37005
37026
|
: sorted_data(sorted_data), read_state(global_sort_state.buffer_manager, global_sort_state),
|
|
37006
|
-
total_count(sorted_data.Count()),
|
|
37027
|
+
total_count(sorted_data.Count()), total_scanned(0), flush(flush_p),
|
|
37028
|
+
unswizzling(!sorted_data.layout.AllConstant() && global_sort_state.external) {
|
|
37029
|
+
ValidateUnscannedBlock();
|
|
37007
37030
|
}
|
|
37008
37031
|
|
|
37009
37032
|
PayloadScanner::PayloadScanner(GlobalSortState &global_sort_state, bool flush_p)
|
|
@@ -37013,9 +37036,16 @@ PayloadScanner::PayloadScanner(GlobalSortState &global_sort_state, bool flush_p)
|
|
|
37013
37036
|
PayloadScanner::PayloadScanner(GlobalSortState &global_sort_state, idx_t block_idx)
|
|
37014
37037
|
: sorted_data(*global_sort_state.sorted_blocks[0]->payload_data),
|
|
37015
37038
|
read_state(global_sort_state.buffer_manager, global_sort_state),
|
|
37016
|
-
total_count(sorted_data.data_blocks[block_idx].count),
|
|
37017
|
-
|
|
37039
|
+
total_count(sorted_data.data_blocks[block_idx].count), total_scanned(0), flush(false),
|
|
37040
|
+
unswizzling(!sorted_data.layout.AllConstant() && global_sort_state.external) {
|
|
37018
37041
|
read_state.SetIndices(block_idx, 0);
|
|
37042
|
+
ValidateUnscannedBlock();
|
|
37043
|
+
}
|
|
37044
|
+
|
|
37045
|
+
void PayloadScanner::ValidateUnscannedBlock() const {
|
|
37046
|
+
if (unswizzling && read_state.block_idx < sorted_data.data_blocks.size()) {
|
|
37047
|
+
D_ASSERT(sorted_data.data_blocks[read_state.block_idx].block->IsSwizzled());
|
|
37048
|
+
}
|
|
37019
37049
|
}
|
|
37020
37050
|
|
|
37021
37051
|
void PayloadScanner::Scan(DataChunk &chunk) {
|
|
@@ -37028,6 +37058,9 @@ void PayloadScanner::Scan(DataChunk &chunk) {
|
|
|
37028
37058
|
if (flush) {
|
|
37029
37059
|
for (idx_t i = 0; i < read_state.block_idx; i++) {
|
|
37030
37060
|
sorted_data.data_blocks[i].block = nullptr;
|
|
37061
|
+
if (unswizzling) {
|
|
37062
|
+
sorted_data.heap_blocks[i].block = nullptr;
|
|
37063
|
+
}
|
|
37031
37064
|
}
|
|
37032
37065
|
}
|
|
37033
37066
|
const idx_t &row_width = sorted_data.layout.GetRowWidth();
|
|
@@ -37046,14 +37079,16 @@ void PayloadScanner::Scan(DataChunk &chunk) {
|
|
|
37046
37079
|
row_ptr += row_width;
|
|
37047
37080
|
}
|
|
37048
37081
|
// Unswizzle the offsets back to pointers (if needed)
|
|
37049
|
-
if (
|
|
37082
|
+
if (unswizzling) {
|
|
37050
37083
|
RowOperations::UnswizzlePointers(sorted_data.layout, data_ptr, read_state.payload_heap_handle.Ptr(), next);
|
|
37084
|
+
sorted_data.data_blocks[read_state.block_idx].block->SetSwizzling("PayloadScanner::Scan");
|
|
37051
37085
|
}
|
|
37052
37086
|
// Update state indices
|
|
37053
37087
|
read_state.entry_idx += next;
|
|
37054
37088
|
if (read_state.entry_idx == data_block.count) {
|
|
37055
37089
|
read_state.block_idx++;
|
|
37056
37090
|
read_state.entry_idx = 0;
|
|
37091
|
+
ValidateUnscannedBlock();
|
|
37057
37092
|
}
|
|
37058
37093
|
scanned += next;
|
|
37059
37094
|
}
|
|
@@ -44492,6 +44527,7 @@ namespace duckdb {
|
|
|
44492
44527
|
|
|
44493
44528
|
class BufferHandle;
|
|
44494
44529
|
class RowDataCollection;
|
|
44530
|
+
struct RowDataBlock;
|
|
44495
44531
|
class DataChunk;
|
|
44496
44532
|
|
|
44497
44533
|
//! Used to scan the data into DataChunks after sorting
|
|
@@ -44513,6 +44549,9 @@ public:
|
|
|
44513
44549
|
|
|
44514
44550
|
BufferHandle data_handle;
|
|
44515
44551
|
BufferHandle heap_handle;
|
|
44552
|
+
|
|
44553
|
+
// We must pin ALL blocks we are going to gather from
|
|
44554
|
+
vector<BufferHandle> pinned_blocks;
|
|
44516
44555
|
};
|
|
44517
44556
|
|
|
44518
44557
|
//! Ensure that heap blocks correspond to row blocks
|
|
@@ -44543,6 +44582,8 @@ public:
|
|
|
44543
44582
|
//! we need to re-swizzle.
|
|
44544
44583
|
void ReSwizzle();
|
|
44545
44584
|
|
|
44585
|
+
void SwizzleBlock(RowDataBlock &data_block, RowDataBlock &heap_block);
|
|
44586
|
+
|
|
44546
44587
|
//! Scans the next data chunk from the sorted data
|
|
44547
44588
|
void Scan(DataChunk &chunk);
|
|
44548
44589
|
|
|
@@ -44565,6 +44606,11 @@ private:
|
|
|
44565
44606
|
const bool external;
|
|
44566
44607
|
//! Whether to flush the blocks after scanning
|
|
44567
44608
|
const bool flush;
|
|
44609
|
+
//! Whether we are unswizzling the blocks
|
|
44610
|
+
const bool unswizzling;
|
|
44611
|
+
|
|
44612
|
+
//! Checks that the newest block is valid
|
|
44613
|
+
void ValidateUnscannedBlock() const;
|
|
44568
44614
|
};
|
|
44569
44615
|
|
|
44570
44616
|
} // namespace duckdb
|
|
@@ -44584,21 +44630,19 @@ void RowDataCollectionScanner::AlignHeapBlocks(RowDataCollection &swizzled_block
|
|
|
44584
44630
|
return;
|
|
44585
44631
|
}
|
|
44586
44632
|
|
|
44587
|
-
// The main data blocks can just be moved
|
|
44588
|
-
swizzled_block_collection.Merge(block_collection);
|
|
44589
|
-
block_collection.Clear();
|
|
44590
|
-
|
|
44591
44633
|
if (layout.AllConstant()) {
|
|
44592
|
-
// No heap blocks!
|
|
44634
|
+
// No heap blocks! Just merge fixed-size data
|
|
44635
|
+
swizzled_block_collection.Merge(block_collection);
|
|
44593
44636
|
return;
|
|
44594
44637
|
}
|
|
44595
44638
|
|
|
44596
44639
|
// We create one heap block per data block and swizzle the pointers
|
|
44597
|
-
|
|
44640
|
+
D_ASSERT(string_heap.keep_pinned == swizzled_string_heap.keep_pinned);
|
|
44641
|
+
auto &buffer_manager = block_collection.buffer_manager;
|
|
44598
44642
|
auto &heap_blocks = string_heap.blocks;
|
|
44599
44643
|
idx_t heap_block_idx = 0;
|
|
44600
44644
|
idx_t heap_block_remaining = heap_blocks[heap_block_idx].count;
|
|
44601
|
-
for (auto &data_block :
|
|
44645
|
+
for (auto &data_block : block_collection.blocks) {
|
|
44602
44646
|
if (heap_block_remaining == 0) {
|
|
44603
44647
|
heap_block_remaining = heap_blocks[++heap_block_idx].count;
|
|
44604
44648
|
}
|
|
@@ -44606,27 +44650,44 @@ void RowDataCollectionScanner::AlignHeapBlocks(RowDataCollection &swizzled_block
|
|
|
44606
44650
|
// Pin the data block and swizzle the pointers within the rows
|
|
44607
44651
|
auto data_handle = buffer_manager.Pin(data_block.block);
|
|
44608
44652
|
auto data_ptr = data_handle.Ptr();
|
|
44609
|
-
|
|
44653
|
+
if (!string_heap.keep_pinned) {
|
|
44654
|
+
D_ASSERT(!data_block.block->IsSwizzled());
|
|
44655
|
+
RowOperations::SwizzleColumns(layout, data_ptr, data_block.count);
|
|
44656
|
+
data_block.block->SetSwizzling(nullptr);
|
|
44657
|
+
}
|
|
44658
|
+
// At this point the data block is pinned and the heap pointer is valid
|
|
44659
|
+
// so we can copy heap data as needed
|
|
44610
44660
|
|
|
44611
44661
|
// We want to copy as little of the heap data as possible, check how the data and heap blocks line up
|
|
44612
44662
|
if (heap_block_remaining >= data_block.count) {
|
|
44613
44663
|
// Easy: current heap block contains all strings for this data block, just copy (reference) the block
|
|
44614
44664
|
swizzled_string_heap.blocks.emplace_back(RowDataBlock(heap_blocks[heap_block_idx]));
|
|
44615
|
-
swizzled_string_heap.blocks.back().count =
|
|
44616
|
-
|
|
44617
|
-
// Swizzle the heap pointer
|
|
44618
|
-
auto
|
|
44619
|
-
auto
|
|
44620
|
-
|
|
44621
|
-
|
|
44665
|
+
swizzled_string_heap.blocks.back().count = data_block.count;
|
|
44666
|
+
|
|
44667
|
+
// Swizzle the heap pointer if we are not pinning the heap
|
|
44668
|
+
auto &heap_block = swizzled_string_heap.blocks.back().block;
|
|
44669
|
+
auto heap_handle = buffer_manager.Pin(heap_block);
|
|
44670
|
+
if (!swizzled_string_heap.keep_pinned) {
|
|
44671
|
+
auto heap_ptr = Load<data_ptr_t>(data_ptr + layout.GetHeapPointerOffset());
|
|
44672
|
+
auto heap_offset = heap_ptr - heap_handle.Ptr();
|
|
44673
|
+
RowOperations::SwizzleHeapPointer(layout, data_ptr, heap_ptr, data_block.count, heap_offset);
|
|
44674
|
+
} else {
|
|
44675
|
+
swizzled_string_heap.pinned_blocks.emplace_back(move(heap_handle));
|
|
44676
|
+
}
|
|
44622
44677
|
|
|
44623
44678
|
// Update counter
|
|
44624
44679
|
heap_block_remaining -= data_block.count;
|
|
44625
44680
|
} else {
|
|
44626
44681
|
// Strings for this data block are spread over the current heap block and the next (and possibly more)
|
|
44682
|
+
if (string_heap.keep_pinned) {
|
|
44683
|
+
// The heap is changing underneath the data block,
|
|
44684
|
+
// so swizzle the string pointers to make them portable.
|
|
44685
|
+
RowOperations::SwizzleColumns(layout, data_ptr, data_block.count);
|
|
44686
|
+
}
|
|
44627
44687
|
idx_t data_block_remaining = data_block.count;
|
|
44628
44688
|
vector<std::pair<data_ptr_t, idx_t>> ptrs_and_sizes;
|
|
44629
44689
|
idx_t total_size = 0;
|
|
44690
|
+
const auto base_row_ptr = data_ptr;
|
|
44630
44691
|
while (data_block_remaining > 0) {
|
|
44631
44692
|
if (heap_block_remaining == 0) {
|
|
44632
44693
|
heap_block_remaining = heap_blocks[++heap_block_idx].count;
|
|
@@ -44656,12 +44717,21 @@ void RowDataCollectionScanner::AlignHeapBlocks(RowDataCollection &swizzled_block
|
|
|
44656
44717
|
RowDataBlock(buffer_manager, MaxValue<idx_t>(total_size, (idx_t)Storage::BLOCK_SIZE), 1));
|
|
44657
44718
|
auto new_heap_handle = buffer_manager.Pin(swizzled_string_heap.blocks.back().block);
|
|
44658
44719
|
auto new_heap_ptr = new_heap_handle.Ptr();
|
|
44720
|
+
if (swizzled_string_heap.keep_pinned) {
|
|
44721
|
+
// Since the heap blocks are pinned, we can unswizzle the data again.
|
|
44722
|
+
swizzled_string_heap.pinned_blocks.emplace_back(move(new_heap_handle));
|
|
44723
|
+
RowOperations::UnswizzlePointers(layout, base_row_ptr, new_heap_ptr, data_block.count);
|
|
44724
|
+
RowOperations::UnswizzleHeapPointer(layout, base_row_ptr, new_heap_ptr, data_block.count);
|
|
44725
|
+
}
|
|
44659
44726
|
for (auto &ptr_and_size : ptrs_and_sizes) {
|
|
44660
44727
|
memcpy(new_heap_ptr, ptr_and_size.first, ptr_and_size.second);
|
|
44661
44728
|
new_heap_ptr += ptr_and_size.second;
|
|
44662
44729
|
}
|
|
44663
44730
|
}
|
|
44664
44731
|
}
|
|
44732
|
+
|
|
44733
|
+
// We're done with variable-sized data, now just merge the fixed-size data
|
|
44734
|
+
swizzled_block_collection.Merge(block_collection);
|
|
44665
44735
|
D_ASSERT(swizzled_block_collection.blocks.size() == swizzled_string_heap.blocks.size());
|
|
44666
44736
|
|
|
44667
44737
|
// Update counts and cleanup
|
|
@@ -44691,11 +44761,28 @@ void RowDataCollectionScanner::ScanState::PinData() {
|
|
|
44691
44761
|
RowDataCollectionScanner::RowDataCollectionScanner(RowDataCollection &rows_p, RowDataCollection &heap_p,
|
|
44692
44762
|
const RowLayout &layout_p, bool external_p, bool flush_p)
|
|
44693
44763
|
: rows(rows_p), heap(heap_p), layout(layout_p), read_state(*this), total_count(rows.count), total_scanned(0),
|
|
44694
|
-
external(external_p), flush(flush_p) {
|
|
44764
|
+
external(external_p), flush(flush_p), unswizzling(!layout.AllConstant() && external && !heap.keep_pinned) {
|
|
44695
44765
|
|
|
44696
|
-
if (
|
|
44766
|
+
if (unswizzling) {
|
|
44697
44767
|
D_ASSERT(rows.blocks.size() == heap.blocks.size());
|
|
44698
44768
|
}
|
|
44769
|
+
|
|
44770
|
+
ValidateUnscannedBlock();
|
|
44771
|
+
}
|
|
44772
|
+
|
|
44773
|
+
void RowDataCollectionScanner::SwizzleBlock(RowDataBlock &data_block, RowDataBlock &heap_block) {
|
|
44774
|
+
// Pin the data block and swizzle the pointers within the rows
|
|
44775
|
+
D_ASSERT(!data_block.block->IsSwizzled());
|
|
44776
|
+
auto data_handle = rows.buffer_manager.Pin(data_block.block);
|
|
44777
|
+
auto data_ptr = data_handle.Ptr();
|
|
44778
|
+
RowOperations::SwizzleColumns(layout, data_ptr, data_block.count);
|
|
44779
|
+
data_block.block->SetSwizzling(nullptr);
|
|
44780
|
+
|
|
44781
|
+
// Swizzle the heap pointers
|
|
44782
|
+
auto heap_handle = heap.buffer_manager.Pin(heap_block.block);
|
|
44783
|
+
auto heap_ptr = Load<data_ptr_t>(data_ptr + layout.GetHeapPointerOffset());
|
|
44784
|
+
auto heap_offset = heap_ptr - heap_handle.Ptr();
|
|
44785
|
+
RowOperations::SwizzleHeapPointer(layout, data_ptr, heap_ptr, data_block.count, heap_offset);
|
|
44699
44786
|
}
|
|
44700
44787
|
|
|
44701
44788
|
void RowDataCollectionScanner::ReSwizzle() {
|
|
@@ -44703,27 +44790,23 @@ void RowDataCollectionScanner::ReSwizzle() {
|
|
|
44703
44790
|
return;
|
|
44704
44791
|
}
|
|
44705
44792
|
|
|
44706
|
-
if (
|
|
44707
|
-
// No
|
|
44793
|
+
if (!unswizzling) {
|
|
44794
|
+
// No swizzled blocks!
|
|
44708
44795
|
return;
|
|
44709
44796
|
}
|
|
44710
44797
|
|
|
44711
44798
|
D_ASSERT(rows.blocks.size() == heap.blocks.size());
|
|
44799
|
+
for (idx_t i = 0; i < rows.blocks.size(); ++i) {
|
|
44800
|
+
auto &data_block = rows.blocks[i];
|
|
44801
|
+
if (data_block.block && !data_block.block->IsSwizzled()) {
|
|
44802
|
+
SwizzleBlock(data_block, heap.blocks[i]);
|
|
44803
|
+
}
|
|
44804
|
+
}
|
|
44805
|
+
}
|
|
44712
44806
|
|
|
44713
|
-
|
|
44714
|
-
|
|
44715
|
-
|
|
44716
|
-
// Pin the data block and swizzle the pointers within the rows
|
|
44717
|
-
auto data_handle = rows.buffer_manager.Pin(data_block.block);
|
|
44718
|
-
auto data_ptr = data_handle.Ptr();
|
|
44719
|
-
RowOperations::SwizzleColumns(layout, data_ptr, data_block.count);
|
|
44720
|
-
|
|
44721
|
-
// Swizzle the heap pointers
|
|
44722
|
-
auto &heap_block = heap.blocks[heap_block_idx++];
|
|
44723
|
-
auto heap_handle = heap.buffer_manager.Pin(heap_block.block);
|
|
44724
|
-
auto heap_ptr = Load<data_ptr_t>(data_ptr + layout.GetHeapPointerOffset());
|
|
44725
|
-
auto heap_offset = heap_ptr - heap_handle.Ptr();
|
|
44726
|
-
RowOperations::SwizzleHeapPointer(layout, data_ptr, heap_ptr, data_block.count, heap_offset);
|
|
44807
|
+
void RowDataCollectionScanner::ValidateUnscannedBlock() const {
|
|
44808
|
+
if (unswizzling && read_state.block_idx < rows.blocks.size()) {
|
|
44809
|
+
D_ASSERT(rows.blocks[read_state.block_idx].block->IsSwizzled());
|
|
44727
44810
|
}
|
|
44728
44811
|
}
|
|
44729
44812
|
|
|
@@ -44733,19 +44816,13 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
|
|
|
44733
44816
|
chunk.SetCardinality(count);
|
|
44734
44817
|
return;
|
|
44735
44818
|
}
|
|
44736
|
-
// Eagerly delete references to blocks that we've passed
|
|
44737
|
-
if (flush) {
|
|
44738
|
-
for (idx_t i = 0; i < read_state.block_idx; ++i) {
|
|
44739
|
-
rows.blocks[i].block = nullptr;
|
|
44740
|
-
if (!layout.AllConstant() && external) {
|
|
44741
|
-
heap.blocks[i].block = nullptr;
|
|
44742
|
-
}
|
|
44743
|
-
}
|
|
44744
|
-
}
|
|
44745
44819
|
const idx_t &row_width = layout.GetRowWidth();
|
|
44746
44820
|
// Set up a batch of pointers to scan data from
|
|
44747
44821
|
idx_t scanned = 0;
|
|
44748
44822
|
auto data_pointers = FlatVector::GetData<data_ptr_t>(addresses);
|
|
44823
|
+
|
|
44824
|
+
// We must pin ALL blocks we are going to gather from
|
|
44825
|
+
vector<BufferHandle> pinned_blocks;
|
|
44749
44826
|
while (scanned < count) {
|
|
44750
44827
|
read_state.PinData();
|
|
44751
44828
|
auto &data_block = rows.blocks[read_state.block_idx];
|
|
@@ -44758,14 +44835,22 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
|
|
|
44758
44835
|
row_ptr += row_width;
|
|
44759
44836
|
}
|
|
44760
44837
|
// Unswizzle the offsets back to pointers (if needed)
|
|
44761
|
-
if (
|
|
44838
|
+
if (unswizzling) {
|
|
44762
44839
|
RowOperations::UnswizzlePointers(layout, data_ptr, read_state.heap_handle.Ptr(), next);
|
|
44840
|
+
rows.blocks[read_state.block_idx].block->SetSwizzling("RowDataCollectionScanner::Scan");
|
|
44763
44841
|
}
|
|
44764
44842
|
// Update state indices
|
|
44765
44843
|
read_state.entry_idx += next;
|
|
44766
44844
|
if (read_state.entry_idx == data_block.count) {
|
|
44845
|
+
// Pin completed blocks so we don't lose them
|
|
44846
|
+
pinned_blocks.emplace_back(rows.buffer_manager.Pin(data_block.block));
|
|
44847
|
+
if (unswizzling) {
|
|
44848
|
+
auto &heap_block = heap.blocks[read_state.block_idx];
|
|
44849
|
+
pinned_blocks.emplace_back(heap.buffer_manager.Pin(heap_block.block));
|
|
44850
|
+
}
|
|
44767
44851
|
read_state.block_idx++;
|
|
44768
44852
|
read_state.entry_idx = 0;
|
|
44853
|
+
ValidateUnscannedBlock();
|
|
44769
44854
|
}
|
|
44770
44855
|
scanned += next;
|
|
44771
44856
|
}
|
|
@@ -44779,6 +44864,27 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
|
|
|
44779
44864
|
chunk.SetCardinality(count);
|
|
44780
44865
|
chunk.Verify();
|
|
44781
44866
|
total_scanned += scanned;
|
|
44867
|
+
|
|
44868
|
+
// Switch to a new set of pinned blocks
|
|
44869
|
+
read_state.pinned_blocks.swap(pinned_blocks);
|
|
44870
|
+
|
|
44871
|
+
if (flush) {
|
|
44872
|
+
// Release blocks we have passed.
|
|
44873
|
+
for (idx_t i = 0; i < read_state.block_idx; ++i) {
|
|
44874
|
+
rows.blocks[i].block = nullptr;
|
|
44875
|
+
if (unswizzling) {
|
|
44876
|
+
heap.blocks[i].block = nullptr;
|
|
44877
|
+
}
|
|
44878
|
+
}
|
|
44879
|
+
} else if (unswizzling) {
|
|
44880
|
+
// Reswizzle blocks we have passed so they can be flushed safely.
|
|
44881
|
+
for (idx_t i = 0; i < read_state.block_idx; ++i) {
|
|
44882
|
+
auto &data_block = rows.blocks[i];
|
|
44883
|
+
if (data_block.block && !data_block.block->IsSwizzled()) {
|
|
44884
|
+
SwizzleBlock(data_block, heap.blocks[i]);
|
|
44885
|
+
}
|
|
44886
|
+
}
|
|
44887
|
+
}
|
|
44782
44888
|
}
|
|
44783
44889
|
|
|
44784
44890
|
} // namespace duckdb
|
|
@@ -63120,10 +63226,10 @@ void WindowLocalSinkState::Group(WindowGlobalSinkState &gstate) {
|
|
|
63120
63226
|
}
|
|
63121
63227
|
|
|
63122
63228
|
auto &payload_data = *ungrouped->local_sort->payload_data;
|
|
63123
|
-
auto rows = payload_data.CloneEmpty();
|
|
63229
|
+
auto rows = payload_data.CloneEmpty(payload_data.keep_pinned);
|
|
63124
63230
|
|
|
63125
63231
|
auto &payload_heap = *ungrouped->local_sort->payload_heap;
|
|
63126
|
-
auto heap = payload_heap.CloneEmpty();
|
|
63232
|
+
auto heap = payload_heap.CloneEmpty(payload_heap.keep_pinned);
|
|
63127
63233
|
|
|
63128
63234
|
RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, payload_data, payload_heap, payload_layout);
|
|
63129
63235
|
RowDataCollectionScanner scanner(*rows, *heap, payload_layout, true);
|
|
@@ -63146,7 +63252,7 @@ void WindowLocalSinkState::Sink(DataChunk &input_chunk, WindowGlobalSinkState &g
|
|
|
63146
63252
|
|
|
63147
63253
|
// OVER()
|
|
63148
63254
|
if (over_chunk.ColumnCount() == 0) {
|
|
63149
|
-
// No sorts, so build row chunks
|
|
63255
|
+
// No sorts, so build paged row chunks
|
|
63150
63256
|
if (!rows) {
|
|
63151
63257
|
const auto entry_size = payload_layout.GetRowWidth();
|
|
63152
63258
|
const auto capacity = MaxValue<idx_t>(STANDARD_VECTOR_SIZE, (Storage::BLOCK_SIZE / entry_size) + 1);
|
|
@@ -63157,16 +63263,17 @@ void WindowLocalSinkState::Sink(DataChunk &input_chunk, WindowGlobalSinkState &g
|
|
|
63157
63263
|
const auto row_sel = FlatVector::IncrementalSelectionVector();
|
|
63158
63264
|
Vector addresses(LogicalType::POINTER);
|
|
63159
63265
|
auto key_locations = FlatVector::GetData<data_ptr_t>(addresses);
|
|
63266
|
+
const auto prev_rows_blocks = rows->blocks.size();
|
|
63160
63267
|
auto handles = rows->Build(row_count, key_locations, nullptr, row_sel);
|
|
63161
|
-
|
|
63162
|
-
|
|
63163
|
-
|
|
63164
|
-
|
|
63165
|
-
|
|
63166
|
-
|
|
63167
|
-
|
|
63168
|
-
|
|
63169
|
-
|
|
63268
|
+
auto input_data = input_chunk.ToUnifiedFormat();
|
|
63269
|
+
RowOperations::Scatter(input_chunk, input_data.get(), payload_layout, addresses, *strings, *row_sel, row_count);
|
|
63270
|
+
// Mark that row blocks contain pointers (heap blocks are pinned)
|
|
63271
|
+
if (!payload_layout.AllConstant()) {
|
|
63272
|
+
D_ASSERT(strings->keep_pinned);
|
|
63273
|
+
for (size_t i = prev_rows_blocks; i < rows->blocks.size(); ++i) {
|
|
63274
|
+
rows->blocks[i].block->SetSwizzling("WindowLocalSinkState::Sink");
|
|
63275
|
+
}
|
|
63276
|
+
}
|
|
63170
63277
|
return;
|
|
63171
63278
|
}
|
|
63172
63279
|
|
|
@@ -64433,8 +64540,8 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
|
64433
64540
|
partition_mask.SetValidUnsafe(0);
|
|
64434
64541
|
order_mask.SetValidUnsafe(0);
|
|
64435
64542
|
// No partition - align the heap blocks with the row blocks
|
|
64436
|
-
rows = gstate.rows->CloneEmpty();
|
|
64437
|
-
heap = gstate.strings->CloneEmpty();
|
|
64543
|
+
rows = gstate.rows->CloneEmpty(gstate.rows->keep_pinned);
|
|
64544
|
+
heap = gstate.strings->CloneEmpty(gstate.strings->keep_pinned);
|
|
64438
64545
|
RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gstate.rows, *gstate.strings, layout);
|
|
64439
64546
|
external = true;
|
|
64440
64547
|
} else if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
|
|
@@ -71726,7 +71833,7 @@ PhysicalRangeJoin::GlobalSortedTable::GlobalSortedTable(ClientContext &context,
|
|
|
71726
71833
|
memory_per_thread(0) {
|
|
71727
71834
|
D_ASSERT(orders.size() == 1);
|
|
71728
71835
|
|
|
71729
|
-
// Set external (can be
|
|
71836
|
+
// Set external (can be forced with the PRAGMA)
|
|
71730
71837
|
auto &config = ClientConfig::GetConfig(context);
|
|
71731
71838
|
global_sort_state.external = config.force_external;
|
|
71732
71839
|
memory_per_thread = PhysicalRangeJoin::GetMaxThreadMemory(context);
|
|
@@ -71973,6 +72080,7 @@ void PhysicalRangeJoin::SliceSortedPayload(DataChunk &payload, GlobalSortState &
|
|
|
71973
72080
|
if (!sorted_data.layout.AllConstant() && state.external) {
|
|
71974
72081
|
RowOperations::UnswizzlePointers(sorted_data.layout, data_ptr, read_state.payload_heap_handle.Ptr(),
|
|
71975
72082
|
addr_count);
|
|
72083
|
+
sorted_data.data_blocks[read_state.block_idx].block->SetSwizzling("PhysicalRangeJoin::SliceSortedPayload");
|
|
71976
72084
|
}
|
|
71977
72085
|
|
|
71978
72086
|
// Deserialize the payload data
|
|
@@ -185207,7 +185315,8 @@ struct BufferAllocatorData : PrivateAllocatorData {
|
|
|
185207
185315
|
};
|
|
185208
185316
|
|
|
185209
185317
|
BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p)
|
|
185210
|
-
: db(db), readers(0), block_id(block_id_p), buffer(nullptr), eviction_timestamp(0), can_destroy(false)
|
|
185318
|
+
: db(db), readers(0), block_id(block_id_p), buffer(nullptr), eviction_timestamp(0), can_destroy(false),
|
|
185319
|
+
unswizzled(nullptr) {
|
|
185211
185320
|
eviction_timestamp = 0;
|
|
185212
185321
|
state = BlockState::BLOCK_UNLOADED;
|
|
185213
185322
|
memory_usage = Storage::BLOCK_ALLOC_SIZE;
|
|
@@ -185215,7 +185324,7 @@ BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p)
|
|
|
185215
185324
|
|
|
185216
185325
|
BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p, unique_ptr<FileBuffer> buffer_p,
|
|
185217
185326
|
bool can_destroy_p, idx_t block_size)
|
|
185218
|
-
: db(db), readers(0), block_id(block_id_p), eviction_timestamp(0), can_destroy(can_destroy_p) {
|
|
185327
|
+
: db(db), readers(0), block_id(block_id_p), eviction_timestamp(0), can_destroy(can_destroy_p), unswizzled(nullptr) {
|
|
185219
185328
|
D_ASSERT(block_size >= Storage::BLOCK_SIZE);
|
|
185220
185329
|
buffer = move(buffer_p);
|
|
185221
185330
|
state = BlockState::BLOCK_LOADED;
|
|
@@ -185224,6 +185333,8 @@ BlockHandle::BlockHandle(DatabaseInstance &db, block_id_t block_id_p, unique_ptr
|
|
|
185224
185333
|
|
|
185225
185334
|
BlockHandle::~BlockHandle() {
|
|
185226
185335
|
auto &buffer_manager = BufferManager::GetBufferManager(db);
|
|
185336
|
+
// being destroyed, so any unswizzled pointers are just binary junk now.
|
|
185337
|
+
unswizzled = nullptr;
|
|
185227
185338
|
// no references remain to this block: erase
|
|
185228
185339
|
if (state == BlockState::BLOCK_LOADED) {
|
|
185229
185340
|
// the block is still loaded in memory: erase it
|
|
@@ -185301,6 +185412,7 @@ unique_ptr<FileBuffer> BlockHandle::UnloadAndTakeBlock() {
|
|
|
185301
185412
|
// already unloaded: nothing to do
|
|
185302
185413
|
return nullptr;
|
|
185303
185414
|
}
|
|
185415
|
+
D_ASSERT(!unswizzled);
|
|
185304
185416
|
D_ASSERT(CanUnload());
|
|
185305
185417
|
D_ASSERT(memory_usage >= Storage::BLOCK_ALLOC_SIZE);
|
|
185306
185418
|
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.4.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "005bf35e9"
|
|
15
|
+
#define DUCKDB_VERSION "v0.4.1-dev1784"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -22829,6 +22829,14 @@ public:
|
|
|
22829
22829
|
return readers;
|
|
22830
22830
|
}
|
|
22831
22831
|
|
|
22832
|
+
inline bool IsSwizzled() const {
|
|
22833
|
+
return !unswizzled;
|
|
22834
|
+
}
|
|
22835
|
+
|
|
22836
|
+
inline void SetSwizzling(const char *unswizzler) {
|
|
22837
|
+
unswizzled = unswizzler;
|
|
22838
|
+
}
|
|
22839
|
+
|
|
22832
22840
|
private:
|
|
22833
22841
|
static BufferHandle Load(shared_ptr<BlockHandle> &handle, unique_ptr<FileBuffer> buffer = nullptr);
|
|
22834
22842
|
unique_ptr<FileBuffer> UnloadAndTakeBlock();
|
|
@@ -22851,6 +22859,8 @@ private:
|
|
|
22851
22859
|
const bool can_destroy;
|
|
22852
22860
|
//! The memory usage of the block
|
|
22853
22861
|
idx_t memory_usage;
|
|
22862
|
+
//! Does the block contain any memory pointers?
|
|
22863
|
+
const char *unswizzled;
|
|
22854
22864
|
};
|
|
22855
22865
|
|
|
22856
22866
|
} // namespace duckdb
|