duckdb 0.4.1-dev787.0 → 0.4.1-dev790.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +174 -242
- package/src/duckdb.hpp +2 -2
- package/src/parquet-amalgamation.cpp +29433 -29433
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -33160,6 +33160,8 @@ static inline int FastMemcmp(const void *str1, const void *str2, const size_t si
|
|
|
33160
33160
|
|
|
33161
33161
|
|
|
33162
33162
|
|
|
33163
|
+
|
|
33164
|
+
|
|
33163
33165
|
namespace duckdb {
|
|
33164
33166
|
|
|
33165
33167
|
class BufferManager;
|
|
@@ -33318,6 +33320,84 @@ private:
|
|
|
33318
33320
|
const bool flush;
|
|
33319
33321
|
};
|
|
33320
33322
|
|
|
33323
|
+
struct SBIterator {
|
|
33324
|
+
static int ComparisonValue(ExpressionType comparison);
|
|
33325
|
+
|
|
33326
|
+
SBIterator(GlobalSortState &gss, ExpressionType comparison, idx_t entry_idx_p = 0);
|
|
33327
|
+
|
|
33328
|
+
inline idx_t GetIndex() const {
|
|
33329
|
+
return entry_idx;
|
|
33330
|
+
}
|
|
33331
|
+
|
|
33332
|
+
inline void SetIndex(idx_t entry_idx_p) {
|
|
33333
|
+
const auto new_block_idx = entry_idx_p / block_capacity;
|
|
33334
|
+
if (new_block_idx != scan.block_idx) {
|
|
33335
|
+
scan.SetIndices(new_block_idx, 0);
|
|
33336
|
+
if (new_block_idx < block_count) {
|
|
33337
|
+
scan.PinRadix(scan.block_idx);
|
|
33338
|
+
block_ptr = scan.RadixPtr();
|
|
33339
|
+
if (!all_constant) {
|
|
33340
|
+
scan.PinData(*scan.sb->blob_sorting_data);
|
|
33341
|
+
}
|
|
33342
|
+
}
|
|
33343
|
+
}
|
|
33344
|
+
|
|
33345
|
+
scan.entry_idx = entry_idx_p % block_capacity;
|
|
33346
|
+
entry_ptr = block_ptr + scan.entry_idx * entry_size;
|
|
33347
|
+
entry_idx = entry_idx_p;
|
|
33348
|
+
}
|
|
33349
|
+
|
|
33350
|
+
inline SBIterator &operator++() {
|
|
33351
|
+
if (++scan.entry_idx < block_capacity) {
|
|
33352
|
+
entry_ptr += entry_size;
|
|
33353
|
+
++entry_idx;
|
|
33354
|
+
} else {
|
|
33355
|
+
SetIndex(entry_idx + 1);
|
|
33356
|
+
}
|
|
33357
|
+
|
|
33358
|
+
return *this;
|
|
33359
|
+
}
|
|
33360
|
+
|
|
33361
|
+
inline SBIterator &operator--() {
|
|
33362
|
+
if (scan.entry_idx) {
|
|
33363
|
+
--scan.entry_idx;
|
|
33364
|
+
--entry_idx;
|
|
33365
|
+
entry_ptr -= entry_size;
|
|
33366
|
+
} else {
|
|
33367
|
+
SetIndex(entry_idx - 1);
|
|
33368
|
+
}
|
|
33369
|
+
|
|
33370
|
+
return *this;
|
|
33371
|
+
}
|
|
33372
|
+
|
|
33373
|
+
inline bool Compare(const SBIterator &other) const {
|
|
33374
|
+
int comp_res;
|
|
33375
|
+
if (all_constant) {
|
|
33376
|
+
comp_res = FastMemcmp(entry_ptr, other.entry_ptr, cmp_size);
|
|
33377
|
+
} else {
|
|
33378
|
+
comp_res = Comparators::CompareTuple(scan, other.scan, entry_ptr, other.entry_ptr, sort_layout, external);
|
|
33379
|
+
}
|
|
33380
|
+
|
|
33381
|
+
return comp_res <= cmp;
|
|
33382
|
+
}
|
|
33383
|
+
|
|
33384
|
+
// Fixed comparison parameters
|
|
33385
|
+
const SortLayout &sort_layout;
|
|
33386
|
+
const idx_t block_count;
|
|
33387
|
+
const idx_t block_capacity;
|
|
33388
|
+
const size_t cmp_size;
|
|
33389
|
+
const size_t entry_size;
|
|
33390
|
+
const bool all_constant;
|
|
33391
|
+
const bool external;
|
|
33392
|
+
const int cmp;
|
|
33393
|
+
|
|
33394
|
+
// Iteration state
|
|
33395
|
+
SBScanState scan;
|
|
33396
|
+
idx_t entry_idx;
|
|
33397
|
+
data_ptr_t block_ptr;
|
|
33398
|
+
data_ptr_t entry_ptr;
|
|
33399
|
+
};
|
|
33400
|
+
|
|
33321
33401
|
} // namespace duckdb
|
|
33322
33402
|
|
|
33323
33403
|
|
|
@@ -35661,6 +35741,30 @@ void PayloadScanner::Scan(DataChunk &chunk) {
|
|
|
35661
35741
|
total_scanned += scanned;
|
|
35662
35742
|
}
|
|
35663
35743
|
|
|
35744
|
+
int SBIterator::ComparisonValue(ExpressionType comparison) {
|
|
35745
|
+
switch (comparison) {
|
|
35746
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
|
35747
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
|
35748
|
+
return -1;
|
|
35749
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
35750
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
35751
|
+
return 0;
|
|
35752
|
+
default:
|
|
35753
|
+
throw InternalException("Unimplemented comparison type for IEJoin!");
|
|
35754
|
+
}
|
|
35755
|
+
}
|
|
35756
|
+
|
|
35757
|
+
SBIterator::SBIterator(GlobalSortState &gss, ExpressionType comparison, idx_t entry_idx_p)
|
|
35758
|
+
: sort_layout(gss.sort_layout), block_count(gss.sorted_blocks[0]->radix_sorting_data.size()),
|
|
35759
|
+
block_capacity(gss.block_capacity), cmp_size(sort_layout.comparison_size), entry_size(sort_layout.entry_size),
|
|
35760
|
+
all_constant(sort_layout.all_constant), external(gss.external), cmp(ComparisonValue(comparison)),
|
|
35761
|
+
scan(gss.buffer_manager, gss), block_ptr(nullptr), entry_ptr(nullptr) {
|
|
35762
|
+
|
|
35763
|
+
scan.sb = gss.sorted_blocks[0].get();
|
|
35764
|
+
scan.block_idx = block_count;
|
|
35765
|
+
SetIndex(entry_idx_p);
|
|
35766
|
+
}
|
|
35767
|
+
|
|
35664
35768
|
} // namespace duckdb
|
|
35665
35769
|
|
|
35666
35770
|
|
|
@@ -60152,9 +60256,9 @@ public:
|
|
|
60152
60256
|
using Orders = vector<BoundOrderByNode>;
|
|
60153
60257
|
using Types = vector<LogicalType>;
|
|
60154
60258
|
|
|
60155
|
-
WindowGlobalHashGroup(BufferManager &buffer_manager, const Orders &
|
|
60156
|
-
idx_t max_mem, bool external)
|
|
60157
|
-
: memory_per_thread(max_mem), count(0) {
|
|
60259
|
+
WindowGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
|
|
60260
|
+
const Types &payload_types, idx_t max_mem, bool external)
|
|
60261
|
+
: memory_per_thread(max_mem), count(0), partition_layout(partitions) {
|
|
60158
60262
|
|
|
60159
60263
|
RowLayout payload_layout;
|
|
60160
60264
|
payload_layout.Initialize(payload_types);
|
|
@@ -60170,11 +60274,47 @@ public:
|
|
|
60170
60274
|
global_sort->PrepareMergePhase();
|
|
60171
60275
|
}
|
|
60172
60276
|
|
|
60277
|
+
void ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask);
|
|
60278
|
+
|
|
60173
60279
|
const idx_t memory_per_thread;
|
|
60174
60280
|
GlobalSortStatePtr global_sort;
|
|
60175
60281
|
atomic<idx_t> count;
|
|
60282
|
+
|
|
60283
|
+
// Mask computation
|
|
60284
|
+
SortLayout partition_layout;
|
|
60176
60285
|
};
|
|
60177
60286
|
|
|
60287
|
+
void WindowGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask) {
|
|
60288
|
+
D_ASSERT(count > 0);
|
|
60289
|
+
|
|
60290
|
+
// Set up a comparator for the partition subset
|
|
60291
|
+
const auto partition_size = partition_layout.comparison_size;
|
|
60292
|
+
|
|
60293
|
+
SBIterator prev(*global_sort, ExpressionType::COMPARE_LESSTHAN);
|
|
60294
|
+
SBIterator curr(*global_sort, ExpressionType::COMPARE_LESSTHAN);
|
|
60295
|
+
|
|
60296
|
+
partition_mask.SetValidUnsafe(0);
|
|
60297
|
+
order_mask.SetValidUnsafe(0);
|
|
60298
|
+
for (++curr; curr.GetIndex() < count; ++curr) {
|
|
60299
|
+
// Compare the partition subset first because if that differs, then so does the full ordering
|
|
60300
|
+
int part_cmp = 0;
|
|
60301
|
+
if (partition_layout.all_constant) {
|
|
60302
|
+
part_cmp = FastMemcmp(prev.entry_ptr, curr.entry_ptr, partition_size);
|
|
60303
|
+
} else {
|
|
60304
|
+
part_cmp = Comparators::CompareTuple(prev.scan, curr.scan, prev.entry_ptr, curr.entry_ptr, partition_layout,
|
|
60305
|
+
prev.external);
|
|
60306
|
+
}
|
|
60307
|
+
|
|
60308
|
+
if (part_cmp) {
|
|
60309
|
+
partition_mask.SetValidUnsafe(curr.GetIndex());
|
|
60310
|
+
order_mask.SetValidUnsafe(curr.GetIndex());
|
|
60311
|
+
} else if (prev.Compare(curr)) {
|
|
60312
|
+
order_mask.SetValidUnsafe(curr.GetIndex());
|
|
60313
|
+
}
|
|
60314
|
+
++prev;
|
|
60315
|
+
}
|
|
60316
|
+
}
|
|
60317
|
+
|
|
60178
60318
|
// Global sink state
|
|
60179
60319
|
class WindowGlobalSinkState : public GlobalSinkState {
|
|
60180
60320
|
public:
|
|
@@ -60204,6 +60344,7 @@ public:
|
|
|
60204
60344
|
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(),
|
|
60205
60345
|
wexpr->partitions_stats[prt_idx]->Copy());
|
|
60206
60346
|
}
|
|
60347
|
+
partitions.emplace_back(orders.back().Copy());
|
|
60207
60348
|
}
|
|
60208
60349
|
|
|
60209
60350
|
for (const auto &order : wexpr->orders) {
|
|
@@ -60220,8 +60361,8 @@ public:
|
|
|
60220
60361
|
lock_guard<mutex> guard(lock);
|
|
60221
60362
|
|
|
60222
60363
|
if (!ungrouped) {
|
|
60223
|
-
ungrouped =
|
|
60224
|
-
|
|
60364
|
+
ungrouped = make_unique<WindowGlobalHashGroup>(buffer_manager, partitions, orders, payload_types,
|
|
60365
|
+
memory_per_thread, external);
|
|
60225
60366
|
}
|
|
60226
60367
|
|
|
60227
60368
|
return ungrouped.get();
|
|
@@ -60249,7 +60390,8 @@ public:
|
|
|
60249
60390
|
auto &hash_group = hash_groups[group];
|
|
60250
60391
|
if (!hash_group) {
|
|
60251
60392
|
const auto maxmem = memory_per_thread / partition_info.n_partitions;
|
|
60252
|
-
hash_group =
|
|
60393
|
+
hash_group =
|
|
60394
|
+
make_unique<WindowGlobalHashGroup>(buffer_manager, partitions, orders, payload_types, maxmem, external);
|
|
60253
60395
|
}
|
|
60254
60396
|
|
|
60255
60397
|
return hash_group.get();
|
|
@@ -60276,6 +60418,7 @@ public:
|
|
|
60276
60418
|
mutex lock;
|
|
60277
60419
|
|
|
60278
60420
|
// Sorting
|
|
60421
|
+
Orders partitions;
|
|
60279
60422
|
Orders orders;
|
|
60280
60423
|
Types payload_types;
|
|
60281
60424
|
HashGroupPtr ungrouped;
|
|
@@ -60676,7 +60819,7 @@ void WindowGlobalSinkState::Finalize() {
|
|
|
60676
60819
|
global_sort.InitializeMergeRound();
|
|
60677
60820
|
MergeSorter merge_sorter(global_sort, global_sort.buffer_manager);
|
|
60678
60821
|
merge_sorter.PerformInMergeRound();
|
|
60679
|
-
global_sort.CompleteMergeRound();
|
|
60822
|
+
global_sort.CompleteMergeRound(true);
|
|
60680
60823
|
}
|
|
60681
60824
|
|
|
60682
60825
|
// Sink it into a temporary local sink state
|
|
@@ -60719,120 +60862,6 @@ PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expr
|
|
|
60719
60862
|
: PhysicalOperator(type, move(types), estimated_cardinality), select_list(move(select_list)) {
|
|
60720
60863
|
}
|
|
60721
60864
|
|
|
60722
|
-
struct MaskColumnOperator {
|
|
60723
|
-
template <class INPUT_TYPE>
|
|
60724
|
-
static INPUT_TYPE GetData(Vector &v, idx_t row_idx) {
|
|
60725
|
-
auto data = FlatVector::GetData<INPUT_TYPE>(v);
|
|
60726
|
-
return data[row_idx];
|
|
60727
|
-
}
|
|
60728
|
-
};
|
|
60729
|
-
|
|
60730
|
-
struct MaskColumnOperatorValue {
|
|
60731
|
-
template <class INPUT_TYPE>
|
|
60732
|
-
static INPUT_TYPE GetData(Vector &v, idx_t row_idx) {
|
|
60733
|
-
return v.GetValue(row_idx);
|
|
60734
|
-
}
|
|
60735
|
-
};
|
|
60736
|
-
|
|
60737
|
-
template <typename INPUT_TYPE, class OP = MaskColumnOperator>
|
|
60738
|
-
static void MaskTypedColumn(ValidityMask &mask, ChunkCollection &over_collection, const idx_t c) {
|
|
60739
|
-
idx_t r = 0;
|
|
60740
|
-
bool prev_valid;
|
|
60741
|
-
INPUT_TYPE prev;
|
|
60742
|
-
bool first_chunk = true;
|
|
60743
|
-
|
|
60744
|
-
for (auto &chunk : over_collection.Chunks()) {
|
|
60745
|
-
auto &v = chunk->data[c];
|
|
60746
|
-
auto validity = FlatVector::Validity(v);
|
|
60747
|
-
if (mask.CheckAllValid(r + chunk->size(), r)) {
|
|
60748
|
-
#ifdef DEBUG
|
|
60749
|
-
for (idx_t i = 0; i < chunk->size(); i++) {
|
|
60750
|
-
D_ASSERT(mask.RowIsValid(r + i));
|
|
60751
|
-
}
|
|
60752
|
-
#endif
|
|
60753
|
-
// all valid for this chunk: we can skip this
|
|
60754
|
-
// we should update the "prev" values though
|
|
60755
|
-
auto last_idx = chunk->size() - 1;
|
|
60756
|
-
prev_valid = validity.RowIsValid(last_idx);
|
|
60757
|
-
prev = OP::template GetData<INPUT_TYPE>(v, last_idx);
|
|
60758
|
-
first_chunk = false;
|
|
60759
|
-
r += chunk->size();
|
|
60760
|
-
continue;
|
|
60761
|
-
}
|
|
60762
|
-
idx_t start_index = 0;
|
|
60763
|
-
if (first_chunk) {
|
|
60764
|
-
// record the first value (if this is the first chunk)
|
|
60765
|
-
prev_valid = validity.RowIsValid(0);
|
|
60766
|
-
prev = OP::template GetData<INPUT_TYPE>(v, 0);
|
|
60767
|
-
first_chunk = false;
|
|
60768
|
-
start_index++;
|
|
60769
|
-
r++;
|
|
60770
|
-
}
|
|
60771
|
-
for (idx_t i = start_index; i < chunk->size(); i++) {
|
|
60772
|
-
auto curr_valid = validity.RowIsValid(i);
|
|
60773
|
-
auto curr = OP::template GetData<INPUT_TYPE>(v, i);
|
|
60774
|
-
|
|
60775
|
-
if (!mask.RowIsValid(r)) {
|
|
60776
|
-
if (curr_valid != prev_valid || (curr_valid && !Equals::Operation(curr, prev))) {
|
|
60777
|
-
mask.SetValidUnsafe(r);
|
|
60778
|
-
}
|
|
60779
|
-
}
|
|
60780
|
-
prev_valid = curr_valid;
|
|
60781
|
-
prev = curr;
|
|
60782
|
-
r++;
|
|
60783
|
-
}
|
|
60784
|
-
}
|
|
60785
|
-
}
|
|
60786
|
-
|
|
60787
|
-
static void MaskColumn(ValidityMask &mask, ChunkCollection &over_collection, const idx_t c) {
|
|
60788
|
-
auto &vector = over_collection.GetChunk(0).data[c];
|
|
60789
|
-
switch (vector.GetType().InternalType()) {
|
|
60790
|
-
case PhysicalType::BOOL:
|
|
60791
|
-
case PhysicalType::INT8:
|
|
60792
|
-
MaskTypedColumn<int8_t>(mask, over_collection, c);
|
|
60793
|
-
break;
|
|
60794
|
-
case PhysicalType::INT16:
|
|
60795
|
-
MaskTypedColumn<int16_t>(mask, over_collection, c);
|
|
60796
|
-
break;
|
|
60797
|
-
case PhysicalType::INT32:
|
|
60798
|
-
MaskTypedColumn<int32_t>(mask, over_collection, c);
|
|
60799
|
-
break;
|
|
60800
|
-
case PhysicalType::INT64:
|
|
60801
|
-
MaskTypedColumn<int64_t>(mask, over_collection, c);
|
|
60802
|
-
break;
|
|
60803
|
-
case PhysicalType::UINT8:
|
|
60804
|
-
MaskTypedColumn<uint8_t>(mask, over_collection, c);
|
|
60805
|
-
break;
|
|
60806
|
-
case PhysicalType::UINT16:
|
|
60807
|
-
MaskTypedColumn<uint16_t>(mask, over_collection, c);
|
|
60808
|
-
break;
|
|
60809
|
-
case PhysicalType::UINT32:
|
|
60810
|
-
MaskTypedColumn<uint32_t>(mask, over_collection, c);
|
|
60811
|
-
break;
|
|
60812
|
-
case PhysicalType::UINT64:
|
|
60813
|
-
MaskTypedColumn<uint64_t>(mask, over_collection, c);
|
|
60814
|
-
break;
|
|
60815
|
-
case PhysicalType::INT128:
|
|
60816
|
-
MaskTypedColumn<hugeint_t>(mask, over_collection, c);
|
|
60817
|
-
break;
|
|
60818
|
-
case PhysicalType::FLOAT:
|
|
60819
|
-
MaskTypedColumn<float>(mask, over_collection, c);
|
|
60820
|
-
break;
|
|
60821
|
-
case PhysicalType::DOUBLE:
|
|
60822
|
-
MaskTypedColumn<double>(mask, over_collection, c);
|
|
60823
|
-
break;
|
|
60824
|
-
case PhysicalType::VARCHAR:
|
|
60825
|
-
MaskTypedColumn<string_t>(mask, over_collection, c);
|
|
60826
|
-
break;
|
|
60827
|
-
case PhysicalType::INTERVAL:
|
|
60828
|
-
MaskTypedColumn<interval_t>(mask, over_collection, c);
|
|
60829
|
-
break;
|
|
60830
|
-
default:
|
|
60831
|
-
MaskTypedColumn<Value, MaskColumnOperatorValue>(mask, over_collection, c);
|
|
60832
|
-
break;
|
|
60833
|
-
}
|
|
60834
|
-
}
|
|
60835
|
-
|
|
60836
60865
|
static idx_t FindNextStart(const ValidityMask &mask, idx_t l, const idx_t r, idx_t &n) {
|
|
60837
60866
|
if (mask.AllValid()) {
|
|
60838
60867
|
auto start = MinValue(l + n - 1, r);
|
|
@@ -61661,31 +61690,12 @@ using WindowExpressions = vector<BoundWindowExpression *>;
|
|
|
61661
61690
|
|
|
61662
61691
|
static void ComputeWindowExpressions(WindowExpressions &window_exprs, ChunkCollection &input,
|
|
61663
61692
|
ChunkCollection &window_results, ChunkCollection &over,
|
|
61693
|
+
const ValidityMask &partition_mask, const ValidityMask &order_mask,
|
|
61664
61694
|
WindowAggregationMode mode) {
|
|
61665
61695
|
// Idempotency
|
|
61666
61696
|
if (input.Count() == 0) {
|
|
61667
61697
|
return;
|
|
61668
61698
|
}
|
|
61669
|
-
// Pick out a function for the OVER clause
|
|
61670
|
-
auto over_expr = window_exprs[0];
|
|
61671
|
-
|
|
61672
|
-
// Set bits for the start of each partition
|
|
61673
|
-
vector<validity_t> partition_bits(ValidityMask::EntryCount(input.Count()), 0);
|
|
61674
|
-
ValidityMask partition_mask(partition_bits.data());
|
|
61675
|
-
partition_mask.SetValid(0);
|
|
61676
|
-
|
|
61677
|
-
for (idx_t c = 0; c < over_expr->partitions.size(); ++c) {
|
|
61678
|
-
MaskColumn(partition_mask, over, c);
|
|
61679
|
-
}
|
|
61680
|
-
|
|
61681
|
-
// Set bits for the start of each peer group.
|
|
61682
|
-
// Partitions also break peer groups, so start with the partition bits.
|
|
61683
|
-
const auto sort_col_count = over_expr->partitions.size() + over_expr->orders.size();
|
|
61684
|
-
ValidityMask order_mask(partition_mask, input.Count());
|
|
61685
|
-
for (idx_t c = over_expr->partitions.size(); c < sort_col_count; ++c) {
|
|
61686
|
-
MaskColumn(order_mask, over, c);
|
|
61687
|
-
}
|
|
61688
|
-
|
|
61689
61699
|
// Compute the functions columnwise
|
|
61690
61700
|
for (idx_t expr_idx = 0; expr_idx < window_exprs.size(); ++expr_idx) {
|
|
61691
61701
|
ChunkCollection output(input.GetAllocator());
|
|
@@ -61719,6 +61729,24 @@ static void GeneratePartition(WindowLocalSourceState &state, WindowGlobalSinkSta
|
|
|
61719
61729
|
// 3. Multiple partitions (sorting and hashing)
|
|
61720
61730
|
const auto &input_types = op.children[0]->types;
|
|
61721
61731
|
|
|
61732
|
+
// How big is the partition?
|
|
61733
|
+
idx_t count = 0;
|
|
61734
|
+
if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
|
|
61735
|
+
count = gstate.hash_groups[hash_bin]->count;
|
|
61736
|
+
} else if (gstate.rows && !hash_bin) {
|
|
61737
|
+
count = gstate.count;
|
|
61738
|
+
} else {
|
|
61739
|
+
return;
|
|
61740
|
+
}
|
|
61741
|
+
|
|
61742
|
+
// Initialise masks to false
|
|
61743
|
+
const auto bit_count = ValidityMask::ValidityMaskSize(count);
|
|
61744
|
+
vector<validity_t> partition_bits(bit_count, 0);
|
|
61745
|
+
ValidityMask partition_mask(partition_bits.data());
|
|
61746
|
+
|
|
61747
|
+
vector<validity_t> order_bits(bit_count, 0);
|
|
61748
|
+
ValidityMask order_mask(order_bits.data());
|
|
61749
|
+
|
|
61722
61750
|
// Scan the sorted data into new Collections
|
|
61723
61751
|
auto &allocator = gstate.allocator;
|
|
61724
61752
|
ChunkCollection input(allocator);
|
|
@@ -61726,9 +61754,12 @@ static void GeneratePartition(WindowLocalSourceState &state, WindowGlobalSinkSta
|
|
|
61726
61754
|
if (gstate.rows && !hash_bin) {
|
|
61727
61755
|
// No partition - convert row collection to chunk collection
|
|
61728
61756
|
ScanRowCollection(*gstate.rows, *gstate.strings, input, input_types);
|
|
61757
|
+
partition_mask.SetValidUnsafe(0);
|
|
61758
|
+
order_mask.SetValidUnsafe(0);
|
|
61729
61759
|
} else if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
|
|
61730
61760
|
// Overwrite the collections with the sorted data
|
|
61731
61761
|
state.hash_group = move(gstate.hash_groups[hash_bin]);
|
|
61762
|
+
state.hash_group->ComputeMasks(partition_mask, order_mask);
|
|
61732
61763
|
const auto over_types = state.hash_group->global_sort->sort_layout.logical_types;
|
|
61733
61764
|
ScanSortedPartition(state, input, input_types, over, over_types);
|
|
61734
61765
|
} else {
|
|
@@ -61736,7 +61767,7 @@ static void GeneratePartition(WindowLocalSourceState &state, WindowGlobalSinkSta
|
|
|
61736
61767
|
}
|
|
61737
61768
|
|
|
61738
61769
|
ChunkCollection output(allocator);
|
|
61739
|
-
ComputeWindowExpressions(window_exprs, input, output, over, gstate.mode);
|
|
61770
|
+
ComputeWindowExpressions(window_exprs, input, output, over, partition_mask, order_mask, gstate.mode);
|
|
61740
61771
|
state.chunks.Merge(input);
|
|
61741
61772
|
state.window_results.Merge(output);
|
|
61742
61773
|
}
|
|
@@ -61837,7 +61868,7 @@ public:
|
|
|
61837
61868
|
}
|
|
61838
61869
|
|
|
61839
61870
|
void FinishEvent() override {
|
|
61840
|
-
hash_group.global_sort->CompleteMergeRound();
|
|
61871
|
+
hash_group.global_sort->CompleteMergeRound(true);
|
|
61841
61872
|
CreateMergeTasks(pipeline, *this, gstate, hash_group);
|
|
61842
61873
|
}
|
|
61843
61874
|
|
|
@@ -66078,7 +66109,6 @@ private:
|
|
|
66078
66109
|
|
|
66079
66110
|
|
|
66080
66111
|
|
|
66081
|
-
|
|
66082
66112
|
#include <thread>
|
|
66083
66113
|
|
|
66084
66114
|
namespace duckdb {
|
|
@@ -66259,104 +66289,6 @@ OperatorResultType PhysicalIEJoin::Execute(ExecutionContext &context, DataChunk
|
|
|
66259
66289
|
//===--------------------------------------------------------------------===//
|
|
66260
66290
|
// Source
|
|
66261
66291
|
//===--------------------------------------------------------------------===//
|
|
66262
|
-
struct SBIterator {
|
|
66263
|
-
static int ComparisonValue(ExpressionType comparison) {
|
|
66264
|
-
switch (comparison) {
|
|
66265
|
-
case ExpressionType::COMPARE_LESSTHAN:
|
|
66266
|
-
case ExpressionType::COMPARE_GREATERTHAN:
|
|
66267
|
-
return -1;
|
|
66268
|
-
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
66269
|
-
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
66270
|
-
return 0;
|
|
66271
|
-
default:
|
|
66272
|
-
throw InternalException("Unimplemented comparison type for IEJoin!");
|
|
66273
|
-
}
|
|
66274
|
-
}
|
|
66275
|
-
|
|
66276
|
-
explicit SBIterator(GlobalSortState &gss, ExpressionType comparison, idx_t entry_idx_p = 0)
|
|
66277
|
-
: sort_layout(gss.sort_layout), block_count(gss.sorted_blocks[0]->radix_sorting_data.size()),
|
|
66278
|
-
block_capacity(gss.block_capacity), cmp_size(sort_layout.comparison_size), entry_size(sort_layout.entry_size),
|
|
66279
|
-
all_constant(sort_layout.all_constant), external(gss.external), cmp(ComparisonValue(comparison)),
|
|
66280
|
-
scan(gss.buffer_manager, gss), block_ptr(nullptr), entry_ptr(nullptr) {
|
|
66281
|
-
|
|
66282
|
-
scan.sb = gss.sorted_blocks[0].get();
|
|
66283
|
-
scan.block_idx = block_count;
|
|
66284
|
-
SetIndex(entry_idx_p);
|
|
66285
|
-
}
|
|
66286
|
-
|
|
66287
|
-
inline idx_t GetIndex() const {
|
|
66288
|
-
return entry_idx;
|
|
66289
|
-
}
|
|
66290
|
-
|
|
66291
|
-
inline void SetIndex(idx_t entry_idx_p) {
|
|
66292
|
-
const auto new_block_idx = entry_idx_p / block_capacity;
|
|
66293
|
-
if (new_block_idx != scan.block_idx) {
|
|
66294
|
-
scan.SetIndices(new_block_idx, 0);
|
|
66295
|
-
if (new_block_idx < block_count) {
|
|
66296
|
-
scan.PinRadix(scan.block_idx);
|
|
66297
|
-
block_ptr = scan.RadixPtr();
|
|
66298
|
-
if (!all_constant) {
|
|
66299
|
-
scan.PinData(*scan.sb->blob_sorting_data);
|
|
66300
|
-
}
|
|
66301
|
-
}
|
|
66302
|
-
}
|
|
66303
|
-
|
|
66304
|
-
scan.entry_idx = entry_idx_p % block_capacity;
|
|
66305
|
-
entry_ptr = block_ptr + scan.entry_idx * entry_size;
|
|
66306
|
-
entry_idx = entry_idx_p;
|
|
66307
|
-
}
|
|
66308
|
-
|
|
66309
|
-
inline SBIterator &operator++() {
|
|
66310
|
-
if (++scan.entry_idx < block_capacity) {
|
|
66311
|
-
entry_ptr += entry_size;
|
|
66312
|
-
++entry_idx;
|
|
66313
|
-
} else {
|
|
66314
|
-
SetIndex(entry_idx + 1);
|
|
66315
|
-
}
|
|
66316
|
-
|
|
66317
|
-
return *this;
|
|
66318
|
-
}
|
|
66319
|
-
|
|
66320
|
-
inline SBIterator &operator--() {
|
|
66321
|
-
if (scan.entry_idx) {
|
|
66322
|
-
--scan.entry_idx;
|
|
66323
|
-
--entry_idx;
|
|
66324
|
-
entry_ptr -= entry_size;
|
|
66325
|
-
} else {
|
|
66326
|
-
SetIndex(entry_idx - 1);
|
|
66327
|
-
}
|
|
66328
|
-
|
|
66329
|
-
return *this;
|
|
66330
|
-
}
|
|
66331
|
-
|
|
66332
|
-
inline bool Compare(const SBIterator &other) const {
|
|
66333
|
-
int comp_res;
|
|
66334
|
-
if (all_constant) {
|
|
66335
|
-
comp_res = FastMemcmp(entry_ptr, other.entry_ptr, cmp_size);
|
|
66336
|
-
} else {
|
|
66337
|
-
comp_res = Comparators::CompareTuple(scan, other.scan, entry_ptr, other.entry_ptr, sort_layout, external);
|
|
66338
|
-
}
|
|
66339
|
-
|
|
66340
|
-
return comp_res <= cmp;
|
|
66341
|
-
}
|
|
66342
|
-
|
|
66343
|
-
// Fixed comparison parameters
|
|
66344
|
-
const SortLayout &sort_layout;
|
|
66345
|
-
const idx_t block_count;
|
|
66346
|
-
const idx_t block_capacity;
|
|
66347
|
-
const size_t cmp_size;
|
|
66348
|
-
const size_t entry_size;
|
|
66349
|
-
const bool all_constant;
|
|
66350
|
-
const bool external;
|
|
66351
|
-
const int cmp;
|
|
66352
|
-
|
|
66353
|
-
// Iteration state
|
|
66354
|
-
SBScanState scan;
|
|
66355
|
-
idx_t entry_idx;
|
|
66356
|
-
data_ptr_t block_ptr;
|
|
66357
|
-
data_ptr_t entry_ptr;
|
|
66358
|
-
};
|
|
66359
|
-
|
|
66360
66292
|
struct IEJoinUnion {
|
|
66361
66293
|
using SortedTable = PhysicalRangeJoin::GlobalSortedTable;
|
|
66362
66294
|
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.4.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "d163f7ee3"
|
|
15
|
+
#define DUCKDB_VERSION "v0.4.1-dev790"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|