duckdb 0.5.2-dev1295.0 → 0.5.2-dev1312.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +366 -186
- package/src/duckdb.hpp +5 -2
- package/src/parquet-amalgamation.cpp +37760 -37760
package/src/duckdb.cpp
CHANGED
|
@@ -17668,6 +17668,25 @@ public:
|
|
|
17668
17668
|
|
|
17669
17669
|
|
|
17670
17670
|
|
|
17671
|
+
//===----------------------------------------------------------------------===//
|
|
17672
|
+
// DuckDB
|
|
17673
|
+
//
|
|
17674
|
+
// duckdb/common/likely.hpp
|
|
17675
|
+
//
|
|
17676
|
+
//
|
|
17677
|
+
//===----------------------------------------------------------------------===//
|
|
17678
|
+
|
|
17679
|
+
|
|
17680
|
+
|
|
17681
|
+
#if __GNUC__
|
|
17682
|
+
#define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (__builtin_expect(cond, expected_value))
|
|
17683
|
+
#else
|
|
17684
|
+
#define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (cond)
|
|
17685
|
+
#endif
|
|
17686
|
+
|
|
17687
|
+
#define DUCKDB_LIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 1)
|
|
17688
|
+
#define DUCKDB_UNLIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 0)
|
|
17689
|
+
|
|
17671
17690
|
|
|
17672
17691
|
namespace duckdb {
|
|
17673
17692
|
struct StringDictionaryContainer {
|
|
@@ -17723,31 +17742,33 @@ public:
|
|
|
17723
17742
|
return StringAppendBase(append_state.handle, segment, stats, data, offset, count);
|
|
17724
17743
|
}
|
|
17725
17744
|
|
|
17726
|
-
template <bool DUPLICATE_ELIMINATE = false>
|
|
17727
17745
|
static idx_t StringAppendBase(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data,
|
|
17728
|
-
idx_t offset, idx_t count
|
|
17729
|
-
std::unordered_map<string, int32_t> *seen_strings = nullptr) {
|
|
17746
|
+
idx_t offset, idx_t count) {
|
|
17730
17747
|
auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
|
|
17731
17748
|
auto handle = buffer_manager.Pin(segment.block);
|
|
17732
17749
|
return StringAppendBase(handle, segment, stats, data, offset, count);
|
|
17733
17750
|
}
|
|
17734
|
-
|
|
17751
|
+
|
|
17735
17752
|
static idx_t StringAppendBase(BufferHandle &handle, ColumnSegment &segment, SegmentStatistics &stats,
|
|
17736
|
-
UnifiedVectorFormat &data, idx_t offset, idx_t count
|
|
17737
|
-
std::unordered_map<string, int32_t> *seen_strings = nullptr) {
|
|
17753
|
+
UnifiedVectorFormat &data, idx_t offset, idx_t count) {
|
|
17738
17754
|
D_ASSERT(segment.GetBlockOffset() == 0);
|
|
17755
|
+
auto handle_ptr = handle.Ptr();
|
|
17739
17756
|
auto source_data = (string_t *)data.data;
|
|
17740
|
-
auto result_data = (int32_t *)(
|
|
17757
|
+
auto result_data = (int32_t *)(handle_ptr + DICTIONARY_HEADER_SIZE);
|
|
17758
|
+
uint32_t *dictionary_size = (uint32_t *)handle_ptr;
|
|
17759
|
+
uint32_t *dictionary_end = (uint32_t *)(handle_ptr + sizeof(uint32_t));
|
|
17760
|
+
|
|
17761
|
+
idx_t remaining_space = RemainingSpace(segment, handle);
|
|
17762
|
+
auto base_count = segment.count.load();
|
|
17741
17763
|
for (idx_t i = 0; i < count; i++) {
|
|
17742
17764
|
auto source_idx = data.sel->get_index(offset + i);
|
|
17743
|
-
auto target_idx =
|
|
17744
|
-
idx_t remaining_space = RemainingSpace(segment, handle);
|
|
17765
|
+
auto target_idx = base_count + i;
|
|
17745
17766
|
if (remaining_space < sizeof(int32_t)) {
|
|
17746
17767
|
// string index does not fit in the block at all
|
|
17768
|
+
segment.count += i;
|
|
17747
17769
|
return i;
|
|
17748
17770
|
}
|
|
17749
17771
|
remaining_space -= sizeof(int32_t);
|
|
17750
|
-
auto dictionary = GetDictionary(segment, handle);
|
|
17751
17772
|
if (!data.validity.RowIsValid(source_idx)) {
|
|
17752
17773
|
// null value is stored as a copy of the last value, this is done to be able to efficiently do the
|
|
17753
17774
|
// string_length calculation
|
|
@@ -17756,82 +17777,68 @@ public:
|
|
|
17756
17777
|
} else {
|
|
17757
17778
|
result_data[target_idx] = 0;
|
|
17758
17779
|
}
|
|
17759
|
-
|
|
17760
|
-
|
|
17761
|
-
|
|
17762
|
-
dictionary.Verify();
|
|
17763
|
-
|
|
17764
|
-
int32_t match;
|
|
17765
|
-
bool found;
|
|
17766
|
-
if (DUPLICATE_ELIMINATE) {
|
|
17767
|
-
auto search = seen_strings->find(source_data[source_idx].GetString());
|
|
17768
|
-
if (search != seen_strings->end()) {
|
|
17769
|
-
match = search->second;
|
|
17770
|
-
found = true;
|
|
17771
|
-
} else {
|
|
17772
|
-
found = false;
|
|
17773
|
-
}
|
|
17774
|
-
}
|
|
17775
|
-
|
|
17776
|
-
if (DUPLICATE_ELIMINATE && found) {
|
|
17777
|
-
// We have seen this string
|
|
17778
|
-
result_data[target_idx] = match;
|
|
17779
|
-
} else {
|
|
17780
|
-
// Unknown string, continue
|
|
17781
|
-
// non-null value, check if we can fit it within the block
|
|
17782
|
-
idx_t string_length = source_data[source_idx].GetSize();
|
|
17783
|
-
idx_t dictionary_length = string_length;
|
|
17784
|
-
|
|
17785
|
-
// determine whether or not we have space in the block for this string
|
|
17786
|
-
bool use_overflow_block = false;
|
|
17787
|
-
idx_t required_space = dictionary_length;
|
|
17788
|
-
if (required_space >= StringUncompressed::STRING_BLOCK_LIMIT) {
|
|
17789
|
-
// string exceeds block limit, store in overflow block and only write a marker here
|
|
17790
|
-
required_space = BIG_STRING_MARKER_SIZE;
|
|
17791
|
-
use_overflow_block = true;
|
|
17792
|
-
}
|
|
17793
|
-
if (required_space > remaining_space) {
|
|
17794
|
-
// no space remaining: return how many tuples we ended up writing
|
|
17795
|
-
return i;
|
|
17796
|
-
}
|
|
17780
|
+
continue;
|
|
17781
|
+
}
|
|
17782
|
+
auto end = handle.Ptr() + *dictionary_end;
|
|
17797
17783
|
|
|
17798
|
-
|
|
17799
|
-
|
|
17784
|
+
#ifdef DEBUG
|
|
17785
|
+
GetDictionary(segment, handle).Verify();
|
|
17786
|
+
#endif
|
|
17787
|
+
// Unknown string, continue
|
|
17788
|
+
// non-null value, check if we can fit it within the block
|
|
17789
|
+
idx_t string_length = source_data[source_idx].GetSize();
|
|
17790
|
+
|
|
17791
|
+
// determine whether or not we have space in the block for this string
|
|
17792
|
+
bool use_overflow_block = false;
|
|
17793
|
+
idx_t required_space = string_length;
|
|
17794
|
+
if (DUCKDB_UNLIKELY(required_space >= StringUncompressed::STRING_BLOCK_LIMIT)) {
|
|
17795
|
+
// string exceeds block limit, store in overflow block and only write a marker here
|
|
17796
|
+
required_space = BIG_STRING_MARKER_SIZE;
|
|
17797
|
+
use_overflow_block = true;
|
|
17798
|
+
}
|
|
17799
|
+
if (DUCKDB_UNLIKELY(required_space > remaining_space)) {
|
|
17800
|
+
// no space remaining: return how many tuples we ended up writing
|
|
17801
|
+
segment.count += i;
|
|
17802
|
+
return i;
|
|
17803
|
+
}
|
|
17800
17804
|
|
|
17801
|
-
|
|
17802
|
-
|
|
17803
|
-
block_id_t block;
|
|
17804
|
-
int32_t offset;
|
|
17805
|
-
// write the string into the current string block
|
|
17806
|
-
WriteString(segment, source_data[source_idx], block, offset);
|
|
17807
|
-
dictionary.size += BIG_STRING_MARKER_SIZE;
|
|
17808
|
-
auto dict_pos = end - dictionary.size;
|
|
17805
|
+
// we have space: write the string
|
|
17806
|
+
UpdateStringStats(stats, source_data[source_idx]);
|
|
17809
17807
|
|
|
17810
|
-
|
|
17811
|
-
|
|
17812
|
-
|
|
17813
|
-
|
|
17814
|
-
|
|
17815
|
-
|
|
17816
|
-
|
|
17817
|
-
|
|
17818
|
-
|
|
17819
|
-
}
|
|
17820
|
-
D_ASSERT(RemainingSpace(segment, handle) <= Storage::BLOCK_SIZE);
|
|
17821
|
-
// place the dictionary offset into the set of vectors
|
|
17822
|
-
dictionary.Verify();
|
|
17808
|
+
if (DUCKDB_UNLIKELY(use_overflow_block)) {
|
|
17809
|
+
// write to overflow blocks
|
|
17810
|
+
block_id_t block;
|
|
17811
|
+
int32_t offset;
|
|
17812
|
+
// write the string into the current string block
|
|
17813
|
+
WriteString(segment, source_data[source_idx], block, offset);
|
|
17814
|
+
*dictionary_size += BIG_STRING_MARKER_SIZE;
|
|
17815
|
+
remaining_space -= BIG_STRING_MARKER_SIZE;
|
|
17816
|
+
auto dict_pos = end - *dictionary_size;
|
|
17823
17817
|
|
|
17824
|
-
|
|
17825
|
-
|
|
17818
|
+
// write a big string marker into the dictionary
|
|
17819
|
+
WriteStringMarker(dict_pos, block, offset);
|
|
17826
17820
|
|
|
17827
|
-
|
|
17828
|
-
|
|
17829
|
-
|
|
17830
|
-
|
|
17831
|
-
|
|
17832
|
-
|
|
17833
|
-
|
|
17821
|
+
// place the dictionary offset into the set of vectors
|
|
17822
|
+
// note: for overflow strings we write negative value
|
|
17823
|
+
result_data[target_idx] = -(*dictionary_size);
|
|
17824
|
+
} else {
|
|
17825
|
+
// string fits in block, append to dictionary and increment dictionary position
|
|
17826
|
+
D_ASSERT(string_length < NumericLimits<uint16_t>::Maximum());
|
|
17827
|
+
*dictionary_size += required_space;
|
|
17828
|
+
remaining_space -= required_space;
|
|
17829
|
+
auto dict_pos = end - *dictionary_size;
|
|
17830
|
+
// now write the actual string data into the dictionary
|
|
17831
|
+
memcpy(dict_pos, source_data[source_idx].GetDataUnsafe(), string_length);
|
|
17832
|
+
|
|
17833
|
+
// place the dictionary offset into the set of vectors
|
|
17834
|
+
result_data[target_idx] = *dictionary_size;
|
|
17835
|
+
}
|
|
17836
|
+
D_ASSERT(RemainingSpace(segment, handle) <= Storage::BLOCK_SIZE);
|
|
17837
|
+
#ifdef DEBUG
|
|
17838
|
+
GetDictionary(segment, handle).Verify();
|
|
17839
|
+
#endif
|
|
17834
17840
|
}
|
|
17841
|
+
segment.count += count;
|
|
17835
17842
|
return count;
|
|
17836
17843
|
}
|
|
17837
17844
|
|
|
@@ -53611,24 +53618,6 @@ void VectorOperations::Not(Vector &input, Vector &result, idx_t count) {
|
|
|
53611
53618
|
|
|
53612
53619
|
|
|
53613
53620
|
|
|
53614
|
-
//===----------------------------------------------------------------------===//
|
|
53615
|
-
// DuckDB
|
|
53616
|
-
//
|
|
53617
|
-
// duckdb/common/likely.hpp
|
|
53618
|
-
//
|
|
53619
|
-
//
|
|
53620
|
-
//===----------------------------------------------------------------------===//
|
|
53621
|
-
|
|
53622
|
-
|
|
53623
|
-
|
|
53624
|
-
#if __GNUC__
|
|
53625
|
-
#define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (__builtin_expect(cond, expected_value))
|
|
53626
|
-
#else
|
|
53627
|
-
#define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (cond)
|
|
53628
|
-
#endif
|
|
53629
|
-
|
|
53630
|
-
#define DUCKDB_LIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 1)
|
|
53631
|
-
#define DUCKDB_UNLIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 0)
|
|
53632
53621
|
|
|
53633
53622
|
|
|
53634
53623
|
namespace duckdb {
|
|
@@ -64202,6 +64191,10 @@ public:
|
|
|
64202
64191
|
OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
|
64203
64192
|
GlobalOperatorState &gstate, OperatorState &state) const override;
|
|
64204
64193
|
|
|
64194
|
+
bool IsOrderDependent() const override {
|
|
64195
|
+
return true;
|
|
64196
|
+
}
|
|
64197
|
+
|
|
64205
64198
|
string ParamsToString() const override;
|
|
64206
64199
|
};
|
|
64207
64200
|
|
|
@@ -65145,6 +65138,9 @@ public:
|
|
|
65145
65138
|
|
|
65146
65139
|
//! The projection list of the WINDOW statement (may contain aggregates)
|
|
65147
65140
|
vector<unique_ptr<Expression>> select_list;
|
|
65141
|
+
//! Whether or not the window is order dependent (only true if all window functions contain neither an order nor a
|
|
65142
|
+
//! partition clause)
|
|
65143
|
+
bool is_order_dependent;
|
|
65148
65144
|
|
|
65149
65145
|
public:
|
|
65150
65146
|
// Source interface
|
|
@@ -65178,7 +65174,11 @@ public:
|
|
|
65178
65174
|
}
|
|
65179
65175
|
|
|
65180
65176
|
bool ParallelSink() const override {
|
|
65181
|
-
return
|
|
65177
|
+
return !is_order_dependent;
|
|
65178
|
+
}
|
|
65179
|
+
|
|
65180
|
+
bool IsOrderDependent() const override {
|
|
65181
|
+
return is_order_dependent;
|
|
65182
65182
|
}
|
|
65183
65183
|
|
|
65184
65184
|
public:
|
|
@@ -65873,9 +65873,17 @@ void WindowGlobalSinkState::Finalize() {
|
|
|
65873
65873
|
}
|
|
65874
65874
|
|
|
65875
65875
|
// this implements a sorted window functions variant
|
|
65876
|
-
PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expression>>
|
|
65876
|
+
PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expression>> select_list_p,
|
|
65877
65877
|
idx_t estimated_cardinality, PhysicalOperatorType type)
|
|
65878
|
-
: PhysicalOperator(type, move(types), estimated_cardinality), select_list(move(
|
|
65878
|
+
: PhysicalOperator(type, move(types), estimated_cardinality), select_list(move(select_list_p)) {
|
|
65879
|
+
is_order_dependent = false;
|
|
65880
|
+
for (auto &expr : select_list) {
|
|
65881
|
+
D_ASSERT(expr->expression_class == ExpressionClass::BOUND_WINDOW);
|
|
65882
|
+
auto &bound_window = (BoundWindowExpression &)*expr;
|
|
65883
|
+
if (bound_window.partitions.empty() && bound_window.orders.empty()) {
|
|
65884
|
+
is_order_dependent = true;
|
|
65885
|
+
}
|
|
65886
|
+
}
|
|
65879
65887
|
}
|
|
65880
65888
|
|
|
65881
65889
|
static idx_t FindNextStart(const ValidityMask &mask, idx_t l, const idx_t r, idx_t &n) {
|
|
@@ -78838,6 +78846,84 @@ PhysicalBatchInsert::PhysicalBatchInsert(LogicalOperator &op, SchemaCatalogEntry
|
|
|
78838
78846
|
//===--------------------------------------------------------------------===//
|
|
78839
78847
|
// Sink
|
|
78840
78848
|
//===--------------------------------------------------------------------===//
|
|
78849
|
+
|
|
78850
|
+
class CollectionMerger {
|
|
78851
|
+
public:
|
|
78852
|
+
explicit CollectionMerger(ClientContext &context) : context(context) {
|
|
78853
|
+
}
|
|
78854
|
+
|
|
78855
|
+
ClientContext &context;
|
|
78856
|
+
vector<unique_ptr<RowGroupCollection>> current_collections;
|
|
78857
|
+
|
|
78858
|
+
public:
|
|
78859
|
+
void AddCollection(unique_ptr<RowGroupCollection> collection) {
|
|
78860
|
+
current_collections.push_back(move(collection));
|
|
78861
|
+
}
|
|
78862
|
+
|
|
78863
|
+
bool Empty() {
|
|
78864
|
+
return current_collections.empty();
|
|
78865
|
+
}
|
|
78866
|
+
|
|
78867
|
+
unique_ptr<RowGroupCollection> Flush(OptimisticDataWriter &writer) {
|
|
78868
|
+
if (Empty()) {
|
|
78869
|
+
return nullptr;
|
|
78870
|
+
}
|
|
78871
|
+
unique_ptr<RowGroupCollection> new_collection;
|
|
78872
|
+
if (current_collections.size() == 1) {
|
|
78873
|
+
// we have gathered only one row group collection: merge it directly
|
|
78874
|
+
new_collection = move(current_collections[0]);
|
|
78875
|
+
} else {
|
|
78876
|
+
// we have gathered multiple collections: create one big collection and merge that
|
|
78877
|
+
// find the biggest collection
|
|
78878
|
+
idx_t biggest_index = 0;
|
|
78879
|
+
for (idx_t i = 1; i < current_collections.size(); i++) {
|
|
78880
|
+
D_ASSERT(current_collections[i]);
|
|
78881
|
+
if (current_collections[i]->GetTotalRows() > current_collections[biggest_index]->GetTotalRows()) {
|
|
78882
|
+
biggest_index = i;
|
|
78883
|
+
}
|
|
78884
|
+
}
|
|
78885
|
+
// now append all the other collections to this collection
|
|
78886
|
+
new_collection = move(current_collections[biggest_index]);
|
|
78887
|
+
auto &types = new_collection->GetTypes();
|
|
78888
|
+
TableAppendState append_state;
|
|
78889
|
+
new_collection->InitializeAppend(append_state);
|
|
78890
|
+
|
|
78891
|
+
DataChunk scan_chunk;
|
|
78892
|
+
scan_chunk.Initialize(context, types);
|
|
78893
|
+
|
|
78894
|
+
vector<column_t> column_ids;
|
|
78895
|
+
for (idx_t i = 0; i < types.size(); i++) {
|
|
78896
|
+
column_ids.push_back(i);
|
|
78897
|
+
}
|
|
78898
|
+
for (auto &collection : current_collections) {
|
|
78899
|
+
if (!collection) {
|
|
78900
|
+
continue;
|
|
78901
|
+
}
|
|
78902
|
+
TableScanState scan_state;
|
|
78903
|
+
scan_state.Initialize(column_ids);
|
|
78904
|
+
collection->InitializeScan(scan_state.local_state, column_ids, nullptr);
|
|
78905
|
+
|
|
78906
|
+
while (true) {
|
|
78907
|
+
scan_chunk.Reset();
|
|
78908
|
+
scan_state.local_state.ScanCommitted(scan_chunk, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
|
|
78909
|
+
if (scan_chunk.size() == 0) {
|
|
78910
|
+
break;
|
|
78911
|
+
}
|
|
78912
|
+
auto new_row_group = new_collection->Append(scan_chunk, append_state);
|
|
78913
|
+
if (new_row_group) {
|
|
78914
|
+
writer.CheckFlushToDisk(*new_collection);
|
|
78915
|
+
}
|
|
78916
|
+
}
|
|
78917
|
+
}
|
|
78918
|
+
|
|
78919
|
+
new_collection->FinalizeAppend(TransactionData(0, 0), append_state);
|
|
78920
|
+
writer.FlushToDisk(*new_collection);
|
|
78921
|
+
}
|
|
78922
|
+
current_collections.clear();
|
|
78923
|
+
return new_collection;
|
|
78924
|
+
}
|
|
78925
|
+
};
|
|
78926
|
+
|
|
78841
78927
|
class BatchInsertGlobalState : public GlobalSinkState {
|
|
78842
78928
|
public:
|
|
78843
78929
|
explicit BatchInsertGlobalState() : insert_count(0) {
|
|
@@ -78848,16 +78934,124 @@ public:
|
|
|
78848
78934
|
idx_t insert_count;
|
|
78849
78935
|
map<idx_t, unique_ptr<RowGroupCollection>> collections;
|
|
78850
78936
|
|
|
78851
|
-
|
|
78852
|
-
|
|
78853
|
-
|
|
78937
|
+
bool CheckMergeInternal(idx_t batch_index, vector<unique_ptr<RowGroupCollection>> *result, idx_t *merge_count) {
|
|
78938
|
+
auto entry = collections.find(batch_index);
|
|
78939
|
+
if (entry == collections.end()) {
|
|
78940
|
+
// no collection at this index
|
|
78941
|
+
return false;
|
|
78942
|
+
}
|
|
78943
|
+
auto row_count = entry->second->GetTotalRows();
|
|
78944
|
+
if (row_count >= LocalStorage::MERGE_THRESHOLD) {
|
|
78945
|
+
// the collection at this batch index is large and has already been written
|
|
78946
|
+
return false;
|
|
78947
|
+
}
|
|
78948
|
+
// we can merge this collection!
|
|
78949
|
+
if (merge_count) {
|
|
78950
|
+
// add the count
|
|
78951
|
+
D_ASSERT(!result);
|
|
78952
|
+
*merge_count += row_count;
|
|
78953
|
+
} else {
|
|
78954
|
+
// add the
|
|
78955
|
+
D_ASSERT(result);
|
|
78956
|
+
result->push_back(move(entry->second));
|
|
78957
|
+
collections.erase(batch_index);
|
|
78958
|
+
}
|
|
78959
|
+
return true;
|
|
78960
|
+
}
|
|
78961
|
+
|
|
78962
|
+
bool CheckMerge(idx_t batch_index, idx_t &merge_count) {
|
|
78963
|
+
return CheckMergeInternal(batch_index, nullptr, &merge_count);
|
|
78964
|
+
}
|
|
78965
|
+
bool CheckMerge(idx_t batch_index, vector<unique_ptr<RowGroupCollection>> &result) {
|
|
78966
|
+
return CheckMergeInternal(batch_index, &result, nullptr);
|
|
78967
|
+
}
|
|
78968
|
+
|
|
78969
|
+
unique_ptr<RowGroupCollection> MergeCollections(ClientContext &context,
|
|
78970
|
+
vector<unique_ptr<RowGroupCollection>> merge_collections,
|
|
78971
|
+
OptimisticDataWriter &writer) {
|
|
78972
|
+
CollectionMerger merger(context);
|
|
78973
|
+
for (auto &collection : merge_collections) {
|
|
78974
|
+
merger.AddCollection(move(collection));
|
|
78975
|
+
}
|
|
78976
|
+
return merger.Flush(writer);
|
|
78977
|
+
}
|
|
78978
|
+
|
|
78979
|
+
void VerifyUniqueBatch(idx_t batch_index) {
|
|
78854
78980
|
if (collections.find(batch_index) != collections.end()) {
|
|
78855
78981
|
throw InternalException("PhysicalBatchInsert::AddCollection error: batch index %d is present in multiple "
|
|
78856
78982
|
"collections. This occurs when "
|
|
78857
78983
|
"batch indexes are not uniquely distributed over threads",
|
|
78858
78984
|
batch_index);
|
|
78859
78985
|
}
|
|
78860
|
-
|
|
78986
|
+
}
|
|
78987
|
+
|
|
78988
|
+
void AddCollection(ClientContext &context, idx_t batch_index, unique_ptr<RowGroupCollection> current_collection,
|
|
78989
|
+
OptimisticDataWriter *writer = nullptr, bool *written_to_disk = nullptr) {
|
|
78990
|
+
vector<unique_ptr<RowGroupCollection>> merge_collections;
|
|
78991
|
+
idx_t merge_count;
|
|
78992
|
+
{
|
|
78993
|
+
lock_guard<mutex> l(lock);
|
|
78994
|
+
auto new_count = current_collection->GetTotalRows();
|
|
78995
|
+
insert_count += new_count;
|
|
78996
|
+
VerifyUniqueBatch(batch_index);
|
|
78997
|
+
if (writer && new_count < LocalStorage::MERGE_THRESHOLD) {
|
|
78998
|
+
// we are inserting a small collection that has not yet been written to disk
|
|
78999
|
+
// check if there are any collections with adjacent batch indexes that we can merge together
|
|
79000
|
+
|
|
79001
|
+
// first check how many rows we will end up with by performing such a merge
|
|
79002
|
+
// check backwards
|
|
79003
|
+
merge_count = new_count;
|
|
79004
|
+
idx_t start_batch_index;
|
|
79005
|
+
idx_t end_batch_index;
|
|
79006
|
+
for (start_batch_index = batch_index; start_batch_index > 0; start_batch_index--) {
|
|
79007
|
+
if (!CheckMerge(start_batch_index - 1, merge_count)) {
|
|
79008
|
+
break;
|
|
79009
|
+
}
|
|
79010
|
+
}
|
|
79011
|
+
// check forwards
|
|
79012
|
+
for (end_batch_index = batch_index;; end_batch_index++) {
|
|
79013
|
+
if (!CheckMerge(end_batch_index + 1, merge_count)) {
|
|
79014
|
+
break;
|
|
79015
|
+
}
|
|
79016
|
+
}
|
|
79017
|
+
// merging together creates a big enough row group
|
|
79018
|
+
// merge!
|
|
79019
|
+
if (merge_count >= RowGroup::ROW_GROUP_SIZE) {
|
|
79020
|
+
// gather the row groups to merge
|
|
79021
|
+
// note that we need to gather them in order of batch index
|
|
79022
|
+
for (idx_t i = start_batch_index; i <= end_batch_index; i++) {
|
|
79023
|
+
if (i == batch_index) {
|
|
79024
|
+
merge_collections.push_back(move(current_collection));
|
|
79025
|
+
continue;
|
|
79026
|
+
}
|
|
79027
|
+
auto can_merge = CheckMerge(i, merge_collections);
|
|
79028
|
+
if (!can_merge) {
|
|
79029
|
+
throw InternalException("Could not merge row group in batch insert?!");
|
|
79030
|
+
}
|
|
79031
|
+
}
|
|
79032
|
+
}
|
|
79033
|
+
}
|
|
79034
|
+
if (merge_collections.empty()) {
|
|
79035
|
+
// no collections to merge together - add the collection to the batch index
|
|
79036
|
+
collections[batch_index] = move(current_collection);
|
|
79037
|
+
}
|
|
79038
|
+
}
|
|
79039
|
+
if (!merge_collections.empty()) {
|
|
79040
|
+
// merge together the collections
|
|
79041
|
+
D_ASSERT(writer);
|
|
79042
|
+
auto final_collection = MergeCollections(context, move(merge_collections), *writer);
|
|
79043
|
+
D_ASSERT(final_collection->GetTotalRows() == merge_count);
|
|
79044
|
+
D_ASSERT(final_collection->GetTotalRows() >= RowGroup::ROW_GROUP_SIZE);
|
|
79045
|
+
if (written_to_disk) {
|
|
79046
|
+
*written_to_disk = true;
|
|
79047
|
+
}
|
|
79048
|
+
// add the merged-together collection to the
|
|
79049
|
+
{
|
|
79050
|
+
lock_guard<mutex> l(lock);
|
|
79051
|
+
VerifyUniqueBatch(batch_index);
|
|
79052
|
+
collections[batch_index] = move(final_collection);
|
|
79053
|
+
}
|
|
79054
|
+
}
|
|
78861
79055
|
}
|
|
78862
79056
|
};
|
|
78863
79057
|
|
|
@@ -78877,6 +79071,16 @@ public:
|
|
|
78877
79071
|
unique_ptr<OptimisticDataWriter> writer;
|
|
78878
79072
|
bool written_to_disk;
|
|
78879
79073
|
|
|
79074
|
+
void FlushToDisk() {
|
|
79075
|
+
if (!current_collection) {
|
|
79076
|
+
return;
|
|
79077
|
+
}
|
|
79078
|
+
if (!written_to_disk || current_collection->GetTotalRows() < LocalStorage::MERGE_THRESHOLD) {
|
|
79079
|
+
return;
|
|
79080
|
+
}
|
|
79081
|
+
writer->FlushToDisk(*current_collection);
|
|
79082
|
+
}
|
|
79083
|
+
|
|
78880
79084
|
void CreateNewCollection(TableCatalogEntry *table, const vector<LogicalType> &insert_types) {
|
|
78881
79085
|
auto &table_info = table->storage->info;
|
|
78882
79086
|
auto &block_manager = TableIOManager::Get(*table->storage).GetBlockManagerForRowData();
|
|
@@ -78921,10 +79125,10 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, GlobalSinkSt
|
|
|
78921
79125
|
// batch index has changed: move the old collection to the global state and create a new collection
|
|
78922
79126
|
TransactionData tdata(0, 0);
|
|
78923
79127
|
lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
|
|
78924
|
-
|
|
78925
|
-
|
|
78926
|
-
|
|
78927
|
-
|
|
79128
|
+
lstate.FlushToDisk();
|
|
79129
|
+
|
|
79130
|
+
gstate.AddCollection(context.client, lstate.current_index, move(lstate.current_collection), lstate.writer.get(),
|
|
79131
|
+
&lstate.written_to_disk);
|
|
78928
79132
|
lstate.CreateNewCollection(table, insert_types);
|
|
78929
79133
|
}
|
|
78930
79134
|
lstate.current_index = lstate.batch_index;
|
|
@@ -78948,94 +79152,61 @@ void PhysicalBatchInsert::Combine(ExecutionContext &context, GlobalSinkState &gs
|
|
|
78948
79152
|
if (!lstate.current_collection) {
|
|
78949
79153
|
return;
|
|
78950
79154
|
}
|
|
78951
|
-
|
|
78952
|
-
lstate.writer->FlushToDisk(*lstate.current_collection);
|
|
78953
|
-
}
|
|
79155
|
+
lstate.FlushToDisk();
|
|
78954
79156
|
lstate.writer->FinalFlush();
|
|
78955
79157
|
|
|
78956
79158
|
TransactionData tdata(0, 0);
|
|
78957
79159
|
lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
|
|
78958
|
-
gstate.AddCollection(lstate.current_index, move(lstate.current_collection));
|
|
79160
|
+
gstate.AddCollection(context.client, lstate.current_index, move(lstate.current_collection));
|
|
78959
79161
|
}
|
|
78960
79162
|
|
|
78961
|
-
struct CollectionMerger {
|
|
78962
|
-
vector<unique_ptr<RowGroupCollection>> current_collections;
|
|
78963
|
-
|
|
78964
|
-
void AddCollection(unique_ptr<RowGroupCollection> collection) {
|
|
78965
|
-
current_collections.push_back(move(collection));
|
|
78966
|
-
}
|
|
78967
|
-
|
|
78968
|
-
bool Empty() {
|
|
78969
|
-
return current_collections.empty();
|
|
78970
|
-
}
|
|
78971
|
-
|
|
78972
|
-
void Flush(ClientContext &context, DataTable &storage) {
|
|
78973
|
-
if (Empty()) {
|
|
78974
|
-
return;
|
|
78975
|
-
}
|
|
78976
|
-
unique_ptr<RowGroupCollection> new_collection;
|
|
78977
|
-
if (current_collections.size() == 1) {
|
|
78978
|
-
// we have gathered only one row group collection: merge it directly
|
|
78979
|
-
new_collection = move(current_collections[0]);
|
|
78980
|
-
} else {
|
|
78981
|
-
// we have gathered multiple collections: create one big collection and merge that
|
|
78982
|
-
auto &table_info = storage.info;
|
|
78983
|
-
auto &block_manager = TableIOManager::Get(storage).GetBlockManagerForRowData();
|
|
78984
|
-
auto types = storage.GetTypes();
|
|
78985
|
-
new_collection = make_unique<RowGroupCollection>(table_info, block_manager, types, MAX_ROW_ID);
|
|
78986
|
-
TableAppendState append_state;
|
|
78987
|
-
new_collection->InitializeEmpty();
|
|
78988
|
-
new_collection->InitializeAppend(append_state);
|
|
78989
|
-
|
|
78990
|
-
DataChunk scan_chunk;
|
|
78991
|
-
scan_chunk.Initialize(context, types);
|
|
78992
|
-
|
|
78993
|
-
vector<column_t> column_ids;
|
|
78994
|
-
for (idx_t i = 0; i < types.size(); i++) {
|
|
78995
|
-
column_ids.push_back(i);
|
|
78996
|
-
}
|
|
78997
|
-
for (auto &collection : current_collections) {
|
|
78998
|
-
TableScanState scan_state;
|
|
78999
|
-
scan_state.Initialize(column_ids);
|
|
79000
|
-
collection->InitializeScan(scan_state.local_state, column_ids, nullptr);
|
|
79001
|
-
|
|
79002
|
-
while (true) {
|
|
79003
|
-
scan_chunk.Reset();
|
|
79004
|
-
scan_state.local_state.ScanCommitted(scan_chunk, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
|
|
79005
|
-
if (scan_chunk.size() == 0) {
|
|
79006
|
-
break;
|
|
79007
|
-
}
|
|
79008
|
-
new_collection->Append(scan_chunk, append_state);
|
|
79009
|
-
}
|
|
79010
|
-
}
|
|
79011
|
-
|
|
79012
|
-
new_collection->FinalizeAppend(TransactionData(0, 0), append_state);
|
|
79013
|
-
}
|
|
79014
|
-
storage.LocalMerge(context, *new_collection);
|
|
79015
|
-
current_collections.clear();
|
|
79016
|
-
}
|
|
79017
|
-
};
|
|
79018
|
-
|
|
79019
79163
|
SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
|
79020
79164
|
GlobalSinkState &gstate_p) const {
|
|
79021
79165
|
auto &gstate = (BatchInsertGlobalState &)gstate_p;
|
|
79022
79166
|
|
|
79023
|
-
|
|
79167
|
+
// in the finalize, do a final pass over all of the collections we created and try to merge smaller collections
|
|
79168
|
+
// together
|
|
79169
|
+
vector<unique_ptr<CollectionMerger>> mergers;
|
|
79170
|
+
unique_ptr<CollectionMerger> current_merger;
|
|
79024
79171
|
|
|
79025
79172
|
auto &storage = *gstate.table->storage;
|
|
79026
79173
|
for (auto &collection : gstate.collections) {
|
|
79027
79174
|
if (collection.second->GetTotalRows() < LocalStorage::MERGE_THRESHOLD) {
|
|
79028
79175
|
// this collection has very few rows: add it to the merge set
|
|
79029
|
-
|
|
79176
|
+
if (!current_merger) {
|
|
79177
|
+
current_merger = make_unique<CollectionMerger>(context);
|
|
79178
|
+
}
|
|
79179
|
+
current_merger->AddCollection(move(collection.second));
|
|
79030
79180
|
} else {
|
|
79031
|
-
|
|
79181
|
+
// this collection has a lot of rows: it does not need to be merged
|
|
79182
|
+
// create a separate collection merger only for this entry
|
|
79183
|
+
if (current_merger) {
|
|
79032
79184
|
// we have small collections remaining: flush them
|
|
79033
|
-
|
|
79185
|
+
mergers.push_back(move(current_merger));
|
|
79186
|
+
current_merger.reset();
|
|
79034
79187
|
}
|
|
79035
|
-
|
|
79188
|
+
auto larger_merger = make_unique<CollectionMerger>(context);
|
|
79189
|
+
larger_merger->AddCollection(move(collection.second));
|
|
79190
|
+
mergers.push_back(move(larger_merger));
|
|
79036
79191
|
}
|
|
79037
79192
|
}
|
|
79038
|
-
|
|
79193
|
+
if (current_merger) {
|
|
79194
|
+
mergers.push_back(move(current_merger));
|
|
79195
|
+
}
|
|
79196
|
+
|
|
79197
|
+
// now that we have created all of the mergers, perform the actual merging
|
|
79198
|
+
vector<unique_ptr<RowGroupCollection>> final_collections;
|
|
79199
|
+
final_collections.reserve(mergers.size());
|
|
79200
|
+
auto writer = make_unique<OptimisticDataWriter>(&storage);
|
|
79201
|
+
for (auto &merger : mergers) {
|
|
79202
|
+
final_collections.push_back(merger->Flush(*writer));
|
|
79203
|
+
}
|
|
79204
|
+
writer->FinalFlush();
|
|
79205
|
+
|
|
79206
|
+
// finally, merge the row groups into the local storage
|
|
79207
|
+
for (auto &collection : final_collections) {
|
|
79208
|
+
storage.LocalMerge(context, *collection);
|
|
79209
|
+
}
|
|
79039
79210
|
return SinkFinalizeType::READY;
|
|
79040
79211
|
}
|
|
79041
79212
|
|
|
@@ -79114,6 +79285,10 @@ public:
|
|
|
79114
79285
|
bool IsSink() const override {
|
|
79115
79286
|
return true;
|
|
79116
79287
|
}
|
|
79288
|
+
|
|
79289
|
+
bool IsOrderDependent() const override {
|
|
79290
|
+
return true;
|
|
79291
|
+
}
|
|
79117
79292
|
};
|
|
79118
79293
|
} // namespace duckdb
|
|
79119
79294
|
|
|
@@ -82913,19 +83088,24 @@ OperatorResultType CachingPhysicalOperator::Execute(ExecutionContext &context, D
|
|
|
82913
83088
|
auto child_result = ExecuteInternal(context, input, chunk, gstate, state);
|
|
82914
83089
|
|
|
82915
83090
|
#if STANDARD_VECTOR_SIZE >= 128
|
|
82916
|
-
if (!
|
|
82917
|
-
|
|
82918
|
-
|
|
83091
|
+
if (!state.initialized) {
|
|
83092
|
+
state.initialized = true;
|
|
83093
|
+
state.can_cache_chunk = true;
|
|
83094
|
+
if (!context.pipeline || !caching_supported) {
|
|
83095
|
+
state.can_cache_chunk = false;
|
|
83096
|
+
}
|
|
82919
83097
|
|
|
82920
|
-
|
|
82921
|
-
|
|
82922
|
-
|
|
82923
|
-
}
|
|
83098
|
+
if (context.pipeline->GetSink() && context.pipeline->GetSink()->RequiresBatchIndex()) {
|
|
83099
|
+
state.can_cache_chunk = false;
|
|
83100
|
+
}
|
|
82924
83101
|
|
|
82925
|
-
|
|
83102
|
+
if (context.pipeline->IsOrderDependent()) {
|
|
83103
|
+
state.can_cache_chunk = false;
|
|
83104
|
+
}
|
|
83105
|
+
}
|
|
83106
|
+
if (!state.can_cache_chunk) {
|
|
82926
83107
|
return child_result;
|
|
82927
83108
|
}
|
|
82928
|
-
|
|
82929
83109
|
if (chunk.size() < CACHE_THRESHOLD) {
|
|
82930
83110
|
// we have filtered out a significant amount of tuples
|
|
82931
83111
|
// add this chunk to the cache and continue
|