duckdb 0.5.2-dev1295.0 → 0.5.2-dev1312.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -17668,6 +17668,25 @@ public:
17668
17668
 
17669
17669
 
17670
17670
 
17671
+ //===----------------------------------------------------------------------===//
17672
+ // DuckDB
17673
+ //
17674
+ // duckdb/common/likely.hpp
17675
+ //
17676
+ //
17677
+ //===----------------------------------------------------------------------===//
17678
+
17679
+
17680
+
17681
+ #if __GNUC__
17682
+ #define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (__builtin_expect(cond, expected_value))
17683
+ #else
17684
+ #define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (cond)
17685
+ #endif
17686
+
17687
+ #define DUCKDB_LIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 1)
17688
+ #define DUCKDB_UNLIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 0)
17689
+
17671
17690
 
17672
17691
  namespace duckdb {
17673
17692
  struct StringDictionaryContainer {
@@ -17723,31 +17742,33 @@ public:
17723
17742
  return StringAppendBase(append_state.handle, segment, stats, data, offset, count);
17724
17743
  }
17725
17744
 
17726
- template <bool DUPLICATE_ELIMINATE = false>
17727
17745
  static idx_t StringAppendBase(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data,
17728
- idx_t offset, idx_t count,
17729
- std::unordered_map<string, int32_t> *seen_strings = nullptr) {
17746
+ idx_t offset, idx_t count) {
17730
17747
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
17731
17748
  auto handle = buffer_manager.Pin(segment.block);
17732
17749
  return StringAppendBase(handle, segment, stats, data, offset, count);
17733
17750
  }
17734
- template <bool DUPLICATE_ELIMINATE = false>
17751
+
17735
17752
  static idx_t StringAppendBase(BufferHandle &handle, ColumnSegment &segment, SegmentStatistics &stats,
17736
- UnifiedVectorFormat &data, idx_t offset, idx_t count,
17737
- std::unordered_map<string, int32_t> *seen_strings = nullptr) {
17753
+ UnifiedVectorFormat &data, idx_t offset, idx_t count) {
17738
17754
  D_ASSERT(segment.GetBlockOffset() == 0);
17755
+ auto handle_ptr = handle.Ptr();
17739
17756
  auto source_data = (string_t *)data.data;
17740
- auto result_data = (int32_t *)(handle.Ptr() + DICTIONARY_HEADER_SIZE);
17757
+ auto result_data = (int32_t *)(handle_ptr + DICTIONARY_HEADER_SIZE);
17758
+ uint32_t *dictionary_size = (uint32_t *)handle_ptr;
17759
+ uint32_t *dictionary_end = (uint32_t *)(handle_ptr + sizeof(uint32_t));
17760
+
17761
+ idx_t remaining_space = RemainingSpace(segment, handle);
17762
+ auto base_count = segment.count.load();
17741
17763
  for (idx_t i = 0; i < count; i++) {
17742
17764
  auto source_idx = data.sel->get_index(offset + i);
17743
- auto target_idx = segment.count.load();
17744
- idx_t remaining_space = RemainingSpace(segment, handle);
17765
+ auto target_idx = base_count + i;
17745
17766
  if (remaining_space < sizeof(int32_t)) {
17746
17767
  // string index does not fit in the block at all
17768
+ segment.count += i;
17747
17769
  return i;
17748
17770
  }
17749
17771
  remaining_space -= sizeof(int32_t);
17750
- auto dictionary = GetDictionary(segment, handle);
17751
17772
  if (!data.validity.RowIsValid(source_idx)) {
17752
17773
  // null value is stored as a copy of the last value, this is done to be able to efficiently do the
17753
17774
  // string_length calculation
@@ -17756,82 +17777,68 @@ public:
17756
17777
  } else {
17757
17778
  result_data[target_idx] = 0;
17758
17779
  }
17759
- } else {
17760
- auto end = handle.Ptr() + dictionary.end;
17761
-
17762
- dictionary.Verify();
17763
-
17764
- int32_t match;
17765
- bool found;
17766
- if (DUPLICATE_ELIMINATE) {
17767
- auto search = seen_strings->find(source_data[source_idx].GetString());
17768
- if (search != seen_strings->end()) {
17769
- match = search->second;
17770
- found = true;
17771
- } else {
17772
- found = false;
17773
- }
17774
- }
17775
-
17776
- if (DUPLICATE_ELIMINATE && found) {
17777
- // We have seen this string
17778
- result_data[target_idx] = match;
17779
- } else {
17780
- // Unknown string, continue
17781
- // non-null value, check if we can fit it within the block
17782
- idx_t string_length = source_data[source_idx].GetSize();
17783
- idx_t dictionary_length = string_length;
17784
-
17785
- // determine whether or not we have space in the block for this string
17786
- bool use_overflow_block = false;
17787
- idx_t required_space = dictionary_length;
17788
- if (required_space >= StringUncompressed::STRING_BLOCK_LIMIT) {
17789
- // string exceeds block limit, store in overflow block and only write a marker here
17790
- required_space = BIG_STRING_MARKER_SIZE;
17791
- use_overflow_block = true;
17792
- }
17793
- if (required_space > remaining_space) {
17794
- // no space remaining: return how many tuples we ended up writing
17795
- return i;
17796
- }
17780
+ continue;
17781
+ }
17782
+ auto end = handle.Ptr() + *dictionary_end;
17797
17783
 
17798
- // we have space: write the string
17799
- UpdateStringStats(stats, source_data[source_idx]);
17784
+ #ifdef DEBUG
17785
+ GetDictionary(segment, handle).Verify();
17786
+ #endif
17787
+ // Unknown string, continue
17788
+ // non-null value, check if we can fit it within the block
17789
+ idx_t string_length = source_data[source_idx].GetSize();
17790
+
17791
+ // determine whether or not we have space in the block for this string
17792
+ bool use_overflow_block = false;
17793
+ idx_t required_space = string_length;
17794
+ if (DUCKDB_UNLIKELY(required_space >= StringUncompressed::STRING_BLOCK_LIMIT)) {
17795
+ // string exceeds block limit, store in overflow block and only write a marker here
17796
+ required_space = BIG_STRING_MARKER_SIZE;
17797
+ use_overflow_block = true;
17798
+ }
17799
+ if (DUCKDB_UNLIKELY(required_space > remaining_space)) {
17800
+ // no space remaining: return how many tuples we ended up writing
17801
+ segment.count += i;
17802
+ return i;
17803
+ }
17800
17804
 
17801
- if (use_overflow_block) {
17802
- // write to overflow blocks
17803
- block_id_t block;
17804
- int32_t offset;
17805
- // write the string into the current string block
17806
- WriteString(segment, source_data[source_idx], block, offset);
17807
- dictionary.size += BIG_STRING_MARKER_SIZE;
17808
- auto dict_pos = end - dictionary.size;
17805
+ // we have space: write the string
17806
+ UpdateStringStats(stats, source_data[source_idx]);
17809
17807
 
17810
- // write a big string marker into the dictionary
17811
- WriteStringMarker(dict_pos, block, offset);
17812
- } else {
17813
- // string fits in block, append to dictionary and increment dictionary position
17814
- D_ASSERT(string_length < NumericLimits<uint16_t>::Maximum());
17815
- dictionary.size += required_space;
17816
- auto dict_pos = end - dictionary.size;
17817
- // now write the actual string data into the dictionary
17818
- memcpy(dict_pos, source_data[source_idx].GetDataUnsafe(), string_length);
17819
- }
17820
- D_ASSERT(RemainingSpace(segment, handle) <= Storage::BLOCK_SIZE);
17821
- // place the dictionary offset into the set of vectors
17822
- dictionary.Verify();
17808
+ if (DUCKDB_UNLIKELY(use_overflow_block)) {
17809
+ // write to overflow blocks
17810
+ block_id_t block;
17811
+ int32_t offset;
17812
+ // write the string into the current string block
17813
+ WriteString(segment, source_data[source_idx], block, offset);
17814
+ *dictionary_size += BIG_STRING_MARKER_SIZE;
17815
+ remaining_space -= BIG_STRING_MARKER_SIZE;
17816
+ auto dict_pos = end - *dictionary_size;
17823
17817
 
17824
- // note: for overflow strings we write negative value
17825
- result_data[target_idx] = use_overflow_block ? -1 * dictionary.size : dictionary.size;
17818
+ // write a big string marker into the dictionary
17819
+ WriteStringMarker(dict_pos, block, offset);
17826
17820
 
17827
- if (DUPLICATE_ELIMINATE) {
17828
- seen_strings->insert({source_data[source_idx].GetString(), dictionary.size});
17829
- }
17830
- SetDictionary(segment, handle, dictionary);
17831
- }
17832
- }
17833
- segment.count++;
17821
+ // place the dictionary offset into the set of vectors
17822
+ // note: for overflow strings we write negative value
17823
+ result_data[target_idx] = -(*dictionary_size);
17824
+ } else {
17825
+ // string fits in block, append to dictionary and increment dictionary position
17826
+ D_ASSERT(string_length < NumericLimits<uint16_t>::Maximum());
17827
+ *dictionary_size += required_space;
17828
+ remaining_space -= required_space;
17829
+ auto dict_pos = end - *dictionary_size;
17830
+ // now write the actual string data into the dictionary
17831
+ memcpy(dict_pos, source_data[source_idx].GetDataUnsafe(), string_length);
17832
+
17833
+ // place the dictionary offset into the set of vectors
17834
+ result_data[target_idx] = *dictionary_size;
17835
+ }
17836
+ D_ASSERT(RemainingSpace(segment, handle) <= Storage::BLOCK_SIZE);
17837
+ #ifdef DEBUG
17838
+ GetDictionary(segment, handle).Verify();
17839
+ #endif
17834
17840
  }
17841
+ segment.count += count;
17835
17842
  return count;
17836
17843
  }
17837
17844
 
@@ -53611,24 +53618,6 @@ void VectorOperations::Not(Vector &input, Vector &result, idx_t count) {
53611
53618
 
53612
53619
 
53613
53620
 
53614
- //===----------------------------------------------------------------------===//
53615
- // DuckDB
53616
- //
53617
- // duckdb/common/likely.hpp
53618
- //
53619
- //
53620
- //===----------------------------------------------------------------------===//
53621
-
53622
-
53623
-
53624
- #if __GNUC__
53625
- #define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (__builtin_expect(cond, expected_value))
53626
- #else
53627
- #define DUCKDB_BUILTIN_EXPECT(cond, expected_value) (cond)
53628
- #endif
53629
-
53630
- #define DUCKDB_LIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 1)
53631
- #define DUCKDB_UNLIKELY(...) DUCKDB_BUILTIN_EXPECT((__VA_ARGS__), 0)
53632
53621
 
53633
53622
 
53634
53623
  namespace duckdb {
@@ -64202,6 +64191,10 @@ public:
64202
64191
  OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
64203
64192
  GlobalOperatorState &gstate, OperatorState &state) const override;
64204
64193
 
64194
+ bool IsOrderDependent() const override {
64195
+ return true;
64196
+ }
64197
+
64205
64198
  string ParamsToString() const override;
64206
64199
  };
64207
64200
 
@@ -65145,6 +65138,9 @@ public:
65145
65138
 
65146
65139
  //! The projection list of the WINDOW statement (may contain aggregates)
65147
65140
  vector<unique_ptr<Expression>> select_list;
65141
+ //! Whether or not the window is order dependent (only true if all window functions contain neither an order nor a
65142
+ //! partition clause)
65143
+ bool is_order_dependent;
65148
65144
 
65149
65145
  public:
65150
65146
  // Source interface
@@ -65178,7 +65174,11 @@ public:
65178
65174
  }
65179
65175
 
65180
65176
  bool ParallelSink() const override {
65181
- return true;
65177
+ return !is_order_dependent;
65178
+ }
65179
+
65180
+ bool IsOrderDependent() const override {
65181
+ return is_order_dependent;
65182
65182
  }
65183
65183
 
65184
65184
  public:
@@ -65873,9 +65873,17 @@ void WindowGlobalSinkState::Finalize() {
65873
65873
  }
65874
65874
 
65875
65875
  // this implements a sorted window functions variant
65876
- PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expression>> select_list,
65876
+ PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expression>> select_list_p,
65877
65877
  idx_t estimated_cardinality, PhysicalOperatorType type)
65878
- : PhysicalOperator(type, move(types), estimated_cardinality), select_list(move(select_list)) {
65878
+ : PhysicalOperator(type, move(types), estimated_cardinality), select_list(move(select_list_p)) {
65879
+ is_order_dependent = false;
65880
+ for (auto &expr : select_list) {
65881
+ D_ASSERT(expr->expression_class == ExpressionClass::BOUND_WINDOW);
65882
+ auto &bound_window = (BoundWindowExpression &)*expr;
65883
+ if (bound_window.partitions.empty() && bound_window.orders.empty()) {
65884
+ is_order_dependent = true;
65885
+ }
65886
+ }
65879
65887
  }
65880
65888
 
65881
65889
  static idx_t FindNextStart(const ValidityMask &mask, idx_t l, const idx_t r, idx_t &n) {
@@ -78838,6 +78846,84 @@ PhysicalBatchInsert::PhysicalBatchInsert(LogicalOperator &op, SchemaCatalogEntry
78838
78846
  //===--------------------------------------------------------------------===//
78839
78847
  // Sink
78840
78848
  //===--------------------------------------------------------------------===//
78849
+
78850
+ class CollectionMerger {
78851
+ public:
78852
+ explicit CollectionMerger(ClientContext &context) : context(context) {
78853
+ }
78854
+
78855
+ ClientContext &context;
78856
+ vector<unique_ptr<RowGroupCollection>> current_collections;
78857
+
78858
+ public:
78859
+ void AddCollection(unique_ptr<RowGroupCollection> collection) {
78860
+ current_collections.push_back(move(collection));
78861
+ }
78862
+
78863
+ bool Empty() {
78864
+ return current_collections.empty();
78865
+ }
78866
+
78867
+ unique_ptr<RowGroupCollection> Flush(OptimisticDataWriter &writer) {
78868
+ if (Empty()) {
78869
+ return nullptr;
78870
+ }
78871
+ unique_ptr<RowGroupCollection> new_collection;
78872
+ if (current_collections.size() == 1) {
78873
+ // we have gathered only one row group collection: merge it directly
78874
+ new_collection = move(current_collections[0]);
78875
+ } else {
78876
+ // we have gathered multiple collections: create one big collection and merge that
78877
+ // find the biggest collection
78878
+ idx_t biggest_index = 0;
78879
+ for (idx_t i = 1; i < current_collections.size(); i++) {
78880
+ D_ASSERT(current_collections[i]);
78881
+ if (current_collections[i]->GetTotalRows() > current_collections[biggest_index]->GetTotalRows()) {
78882
+ biggest_index = i;
78883
+ }
78884
+ }
78885
+ // now append all the other collections to this collection
78886
+ new_collection = move(current_collections[biggest_index]);
78887
+ auto &types = new_collection->GetTypes();
78888
+ TableAppendState append_state;
78889
+ new_collection->InitializeAppend(append_state);
78890
+
78891
+ DataChunk scan_chunk;
78892
+ scan_chunk.Initialize(context, types);
78893
+
78894
+ vector<column_t> column_ids;
78895
+ for (idx_t i = 0; i < types.size(); i++) {
78896
+ column_ids.push_back(i);
78897
+ }
78898
+ for (auto &collection : current_collections) {
78899
+ if (!collection) {
78900
+ continue;
78901
+ }
78902
+ TableScanState scan_state;
78903
+ scan_state.Initialize(column_ids);
78904
+ collection->InitializeScan(scan_state.local_state, column_ids, nullptr);
78905
+
78906
+ while (true) {
78907
+ scan_chunk.Reset();
78908
+ scan_state.local_state.ScanCommitted(scan_chunk, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
78909
+ if (scan_chunk.size() == 0) {
78910
+ break;
78911
+ }
78912
+ auto new_row_group = new_collection->Append(scan_chunk, append_state);
78913
+ if (new_row_group) {
78914
+ writer.CheckFlushToDisk(*new_collection);
78915
+ }
78916
+ }
78917
+ }
78918
+
78919
+ new_collection->FinalizeAppend(TransactionData(0, 0), append_state);
78920
+ writer.FlushToDisk(*new_collection);
78921
+ }
78922
+ current_collections.clear();
78923
+ return new_collection;
78924
+ }
78925
+ };
78926
+
78841
78927
  class BatchInsertGlobalState : public GlobalSinkState {
78842
78928
  public:
78843
78929
  explicit BatchInsertGlobalState() : insert_count(0) {
@@ -78848,16 +78934,124 @@ public:
78848
78934
  idx_t insert_count;
78849
78935
  map<idx_t, unique_ptr<RowGroupCollection>> collections;
78850
78936
 
78851
- void AddCollection(idx_t batch_index, unique_ptr<RowGroupCollection> current_collection) {
78852
- lock_guard<mutex> l(lock);
78853
- insert_count += current_collection->GetTotalRows();
78937
+ bool CheckMergeInternal(idx_t batch_index, vector<unique_ptr<RowGroupCollection>> *result, idx_t *merge_count) {
78938
+ auto entry = collections.find(batch_index);
78939
+ if (entry == collections.end()) {
78940
+ // no collection at this index
78941
+ return false;
78942
+ }
78943
+ auto row_count = entry->second->GetTotalRows();
78944
+ if (row_count >= LocalStorage::MERGE_THRESHOLD) {
78945
+ // the collection at this batch index is large and has already been written
78946
+ return false;
78947
+ }
78948
+ // we can merge this collection!
78949
+ if (merge_count) {
78950
+ // add the count
78951
+ D_ASSERT(!result);
78952
+ *merge_count += row_count;
78953
+ } else {
78954
+ // add the
78955
+ D_ASSERT(result);
78956
+ result->push_back(move(entry->second));
78957
+ collections.erase(batch_index);
78958
+ }
78959
+ return true;
78960
+ }
78961
+
78962
+ bool CheckMerge(idx_t batch_index, idx_t &merge_count) {
78963
+ return CheckMergeInternal(batch_index, nullptr, &merge_count);
78964
+ }
78965
+ bool CheckMerge(idx_t batch_index, vector<unique_ptr<RowGroupCollection>> &result) {
78966
+ return CheckMergeInternal(batch_index, &result, nullptr);
78967
+ }
78968
+
78969
+ unique_ptr<RowGroupCollection> MergeCollections(ClientContext &context,
78970
+ vector<unique_ptr<RowGroupCollection>> merge_collections,
78971
+ OptimisticDataWriter &writer) {
78972
+ CollectionMerger merger(context);
78973
+ for (auto &collection : merge_collections) {
78974
+ merger.AddCollection(move(collection));
78975
+ }
78976
+ return merger.Flush(writer);
78977
+ }
78978
+
78979
+ void VerifyUniqueBatch(idx_t batch_index) {
78854
78980
  if (collections.find(batch_index) != collections.end()) {
78855
78981
  throw InternalException("PhysicalBatchInsert::AddCollection error: batch index %d is present in multiple "
78856
78982
  "collections. This occurs when "
78857
78983
  "batch indexes are not uniquely distributed over threads",
78858
78984
  batch_index);
78859
78985
  }
78860
- collections[batch_index] = move(current_collection);
78986
+ }
78987
+
78988
+ void AddCollection(ClientContext &context, idx_t batch_index, unique_ptr<RowGroupCollection> current_collection,
78989
+ OptimisticDataWriter *writer = nullptr, bool *written_to_disk = nullptr) {
78990
+ vector<unique_ptr<RowGroupCollection>> merge_collections;
78991
+ idx_t merge_count;
78992
+ {
78993
+ lock_guard<mutex> l(lock);
78994
+ auto new_count = current_collection->GetTotalRows();
78995
+ insert_count += new_count;
78996
+ VerifyUniqueBatch(batch_index);
78997
+ if (writer && new_count < LocalStorage::MERGE_THRESHOLD) {
78998
+ // we are inserting a small collection that has not yet been written to disk
78999
+ // check if there are any collections with adjacent batch indexes that we can merge together
79000
+
79001
+ // first check how many rows we will end up with by performing such a merge
79002
+ // check backwards
79003
+ merge_count = new_count;
79004
+ idx_t start_batch_index;
79005
+ idx_t end_batch_index;
79006
+ for (start_batch_index = batch_index; start_batch_index > 0; start_batch_index--) {
79007
+ if (!CheckMerge(start_batch_index - 1, merge_count)) {
79008
+ break;
79009
+ }
79010
+ }
79011
+ // check forwards
79012
+ for (end_batch_index = batch_index;; end_batch_index++) {
79013
+ if (!CheckMerge(end_batch_index + 1, merge_count)) {
79014
+ break;
79015
+ }
79016
+ }
79017
+ // merging together creates a big enough row group
79018
+ // merge!
79019
+ if (merge_count >= RowGroup::ROW_GROUP_SIZE) {
79020
+ // gather the row groups to merge
79021
+ // note that we need to gather them in order of batch index
79022
+ for (idx_t i = start_batch_index; i <= end_batch_index; i++) {
79023
+ if (i == batch_index) {
79024
+ merge_collections.push_back(move(current_collection));
79025
+ continue;
79026
+ }
79027
+ auto can_merge = CheckMerge(i, merge_collections);
79028
+ if (!can_merge) {
79029
+ throw InternalException("Could not merge row group in batch insert?!");
79030
+ }
79031
+ }
79032
+ }
79033
+ }
79034
+ if (merge_collections.empty()) {
79035
+ // no collections to merge together - add the collection to the batch index
79036
+ collections[batch_index] = move(current_collection);
79037
+ }
79038
+ }
79039
+ if (!merge_collections.empty()) {
79040
+ // merge together the collections
79041
+ D_ASSERT(writer);
79042
+ auto final_collection = MergeCollections(context, move(merge_collections), *writer);
79043
+ D_ASSERT(final_collection->GetTotalRows() == merge_count);
79044
+ D_ASSERT(final_collection->GetTotalRows() >= RowGroup::ROW_GROUP_SIZE);
79045
+ if (written_to_disk) {
79046
+ *written_to_disk = true;
79047
+ }
79048
+ // add the merged-together collection to the
79049
+ {
79050
+ lock_guard<mutex> l(lock);
79051
+ VerifyUniqueBatch(batch_index);
79052
+ collections[batch_index] = move(final_collection);
79053
+ }
79054
+ }
78861
79055
  }
78862
79056
  };
78863
79057
 
@@ -78877,6 +79071,16 @@ public:
78877
79071
  unique_ptr<OptimisticDataWriter> writer;
78878
79072
  bool written_to_disk;
78879
79073
 
79074
+ void FlushToDisk() {
79075
+ if (!current_collection) {
79076
+ return;
79077
+ }
79078
+ if (!written_to_disk || current_collection->GetTotalRows() < LocalStorage::MERGE_THRESHOLD) {
79079
+ return;
79080
+ }
79081
+ writer->FlushToDisk(*current_collection);
79082
+ }
79083
+
78880
79084
  void CreateNewCollection(TableCatalogEntry *table, const vector<LogicalType> &insert_types) {
78881
79085
  auto &table_info = table->storage->info;
78882
79086
  auto &block_manager = TableIOManager::Get(*table->storage).GetBlockManagerForRowData();
@@ -78921,10 +79125,10 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, GlobalSinkSt
78921
79125
  // batch index has changed: move the old collection to the global state and create a new collection
78922
79126
  TransactionData tdata(0, 0);
78923
79127
  lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
78924
- if (lstate.written_to_disk) {
78925
- lstate.writer->FlushToDisk(*lstate.current_collection);
78926
- }
78927
- gstate.AddCollection(lstate.current_index, move(lstate.current_collection));
79128
+ lstate.FlushToDisk();
79129
+
79130
+ gstate.AddCollection(context.client, lstate.current_index, move(lstate.current_collection), lstate.writer.get(),
79131
+ &lstate.written_to_disk);
78928
79132
  lstate.CreateNewCollection(table, insert_types);
78929
79133
  }
78930
79134
  lstate.current_index = lstate.batch_index;
@@ -78948,94 +79152,61 @@ void PhysicalBatchInsert::Combine(ExecutionContext &context, GlobalSinkState &gs
78948
79152
  if (!lstate.current_collection) {
78949
79153
  return;
78950
79154
  }
78951
- if (lstate.written_to_disk) {
78952
- lstate.writer->FlushToDisk(*lstate.current_collection);
78953
- }
79155
+ lstate.FlushToDisk();
78954
79156
  lstate.writer->FinalFlush();
78955
79157
 
78956
79158
  TransactionData tdata(0, 0);
78957
79159
  lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
78958
- gstate.AddCollection(lstate.current_index, move(lstate.current_collection));
79160
+ gstate.AddCollection(context.client, lstate.current_index, move(lstate.current_collection));
78959
79161
  }
78960
79162
 
78961
- struct CollectionMerger {
78962
- vector<unique_ptr<RowGroupCollection>> current_collections;
78963
-
78964
- void AddCollection(unique_ptr<RowGroupCollection> collection) {
78965
- current_collections.push_back(move(collection));
78966
- }
78967
-
78968
- bool Empty() {
78969
- return current_collections.empty();
78970
- }
78971
-
78972
- void Flush(ClientContext &context, DataTable &storage) {
78973
- if (Empty()) {
78974
- return;
78975
- }
78976
- unique_ptr<RowGroupCollection> new_collection;
78977
- if (current_collections.size() == 1) {
78978
- // we have gathered only one row group collection: merge it directly
78979
- new_collection = move(current_collections[0]);
78980
- } else {
78981
- // we have gathered multiple collections: create one big collection and merge that
78982
- auto &table_info = storage.info;
78983
- auto &block_manager = TableIOManager::Get(storage).GetBlockManagerForRowData();
78984
- auto types = storage.GetTypes();
78985
- new_collection = make_unique<RowGroupCollection>(table_info, block_manager, types, MAX_ROW_ID);
78986
- TableAppendState append_state;
78987
- new_collection->InitializeEmpty();
78988
- new_collection->InitializeAppend(append_state);
78989
-
78990
- DataChunk scan_chunk;
78991
- scan_chunk.Initialize(context, types);
78992
-
78993
- vector<column_t> column_ids;
78994
- for (idx_t i = 0; i < types.size(); i++) {
78995
- column_ids.push_back(i);
78996
- }
78997
- for (auto &collection : current_collections) {
78998
- TableScanState scan_state;
78999
- scan_state.Initialize(column_ids);
79000
- collection->InitializeScan(scan_state.local_state, column_ids, nullptr);
79001
-
79002
- while (true) {
79003
- scan_chunk.Reset();
79004
- scan_state.local_state.ScanCommitted(scan_chunk, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
79005
- if (scan_chunk.size() == 0) {
79006
- break;
79007
- }
79008
- new_collection->Append(scan_chunk, append_state);
79009
- }
79010
- }
79011
-
79012
- new_collection->FinalizeAppend(TransactionData(0, 0), append_state);
79013
- }
79014
- storage.LocalMerge(context, *new_collection);
79015
- current_collections.clear();
79016
- }
79017
- };
79018
-
79019
79163
  SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
79020
79164
  GlobalSinkState &gstate_p) const {
79021
79165
  auto &gstate = (BatchInsertGlobalState &)gstate_p;
79022
79166
 
79023
- CollectionMerger merger;
79167
+ // in the finalize, do a final pass over all of the collections we created and try to merge smaller collections
79168
+ // together
79169
+ vector<unique_ptr<CollectionMerger>> mergers;
79170
+ unique_ptr<CollectionMerger> current_merger;
79024
79171
 
79025
79172
  auto &storage = *gstate.table->storage;
79026
79173
  for (auto &collection : gstate.collections) {
79027
79174
  if (collection.second->GetTotalRows() < LocalStorage::MERGE_THRESHOLD) {
79028
79175
  // this collection has very few rows: add it to the merge set
79029
- merger.AddCollection(move(collection.second));
79176
+ if (!current_merger) {
79177
+ current_merger = make_unique<CollectionMerger>(context);
79178
+ }
79179
+ current_merger->AddCollection(move(collection.second));
79030
79180
  } else {
79031
- if (!merger.Empty()) {
79181
+ // this collection has a lot of rows: it does not need to be merged
79182
+ // create a separate collection merger only for this entry
79183
+ if (current_merger) {
79032
79184
  // we have small collections remaining: flush them
79033
- merger.Flush(context, storage);
79185
+ mergers.push_back(move(current_merger));
79186
+ current_merger.reset();
79034
79187
  }
79035
- storage.LocalMerge(context, *collection.second);
79188
+ auto larger_merger = make_unique<CollectionMerger>(context);
79189
+ larger_merger->AddCollection(move(collection.second));
79190
+ mergers.push_back(move(larger_merger));
79036
79191
  }
79037
79192
  }
79038
- merger.Flush(context, storage);
79193
+ if (current_merger) {
79194
+ mergers.push_back(move(current_merger));
79195
+ }
79196
+
79197
+ // now that we have created all of the mergers, perform the actual merging
79198
+ vector<unique_ptr<RowGroupCollection>> final_collections;
79199
+ final_collections.reserve(mergers.size());
79200
+ auto writer = make_unique<OptimisticDataWriter>(&storage);
79201
+ for (auto &merger : mergers) {
79202
+ final_collections.push_back(merger->Flush(*writer));
79203
+ }
79204
+ writer->FinalFlush();
79205
+
79206
+ // finally, merge the row groups into the local storage
79207
+ for (auto &collection : final_collections) {
79208
+ storage.LocalMerge(context, *collection);
79209
+ }
79039
79210
  return SinkFinalizeType::READY;
79040
79211
  }
79041
79212
 
@@ -79114,6 +79285,10 @@ public:
79114
79285
  bool IsSink() const override {
79115
79286
  return true;
79116
79287
  }
79288
+
79289
+ bool IsOrderDependent() const override {
79290
+ return true;
79291
+ }
79117
79292
  };
79118
79293
  } // namespace duckdb
79119
79294
 
@@ -82913,19 +83088,24 @@ OperatorResultType CachingPhysicalOperator::Execute(ExecutionContext &context, D
82913
83088
  auto child_result = ExecuteInternal(context, input, chunk, gstate, state);
82914
83089
 
82915
83090
  #if STANDARD_VECTOR_SIZE >= 128
82916
- if (!context.pipeline || !caching_supported) {
82917
- return child_result;
82918
- }
83091
+ if (!state.initialized) {
83092
+ state.initialized = true;
83093
+ state.can_cache_chunk = true;
83094
+ if (!context.pipeline || !caching_supported) {
83095
+ state.can_cache_chunk = false;
83096
+ }
82919
83097
 
82920
- if (context.pipeline->GetSink() && context.pipeline->GetSink()->RequiresBatchIndex() &&
82921
- context.pipeline->GetSource()->SupportsBatchIndex()) {
82922
- return child_result;
82923
- }
83098
+ if (context.pipeline->GetSink() && context.pipeline->GetSink()->RequiresBatchIndex()) {
83099
+ state.can_cache_chunk = false;
83100
+ }
82924
83101
 
82925
- if (context.pipeline->IsOrderDependent()) {
83102
+ if (context.pipeline->IsOrderDependent()) {
83103
+ state.can_cache_chunk = false;
83104
+ }
83105
+ }
83106
+ if (!state.can_cache_chunk) {
82926
83107
  return child_result;
82927
83108
  }
82928
-
82929
83109
  if (chunk.size() < CACHE_THRESHOLD) {
82930
83110
  // we have filtered out a significant amount of tuples
82931
83111
  // add this chunk to the cache and continue