duckdb 0.5.2-dev733.0 → 0.5.2-dev746.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "37960df94"
15
- #define DUCKDB_VERSION "v0.5.2-dev733"
14
+ #define DUCKDB_SOURCE_ID "90356fc58"
15
+ #define DUCKDB_VERSION "v0.5.2-dev746"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -3681,8 +3681,16 @@ struct string_t {
3681
3681
  friend class StringSegment;
3682
3682
 
3683
3683
  public:
3684
- static constexpr idx_t PREFIX_LENGTH = 4 * sizeof(char);
3685
- static constexpr idx_t INLINE_LENGTH = 12;
3684
+ static constexpr idx_t PREFIX_BYTES = 4 * sizeof(char);
3685
+ static constexpr idx_t INLINE_BYTES = 12 * sizeof(char);
3686
+ static constexpr idx_t HEADER_SIZE = sizeof(uint32_t) + PREFIX_BYTES;
3687
+ #ifndef DUCKDB_DEBUG_NO_INLINE
3688
+ static constexpr idx_t PREFIX_LENGTH = PREFIX_BYTES;
3689
+ static constexpr idx_t INLINE_LENGTH = INLINE_BYTES;
3690
+ #else
3691
+ static constexpr idx_t PREFIX_LENGTH = 0;
3692
+ static constexpr idx_t INLINE_LENGTH = 0;
3693
+ #endif
3686
3694
 
3687
3695
  string_t() = default;
3688
3696
  explicit string_t(uint32_t len) {
@@ -3694,7 +3702,7 @@ public:
3694
3702
  if (IsInlined()) {
3695
3703
  // zero initialize the prefix first
3696
3704
  // this makes sure that strings with length smaller than 4 still have an equal prefix
3697
- memset(value.inlined.inlined, 0, INLINE_LENGTH);
3705
+ memset(value.inlined.inlined, 0, INLINE_BYTES);
3698
3706
  if (GetSize() == 0) {
3699
3707
  return;
3700
3708
  }
@@ -3702,7 +3710,11 @@ public:
3702
3710
  memcpy(value.inlined.inlined, data, GetSize());
3703
3711
  } else {
3704
3712
  // large string: store pointer
3713
+ #ifndef DUCKDB_DEBUG_NO_INLINE
3705
3714
  memcpy(value.pointer.prefix, data, PREFIX_LENGTH);
3715
+ #else
3716
+ memset(value.pointer.prefix, 0, PREFIX_BYTES);
3717
+ #endif
3706
3718
  value.pointer.ptr = (char *)data;
3707
3719
  }
3708
3720
  }
@@ -3743,15 +3755,19 @@ public:
3743
3755
 
3744
3756
  void Finalize() {
3745
3757
  // set trailing NULL byte
3746
- auto dataptr = (char *)GetDataUnsafe();
3747
3758
  if (GetSize() <= INLINE_LENGTH) {
3748
3759
  // fill prefix with zeros if the length is smaller than the prefix length
3749
- for (idx_t i = GetSize(); i < INLINE_LENGTH; i++) {
3760
+ for (idx_t i = GetSize(); i < INLINE_BYTES; i++) {
3750
3761
  value.inlined.inlined[i] = '\0';
3751
3762
  }
3752
3763
  } else {
3753
3764
  // copy the data into the prefix
3765
+ #ifndef DUCKDB_DEBUG_NO_INLINE
3766
+ auto dataptr = (char *)GetDataUnsafe();
3754
3767
  memcpy(value.pointer.prefix, dataptr, PREFIX_LENGTH);
3768
+ #else
3769
+ memset(value.pointer.prefix, 0, PREFIX_BYTES);
3770
+ #endif
3755
3771
  }
3756
3772
  }
3757
3773
 
@@ -7865,7 +7881,7 @@ struct StringComparisonOperators {
7865
7881
  }
7866
7882
  } else {
7867
7883
  // large string: first check prefix and length
7868
- if (memcmp(&a, &b, sizeof(uint32_t) + string_t::PREFIX_LENGTH) == 0) {
7884
+ if (memcmp(&a, &b, string_t::HEADER_SIZE) == 0) {
7869
7885
  // prefix and length are equal: check main string
7870
7886
  if (memcmp(a.value.pointer.ptr, b.value.pointer.ptr, a.GetSize()) == 0) {
7871
7887
  // entire string is equal
@@ -14740,13 +14756,235 @@ private:
14740
14756
 
14741
14757
 
14742
14758
 
14759
+ //===----------------------------------------------------------------------===//
14760
+ // DuckDB
14761
+ //
14762
+ // duckdb/function/compression_function.hpp
14763
+ //
14764
+ //
14765
+ //===----------------------------------------------------------------------===//
14766
+
14767
+
14768
+
14769
+
14770
+
14771
+
14772
+ //===----------------------------------------------------------------------===//
14773
+ // DuckDB
14774
+ //
14775
+ // duckdb/common/map.hpp
14776
+ //
14777
+ //
14778
+ //===----------------------------------------------------------------------===//
14779
+
14780
+
14781
+
14782
+ #include <map>
14783
+
14784
+ namespace duckdb {
14785
+ using std::map;
14786
+ using std::multimap;
14787
+ } // namespace duckdb
14788
+
14789
+
14790
+
14791
+
14792
+ namespace duckdb {
14793
+ class DatabaseInstance;
14794
+ class ColumnData;
14795
+ class ColumnDataCheckpointer;
14796
+ class ColumnSegment;
14797
+ class SegmentStatistics;
14798
+
14799
+ struct ColumnFetchState;
14800
+ struct ColumnScanState;
14801
+ struct SegmentScanState;
14802
+
14803
+ struct AnalyzeState {
14804
+ virtual ~AnalyzeState() {
14805
+ }
14806
+ };
14807
+
14808
+ struct CompressionState {
14809
+ virtual ~CompressionState() {
14810
+ }
14811
+ };
14812
+
14813
+ struct CompressedSegmentState {
14814
+ virtual ~CompressedSegmentState() {
14815
+ }
14816
+ };
14817
+
14818
+ struct CompressionAppendState {
14819
+ CompressionAppendState(BufferHandle handle_p) : handle(move(handle_p)) {
14820
+ }
14821
+ virtual ~CompressionAppendState() {
14822
+ }
14823
+
14824
+ BufferHandle handle;
14825
+ };
14826
+
14827
+ //===--------------------------------------------------------------------===//
14828
+ // Analyze
14829
+ //===--------------------------------------------------------------------===//
14830
+ //! The analyze functions are used to determine whether or not to use this compression method
14831
+ //! The system first determines the potential compression methods to use based on the physical type of the column
14832
+ //! After that the following steps are taken:
14833
+ //! 1. The init_analyze is called to initialize the analyze state of every candidate compression method
14834
+ //! 2. The analyze method is called with all of the input data in the order in which it must be stored.
14835
+ //! analyze can return "false". In that case, the compression method is taken out of consideration early.
14836
+ //! 3. The final_analyze method is called, which should return a score for the compression method
14837
+
14838
+ //! The system then decides which compression function to use based on the analyzed score (returned from final_analyze)
14839
+ typedef unique_ptr<AnalyzeState> (*compression_init_analyze_t)(ColumnData &col_data, PhysicalType type);
14840
+ typedef bool (*compression_analyze_t)(AnalyzeState &state, Vector &input, idx_t count);
14841
+ typedef idx_t (*compression_final_analyze_t)(AnalyzeState &state);
14842
+
14843
+ //===--------------------------------------------------------------------===//
14844
+ // Compress
14845
+ //===--------------------------------------------------------------------===//
14846
+ typedef unique_ptr<CompressionState> (*compression_init_compression_t)(ColumnDataCheckpointer &checkpointer,
14847
+ unique_ptr<AnalyzeState> state);
14848
+ typedef void (*compression_compress_data_t)(CompressionState &state, Vector &scan_vector, idx_t count);
14849
+ typedef void (*compression_compress_finalize_t)(CompressionState &state);
14850
+
14851
+ //===--------------------------------------------------------------------===//
14852
+ // Uncompress / Scan
14853
+ //===--------------------------------------------------------------------===//
14854
+ typedef unique_ptr<SegmentScanState> (*compression_init_segment_scan_t)(ColumnSegment &segment);
14855
+ typedef void (*compression_scan_vector_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count,
14856
+ Vector &result);
14857
+ typedef void (*compression_scan_partial_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count,
14858
+ Vector &result, idx_t result_offset);
14859
+ typedef void (*compression_fetch_row_t)(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result,
14860
+ idx_t result_idx);
14861
+ typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count);
14862
+
14863
+ //===--------------------------------------------------------------------===//
14864
+ // Append (optional)
14865
+ //===--------------------------------------------------------------------===//
14866
+ typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id);
14867
+ typedef unique_ptr<CompressionAppendState> (*compression_init_append_t)(ColumnSegment &segment);
14868
+ typedef idx_t (*compression_append_t)(CompressionAppendState &append_state, ColumnSegment &segment,
14869
+ SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count);
14870
+ typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
14871
+ typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
14872
+
14873
+ class CompressionFunction {
14874
+ public:
14875
+ CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
14876
+ compression_analyze_t analyze, compression_final_analyze_t final_analyze,
14877
+ compression_init_compression_t init_compression, compression_compress_data_t compress,
14878
+ compression_compress_finalize_t compress_finalize, compression_init_segment_scan_t init_scan,
14879
+ compression_scan_vector_t scan_vector, compression_scan_partial_t scan_partial,
14880
+ compression_fetch_row_t fetch_row, compression_skip_t skip,
14881
+ compression_init_segment_t init_segment = nullptr,
14882
+ compression_init_append_t init_append = nullptr, compression_append_t append = nullptr,
14883
+ compression_finalize_append_t finalize_append = nullptr,
14884
+ compression_revert_append_t revert_append = nullptr)
14885
+ : type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze),
14886
+ init_compression(init_compression), compress(compress), compress_finalize(compress_finalize),
14887
+ init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip),
14888
+ init_segment(init_segment), init_append(init_append), append(append), finalize_append(finalize_append),
14889
+ revert_append(revert_append) {
14890
+ }
14891
+
14892
+ //! Compression type
14893
+ CompressionType type;
14894
+ //! The data type this function can compress
14895
+ PhysicalType data_type;
14896
+
14897
+ //! Analyze step: determine which compression function is the most effective
14898
+ //! init_analyze is called once to set up the analyze state
14899
+ compression_init_analyze_t init_analyze;
14900
+ //! analyze is called several times (once per vector in the row group)
14901
+ //! analyze should return true, unless compression is no longer possible with this compression method
14902
+ //! in that case false should be returned
14903
+ compression_analyze_t analyze;
14904
+ //! final_analyze should return the score of the compression function
14905
+ //! ideally this is the exact number of bytes required to store the data
14906
+ //! this is not required/enforced: it can be an estimate as well
14907
+ //! also this function can return DConstants::INVALID_INDEX to skip this compression method
14908
+ compression_final_analyze_t final_analyze;
14909
+
14910
+ //! Compression step: actually compress the data
14911
+ //! init_compression is called once to set up the comperssion state
14912
+ compression_init_compression_t init_compression;
14913
+ //! compress is called several times (once per vector in the row group)
14914
+ compression_compress_data_t compress;
14915
+ //! compress_finalize is called after
14916
+ compression_compress_finalize_t compress_finalize;
14917
+
14918
+ //! init_scan is called to set up the scan state
14919
+ compression_init_segment_scan_t init_scan;
14920
+ //! scan_vector scans an entire vector using the scan state
14921
+ compression_scan_vector_t scan_vector;
14922
+ //! scan_partial scans a subset of a vector
14923
+ //! this can request > vector_size as well
14924
+ //! this is used if a vector crosses segment boundaries, or for child columns of lists
14925
+ compression_scan_partial_t scan_partial;
14926
+ //! fetch an individual row from the compressed vector
14927
+ //! used for index lookups
14928
+ compression_fetch_row_t fetch_row;
14929
+ //! Skip forward in the compressed segment
14930
+ compression_skip_t skip;
14931
+
14932
+ // Append functions
14933
+ //! This only really needs to be defined for uncompressed segments
14934
+
14935
+ //! Initialize a compressed segment (optional)
14936
+ compression_init_segment_t init_segment;
14937
+ //! Initialize the append state (optional)
14938
+ compression_init_append_t init_append;
14939
+ //! Append to the compressed segment (optional)
14940
+ compression_append_t append;
14941
+ //! Finalize an append to the segment
14942
+ compression_finalize_append_t finalize_append;
14943
+ //! Revert append (optional)
14944
+ compression_revert_append_t revert_append;
14945
+ };
14946
+
14947
+ //! The set of compression functions
14948
+ struct CompressionFunctionSet {
14949
+ mutex lock;
14950
+ map<CompressionType, map<PhysicalType, CompressionFunction>> functions;
14951
+ };
14952
+
14953
+ } // namespace duckdb
14954
+
14955
+ //===----------------------------------------------------------------------===//
14956
+ // DuckDB
14957
+ //
14958
+ // duckdb/transaction/transaction_data.hpp
14959
+ //
14960
+ //
14961
+ //===----------------------------------------------------------------------===//
14962
+
14963
+
14964
+
14965
+
14966
+
14967
+ namespace duckdb {
14968
+ class Transaction;
14969
+
14970
+ struct TransactionData {
14971
+ TransactionData(Transaction &transaction_p);
14972
+ TransactionData(transaction_t transaction_id_p, transaction_t start_time_p);
14973
+
14974
+ Transaction *transaction;
14975
+ transaction_t transaction_id;
14976
+ transaction_t start_time;
14977
+ };
14978
+
14979
+ } // namespace duckdb
14980
+
14743
14981
 
14744
14982
  namespace duckdb {
14745
14983
  class ColumnSegment;
14746
14984
  class DataTable;
14985
+ class LocalTableStorage;
14747
14986
  class RowGroup;
14748
14987
  class UpdateSegment;
14749
- class ValiditySegment;
14750
14988
 
14751
14989
  struct TableAppendState;
14752
14990
 
@@ -14757,6 +14995,8 @@ struct ColumnAppendState {
14757
14995
  vector<ColumnAppendState> child_appends;
14758
14996
  //! The write lock that is held by the append
14759
14997
  unique_ptr<StorageLockKey> lock;
14998
+ //! The compression append state
14999
+ unique_ptr<CompressionAppendState> append_state;
14760
15000
  };
14761
15001
 
14762
15002
  struct RowGroupAppendState {
@@ -14778,14 +15018,26 @@ struct IndexLock {
14778
15018
  };
14779
15019
 
14780
15020
  struct TableAppendState {
14781
- TableAppendState() : row_group_append_state(*this) {
14782
- }
15021
+ TableAppendState();
15022
+ ~TableAppendState();
14783
15023
 
14784
15024
  RowGroupAppendState row_group_append_state;
14785
15025
  unique_lock<mutex> append_lock;
14786
15026
  row_t row_start;
14787
15027
  row_t current_row;
14788
- idx_t remaining_append_count;
15028
+ //! The total number of rows appended by the append operation
15029
+ idx_t total_append_count;
15030
+ //! The first row-group that has been appended to
15031
+ RowGroup *start_row_group;
15032
+ //! The transaction data
15033
+ TransactionData transaction;
15034
+ //! The remaining append count, only if the append count is known beforehand
15035
+ idx_t remaining;
15036
+ };
15037
+
15038
+ struct LocalAppendState {
15039
+ TableAppendState append_state;
15040
+ LocalTableStorage *storage;
14789
15041
  };
14790
15042
 
14791
15043
  } // namespace duckdb
@@ -17959,7 +18211,7 @@ public:
17959
18211
  row_t row_id, DataChunk &result, idx_t result_idx);
17960
18212
 
17961
18213
  //! Append count rows to the version info
17962
- void AppendVersionInfo(TransactionData transaction, idx_t start, idx_t count, transaction_t commit_id);
18214
+ void AppendVersionInfo(TransactionData transaction, idx_t count);
17963
18215
  //! Commit a previous append made by RowGroup::AppendVersionInfo
17964
18216
  void CommitAppend(transaction_t commit_id, idx_t start, idx_t count);
17965
18217
  //! Revert a previous append made by RowGroup::AppendVersionInfo
@@ -17972,7 +18224,7 @@ public:
17972
18224
  static void Serialize(RowGroupPointer &pointer, Serializer &serializer);
17973
18225
  static RowGroupPointer Deserialize(Deserializer &source, const vector<ColumnDefinition> &columns);
17974
18226
 
17975
- void InitializeAppend(TransactionData transaction, RowGroupAppendState &append_state, idx_t remaining_append_count);
18227
+ void InitializeAppend(RowGroupAppendState &append_state);
17976
18228
  void Append(RowGroupAppendState &append_state, DataChunk &chunk, idx_t append_count);
17977
18229
 
17978
18230
  void Update(TransactionData transaction, DataChunk &updates, row_t *ids, idx_t offset, idx_t count,
@@ -18130,8 +18382,14 @@ public:
18130
18382
  void Fetch(TransactionData transaction, DataChunk &result, const vector<column_t> &column_ids,
18131
18383
  Vector &row_identifiers, idx_t fetch_count, ColumnFetchState &state);
18132
18384
 
18385
+ //! Initialize an append of a variable number of rows. FinalizeAppend must be called after appending is done.
18386
+ void InitializeAppend(TableAppendState &state);
18387
+ //! Initialize an append with a known number of rows. FinalizeAppend should not be called after appending is done.
18133
18388
  void InitializeAppend(TransactionData transaction, TableAppendState &state, idx_t append_count);
18134
- void Append(TransactionData transaction, DataChunk &chunk, TableAppendState &state, TableStatistics &stats);
18389
+ //! Append a chunk to a table.
18390
+ void Append(DataChunk &chunk, TableAppendState &state, TableStatistics &stats);
18391
+ //! FinalizeAppend flushes an append with a variable number of rows.
18392
+ void FinalizeAppend(TransactionData transaction, TableAppendState &state);
18135
18393
  void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count);
18136
18394
  void RevertAppendInternal(idx_t start_row, idx_t count);
18137
18395
 
@@ -20525,16 +20783,19 @@ public:
20525
20783
  bool NextParallelScan(ClientContext &context, DataTable *table, ParallelCollectionScanState &state,
20526
20784
  CollectionScanState &scan_state);
20527
20785
 
20786
+ //! Begin appending to the local storage
20787
+ void InitializeAppend(LocalAppendState &state, DataTable *table);
20528
20788
  //! Append a chunk to the local storage
20529
- void Append(DataTable *table, DataChunk &chunk);
20789
+ static void Append(LocalAppendState &state, DataChunk &chunk);
20790
+ //! Finish appending to the local storage
20791
+ static void FinalizeAppend(LocalAppendState &state);
20530
20792
  //! Delete a set of rows from the local storage
20531
20793
  idx_t Delete(DataTable *table, Vector &row_ids, idx_t count);
20532
20794
  //! Update a set of rows in the local storage
20533
20795
  void Update(DataTable *table, Vector &row_ids, const vector<column_t> &column_ids, DataChunk &data);
20534
20796
 
20535
20797
  //! Commits the local storage, writing it to the WAL and completing the commit
20536
- void Commit(LocalStorage::CommitState &commit_state, Transaction &transaction, WriteAheadLog *log,
20537
- transaction_t commit_id);
20798
+ void Commit(LocalStorage::CommitState &commit_state, Transaction &transaction);
20538
20799
 
20539
20800
  bool ChangesMade() noexcept {
20540
20801
  return table_storage.size() > 0;
@@ -20575,6 +20836,7 @@ private:
20575
20836
 
20576
20837
 
20577
20838
 
20839
+
20578
20840
  namespace duckdb {
20579
20841
  class SequenceCatalogEntry;
20580
20842
 
@@ -20665,20 +20927,6 @@ private:
20665
20927
  Transaction(const Transaction &) = delete;
20666
20928
  };
20667
20929
 
20668
- struct TransactionData {
20669
- TransactionData(Transaction &transaction_p) // NOLINT
20670
- : transaction(&transaction_p), transaction_id(transaction_p.transaction_id),
20671
- start_time(transaction_p.start_time) {
20672
- }
20673
- TransactionData(transaction_t transaction_id_p, transaction_t start_time_p)
20674
- : transaction(nullptr), transaction_id(transaction_id_p), start_time(start_time_p) {
20675
- }
20676
-
20677
- Transaction *transaction;
20678
- transaction_t transaction_id;
20679
- transaction_t start_time;
20680
- };
20681
-
20682
20930
  } // namespace duckdb
20683
20931
 
20684
20932
  #include <functional>
@@ -24599,7 +24847,7 @@ private:
24599
24847
  unique_ptr<ConnectionManager> connection_manager;
24600
24848
  unordered_set<std::string> loaded_extensions;
24601
24849
  //! Set to true if a fatal exception has occurred
24602
- bool is_invalidated = false;
24850
+ atomic<bool> is_invalidated;
24603
24851
  };
24604
24852
 
24605
24853
  //! The database object. This object holds the catalog and all the
@@ -25827,198 +26075,6 @@ public:
25827
26075
 
25828
26076
 
25829
26077
 
25830
- //===----------------------------------------------------------------------===//
25831
- // DuckDB
25832
- //
25833
- // duckdb/function/compression_function.hpp
25834
- //
25835
- //
25836
- //===----------------------------------------------------------------------===//
25837
-
25838
-
25839
-
25840
-
25841
-
25842
-
25843
- //===----------------------------------------------------------------------===//
25844
- // DuckDB
25845
- //
25846
- // duckdb/common/map.hpp
25847
- //
25848
- //
25849
- //===----------------------------------------------------------------------===//
25850
-
25851
-
25852
-
25853
- #include <map>
25854
-
25855
- namespace duckdb {
25856
- using std::map;
25857
- using std::multimap;
25858
- } // namespace duckdb
25859
-
25860
-
25861
-
25862
-
25863
- namespace duckdb {
25864
- class DatabaseInstance;
25865
- class ColumnData;
25866
- class ColumnDataCheckpointer;
25867
- class ColumnSegment;
25868
- class SegmentStatistics;
25869
-
25870
- struct ColumnFetchState;
25871
- struct ColumnScanState;
25872
- struct SegmentScanState;
25873
-
25874
- struct AnalyzeState {
25875
- virtual ~AnalyzeState() {
25876
- }
25877
- };
25878
-
25879
- struct CompressionState {
25880
- virtual ~CompressionState() {
25881
- }
25882
- };
25883
-
25884
- struct CompressedSegmentState {
25885
- virtual ~CompressedSegmentState() {
25886
- }
25887
- };
25888
-
25889
- struct UncompressedCompressState : public CompressionState {
25890
- explicit UncompressedCompressState(ColumnDataCheckpointer &checkpointer);
25891
-
25892
- ColumnDataCheckpointer &checkpointer;
25893
- unique_ptr<ColumnSegment> current_segment;
25894
-
25895
- virtual void CreateEmptySegment(idx_t row_start);
25896
- void FlushSegment(idx_t segment_size);
25897
- void Finalize(idx_t segment_size);
25898
- };
25899
-
25900
- //===--------------------------------------------------------------------===//
25901
- // Analyze
25902
- //===--------------------------------------------------------------------===//
25903
- //! The analyze functions are used to determine whether or not to use this compression method
25904
- //! The system first determines the potential compression methods to use based on the physical type of the column
25905
- //! After that the following steps are taken:
25906
- //! 1. The init_analyze is called to initialize the analyze state of every candidate compression method
25907
- //! 2. The analyze method is called with all of the input data in the order in which it must be stored.
25908
- //! analyze can return "false". In that case, the compression method is taken out of consideration early.
25909
- //! 3. The final_analyze method is called, which should return a score for the compression method
25910
-
25911
- //! The system then decides which compression function to use based on the analyzed score (returned from final_analyze)
25912
- typedef unique_ptr<AnalyzeState> (*compression_init_analyze_t)(ColumnData &col_data, PhysicalType type);
25913
- typedef bool (*compression_analyze_t)(AnalyzeState &state, Vector &input, idx_t count);
25914
- typedef idx_t (*compression_final_analyze_t)(AnalyzeState &state);
25915
-
25916
- //===--------------------------------------------------------------------===//
25917
- // Compress
25918
- //===--------------------------------------------------------------------===//
25919
- typedef unique_ptr<CompressionState> (*compression_init_compression_t)(ColumnDataCheckpointer &checkpointer,
25920
- unique_ptr<AnalyzeState> state);
25921
- typedef void (*compression_compress_data_t)(CompressionState &state, Vector &scan_vector, idx_t count);
25922
- typedef void (*compression_compress_finalize_t)(CompressionState &state);
25923
-
25924
- //===--------------------------------------------------------------------===//
25925
- // Uncompress / Scan
25926
- //===--------------------------------------------------------------------===//
25927
- typedef unique_ptr<SegmentScanState> (*compression_init_segment_scan_t)(ColumnSegment &segment);
25928
- typedef void (*compression_scan_vector_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count,
25929
- Vector &result);
25930
- typedef void (*compression_scan_partial_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count,
25931
- Vector &result, idx_t result_offset);
25932
- typedef void (*compression_fetch_row_t)(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result,
25933
- idx_t result_idx);
25934
- typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count);
25935
-
25936
- //===--------------------------------------------------------------------===//
25937
- // Append (optional)
25938
- //===--------------------------------------------------------------------===//
25939
- typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id);
25940
- typedef idx_t (*compression_append_t)(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data,
25941
- idx_t offset, idx_t count);
25942
- typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
25943
- typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
25944
-
25945
- class CompressionFunction {
25946
- public:
25947
- CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
25948
- compression_analyze_t analyze, compression_final_analyze_t final_analyze,
25949
- compression_init_compression_t init_compression, compression_compress_data_t compress,
25950
- compression_compress_finalize_t compress_finalize, compression_init_segment_scan_t init_scan,
25951
- compression_scan_vector_t scan_vector, compression_scan_partial_t scan_partial,
25952
- compression_fetch_row_t fetch_row, compression_skip_t skip,
25953
- compression_init_segment_t init_segment = nullptr, compression_append_t append = nullptr,
25954
- compression_finalize_append_t finalize_append = nullptr,
25955
- compression_revert_append_t revert_append = nullptr)
25956
- : type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze),
25957
- init_compression(init_compression), compress(compress), compress_finalize(compress_finalize),
25958
- init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip),
25959
- init_segment(init_segment), append(append), finalize_append(finalize_append), revert_append(revert_append) {
25960
- }
25961
-
25962
- //! Compression type
25963
- CompressionType type;
25964
- //! The data type this function can compress
25965
- PhysicalType data_type;
25966
-
25967
- //! Analyze step: determine which compression function is the most effective
25968
- //! init_analyze is called once to set up the analyze state
25969
- compression_init_analyze_t init_analyze;
25970
- //! analyze is called several times (once per vector in the row group)
25971
- //! analyze should return true, unless compression is no longer possible with this compression method
25972
- //! in that case false should be returned
25973
- compression_analyze_t analyze;
25974
- //! final_analyze should return the score of the compression function
25975
- //! ideally this is the exact number of bytes required to store the data
25976
- //! this is not required/enforced: it can be an estimate as well
25977
- //! also this function can return DConstants::INVALID_INDEX to skip this compression method
25978
- compression_final_analyze_t final_analyze;
25979
-
25980
- //! Compression step: actually compress the data
25981
- //! init_compression is called once to set up the comperssion state
25982
- compression_init_compression_t init_compression;
25983
- //! compress is called several times (once per vector in the row group)
25984
- compression_compress_data_t compress;
25985
- //! compress_finalize is called after
25986
- compression_compress_finalize_t compress_finalize;
25987
-
25988
- //! init_scan is called to set up the scan state
25989
- compression_init_segment_scan_t init_scan;
25990
- //! scan_vector scans an entire vector using the scan state
25991
- compression_scan_vector_t scan_vector;
25992
- //! scan_partial scans a subset of a vector
25993
- //! this can request > vector_size as well
25994
- //! this is used if a vector crosses segment boundaries, or for child columns of lists
25995
- compression_scan_partial_t scan_partial;
25996
- //! fetch an individual row from the compressed vector
25997
- //! used for index lookups
25998
- compression_fetch_row_t fetch_row;
25999
- //! Skip forward in the compressed segment
26000
- compression_skip_t skip;
26001
-
26002
- // Append functions
26003
- //! This only really needs to be defined for uncompressed segments
26004
-
26005
- //! Initialize a compressed segment (optional)
26006
- compression_init_segment_t init_segment;
26007
- //! Append to the compressed segment (optional)
26008
- compression_append_t append;
26009
- //! Finalize an append to the segment
26010
- compression_finalize_append_t finalize_append;
26011
- //! Revert append (optional)
26012
- compression_revert_append_t revert_append;
26013
- };
26014
-
26015
- //! The set of compression functions
26016
- struct CompressionFunctionSet {
26017
- mutex lock;
26018
- map<CompressionType, map<PhysicalType, CompressionFunction>> functions;
26019
- };
26020
-
26021
- } // namespace duckdb
26022
26078
 
26023
26079
 
26024
26080
  namespace duckdb {
@@ -26084,7 +26140,7 @@ public:
26084
26140
  //! Finalize the segment for appending - no more appends can follow on this segment
26085
26141
  //! The segment should be compacted as much as possible
26086
26142
  //! Returns the number of bytes occupied within the segment
26087
- idx_t FinalizeAppend();
26143
+ idx_t FinalizeAppend(ColumnAppendState &state);
26088
26144
  //! Revert an append made to this segment
26089
26145
  void RevertAppend(idx_t start_row);
26090
26146
 
@@ -26191,6 +26247,7 @@ struct DataTableInfo {
26191
26247
 
26192
26248
  namespace duckdb {
26193
26249
  class ClientContext;
26250
+ class ColumnDataCollection;
26194
26251
  class ColumnDefinition;
26195
26252
  class DataTable;
26196
26253
  class RowGroup;
@@ -26249,8 +26306,17 @@ public:
26249
26306
  void Fetch(Transaction &transaction, DataChunk &result, const vector<column_t> &column_ids, Vector &row_ids,
26250
26307
  idx_t fetch_count, ColumnFetchState &state);
26251
26308
 
26252
- //! Append a DataChunk to the table. Throws an exception if the columns don't match the tables' columns.
26253
- void Append(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk);
26309
+ //! Initializes an append to transaction-local storage
26310
+ void InitializeLocalAppend(LocalAppendState &state, ClientContext &context);
26311
+ //! Append a DataChunk to the transaction-local storage of the table.
26312
+ void LocalAppend(LocalAppendState &state, TableCatalogEntry &table, ClientContext &context, DataChunk &chunk);
26313
+ //! Finalizes a transaction-local append
26314
+ void FinalizeLocalAppend(LocalAppendState &state);
26315
+ //! Append a chunk to the transaction-local storage of this table
26316
+ void LocalAppend(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk);
26317
+ //! Append a column data collection to the transaction-local storage of this table
26318
+ void LocalAppend(TableCatalogEntry &table, ClientContext &context, ColumnDataCollection &collection);
26319
+
26254
26320
  //! Delete the entries with the specified row identifier from the table
26255
26321
  idx_t Delete(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, idx_t count);
26256
26322
  //! Update the entries with the specified row identifier from the table
@@ -26272,8 +26338,8 @@ public:
26272
26338
  void AppendLock(TableAppendState &state);
26273
26339
  //! Begin appending structs to this table, obtaining necessary locks, etc
26274
26340
  void InitializeAppend(Transaction &transaction, TableAppendState &state, idx_t append_count);
26275
- //! Append a chunk to the table using the AppendState obtained from BeginAppend
26276
- void Append(Transaction &transaction, DataChunk &chunk, TableAppendState &state);
26341
+ //! Append a chunk to the table using the AppendState obtained from InitializeAppend
26342
+ void Append(DataChunk &chunk, TableAppendState &state);
26277
26343
  //! Commit the append
26278
26344
  void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count);
26279
26345
  //! Write a segment of the table to the WAL