duckdb 0.5.2-dev733.0 → 0.5.2-dev746.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +252 -105
- package/src/duckdb.hpp +296 -230
- package/src/parquet-amalgamation.cpp +20320 -20320
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "90356fc58"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev746"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -3681,8 +3681,16 @@ struct string_t {
|
|
|
3681
3681
|
friend class StringSegment;
|
|
3682
3682
|
|
|
3683
3683
|
public:
|
|
3684
|
-
static constexpr idx_t
|
|
3685
|
-
static constexpr idx_t
|
|
3684
|
+
static constexpr idx_t PREFIX_BYTES = 4 * sizeof(char);
|
|
3685
|
+
static constexpr idx_t INLINE_BYTES = 12 * sizeof(char);
|
|
3686
|
+
static constexpr idx_t HEADER_SIZE = sizeof(uint32_t) + PREFIX_BYTES;
|
|
3687
|
+
#ifndef DUCKDB_DEBUG_NO_INLINE
|
|
3688
|
+
static constexpr idx_t PREFIX_LENGTH = PREFIX_BYTES;
|
|
3689
|
+
static constexpr idx_t INLINE_LENGTH = INLINE_BYTES;
|
|
3690
|
+
#else
|
|
3691
|
+
static constexpr idx_t PREFIX_LENGTH = 0;
|
|
3692
|
+
static constexpr idx_t INLINE_LENGTH = 0;
|
|
3693
|
+
#endif
|
|
3686
3694
|
|
|
3687
3695
|
string_t() = default;
|
|
3688
3696
|
explicit string_t(uint32_t len) {
|
|
@@ -3694,7 +3702,7 @@ public:
|
|
|
3694
3702
|
if (IsInlined()) {
|
|
3695
3703
|
// zero initialize the prefix first
|
|
3696
3704
|
// this makes sure that strings with length smaller than 4 still have an equal prefix
|
|
3697
|
-
memset(value.inlined.inlined, 0,
|
|
3705
|
+
memset(value.inlined.inlined, 0, INLINE_BYTES);
|
|
3698
3706
|
if (GetSize() == 0) {
|
|
3699
3707
|
return;
|
|
3700
3708
|
}
|
|
@@ -3702,7 +3710,11 @@ public:
|
|
|
3702
3710
|
memcpy(value.inlined.inlined, data, GetSize());
|
|
3703
3711
|
} else {
|
|
3704
3712
|
// large string: store pointer
|
|
3713
|
+
#ifndef DUCKDB_DEBUG_NO_INLINE
|
|
3705
3714
|
memcpy(value.pointer.prefix, data, PREFIX_LENGTH);
|
|
3715
|
+
#else
|
|
3716
|
+
memset(value.pointer.prefix, 0, PREFIX_BYTES);
|
|
3717
|
+
#endif
|
|
3706
3718
|
value.pointer.ptr = (char *)data;
|
|
3707
3719
|
}
|
|
3708
3720
|
}
|
|
@@ -3743,15 +3755,19 @@ public:
|
|
|
3743
3755
|
|
|
3744
3756
|
void Finalize() {
|
|
3745
3757
|
// set trailing NULL byte
|
|
3746
|
-
auto dataptr = (char *)GetDataUnsafe();
|
|
3747
3758
|
if (GetSize() <= INLINE_LENGTH) {
|
|
3748
3759
|
// fill prefix with zeros if the length is smaller than the prefix length
|
|
3749
|
-
for (idx_t i = GetSize(); i <
|
|
3760
|
+
for (idx_t i = GetSize(); i < INLINE_BYTES; i++) {
|
|
3750
3761
|
value.inlined.inlined[i] = '\0';
|
|
3751
3762
|
}
|
|
3752
3763
|
} else {
|
|
3753
3764
|
// copy the data into the prefix
|
|
3765
|
+
#ifndef DUCKDB_DEBUG_NO_INLINE
|
|
3766
|
+
auto dataptr = (char *)GetDataUnsafe();
|
|
3754
3767
|
memcpy(value.pointer.prefix, dataptr, PREFIX_LENGTH);
|
|
3768
|
+
#else
|
|
3769
|
+
memset(value.pointer.prefix, 0, PREFIX_BYTES);
|
|
3770
|
+
#endif
|
|
3755
3771
|
}
|
|
3756
3772
|
}
|
|
3757
3773
|
|
|
@@ -7865,7 +7881,7 @@ struct StringComparisonOperators {
|
|
|
7865
7881
|
}
|
|
7866
7882
|
} else {
|
|
7867
7883
|
// large string: first check prefix and length
|
|
7868
|
-
if (memcmp(&a, &b,
|
|
7884
|
+
if (memcmp(&a, &b, string_t::HEADER_SIZE) == 0) {
|
|
7869
7885
|
// prefix and length are equal: check main string
|
|
7870
7886
|
if (memcmp(a.value.pointer.ptr, b.value.pointer.ptr, a.GetSize()) == 0) {
|
|
7871
7887
|
// entire string is equal
|
|
@@ -14740,13 +14756,235 @@ private:
|
|
|
14740
14756
|
|
|
14741
14757
|
|
|
14742
14758
|
|
|
14759
|
+
//===----------------------------------------------------------------------===//
|
|
14760
|
+
// DuckDB
|
|
14761
|
+
//
|
|
14762
|
+
// duckdb/function/compression_function.hpp
|
|
14763
|
+
//
|
|
14764
|
+
//
|
|
14765
|
+
//===----------------------------------------------------------------------===//
|
|
14766
|
+
|
|
14767
|
+
|
|
14768
|
+
|
|
14769
|
+
|
|
14770
|
+
|
|
14771
|
+
|
|
14772
|
+
//===----------------------------------------------------------------------===//
|
|
14773
|
+
// DuckDB
|
|
14774
|
+
//
|
|
14775
|
+
// duckdb/common/map.hpp
|
|
14776
|
+
//
|
|
14777
|
+
//
|
|
14778
|
+
//===----------------------------------------------------------------------===//
|
|
14779
|
+
|
|
14780
|
+
|
|
14781
|
+
|
|
14782
|
+
#include <map>
|
|
14783
|
+
|
|
14784
|
+
namespace duckdb {
|
|
14785
|
+
using std::map;
|
|
14786
|
+
using std::multimap;
|
|
14787
|
+
} // namespace duckdb
|
|
14788
|
+
|
|
14789
|
+
|
|
14790
|
+
|
|
14791
|
+
|
|
14792
|
+
namespace duckdb {
|
|
14793
|
+
class DatabaseInstance;
|
|
14794
|
+
class ColumnData;
|
|
14795
|
+
class ColumnDataCheckpointer;
|
|
14796
|
+
class ColumnSegment;
|
|
14797
|
+
class SegmentStatistics;
|
|
14798
|
+
|
|
14799
|
+
struct ColumnFetchState;
|
|
14800
|
+
struct ColumnScanState;
|
|
14801
|
+
struct SegmentScanState;
|
|
14802
|
+
|
|
14803
|
+
struct AnalyzeState {
|
|
14804
|
+
virtual ~AnalyzeState() {
|
|
14805
|
+
}
|
|
14806
|
+
};
|
|
14807
|
+
|
|
14808
|
+
struct CompressionState {
|
|
14809
|
+
virtual ~CompressionState() {
|
|
14810
|
+
}
|
|
14811
|
+
};
|
|
14812
|
+
|
|
14813
|
+
struct CompressedSegmentState {
|
|
14814
|
+
virtual ~CompressedSegmentState() {
|
|
14815
|
+
}
|
|
14816
|
+
};
|
|
14817
|
+
|
|
14818
|
+
struct CompressionAppendState {
|
|
14819
|
+
CompressionAppendState(BufferHandle handle_p) : handle(move(handle_p)) {
|
|
14820
|
+
}
|
|
14821
|
+
virtual ~CompressionAppendState() {
|
|
14822
|
+
}
|
|
14823
|
+
|
|
14824
|
+
BufferHandle handle;
|
|
14825
|
+
};
|
|
14826
|
+
|
|
14827
|
+
//===--------------------------------------------------------------------===//
|
|
14828
|
+
// Analyze
|
|
14829
|
+
//===--------------------------------------------------------------------===//
|
|
14830
|
+
//! The analyze functions are used to determine whether or not to use this compression method
|
|
14831
|
+
//! The system first determines the potential compression methods to use based on the physical type of the column
|
|
14832
|
+
//! After that the following steps are taken:
|
|
14833
|
+
//! 1. The init_analyze is called to initialize the analyze state of every candidate compression method
|
|
14834
|
+
//! 2. The analyze method is called with all of the input data in the order in which it must be stored.
|
|
14835
|
+
//! analyze can return "false". In that case, the compression method is taken out of consideration early.
|
|
14836
|
+
//! 3. The final_analyze method is called, which should return a score for the compression method
|
|
14837
|
+
|
|
14838
|
+
//! The system then decides which compression function to use based on the analyzed score (returned from final_analyze)
|
|
14839
|
+
typedef unique_ptr<AnalyzeState> (*compression_init_analyze_t)(ColumnData &col_data, PhysicalType type);
|
|
14840
|
+
typedef bool (*compression_analyze_t)(AnalyzeState &state, Vector &input, idx_t count);
|
|
14841
|
+
typedef idx_t (*compression_final_analyze_t)(AnalyzeState &state);
|
|
14842
|
+
|
|
14843
|
+
//===--------------------------------------------------------------------===//
|
|
14844
|
+
// Compress
|
|
14845
|
+
//===--------------------------------------------------------------------===//
|
|
14846
|
+
typedef unique_ptr<CompressionState> (*compression_init_compression_t)(ColumnDataCheckpointer &checkpointer,
|
|
14847
|
+
unique_ptr<AnalyzeState> state);
|
|
14848
|
+
typedef void (*compression_compress_data_t)(CompressionState &state, Vector &scan_vector, idx_t count);
|
|
14849
|
+
typedef void (*compression_compress_finalize_t)(CompressionState &state);
|
|
14850
|
+
|
|
14851
|
+
//===--------------------------------------------------------------------===//
|
|
14852
|
+
// Uncompress / Scan
|
|
14853
|
+
//===--------------------------------------------------------------------===//
|
|
14854
|
+
typedef unique_ptr<SegmentScanState> (*compression_init_segment_scan_t)(ColumnSegment &segment);
|
|
14855
|
+
typedef void (*compression_scan_vector_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count,
|
|
14856
|
+
Vector &result);
|
|
14857
|
+
typedef void (*compression_scan_partial_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count,
|
|
14858
|
+
Vector &result, idx_t result_offset);
|
|
14859
|
+
typedef void (*compression_fetch_row_t)(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result,
|
|
14860
|
+
idx_t result_idx);
|
|
14861
|
+
typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count);
|
|
14862
|
+
|
|
14863
|
+
//===--------------------------------------------------------------------===//
|
|
14864
|
+
// Append (optional)
|
|
14865
|
+
//===--------------------------------------------------------------------===//
|
|
14866
|
+
typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id);
|
|
14867
|
+
typedef unique_ptr<CompressionAppendState> (*compression_init_append_t)(ColumnSegment &segment);
|
|
14868
|
+
typedef idx_t (*compression_append_t)(CompressionAppendState &append_state, ColumnSegment &segment,
|
|
14869
|
+
SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count);
|
|
14870
|
+
typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
|
|
14871
|
+
typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
|
|
14872
|
+
|
|
14873
|
+
class CompressionFunction {
|
|
14874
|
+
public:
|
|
14875
|
+
CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
|
|
14876
|
+
compression_analyze_t analyze, compression_final_analyze_t final_analyze,
|
|
14877
|
+
compression_init_compression_t init_compression, compression_compress_data_t compress,
|
|
14878
|
+
compression_compress_finalize_t compress_finalize, compression_init_segment_scan_t init_scan,
|
|
14879
|
+
compression_scan_vector_t scan_vector, compression_scan_partial_t scan_partial,
|
|
14880
|
+
compression_fetch_row_t fetch_row, compression_skip_t skip,
|
|
14881
|
+
compression_init_segment_t init_segment = nullptr,
|
|
14882
|
+
compression_init_append_t init_append = nullptr, compression_append_t append = nullptr,
|
|
14883
|
+
compression_finalize_append_t finalize_append = nullptr,
|
|
14884
|
+
compression_revert_append_t revert_append = nullptr)
|
|
14885
|
+
: type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze),
|
|
14886
|
+
init_compression(init_compression), compress(compress), compress_finalize(compress_finalize),
|
|
14887
|
+
init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip),
|
|
14888
|
+
init_segment(init_segment), init_append(init_append), append(append), finalize_append(finalize_append),
|
|
14889
|
+
revert_append(revert_append) {
|
|
14890
|
+
}
|
|
14891
|
+
|
|
14892
|
+
//! Compression type
|
|
14893
|
+
CompressionType type;
|
|
14894
|
+
//! The data type this function can compress
|
|
14895
|
+
PhysicalType data_type;
|
|
14896
|
+
|
|
14897
|
+
//! Analyze step: determine which compression function is the most effective
|
|
14898
|
+
//! init_analyze is called once to set up the analyze state
|
|
14899
|
+
compression_init_analyze_t init_analyze;
|
|
14900
|
+
//! analyze is called several times (once per vector in the row group)
|
|
14901
|
+
//! analyze should return true, unless compression is no longer possible with this compression method
|
|
14902
|
+
//! in that case false should be returned
|
|
14903
|
+
compression_analyze_t analyze;
|
|
14904
|
+
//! final_analyze should return the score of the compression function
|
|
14905
|
+
//! ideally this is the exact number of bytes required to store the data
|
|
14906
|
+
//! this is not required/enforced: it can be an estimate as well
|
|
14907
|
+
//! also this function can return DConstants::INVALID_INDEX to skip this compression method
|
|
14908
|
+
compression_final_analyze_t final_analyze;
|
|
14909
|
+
|
|
14910
|
+
//! Compression step: actually compress the data
|
|
14911
|
+
//! init_compression is called once to set up the comperssion state
|
|
14912
|
+
compression_init_compression_t init_compression;
|
|
14913
|
+
//! compress is called several times (once per vector in the row group)
|
|
14914
|
+
compression_compress_data_t compress;
|
|
14915
|
+
//! compress_finalize is called after
|
|
14916
|
+
compression_compress_finalize_t compress_finalize;
|
|
14917
|
+
|
|
14918
|
+
//! init_scan is called to set up the scan state
|
|
14919
|
+
compression_init_segment_scan_t init_scan;
|
|
14920
|
+
//! scan_vector scans an entire vector using the scan state
|
|
14921
|
+
compression_scan_vector_t scan_vector;
|
|
14922
|
+
//! scan_partial scans a subset of a vector
|
|
14923
|
+
//! this can request > vector_size as well
|
|
14924
|
+
//! this is used if a vector crosses segment boundaries, or for child columns of lists
|
|
14925
|
+
compression_scan_partial_t scan_partial;
|
|
14926
|
+
//! fetch an individual row from the compressed vector
|
|
14927
|
+
//! used for index lookups
|
|
14928
|
+
compression_fetch_row_t fetch_row;
|
|
14929
|
+
//! Skip forward in the compressed segment
|
|
14930
|
+
compression_skip_t skip;
|
|
14931
|
+
|
|
14932
|
+
// Append functions
|
|
14933
|
+
//! This only really needs to be defined for uncompressed segments
|
|
14934
|
+
|
|
14935
|
+
//! Initialize a compressed segment (optional)
|
|
14936
|
+
compression_init_segment_t init_segment;
|
|
14937
|
+
//! Initialize the append state (optional)
|
|
14938
|
+
compression_init_append_t init_append;
|
|
14939
|
+
//! Append to the compressed segment (optional)
|
|
14940
|
+
compression_append_t append;
|
|
14941
|
+
//! Finalize an append to the segment
|
|
14942
|
+
compression_finalize_append_t finalize_append;
|
|
14943
|
+
//! Revert append (optional)
|
|
14944
|
+
compression_revert_append_t revert_append;
|
|
14945
|
+
};
|
|
14946
|
+
|
|
14947
|
+
//! The set of compression functions
|
|
14948
|
+
struct CompressionFunctionSet {
|
|
14949
|
+
mutex lock;
|
|
14950
|
+
map<CompressionType, map<PhysicalType, CompressionFunction>> functions;
|
|
14951
|
+
};
|
|
14952
|
+
|
|
14953
|
+
} // namespace duckdb
|
|
14954
|
+
|
|
14955
|
+
//===----------------------------------------------------------------------===//
|
|
14956
|
+
// DuckDB
|
|
14957
|
+
//
|
|
14958
|
+
// duckdb/transaction/transaction_data.hpp
|
|
14959
|
+
//
|
|
14960
|
+
//
|
|
14961
|
+
//===----------------------------------------------------------------------===//
|
|
14962
|
+
|
|
14963
|
+
|
|
14964
|
+
|
|
14965
|
+
|
|
14966
|
+
|
|
14967
|
+
namespace duckdb {
|
|
14968
|
+
class Transaction;
|
|
14969
|
+
|
|
14970
|
+
struct TransactionData {
|
|
14971
|
+
TransactionData(Transaction &transaction_p);
|
|
14972
|
+
TransactionData(transaction_t transaction_id_p, transaction_t start_time_p);
|
|
14973
|
+
|
|
14974
|
+
Transaction *transaction;
|
|
14975
|
+
transaction_t transaction_id;
|
|
14976
|
+
transaction_t start_time;
|
|
14977
|
+
};
|
|
14978
|
+
|
|
14979
|
+
} // namespace duckdb
|
|
14980
|
+
|
|
14743
14981
|
|
|
14744
14982
|
namespace duckdb {
|
|
14745
14983
|
class ColumnSegment;
|
|
14746
14984
|
class DataTable;
|
|
14985
|
+
class LocalTableStorage;
|
|
14747
14986
|
class RowGroup;
|
|
14748
14987
|
class UpdateSegment;
|
|
14749
|
-
class ValiditySegment;
|
|
14750
14988
|
|
|
14751
14989
|
struct TableAppendState;
|
|
14752
14990
|
|
|
@@ -14757,6 +14995,8 @@ struct ColumnAppendState {
|
|
|
14757
14995
|
vector<ColumnAppendState> child_appends;
|
|
14758
14996
|
//! The write lock that is held by the append
|
|
14759
14997
|
unique_ptr<StorageLockKey> lock;
|
|
14998
|
+
//! The compression append state
|
|
14999
|
+
unique_ptr<CompressionAppendState> append_state;
|
|
14760
15000
|
};
|
|
14761
15001
|
|
|
14762
15002
|
struct RowGroupAppendState {
|
|
@@ -14778,14 +15018,26 @@ struct IndexLock {
|
|
|
14778
15018
|
};
|
|
14779
15019
|
|
|
14780
15020
|
struct TableAppendState {
|
|
14781
|
-
TableAppendState()
|
|
14782
|
-
|
|
15021
|
+
TableAppendState();
|
|
15022
|
+
~TableAppendState();
|
|
14783
15023
|
|
|
14784
15024
|
RowGroupAppendState row_group_append_state;
|
|
14785
15025
|
unique_lock<mutex> append_lock;
|
|
14786
15026
|
row_t row_start;
|
|
14787
15027
|
row_t current_row;
|
|
14788
|
-
|
|
15028
|
+
//! The total number of rows appended by the append operation
|
|
15029
|
+
idx_t total_append_count;
|
|
15030
|
+
//! The first row-group that has been appended to
|
|
15031
|
+
RowGroup *start_row_group;
|
|
15032
|
+
//! The transaction data
|
|
15033
|
+
TransactionData transaction;
|
|
15034
|
+
//! The remaining append count, only if the append count is known beforehand
|
|
15035
|
+
idx_t remaining;
|
|
15036
|
+
};
|
|
15037
|
+
|
|
15038
|
+
struct LocalAppendState {
|
|
15039
|
+
TableAppendState append_state;
|
|
15040
|
+
LocalTableStorage *storage;
|
|
14789
15041
|
};
|
|
14790
15042
|
|
|
14791
15043
|
} // namespace duckdb
|
|
@@ -17959,7 +18211,7 @@ public:
|
|
|
17959
18211
|
row_t row_id, DataChunk &result, idx_t result_idx);
|
|
17960
18212
|
|
|
17961
18213
|
//! Append count rows to the version info
|
|
17962
|
-
void AppendVersionInfo(TransactionData transaction, idx_t
|
|
18214
|
+
void AppendVersionInfo(TransactionData transaction, idx_t count);
|
|
17963
18215
|
//! Commit a previous append made by RowGroup::AppendVersionInfo
|
|
17964
18216
|
void CommitAppend(transaction_t commit_id, idx_t start, idx_t count);
|
|
17965
18217
|
//! Revert a previous append made by RowGroup::AppendVersionInfo
|
|
@@ -17972,7 +18224,7 @@ public:
|
|
|
17972
18224
|
static void Serialize(RowGroupPointer &pointer, Serializer &serializer);
|
|
17973
18225
|
static RowGroupPointer Deserialize(Deserializer &source, const vector<ColumnDefinition> &columns);
|
|
17974
18226
|
|
|
17975
|
-
void InitializeAppend(
|
|
18227
|
+
void InitializeAppend(RowGroupAppendState &append_state);
|
|
17976
18228
|
void Append(RowGroupAppendState &append_state, DataChunk &chunk, idx_t append_count);
|
|
17977
18229
|
|
|
17978
18230
|
void Update(TransactionData transaction, DataChunk &updates, row_t *ids, idx_t offset, idx_t count,
|
|
@@ -18130,8 +18382,14 @@ public:
|
|
|
18130
18382
|
void Fetch(TransactionData transaction, DataChunk &result, const vector<column_t> &column_ids,
|
|
18131
18383
|
Vector &row_identifiers, idx_t fetch_count, ColumnFetchState &state);
|
|
18132
18384
|
|
|
18385
|
+
//! Initialize an append of a variable number of rows. FinalizeAppend must be called after appending is done.
|
|
18386
|
+
void InitializeAppend(TableAppendState &state);
|
|
18387
|
+
//! Initialize an append with a known number of rows. FinalizeAppend should not be called after appending is done.
|
|
18133
18388
|
void InitializeAppend(TransactionData transaction, TableAppendState &state, idx_t append_count);
|
|
18134
|
-
|
|
18389
|
+
//! Append a chunk to a table.
|
|
18390
|
+
void Append(DataChunk &chunk, TableAppendState &state, TableStatistics &stats);
|
|
18391
|
+
//! FinalizeAppend flushes an append with a variable number of rows.
|
|
18392
|
+
void FinalizeAppend(TransactionData transaction, TableAppendState &state);
|
|
18135
18393
|
void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count);
|
|
18136
18394
|
void RevertAppendInternal(idx_t start_row, idx_t count);
|
|
18137
18395
|
|
|
@@ -20525,16 +20783,19 @@ public:
|
|
|
20525
20783
|
bool NextParallelScan(ClientContext &context, DataTable *table, ParallelCollectionScanState &state,
|
|
20526
20784
|
CollectionScanState &scan_state);
|
|
20527
20785
|
|
|
20786
|
+
//! Begin appending to the local storage
|
|
20787
|
+
void InitializeAppend(LocalAppendState &state, DataTable *table);
|
|
20528
20788
|
//! Append a chunk to the local storage
|
|
20529
|
-
void Append(
|
|
20789
|
+
static void Append(LocalAppendState &state, DataChunk &chunk);
|
|
20790
|
+
//! Finish appending to the local storage
|
|
20791
|
+
static void FinalizeAppend(LocalAppendState &state);
|
|
20530
20792
|
//! Delete a set of rows from the local storage
|
|
20531
20793
|
idx_t Delete(DataTable *table, Vector &row_ids, idx_t count);
|
|
20532
20794
|
//! Update a set of rows in the local storage
|
|
20533
20795
|
void Update(DataTable *table, Vector &row_ids, const vector<column_t> &column_ids, DataChunk &data);
|
|
20534
20796
|
|
|
20535
20797
|
//! Commits the local storage, writing it to the WAL and completing the commit
|
|
20536
|
-
void Commit(LocalStorage::CommitState &commit_state, Transaction &transaction
|
|
20537
|
-
transaction_t commit_id);
|
|
20798
|
+
void Commit(LocalStorage::CommitState &commit_state, Transaction &transaction);
|
|
20538
20799
|
|
|
20539
20800
|
bool ChangesMade() noexcept {
|
|
20540
20801
|
return table_storage.size() > 0;
|
|
@@ -20575,6 +20836,7 @@ private:
|
|
|
20575
20836
|
|
|
20576
20837
|
|
|
20577
20838
|
|
|
20839
|
+
|
|
20578
20840
|
namespace duckdb {
|
|
20579
20841
|
class SequenceCatalogEntry;
|
|
20580
20842
|
|
|
@@ -20665,20 +20927,6 @@ private:
|
|
|
20665
20927
|
Transaction(const Transaction &) = delete;
|
|
20666
20928
|
};
|
|
20667
20929
|
|
|
20668
|
-
struct TransactionData {
|
|
20669
|
-
TransactionData(Transaction &transaction_p) // NOLINT
|
|
20670
|
-
: transaction(&transaction_p), transaction_id(transaction_p.transaction_id),
|
|
20671
|
-
start_time(transaction_p.start_time) {
|
|
20672
|
-
}
|
|
20673
|
-
TransactionData(transaction_t transaction_id_p, transaction_t start_time_p)
|
|
20674
|
-
: transaction(nullptr), transaction_id(transaction_id_p), start_time(start_time_p) {
|
|
20675
|
-
}
|
|
20676
|
-
|
|
20677
|
-
Transaction *transaction;
|
|
20678
|
-
transaction_t transaction_id;
|
|
20679
|
-
transaction_t start_time;
|
|
20680
|
-
};
|
|
20681
|
-
|
|
20682
20930
|
} // namespace duckdb
|
|
20683
20931
|
|
|
20684
20932
|
#include <functional>
|
|
@@ -24599,7 +24847,7 @@ private:
|
|
|
24599
24847
|
unique_ptr<ConnectionManager> connection_manager;
|
|
24600
24848
|
unordered_set<std::string> loaded_extensions;
|
|
24601
24849
|
//! Set to true if a fatal exception has occurred
|
|
24602
|
-
bool is_invalidated
|
|
24850
|
+
atomic<bool> is_invalidated;
|
|
24603
24851
|
};
|
|
24604
24852
|
|
|
24605
24853
|
//! The database object. This object holds the catalog and all the
|
|
@@ -25827,198 +26075,6 @@ public:
|
|
|
25827
26075
|
|
|
25828
26076
|
|
|
25829
26077
|
|
|
25830
|
-
//===----------------------------------------------------------------------===//
|
|
25831
|
-
// DuckDB
|
|
25832
|
-
//
|
|
25833
|
-
// duckdb/function/compression_function.hpp
|
|
25834
|
-
//
|
|
25835
|
-
//
|
|
25836
|
-
//===----------------------------------------------------------------------===//
|
|
25837
|
-
|
|
25838
|
-
|
|
25839
|
-
|
|
25840
|
-
|
|
25841
|
-
|
|
25842
|
-
|
|
25843
|
-
//===----------------------------------------------------------------------===//
|
|
25844
|
-
// DuckDB
|
|
25845
|
-
//
|
|
25846
|
-
// duckdb/common/map.hpp
|
|
25847
|
-
//
|
|
25848
|
-
//
|
|
25849
|
-
//===----------------------------------------------------------------------===//
|
|
25850
|
-
|
|
25851
|
-
|
|
25852
|
-
|
|
25853
|
-
#include <map>
|
|
25854
|
-
|
|
25855
|
-
namespace duckdb {
|
|
25856
|
-
using std::map;
|
|
25857
|
-
using std::multimap;
|
|
25858
|
-
} // namespace duckdb
|
|
25859
|
-
|
|
25860
|
-
|
|
25861
|
-
|
|
25862
|
-
|
|
25863
|
-
namespace duckdb {
|
|
25864
|
-
class DatabaseInstance;
|
|
25865
|
-
class ColumnData;
|
|
25866
|
-
class ColumnDataCheckpointer;
|
|
25867
|
-
class ColumnSegment;
|
|
25868
|
-
class SegmentStatistics;
|
|
25869
|
-
|
|
25870
|
-
struct ColumnFetchState;
|
|
25871
|
-
struct ColumnScanState;
|
|
25872
|
-
struct SegmentScanState;
|
|
25873
|
-
|
|
25874
|
-
struct AnalyzeState {
|
|
25875
|
-
virtual ~AnalyzeState() {
|
|
25876
|
-
}
|
|
25877
|
-
};
|
|
25878
|
-
|
|
25879
|
-
struct CompressionState {
|
|
25880
|
-
virtual ~CompressionState() {
|
|
25881
|
-
}
|
|
25882
|
-
};
|
|
25883
|
-
|
|
25884
|
-
struct CompressedSegmentState {
|
|
25885
|
-
virtual ~CompressedSegmentState() {
|
|
25886
|
-
}
|
|
25887
|
-
};
|
|
25888
|
-
|
|
25889
|
-
struct UncompressedCompressState : public CompressionState {
|
|
25890
|
-
explicit UncompressedCompressState(ColumnDataCheckpointer &checkpointer);
|
|
25891
|
-
|
|
25892
|
-
ColumnDataCheckpointer &checkpointer;
|
|
25893
|
-
unique_ptr<ColumnSegment> current_segment;
|
|
25894
|
-
|
|
25895
|
-
virtual void CreateEmptySegment(idx_t row_start);
|
|
25896
|
-
void FlushSegment(idx_t segment_size);
|
|
25897
|
-
void Finalize(idx_t segment_size);
|
|
25898
|
-
};
|
|
25899
|
-
|
|
25900
|
-
//===--------------------------------------------------------------------===//
|
|
25901
|
-
// Analyze
|
|
25902
|
-
//===--------------------------------------------------------------------===//
|
|
25903
|
-
//! The analyze functions are used to determine whether or not to use this compression method
|
|
25904
|
-
//! The system first determines the potential compression methods to use based on the physical type of the column
|
|
25905
|
-
//! After that the following steps are taken:
|
|
25906
|
-
//! 1. The init_analyze is called to initialize the analyze state of every candidate compression method
|
|
25907
|
-
//! 2. The analyze method is called with all of the input data in the order in which it must be stored.
|
|
25908
|
-
//! analyze can return "false". In that case, the compression method is taken out of consideration early.
|
|
25909
|
-
//! 3. The final_analyze method is called, which should return a score for the compression method
|
|
25910
|
-
|
|
25911
|
-
//! The system then decides which compression function to use based on the analyzed score (returned from final_analyze)
|
|
25912
|
-
typedef unique_ptr<AnalyzeState> (*compression_init_analyze_t)(ColumnData &col_data, PhysicalType type);
|
|
25913
|
-
typedef bool (*compression_analyze_t)(AnalyzeState &state, Vector &input, idx_t count);
|
|
25914
|
-
typedef idx_t (*compression_final_analyze_t)(AnalyzeState &state);
|
|
25915
|
-
|
|
25916
|
-
//===--------------------------------------------------------------------===//
|
|
25917
|
-
// Compress
|
|
25918
|
-
//===--------------------------------------------------------------------===//
|
|
25919
|
-
typedef unique_ptr<CompressionState> (*compression_init_compression_t)(ColumnDataCheckpointer &checkpointer,
|
|
25920
|
-
unique_ptr<AnalyzeState> state);
|
|
25921
|
-
typedef void (*compression_compress_data_t)(CompressionState &state, Vector &scan_vector, idx_t count);
|
|
25922
|
-
typedef void (*compression_compress_finalize_t)(CompressionState &state);
|
|
25923
|
-
|
|
25924
|
-
//===--------------------------------------------------------------------===//
|
|
25925
|
-
// Uncompress / Scan
|
|
25926
|
-
//===--------------------------------------------------------------------===//
|
|
25927
|
-
typedef unique_ptr<SegmentScanState> (*compression_init_segment_scan_t)(ColumnSegment &segment);
|
|
25928
|
-
typedef void (*compression_scan_vector_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count,
|
|
25929
|
-
Vector &result);
|
|
25930
|
-
typedef void (*compression_scan_partial_t)(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count,
|
|
25931
|
-
Vector &result, idx_t result_offset);
|
|
25932
|
-
typedef void (*compression_fetch_row_t)(ColumnSegment &segment, ColumnFetchState &state, row_t row_id, Vector &result,
|
|
25933
|
-
idx_t result_idx);
|
|
25934
|
-
typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count);
|
|
25935
|
-
|
|
25936
|
-
//===--------------------------------------------------------------------===//
|
|
25937
|
-
// Append (optional)
|
|
25938
|
-
//===--------------------------------------------------------------------===//
|
|
25939
|
-
typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id);
|
|
25940
|
-
typedef idx_t (*compression_append_t)(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data,
|
|
25941
|
-
idx_t offset, idx_t count);
|
|
25942
|
-
typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
|
|
25943
|
-
typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
|
|
25944
|
-
|
|
25945
|
-
class CompressionFunction {
|
|
25946
|
-
public:
|
|
25947
|
-
CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
|
|
25948
|
-
compression_analyze_t analyze, compression_final_analyze_t final_analyze,
|
|
25949
|
-
compression_init_compression_t init_compression, compression_compress_data_t compress,
|
|
25950
|
-
compression_compress_finalize_t compress_finalize, compression_init_segment_scan_t init_scan,
|
|
25951
|
-
compression_scan_vector_t scan_vector, compression_scan_partial_t scan_partial,
|
|
25952
|
-
compression_fetch_row_t fetch_row, compression_skip_t skip,
|
|
25953
|
-
compression_init_segment_t init_segment = nullptr, compression_append_t append = nullptr,
|
|
25954
|
-
compression_finalize_append_t finalize_append = nullptr,
|
|
25955
|
-
compression_revert_append_t revert_append = nullptr)
|
|
25956
|
-
: type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze),
|
|
25957
|
-
init_compression(init_compression), compress(compress), compress_finalize(compress_finalize),
|
|
25958
|
-
init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip),
|
|
25959
|
-
init_segment(init_segment), append(append), finalize_append(finalize_append), revert_append(revert_append) {
|
|
25960
|
-
}
|
|
25961
|
-
|
|
25962
|
-
//! Compression type
|
|
25963
|
-
CompressionType type;
|
|
25964
|
-
//! The data type this function can compress
|
|
25965
|
-
PhysicalType data_type;
|
|
25966
|
-
|
|
25967
|
-
//! Analyze step: determine which compression function is the most effective
|
|
25968
|
-
//! init_analyze is called once to set up the analyze state
|
|
25969
|
-
compression_init_analyze_t init_analyze;
|
|
25970
|
-
//! analyze is called several times (once per vector in the row group)
|
|
25971
|
-
//! analyze should return true, unless compression is no longer possible with this compression method
|
|
25972
|
-
//! in that case false should be returned
|
|
25973
|
-
compression_analyze_t analyze;
|
|
25974
|
-
//! final_analyze should return the score of the compression function
|
|
25975
|
-
//! ideally this is the exact number of bytes required to store the data
|
|
25976
|
-
//! this is not required/enforced: it can be an estimate as well
|
|
25977
|
-
//! also this function can return DConstants::INVALID_INDEX to skip this compression method
|
|
25978
|
-
compression_final_analyze_t final_analyze;
|
|
25979
|
-
|
|
25980
|
-
//! Compression step: actually compress the data
|
|
25981
|
-
//! init_compression is called once to set up the comperssion state
|
|
25982
|
-
compression_init_compression_t init_compression;
|
|
25983
|
-
//! compress is called several times (once per vector in the row group)
|
|
25984
|
-
compression_compress_data_t compress;
|
|
25985
|
-
//! compress_finalize is called after
|
|
25986
|
-
compression_compress_finalize_t compress_finalize;
|
|
25987
|
-
|
|
25988
|
-
//! init_scan is called to set up the scan state
|
|
25989
|
-
compression_init_segment_scan_t init_scan;
|
|
25990
|
-
//! scan_vector scans an entire vector using the scan state
|
|
25991
|
-
compression_scan_vector_t scan_vector;
|
|
25992
|
-
//! scan_partial scans a subset of a vector
|
|
25993
|
-
//! this can request > vector_size as well
|
|
25994
|
-
//! this is used if a vector crosses segment boundaries, or for child columns of lists
|
|
25995
|
-
compression_scan_partial_t scan_partial;
|
|
25996
|
-
//! fetch an individual row from the compressed vector
|
|
25997
|
-
//! used for index lookups
|
|
25998
|
-
compression_fetch_row_t fetch_row;
|
|
25999
|
-
//! Skip forward in the compressed segment
|
|
26000
|
-
compression_skip_t skip;
|
|
26001
|
-
|
|
26002
|
-
// Append functions
|
|
26003
|
-
//! This only really needs to be defined for uncompressed segments
|
|
26004
|
-
|
|
26005
|
-
//! Initialize a compressed segment (optional)
|
|
26006
|
-
compression_init_segment_t init_segment;
|
|
26007
|
-
//! Append to the compressed segment (optional)
|
|
26008
|
-
compression_append_t append;
|
|
26009
|
-
//! Finalize an append to the segment
|
|
26010
|
-
compression_finalize_append_t finalize_append;
|
|
26011
|
-
//! Revert append (optional)
|
|
26012
|
-
compression_revert_append_t revert_append;
|
|
26013
|
-
};
|
|
26014
|
-
|
|
26015
|
-
//! The set of compression functions
|
|
26016
|
-
struct CompressionFunctionSet {
|
|
26017
|
-
mutex lock;
|
|
26018
|
-
map<CompressionType, map<PhysicalType, CompressionFunction>> functions;
|
|
26019
|
-
};
|
|
26020
|
-
|
|
26021
|
-
} // namespace duckdb
|
|
26022
26078
|
|
|
26023
26079
|
|
|
26024
26080
|
namespace duckdb {
|
|
@@ -26084,7 +26140,7 @@ public:
|
|
|
26084
26140
|
//! Finalize the segment for appending - no more appends can follow on this segment
|
|
26085
26141
|
//! The segment should be compacted as much as possible
|
|
26086
26142
|
//! Returns the number of bytes occupied within the segment
|
|
26087
|
-
idx_t FinalizeAppend();
|
|
26143
|
+
idx_t FinalizeAppend(ColumnAppendState &state);
|
|
26088
26144
|
//! Revert an append made to this segment
|
|
26089
26145
|
void RevertAppend(idx_t start_row);
|
|
26090
26146
|
|
|
@@ -26191,6 +26247,7 @@ struct DataTableInfo {
|
|
|
26191
26247
|
|
|
26192
26248
|
namespace duckdb {
|
|
26193
26249
|
class ClientContext;
|
|
26250
|
+
class ColumnDataCollection;
|
|
26194
26251
|
class ColumnDefinition;
|
|
26195
26252
|
class DataTable;
|
|
26196
26253
|
class RowGroup;
|
|
@@ -26249,8 +26306,17 @@ public:
|
|
|
26249
26306
|
void Fetch(Transaction &transaction, DataChunk &result, const vector<column_t> &column_ids, Vector &row_ids,
|
|
26250
26307
|
idx_t fetch_count, ColumnFetchState &state);
|
|
26251
26308
|
|
|
26252
|
-
//!
|
|
26253
|
-
void
|
|
26309
|
+
//! Initializes an append to transaction-local storage
|
|
26310
|
+
void InitializeLocalAppend(LocalAppendState &state, ClientContext &context);
|
|
26311
|
+
//! Append a DataChunk to the transaction-local storage of the table.
|
|
26312
|
+
void LocalAppend(LocalAppendState &state, TableCatalogEntry &table, ClientContext &context, DataChunk &chunk);
|
|
26313
|
+
//! Finalizes a transaction-local append
|
|
26314
|
+
void FinalizeLocalAppend(LocalAppendState &state);
|
|
26315
|
+
//! Append a chunk to the transaction-local storage of this table
|
|
26316
|
+
void LocalAppend(TableCatalogEntry &table, ClientContext &context, DataChunk &chunk);
|
|
26317
|
+
//! Append a column data collection to the transaction-local storage of this table
|
|
26318
|
+
void LocalAppend(TableCatalogEntry &table, ClientContext &context, ColumnDataCollection &collection);
|
|
26319
|
+
|
|
26254
26320
|
//! Delete the entries with the specified row identifier from the table
|
|
26255
26321
|
idx_t Delete(TableCatalogEntry &table, ClientContext &context, Vector &row_ids, idx_t count);
|
|
26256
26322
|
//! Update the entries with the specified row identifier from the table
|
|
@@ -26272,8 +26338,8 @@ public:
|
|
|
26272
26338
|
void AppendLock(TableAppendState &state);
|
|
26273
26339
|
//! Begin appending structs to this table, obtaining necessary locks, etc
|
|
26274
26340
|
void InitializeAppend(Transaction &transaction, TableAppendState &state, idx_t append_count);
|
|
26275
|
-
//! Append a chunk to the table using the AppendState obtained from
|
|
26276
|
-
void Append(
|
|
26341
|
+
//! Append a chunk to the table using the AppendState obtained from InitializeAppend
|
|
26342
|
+
void Append(DataChunk &chunk, TableAppendState &state);
|
|
26277
26343
|
//! Commit the append
|
|
26278
26344
|
void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count);
|
|
26279
26345
|
//! Write a segment of the table to the WAL
|