duckdb 0.5.2-dev547.0 → 0.5.2-dev561.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +1248 -737
- package/src/duckdb.hpp +90 -44
- package/src/parquet-amalgamation.cpp +26280 -26280
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "c2e70c43d"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev561"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -17831,12 +17831,13 @@ public:
|
|
|
17831
17831
|
|
|
17832
17832
|
|
|
17833
17833
|
namespace duckdb {
|
|
17834
|
+
class BlockManager;
|
|
17834
17835
|
class ColumnData;
|
|
17835
17836
|
class DatabaseInstance;
|
|
17836
17837
|
class DataTable;
|
|
17837
17838
|
struct DataTableInfo;
|
|
17838
17839
|
class ExpressionExecutor;
|
|
17839
|
-
class
|
|
17840
|
+
class RowGroupWriter;
|
|
17840
17841
|
class UpdateSegment;
|
|
17841
17842
|
class Vector;
|
|
17842
17843
|
struct RowGroupPointer;
|
|
@@ -17853,15 +17854,17 @@ public:
|
|
|
17853
17854
|
static constexpr const idx_t ROW_GROUP_SIZE = STANDARD_VECTOR_SIZE * ROW_GROUP_VECTOR_COUNT;
|
|
17854
17855
|
|
|
17855
17856
|
public:
|
|
17856
|
-
RowGroup(DatabaseInstance &db, DataTableInfo &table_info, idx_t start, idx_t count);
|
|
17857
|
-
RowGroup(DatabaseInstance &db,
|
|
17858
|
-
RowGroupPointer
|
|
17857
|
+
RowGroup(DatabaseInstance &db, BlockManager &block_manager, DataTableInfo &table_info, idx_t start, idx_t count);
|
|
17858
|
+
RowGroup(DatabaseInstance &db, BlockManager &block_manager, DataTableInfo &table_info,
|
|
17859
|
+
const vector<LogicalType> &types, RowGroupPointer &&pointer);
|
|
17859
17860
|
RowGroup(RowGroup &row_group, idx_t start);
|
|
17860
17861
|
~RowGroup();
|
|
17861
17862
|
|
|
17862
17863
|
private:
|
|
17863
17864
|
//! The database instance
|
|
17864
17865
|
DatabaseInstance &db;
|
|
17866
|
+
//! The block manager
|
|
17867
|
+
BlockManager &block_manager;
|
|
17865
17868
|
//! The table info of this row_group
|
|
17866
17869
|
DataTableInfo &table_info;
|
|
17867
17870
|
//! The version info of the row_group (inserted and deleted tuple info)
|
|
@@ -17875,6 +17878,9 @@ public:
|
|
|
17875
17878
|
DatabaseInstance &GetDatabase() {
|
|
17876
17879
|
return db;
|
|
17877
17880
|
}
|
|
17881
|
+
BlockManager &GetBlockManager() {
|
|
17882
|
+
return block_manager;
|
|
17883
|
+
}
|
|
17878
17884
|
DataTableInfo &GetTableInfo() {
|
|
17879
17885
|
return table_info;
|
|
17880
17886
|
}
|
|
@@ -17930,7 +17936,7 @@ public:
|
|
|
17930
17936
|
//! Delete the given set of rows in the version manager
|
|
17931
17937
|
idx_t Delete(TransactionData transaction, DataTable *table, row_t *row_ids, idx_t count);
|
|
17932
17938
|
|
|
17933
|
-
RowGroupPointer Checkpoint(
|
|
17939
|
+
RowGroupPointer Checkpoint(RowGroupWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats);
|
|
17934
17940
|
static void Serialize(RowGroupPointer &pointer, Serializer &serializer);
|
|
17935
17941
|
static RowGroupPointer Deserialize(Deserializer &source, const vector<ColumnDefinition> &columns);
|
|
17936
17942
|
|
|
@@ -18060,13 +18066,14 @@ namespace duckdb {
|
|
|
18060
18066
|
struct ParallelTableScanState;
|
|
18061
18067
|
|
|
18062
18068
|
class PersistentTableData;
|
|
18069
|
+
class TableDataWriter;
|
|
18063
18070
|
class TableIndexList;
|
|
18064
18071
|
class TableStatistics;
|
|
18065
18072
|
|
|
18066
18073
|
class RowGroupCollection {
|
|
18067
18074
|
public:
|
|
18068
|
-
RowGroupCollection(shared_ptr<DataTableInfo> info, vector<LogicalType> types,
|
|
18069
|
-
idx_t total_rows = 0);
|
|
18075
|
+
RowGroupCollection(shared_ptr<DataTableInfo> info, BlockManager &block_manager, vector<LogicalType> types,
|
|
18076
|
+
idx_t row_start, idx_t total_rows = 0);
|
|
18070
18077
|
|
|
18071
18078
|
public:
|
|
18072
18079
|
idx_t GetTotalRows() const;
|
|
@@ -18106,8 +18113,7 @@ public:
|
|
|
18106
18113
|
void UpdateColumn(TransactionData transaction, Vector &row_ids, const vector<column_t> &column_path,
|
|
18107
18114
|
DataChunk &updates, TableStatistics &stats);
|
|
18108
18115
|
|
|
18109
|
-
void Checkpoint(TableDataWriter &writer, vector<
|
|
18110
|
-
vector<unique_ptr<BaseStatistics>> &global_stats);
|
|
18116
|
+
void Checkpoint(TableDataWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats);
|
|
18111
18117
|
|
|
18112
18118
|
void CommitDropColumn(idx_t index);
|
|
18113
18119
|
void CommitDropTable();
|
|
@@ -18124,6 +18130,8 @@ public:
|
|
|
18124
18130
|
void VerifyNewConstraint(DataTable &parent, const BoundConstraint &constraint);
|
|
18125
18131
|
|
|
18126
18132
|
private:
|
|
18133
|
+
//! BlockManager
|
|
18134
|
+
BlockManager &block_manager;
|
|
18127
18135
|
//! The number of rows in the table
|
|
18128
18136
|
atomic<idx_t> total_rows;
|
|
18129
18137
|
shared_ptr<DataTableInfo> info;
|
|
@@ -19611,9 +19619,8 @@ public:
|
|
|
19611
19619
|
BufferManager &buffer_manager;
|
|
19612
19620
|
|
|
19613
19621
|
public:
|
|
19614
|
-
virtual void StartCheckpoint() = 0;
|
|
19615
19622
|
//! Creates a new block inside the block manager
|
|
19616
|
-
virtual unique_ptr<Block> CreateBlock(block_id_t block_id) = 0;
|
|
19623
|
+
virtual unique_ptr<Block> CreateBlock(block_id_t block_id, FileBuffer *source_buffer) = 0;
|
|
19617
19624
|
//! Return the next free block id
|
|
19618
19625
|
virtual block_id_t GetFreeBlockId() = 0;
|
|
19619
19626
|
//! Returns whether or not a specified block is the root block
|
|
@@ -20189,25 +20196,34 @@ private:
|
|
|
20189
20196
|
namespace duckdb {
|
|
20190
20197
|
class DatabaseInstance;
|
|
20191
20198
|
|
|
20192
|
-
//! This struct is responsible for writing
|
|
20199
|
+
//! This struct is responsible for writing data to disk in a stream of blocks.
|
|
20193
20200
|
class MetaBlockWriter : public Serializer {
|
|
20194
20201
|
public:
|
|
20195
|
-
MetaBlockWriter(
|
|
20202
|
+
MetaBlockWriter(BlockManager &block_manager, block_id_t initial_block_id = INVALID_BLOCK);
|
|
20196
20203
|
~MetaBlockWriter() override;
|
|
20197
20204
|
|
|
20198
|
-
|
|
20205
|
+
BlockManager &block_manager;
|
|
20206
|
+
|
|
20207
|
+
protected:
|
|
20199
20208
|
unique_ptr<Block> block;
|
|
20200
20209
|
set<block_id_t> written_blocks;
|
|
20201
20210
|
idx_t offset;
|
|
20202
20211
|
|
|
20203
20212
|
public:
|
|
20204
20213
|
BlockPointer GetBlockPointer();
|
|
20205
|
-
void Flush();
|
|
20214
|
+
virtual void Flush();
|
|
20206
20215
|
|
|
20207
20216
|
void WriteData(const_data_ptr_t buffer, idx_t write_size) override;
|
|
20208
20217
|
|
|
20218
|
+
void MarkWrittenBlocks() {
|
|
20219
|
+
for (auto &block_id : written_blocks) {
|
|
20220
|
+
block_manager.MarkBlockAsModified(block_id);
|
|
20221
|
+
}
|
|
20222
|
+
}
|
|
20223
|
+
|
|
20209
20224
|
protected:
|
|
20210
20225
|
virtual block_id_t GetNextBlockId();
|
|
20226
|
+
void AdvanceBlock();
|
|
20211
20227
|
};
|
|
20212
20228
|
|
|
20213
20229
|
} // namespace duckdb
|
|
@@ -20217,6 +20233,7 @@ protected:
|
|
|
20217
20233
|
namespace duckdb {
|
|
20218
20234
|
|
|
20219
20235
|
class ClientContext;
|
|
20236
|
+
class TableIOManager;
|
|
20220
20237
|
class Transaction;
|
|
20221
20238
|
|
|
20222
20239
|
struct IndexLock;
|
|
@@ -20224,12 +20241,14 @@ struct IndexLock;
|
|
|
20224
20241
|
//! The index is an abstract base class that serves as the basis for indexes
|
|
20225
20242
|
class Index {
|
|
20226
20243
|
public:
|
|
20227
|
-
Index(IndexType type,
|
|
20228
|
-
IndexConstraintType constraint_type);
|
|
20244
|
+
Index(IndexType type, TableIOManager &table_io_manager, const vector<column_t> &column_ids,
|
|
20245
|
+
const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type);
|
|
20229
20246
|
virtual ~Index() = default;
|
|
20230
20247
|
|
|
20231
20248
|
//! The type of the index
|
|
20232
20249
|
IndexType type;
|
|
20250
|
+
//! Associated table io manager
|
|
20251
|
+
TableIOManager &table_io_manager;
|
|
20233
20252
|
//! Column identifiers to extract from the base table
|
|
20234
20253
|
vector<column_t> column_ids;
|
|
20235
20254
|
//! unordered_set of column_ids used by the index
|
|
@@ -25740,7 +25759,6 @@ public:
|
|
|
25740
25759
|
|
|
25741
25760
|
|
|
25742
25761
|
|
|
25743
|
-
|
|
25744
25762
|
//===----------------------------------------------------------------------===//
|
|
25745
25763
|
// DuckDB
|
|
25746
25764
|
//
|
|
@@ -25989,9 +26007,9 @@ public:
|
|
|
25989
26007
|
//! The block that this segment relates to
|
|
25990
26008
|
shared_ptr<BlockHandle> block;
|
|
25991
26009
|
|
|
25992
|
-
static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db,
|
|
25993
|
-
|
|
25994
|
-
CompressionType compression_type,
|
|
26010
|
+
static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager,
|
|
26011
|
+
block_id_t id, idx_t offset, const LogicalType &type_p,
|
|
26012
|
+
idx_t start, idx_t count, CompressionType compression_type,
|
|
25995
26013
|
unique_ptr<BaseStatistics> statistics);
|
|
25996
26014
|
static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start);
|
|
25997
26015
|
static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
|
|
@@ -26022,15 +26040,20 @@ public:
|
|
|
26022
26040
|
|
|
26023
26041
|
//! Convert a transient in-memory segment into a persistent segment blocked by an on-disk block.
|
|
26024
26042
|
//! Only used during checkpointing.
|
|
26025
|
-
void ConvertToPersistent(block_id_t block_id);
|
|
26026
|
-
//!
|
|
26027
|
-
|
|
26043
|
+
void ConvertToPersistent(BlockManager *block_manager, block_id_t block_id);
|
|
26044
|
+
//! Updates pointers to refer to the given block and offset. This is only used
|
|
26045
|
+
//! when sharing a block among segments. This is invoked only AFTER the block is written.
|
|
26046
|
+
void MarkAsPersistent(shared_ptr<BlockHandle> block, uint32_t offset_in_block);
|
|
26028
26047
|
|
|
26029
26048
|
block_id_t GetBlockId() {
|
|
26030
26049
|
D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT);
|
|
26031
26050
|
return block_id;
|
|
26032
26051
|
}
|
|
26033
26052
|
|
|
26053
|
+
BlockManager &GetBlockManager() const {
|
|
26054
|
+
return block->block_manager;
|
|
26055
|
+
}
|
|
26056
|
+
|
|
26034
26057
|
idx_t GetBlockOffset() {
|
|
26035
26058
|
D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT || offset == 0);
|
|
26036
26059
|
return offset;
|
|
@@ -26047,9 +26070,9 @@ public:
|
|
|
26047
26070
|
}
|
|
26048
26071
|
|
|
26049
26072
|
public:
|
|
26050
|
-
ColumnSegment(DatabaseInstance &db, LogicalType type, ColumnSegmentType segment_type,
|
|
26051
|
-
CompressionFunction *function, unique_ptr<BaseStatistics> statistics,
|
|
26052
|
-
idx_t offset);
|
|
26073
|
+
ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
|
|
26074
|
+
idx_t start, idx_t count, CompressionFunction *function, unique_ptr<BaseStatistics> statistics,
|
|
26075
|
+
block_id_t block_id, idx_t offset);
|
|
26053
26076
|
ColumnSegment(ColumnSegment &other, idx_t start);
|
|
26054
26077
|
|
|
26055
26078
|
private:
|
|
@@ -26071,26 +26094,33 @@ private:
|
|
|
26071
26094
|
|
|
26072
26095
|
|
|
26073
26096
|
|
|
26097
|
+
//===----------------------------------------------------------------------===//
|
|
26098
|
+
// DuckDB
|
|
26099
|
+
//
|
|
26100
|
+
// duckdb/storage/table/data_table_info.hpp
|
|
26101
|
+
//
|
|
26102
|
+
//
|
|
26103
|
+
//===----------------------------------------------------------------------===//
|
|
26104
|
+
|
|
26105
|
+
|
|
26106
|
+
|
|
26107
|
+
|
|
26108
|
+
|
|
26074
26109
|
|
|
26075
26110
|
|
|
26076
26111
|
namespace duckdb {
|
|
26077
|
-
class
|
|
26078
|
-
class
|
|
26079
|
-
class DataTable;
|
|
26080
|
-
class RowGroup;
|
|
26081
|
-
class StorageManager;
|
|
26082
|
-
class TableCatalogEntry;
|
|
26083
|
-
class Transaction;
|
|
26084
|
-
class WriteAheadLog;
|
|
26085
|
-
class TableDataWriter;
|
|
26112
|
+
class DatabaseInstance;
|
|
26113
|
+
class TableIOManager;
|
|
26086
26114
|
|
|
26087
26115
|
struct DataTableInfo {
|
|
26088
|
-
DataTableInfo(DatabaseInstance &db, string schema, string table)
|
|
26089
|
-
: db(db), cardinality(0), schema(move(schema)), table(move(table)) {
|
|
26116
|
+
DataTableInfo(DatabaseInstance &db, shared_ptr<TableIOManager> table_io_manager_p, string schema, string table)
|
|
26117
|
+
: db(db), table_io_manager(move(table_io_manager_p)), cardinality(0), schema(move(schema)), table(move(table)) {
|
|
26090
26118
|
}
|
|
26091
26119
|
|
|
26092
26120
|
//! The database instance of the table
|
|
26093
26121
|
DatabaseInstance &db;
|
|
26122
|
+
//! The table IO manager
|
|
26123
|
+
shared_ptr<TableIOManager> table_io_manager;
|
|
26094
26124
|
//! The amount of elements in the table. Note that this number signifies the amount of COMMITTED entries in the
|
|
26095
26125
|
//! table. It can be inaccurate inside of transactions. More work is needed to properly support that.
|
|
26096
26126
|
atomic<idx_t> cardinality;
|
|
@@ -26106,12 +26136,28 @@ struct DataTableInfo {
|
|
|
26106
26136
|
}
|
|
26107
26137
|
};
|
|
26108
26138
|
|
|
26139
|
+
} // namespace duckdb
|
|
26140
|
+
|
|
26141
|
+
|
|
26142
|
+
namespace duckdb {
|
|
26143
|
+
class ClientContext;
|
|
26144
|
+
class ColumnDefinition;
|
|
26145
|
+
class DataTable;
|
|
26146
|
+
class RowGroup;
|
|
26147
|
+
class StorageManager;
|
|
26148
|
+
class TableCatalogEntry;
|
|
26149
|
+
class TableIOManager;
|
|
26150
|
+
class Transaction;
|
|
26151
|
+
class WriteAheadLog;
|
|
26152
|
+
class TableDataWriter;
|
|
26153
|
+
|
|
26109
26154
|
//! DataTable represents a physical table on disk
|
|
26110
26155
|
class DataTable {
|
|
26111
26156
|
public:
|
|
26112
26157
|
//! Constructs a new data table from an (optional) set of persistent segments
|
|
26113
|
-
DataTable(DatabaseInstance &db,
|
|
26114
|
-
vector<ColumnDefinition> column_definitions_p,
|
|
26158
|
+
DataTable(DatabaseInstance &db, shared_ptr<TableIOManager> table_io_manager, const string &schema,
|
|
26159
|
+
const string &table, vector<ColumnDefinition> column_definitions_p,
|
|
26160
|
+
unique_ptr<PersistentTableData> data = nullptr);
|
|
26115
26161
|
//! Constructs a DataTable as a delta on an existing data table with a newly added column
|
|
26116
26162
|
DataTable(ClientContext &context, DataTable &parent, ColumnDefinition &new_column, Expression *default_value);
|
|
26117
26163
|
//! Constructs a DataTable as a delta on an existing data table but with one column removed
|
|
@@ -26122,10 +26168,10 @@ public:
|
|
|
26122
26168
|
//! Constructs a DataTable as a delta on an existing data table but with one column added new constraint
|
|
26123
26169
|
DataTable(ClientContext &context, DataTable &parent, unique_ptr<BoundConstraint> constraint);
|
|
26124
26170
|
|
|
26171
|
+
//! The table info
|
|
26125
26172
|
shared_ptr<DataTableInfo> info;
|
|
26126
|
-
|
|
26173
|
+
//! The set of physical columns stored by this DataTable
|
|
26127
26174
|
vector<ColumnDefinition> column_definitions;
|
|
26128
|
-
|
|
26129
26175
|
//! A reference to the database instance
|
|
26130
26176
|
DatabaseInstance &db;
|
|
26131
26177
|
|