duckdb 0.5.2-dev547.0 → 0.5.2-dev561.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +1248 -737
- package/src/duckdb.hpp +90 -44
- package/src/parquet-amalgamation.cpp +26280 -26280
package/src/duckdb.cpp
CHANGED
|
@@ -1711,20 +1711,22 @@ unique_ptr<Key> Key::CreateKey(const char *value);
|
|
|
1711
1711
|
|
|
1712
1712
|
namespace duckdb {
|
|
1713
1713
|
class BlockHandle;
|
|
1714
|
+
class BlockManager;
|
|
1714
1715
|
class BufferHandle;
|
|
1715
1716
|
class DatabaseInstance;
|
|
1716
1717
|
|
|
1717
1718
|
//! This struct is responsible for reading meta data from disk
|
|
1718
1719
|
class MetaBlockReader : public Deserializer {
|
|
1719
1720
|
public:
|
|
1720
|
-
MetaBlockReader(
|
|
1721
|
+
MetaBlockReader(BlockManager &block_manager, block_id_t block, bool free_blocks_on_read = true);
|
|
1721
1722
|
~MetaBlockReader() override;
|
|
1722
1723
|
|
|
1723
|
-
|
|
1724
|
+
BlockManager &block_manager;
|
|
1724
1725
|
shared_ptr<BlockHandle> block;
|
|
1725
1726
|
BufferHandle handle;
|
|
1726
1727
|
idx_t offset;
|
|
1727
1728
|
block_id_t next_block;
|
|
1729
|
+
bool free_blocks_on_read;
|
|
1728
1730
|
|
|
1729
1731
|
public:
|
|
1730
1732
|
//! Read content of size read_size into the buffer
|
|
@@ -2241,8 +2243,9 @@ enum VerifyExistenceType : uint8_t {
|
|
|
2241
2243
|
|
|
2242
2244
|
class ART : public Index {
|
|
2243
2245
|
public:
|
|
2244
|
-
ART(const vector<column_t> &column_ids,
|
|
2245
|
-
|
|
2246
|
+
ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
2247
|
+
const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type,
|
|
2248
|
+
DatabaseInstance &db, idx_t block_id = DConstants::INVALID_INDEX,
|
|
2246
2249
|
idx_t block_offset = DConstants::INVALID_INDEX);
|
|
2247
2250
|
~ART() override;
|
|
2248
2251
|
|
|
@@ -2251,6 +2254,7 @@ public:
|
|
|
2251
2254
|
|
|
2252
2255
|
DatabaseInstance &db;
|
|
2253
2256
|
|
|
2257
|
+
public:
|
|
2254
2258
|
//! Initialize a scan on the index with the given expression and column ids
|
|
2255
2259
|
//! to fetch from the base table for a single predicate
|
|
2256
2260
|
unique_ptr<IndexScanState> InitializeScanSinglePredicate(Transaction &transaction, Value value,
|
|
@@ -3487,6 +3491,39 @@ protected:
|
|
|
3487
3491
|
|
|
3488
3492
|
|
|
3489
3493
|
|
|
3494
|
+
//===----------------------------------------------------------------------===//
|
|
3495
|
+
// DuckDB
|
|
3496
|
+
//
|
|
3497
|
+
// duckdb/storage/table_io_manager.hpp
|
|
3498
|
+
//
|
|
3499
|
+
//
|
|
3500
|
+
//===----------------------------------------------------------------------===//
|
|
3501
|
+
|
|
3502
|
+
|
|
3503
|
+
|
|
3504
|
+
|
|
3505
|
+
|
|
3506
|
+
namespace duckdb {
|
|
3507
|
+
class BlockManager;
|
|
3508
|
+
class DataTable;
|
|
3509
|
+
|
|
3510
|
+
class TableIOManager {
|
|
3511
|
+
public:
|
|
3512
|
+
virtual ~TableIOManager() {
|
|
3513
|
+
}
|
|
3514
|
+
|
|
3515
|
+
//! Obtains a reference to the TableIOManager of a specific table
|
|
3516
|
+
static TableIOManager &Get(DataTable &table);
|
|
3517
|
+
|
|
3518
|
+
//! The block manager used for managing index data
|
|
3519
|
+
virtual BlockManager &GetIndexBlockManager() = 0;
|
|
3520
|
+
|
|
3521
|
+
//! The block manager used for storing row group data
|
|
3522
|
+
virtual BlockManager &GetBlockManagerForRowData() = 0;
|
|
3523
|
+
};
|
|
3524
|
+
|
|
3525
|
+
} // namespace duckdb
|
|
3526
|
+
|
|
3490
3527
|
//===----------------------------------------------------------------------===//
|
|
3491
3528
|
// DuckDB
|
|
3492
3529
|
//
|
|
@@ -3586,10 +3623,10 @@ class TransactionManager;
|
|
|
3586
3623
|
//! server crashes or is shut down.
|
|
3587
3624
|
class WriteAheadLog {
|
|
3588
3625
|
public:
|
|
3589
|
-
|
|
3626
|
+
//! Initialize the WAL in the specified directory
|
|
3627
|
+
explicit WriteAheadLog(DatabaseInstance &database, const string &path);
|
|
3628
|
+
virtual ~WriteAheadLog();
|
|
3590
3629
|
|
|
3591
|
-
//! Whether or not the WAL has been initialized
|
|
3592
|
-
bool initialized;
|
|
3593
3630
|
//! Skip writing to the WAL
|
|
3594
3631
|
bool skip_writing;
|
|
3595
3632
|
|
|
@@ -3597,14 +3634,12 @@ public:
|
|
|
3597
3634
|
//! Replay the WAL
|
|
3598
3635
|
static bool Replay(DatabaseInstance &database, string &path);
|
|
3599
3636
|
|
|
3600
|
-
//! Initialize the WAL in the specified directory
|
|
3601
|
-
void Initialize(string &path);
|
|
3602
3637
|
//! Returns the current size of the WAL in bytes
|
|
3603
3638
|
int64_t GetWALSize();
|
|
3604
3639
|
//! Gets the total bytes written to the WAL since startup
|
|
3605
3640
|
idx_t GetTotalWritten();
|
|
3606
3641
|
|
|
3607
|
-
void WriteCreateTable(TableCatalogEntry *entry);
|
|
3642
|
+
virtual void WriteCreateTable(TableCatalogEntry *entry);
|
|
3608
3643
|
void WriteDropTable(TableCatalogEntry *entry);
|
|
3609
3644
|
|
|
3610
3645
|
void WriteCreateSchema(SchemaCatalogEntry *entry);
|
|
@@ -3650,7 +3685,7 @@ public:
|
|
|
3650
3685
|
|
|
3651
3686
|
void WriteCheckpoint(block_id_t meta_block);
|
|
3652
3687
|
|
|
3653
|
-
|
|
3688
|
+
protected:
|
|
3654
3689
|
DatabaseInstance &database;
|
|
3655
3690
|
unique_ptr<BufferedFileWriter> writer;
|
|
3656
3691
|
string wal_path;
|
|
@@ -3662,19 +3697,38 @@ private:
|
|
|
3662
3697
|
namespace duckdb {
|
|
3663
3698
|
class BlockManager;
|
|
3664
3699
|
class Catalog;
|
|
3700
|
+
class CheckpointWriter;
|
|
3665
3701
|
class DatabaseInstance;
|
|
3666
3702
|
class TransactionManager;
|
|
3667
3703
|
class TableCatalogEntry;
|
|
3668
3704
|
|
|
3705
|
+
struct DatabaseSize {
|
|
3706
|
+
idx_t total_blocks = 0;
|
|
3707
|
+
idx_t block_size = 0;
|
|
3708
|
+
idx_t free_blocks = 0;
|
|
3709
|
+
idx_t used_blocks = 0;
|
|
3710
|
+
idx_t bytes = 0;
|
|
3711
|
+
idx_t wal_size = 0;
|
|
3712
|
+
};
|
|
3713
|
+
|
|
3714
|
+
class StorageCommitState {
|
|
3715
|
+
public:
|
|
3716
|
+
// Destruction of this object, without prior call to FlushCommit,
|
|
3717
|
+
// will roll back the committed changes.
|
|
3718
|
+
virtual ~StorageCommitState() {
|
|
3719
|
+
}
|
|
3720
|
+
|
|
3721
|
+
// Make the commit persistent
|
|
3722
|
+
virtual void FlushCommit() = 0;
|
|
3723
|
+
};
|
|
3724
|
+
|
|
3669
3725
|
//! StorageManager is responsible for managing the physical storage of the
|
|
3670
3726
|
//! database on disk
|
|
3671
3727
|
class StorageManager {
|
|
3672
3728
|
public:
|
|
3673
3729
|
StorageManager(DatabaseInstance &db, string path, bool read_only);
|
|
3674
|
-
~StorageManager();
|
|
3730
|
+
virtual ~StorageManager();
|
|
3675
3731
|
|
|
3676
|
-
//! The BlockManager to read/store meta information and data in blocks
|
|
3677
|
-
unique_ptr<BlockManager> block_manager;
|
|
3678
3732
|
//! The BufferManager of the database
|
|
3679
3733
|
unique_ptr<BufferManager> buffer_manager;
|
|
3680
3734
|
//! The database this storagemanager belongs to
|
|
@@ -3686,35 +3740,62 @@ public:
|
|
|
3686
3740
|
|
|
3687
3741
|
//! Initialize a database or load an existing database from the given path
|
|
3688
3742
|
void Initialize();
|
|
3689
|
-
//! Get the WAL of the StorageManager, returns nullptr if in-memory
|
|
3690
|
-
WriteAheadLog *GetWriteAheadLog() {
|
|
3691
|
-
return wal.initialized ? &wal : nullptr;
|
|
3692
|
-
}
|
|
3693
3743
|
|
|
3694
3744
|
DatabaseInstance &GetDatabase() {
|
|
3695
3745
|
return db;
|
|
3696
3746
|
}
|
|
3697
3747
|
|
|
3698
|
-
|
|
3748
|
+
//! Get the WAL of the StorageManager, returns nullptr if in-memory
|
|
3749
|
+
WriteAheadLog *GetWriteAheadLog() {
|
|
3750
|
+
return wal.get();
|
|
3751
|
+
}
|
|
3699
3752
|
|
|
3700
3753
|
string GetDBPath() {
|
|
3701
3754
|
return path;
|
|
3702
3755
|
}
|
|
3703
3756
|
bool InMemory();
|
|
3704
3757
|
|
|
3705
|
-
|
|
3706
|
-
|
|
3707
|
-
|
|
3758
|
+
virtual bool AutomaticCheckpoint(idx_t estimated_wal_bytes) = 0;
|
|
3759
|
+
virtual unique_ptr<StorageCommitState> GenStorageCommitState(Transaction &transaction, bool checkpoint) = 0;
|
|
3760
|
+
virtual bool IsCheckpointClean(block_id_t checkpoint_id) = 0;
|
|
3761
|
+
virtual void CreateCheckpoint(bool delete_wal = false, bool force_checkpoint = false) = 0;
|
|
3762
|
+
virtual DatabaseSize GetDatabaseSize() = 0;
|
|
3763
|
+
virtual shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) = 0;
|
|
3764
|
+
|
|
3765
|
+
protected:
|
|
3766
|
+
virtual void LoadDatabase() = 0;
|
|
3767
|
+
virtual void CreateBufferManager();
|
|
3708
3768
|
|
|
3709
3769
|
//! The path of the database
|
|
3710
3770
|
string path;
|
|
3711
3771
|
//! The WriteAheadLog of the storage manager
|
|
3712
|
-
WriteAheadLog wal;
|
|
3772
|
+
unique_ptr<WriteAheadLog> wal;
|
|
3713
3773
|
|
|
3714
3774
|
//! Whether or not the database is opened in read-only mode
|
|
3715
3775
|
bool read_only;
|
|
3716
3776
|
};
|
|
3717
3777
|
|
|
3778
|
+
//! Stores database in a single file.
|
|
3779
|
+
class SingleFileStorageManager : public StorageManager {
|
|
3780
|
+
public:
|
|
3781
|
+
SingleFileStorageManager(DatabaseInstance &db, string path, bool read_only);
|
|
3782
|
+
|
|
3783
|
+
//! The BlockManager to read/store meta information and data in blocks
|
|
3784
|
+
unique_ptr<BlockManager> block_manager;
|
|
3785
|
+
//! TableIoManager
|
|
3786
|
+
unique_ptr<TableIOManager> table_io_manager;
|
|
3787
|
+
|
|
3788
|
+
public:
|
|
3789
|
+
bool AutomaticCheckpoint(idx_t estimated_wal_bytes) override;
|
|
3790
|
+
unique_ptr<StorageCommitState> GenStorageCommitState(Transaction &transaction, bool checkpoint) override;
|
|
3791
|
+
bool IsCheckpointClean(block_id_t checkpoint_id) override;
|
|
3792
|
+
void CreateCheckpoint(bool delete_wal, bool force_checkpoint) override;
|
|
3793
|
+
DatabaseSize GetDatabaseSize() override;
|
|
3794
|
+
shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) override;
|
|
3795
|
+
|
|
3796
|
+
protected:
|
|
3797
|
+
void LoadDatabase() override;
|
|
3798
|
+
};
|
|
3718
3799
|
} // namespace duckdb
|
|
3719
3800
|
|
|
3720
3801
|
|
|
@@ -3766,13 +3847,14 @@ void AddDataTableIndex(DataTable *storage, vector<ColumnDefinition> &columns, ve
|
|
|
3766
3847
|
bound_expressions.push_back(make_unique<BoundReferenceExpression>(columns[key].Type(), key_nr++));
|
|
3767
3848
|
column_ids.push_back(column.StorageOid());
|
|
3768
3849
|
}
|
|
3769
|
-
// create an adaptive radix tree around the expressions
|
|
3770
3850
|
unique_ptr<ART> art;
|
|
3851
|
+
// create an adaptive radix tree around the expressions
|
|
3771
3852
|
if (index_block) {
|
|
3772
|
-
art = make_unique<ART>(column_ids, move(unbound_expressions), constraint_type,
|
|
3773
|
-
index_block->block_id, index_block->offset);
|
|
3853
|
+
art = make_unique<ART>(column_ids, TableIOManager::Get(*storage), move(unbound_expressions), constraint_type,
|
|
3854
|
+
storage->db, index_block->block_id, index_block->offset);
|
|
3774
3855
|
} else {
|
|
3775
|
-
art = make_unique<ART>(column_ids, move(unbound_expressions), constraint_type,
|
|
3856
|
+
art = make_unique<ART>(column_ids, TableIOManager::Get(*storage), move(unbound_expressions), constraint_type,
|
|
3857
|
+
storage->db);
|
|
3776
3858
|
if (!storage->IsRoot()) {
|
|
3777
3859
|
throw TransactionException("Transaction conflict: cannot add an index to a table that has been altered!");
|
|
3778
3860
|
}
|
|
@@ -3810,7 +3892,10 @@ TableCatalogEntry::TableCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schem
|
|
|
3810
3892
|
}
|
|
3811
3893
|
storage_columns.push_back(col_def.Copy());
|
|
3812
3894
|
}
|
|
3813
|
-
storage =
|
|
3895
|
+
storage =
|
|
3896
|
+
make_shared<DataTable>(catalog->db, StorageManager::GetStorageManager(catalog->db).GetTableIOManager(info),
|
|
3897
|
+
schema->name, name, move(storage_columns), move(info->data));
|
|
3898
|
+
|
|
3814
3899
|
// create the unique indexes for the UNIQUE and PRIMARY KEY and FOREIGN KEY constraints
|
|
3815
3900
|
idx_t indexes_idx = 0;
|
|
3816
3901
|
for (idx_t i = 0; i < bound_constraints.size(); i++) {
|
|
@@ -58079,15 +58164,17 @@ ExpressionExecutorState::ExpressionExecutorState(const string &name) : profiler(
|
|
|
58079
58164
|
|
|
58080
58165
|
|
|
58081
58166
|
|
|
58167
|
+
|
|
58082
58168
|
#include <algorithm>
|
|
58083
58169
|
#include <cstring>
|
|
58084
58170
|
#include <ctgmath>
|
|
58085
58171
|
|
|
58086
58172
|
namespace duckdb {
|
|
58087
58173
|
|
|
58088
|
-
ART::ART(const vector<column_t> &column_ids,
|
|
58089
|
-
|
|
58090
|
-
|
|
58174
|
+
ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
58175
|
+
const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type,
|
|
58176
|
+
DatabaseInstance &db, idx_t block_id, idx_t block_offset)
|
|
58177
|
+
: Index(IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type), db(db) {
|
|
58091
58178
|
if (block_id != DConstants::INVALID_INDEX) {
|
|
58092
58179
|
tree = Node::Deserialize(*this, block_id, block_offset);
|
|
58093
58180
|
} else {
|
|
@@ -58377,7 +58464,8 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
|
|
|
58377
58464
|
payload_types.emplace_back(LogicalType::ROW_TYPE);
|
|
58378
58465
|
|
|
58379
58466
|
auto skipped_all_nulls = false;
|
|
58380
|
-
auto temp_art = make_unique<ART>(this->column_ids, this->
|
|
58467
|
+
auto temp_art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
|
|
58468
|
+
this->constraint_type, this->db);
|
|
58381
58469
|
for (;;) {
|
|
58382
58470
|
DataChunk ordered_chunk;
|
|
58383
58471
|
ordered_chunk.Initialize(allocator, payload_types);
|
|
@@ -58420,7 +58508,8 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
|
|
|
58420
58508
|
auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
|
|
58421
58509
|
|
|
58422
58510
|
// construct the ART of this chunk
|
|
58423
|
-
auto art = make_unique<ART>(this->column_ids, this->
|
|
58511
|
+
auto art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
|
|
58512
|
+
this->constraint_type, this->db);
|
|
58424
58513
|
auto key_section = KeySection(start_idx, ordered_chunk.size() - 1, 0, 0);
|
|
58425
58514
|
auto has_constraint = IsPrimary() || IsUnique();
|
|
58426
58515
|
Construct(keys, row_ids, art->tree, key_section, has_constraint);
|
|
@@ -59404,8 +59493,7 @@ void Leaf::Remove(row_t row_id) {
|
|
|
59404
59493
|
}
|
|
59405
59494
|
|
|
59406
59495
|
BlockPointer Leaf::Serialize(duckdb::MetaBlockWriter &writer) {
|
|
59407
|
-
auto
|
|
59408
|
-
uint32_t offset = writer.offset;
|
|
59496
|
+
auto ptr = writer.GetBlockPointer();
|
|
59409
59497
|
// Write Node Type
|
|
59410
59498
|
writer.Write(type);
|
|
59411
59499
|
// Write compression Info
|
|
@@ -59417,7 +59505,7 @@ BlockPointer Leaf::Serialize(duckdb::MetaBlockWriter &writer) {
|
|
|
59417
59505
|
for (idx_t i = 0; i < count; i++) {
|
|
59418
59506
|
writer.Write(row_ids[i]);
|
|
59419
59507
|
}
|
|
59420
|
-
return
|
|
59508
|
+
return ptr;
|
|
59421
59509
|
}
|
|
59422
59510
|
|
|
59423
59511
|
Leaf *Leaf::Deserialize(MetaBlockReader &reader) {
|
|
@@ -59450,8 +59538,6 @@ void Leaf::Merge(bool &has_constraint, Node *&l_node, Node *&r_node) {
|
|
|
59450
59538
|
|
|
59451
59539
|
|
|
59452
59540
|
|
|
59453
|
-
|
|
59454
|
-
|
|
59455
59541
|
//===----------------------------------------------------------------------===//
|
|
59456
59542
|
// DuckDB
|
|
59457
59543
|
//
|
|
@@ -59469,6 +59555,9 @@ using std::swap;
|
|
|
59469
59555
|
}
|
|
59470
59556
|
|
|
59471
59557
|
|
|
59558
|
+
|
|
59559
|
+
|
|
59560
|
+
|
|
59472
59561
|
namespace duckdb {
|
|
59473
59562
|
|
|
59474
59563
|
InternalType::InternalType(Node *n) {
|
|
@@ -59602,8 +59691,7 @@ BlockPointer Node::SerializeInternal(ART &art, duckdb::MetaBlockWriter &writer,
|
|
|
59602
59691
|
for (idx_t i = 0; i < internal_type.children_size; i++) {
|
|
59603
59692
|
child_offsets.emplace_back(internal_type.children[i].Serialize(art, writer));
|
|
59604
59693
|
}
|
|
59605
|
-
auto
|
|
59606
|
-
uint32_t offset = writer.offset;
|
|
59694
|
+
auto ptr = writer.GetBlockPointer();
|
|
59607
59695
|
// Write Node Type
|
|
59608
59696
|
writer.Write(type);
|
|
59609
59697
|
// Write count
|
|
@@ -59619,7 +59707,7 @@ BlockPointer Node::SerializeInternal(ART &art, duckdb::MetaBlockWriter &writer,
|
|
|
59619
59707
|
writer.Write(offsets.block_id);
|
|
59620
59708
|
writer.Write(offsets.offset);
|
|
59621
59709
|
}
|
|
59622
|
-
return
|
|
59710
|
+
return ptr;
|
|
59623
59711
|
}
|
|
59624
59712
|
|
|
59625
59713
|
BlockPointer Node::Serialize(ART &art, duckdb::MetaBlockWriter &writer) {
|
|
@@ -59655,7 +59743,7 @@ void Node::DeserializeInternal(duckdb::MetaBlockReader &reader) {
|
|
|
59655
59743
|
}
|
|
59656
59744
|
|
|
59657
59745
|
Node *Node::Deserialize(ART &art, idx_t block_id, idx_t offset) {
|
|
59658
|
-
MetaBlockReader reader(art.
|
|
59746
|
+
MetaBlockReader reader(art.table_io_manager.GetIndexBlockManager(), block_id);
|
|
59659
59747
|
reader.offset = offset;
|
|
59660
59748
|
auto n = reader.Read<uint8_t>();
|
|
59661
59749
|
NodeType node_type(static_cast<NodeType>(n));
|
|
@@ -79673,6 +79761,7 @@ public:
|
|
|
79673
79761
|
|
|
79674
79762
|
|
|
79675
79763
|
|
|
79764
|
+
|
|
79676
79765
|
namespace duckdb {
|
|
79677
79766
|
|
|
79678
79767
|
//===--------------------------------------------------------------------===//
|
|
@@ -79710,7 +79799,8 @@ unique_ptr<GlobalSinkState> PhysicalCreateIndex::GetGlobalSinkState(ClientContex
|
|
|
79710
79799
|
// create the global index
|
|
79711
79800
|
switch (info->index_type) {
|
|
79712
79801
|
case IndexType::ART: {
|
|
79713
|
-
state->global_index = make_unique<ART>(storage_ids,
|
|
79802
|
+
state->global_index = make_unique<ART>(storage_ids, TableIOManager::Get(*table.storage), unbound_expressions,
|
|
79803
|
+
info->constraint_type, *context.db);
|
|
79714
79804
|
break;
|
|
79715
79805
|
}
|
|
79716
79806
|
default:
|
|
@@ -79729,8 +79819,8 @@ unique_ptr<LocalSinkState> PhysicalCreateIndex::GetLocalSinkState(ExecutionConte
|
|
|
79729
79819
|
// create the local index
|
|
79730
79820
|
switch (info->index_type) {
|
|
79731
79821
|
case IndexType::ART: {
|
|
79732
|
-
state->local_index =
|
|
79733
|
-
|
|
79822
|
+
state->local_index = make_unique<ART>(storage_ids, TableIOManager::Get(*table.storage), unbound_expressions,
|
|
79823
|
+
info->constraint_type, *context.client.db);
|
|
79734
79824
|
break;
|
|
79735
79825
|
}
|
|
79736
79826
|
default:
|
|
@@ -122487,32 +122577,18 @@ void PragmaDatabaseSizeFunction(ClientContext &context, TableFunctionInput &data
|
|
|
122487
122577
|
return;
|
|
122488
122578
|
}
|
|
122489
122579
|
auto &storage = StorageManager::GetStorageManager(context);
|
|
122490
|
-
auto &block_manager = BlockManager::GetBlockManager(context);
|
|
122491
122580
|
auto &buffer_manager = BufferManager::GetBufferManager(context);
|
|
122492
122581
|
|
|
122582
|
+
auto ds = storage.GetDatabaseSize();
|
|
122583
|
+
|
|
122493
122584
|
output.SetCardinality(1);
|
|
122494
|
-
|
|
122495
|
-
|
|
122496
|
-
|
|
122497
|
-
|
|
122498
|
-
|
|
122499
|
-
|
|
122500
|
-
|
|
122501
|
-
auto wal_size = wal ? wal->GetWALSize() : 0;
|
|
122502
|
-
output.data[0].SetValue(0, Value(StringUtil::BytesToHumanReadableString(bytes)));
|
|
122503
|
-
output.data[1].SetValue(0, Value::BIGINT(block_size));
|
|
122504
|
-
output.data[2].SetValue(0, Value::BIGINT(total_blocks));
|
|
122505
|
-
output.data[3].SetValue(0, Value::BIGINT(used_blocks));
|
|
122506
|
-
output.data[4].SetValue(0, Value::BIGINT(free_blocks));
|
|
122507
|
-
output.data[5].SetValue(0, Value(StringUtil::BytesToHumanReadableString(wal_size)));
|
|
122508
|
-
} else {
|
|
122509
|
-
output.data[0].SetValue(0, Value());
|
|
122510
|
-
output.data[1].SetValue(0, Value());
|
|
122511
|
-
output.data[2].SetValue(0, Value());
|
|
122512
|
-
output.data[3].SetValue(0, Value());
|
|
122513
|
-
output.data[4].SetValue(0, Value());
|
|
122514
|
-
output.data[5].SetValue(0, Value());
|
|
122515
|
-
}
|
|
122585
|
+
output.data[0].SetValue(0, Value(StringUtil::BytesToHumanReadableString(ds.bytes)));
|
|
122586
|
+
output.data[1].SetValue(0, Value::BIGINT(ds.block_size));
|
|
122587
|
+
output.data[2].SetValue(0, Value::BIGINT(ds.total_blocks));
|
|
122588
|
+
output.data[3].SetValue(0, Value::BIGINT(ds.used_blocks));
|
|
122589
|
+
output.data[4].SetValue(0, Value::BIGINT(ds.free_blocks));
|
|
122590
|
+
output.data[5].SetValue(0, Value(StringUtil::BytesToHumanReadableString(ds.wal_size)));
|
|
122591
|
+
|
|
122516
122592
|
output.data[6].SetValue(0, Value(StringUtil::BytesToHumanReadableString(buffer_manager.GetUsedMemory())));
|
|
122517
122593
|
auto max_memory = buffer_manager.GetMaxMemory();
|
|
122518
122594
|
output.data[7].SetValue(0, max_memory == (idx_t)-1 ? Value("Unlimited")
|
|
@@ -130510,14 +130586,6 @@ BufferManager &BufferManager::GetBufferManager(DatabaseInstance &db) {
|
|
|
130510
130586
|
return *db.GetStorageManager().buffer_manager;
|
|
130511
130587
|
}
|
|
130512
130588
|
|
|
130513
|
-
BlockManager &BlockManager::GetBlockManager(DatabaseInstance &db) {
|
|
130514
|
-
return *db.GetStorageManager().block_manager;
|
|
130515
|
-
}
|
|
130516
|
-
|
|
130517
|
-
BlockManager &BlockManager::GetBlockManager(ClientContext &context) {
|
|
130518
|
-
return BlockManager::GetBlockManager(DatabaseInstance::GetDatabase(context));
|
|
130519
|
-
}
|
|
130520
|
-
|
|
130521
130589
|
DatabaseInstance &DatabaseInstance::GetDatabase(ClientContext &context) {
|
|
130522
130590
|
return *context.db;
|
|
130523
130591
|
}
|
|
@@ -130604,10 +130672,11 @@ void DatabaseInstance::Initialize(const char *database_path, DBConfig *user_conf
|
|
|
130604
130672
|
config.options.temporary_directory = string();
|
|
130605
130673
|
}
|
|
130606
130674
|
|
|
130607
|
-
//
|
|
130608
|
-
//
|
|
130609
|
-
|
|
130610
|
-
|
|
130675
|
+
// TODO: Support an extension here, to generate different storage managers
|
|
130676
|
+
// depending on the DB path structure/prefix.
|
|
130677
|
+
const string dbPath = config.options.database_path;
|
|
130678
|
+
storage = make_unique<SingleFileStorageManager>(*this, dbPath, config.options.access_mode == AccessMode::READ_ONLY);
|
|
130679
|
+
|
|
130611
130680
|
catalog = make_unique<Catalog>(*this);
|
|
130612
130681
|
transaction_manager = make_unique<TransactionManager>(*this);
|
|
130613
130682
|
scheduler = make_unique<TaskScheduler>(*this);
|
|
@@ -192836,10 +192905,7 @@ public:
|
|
|
192836
192905
|
using BlockManager::BlockManager;
|
|
192837
192906
|
|
|
192838
192907
|
// LCOV_EXCL_START
|
|
192839
|
-
|
|
192840
|
-
throw InternalException("Cannot perform IO in in-memory database!");
|
|
192841
|
-
}
|
|
192842
|
-
unique_ptr<Block> CreateBlock(block_id_t block_id) override {
|
|
192908
|
+
unique_ptr<Block> CreateBlock(block_id_t block_id, FileBuffer *source_buffer) override {
|
|
192843
192909
|
throw InternalException("Cannot perform IO in in-memory database!");
|
|
192844
192910
|
}
|
|
192845
192911
|
block_id_t GetFreeBlockId() override {
|
|
@@ -193756,7 +193822,17 @@ Allocator &BufferManager::GetBufferAllocator() {
|
|
|
193756
193822
|
//===----------------------------------------------------------------------===//
|
|
193757
193823
|
// DuckDB
|
|
193758
193824
|
//
|
|
193759
|
-
// duckdb/storage/checkpoint/
|
|
193825
|
+
// duckdb/storage/checkpoint/table_data_writer.hpp
|
|
193826
|
+
//
|
|
193827
|
+
//
|
|
193828
|
+
//===----------------------------------------------------------------------===//
|
|
193829
|
+
|
|
193830
|
+
|
|
193831
|
+
|
|
193832
|
+
//===----------------------------------------------------------------------===//
|
|
193833
|
+
// DuckDB
|
|
193834
|
+
//
|
|
193835
|
+
// duckdb/storage/checkpoint/row_group_writer.hpp
|
|
193760
193836
|
//
|
|
193761
193837
|
//
|
|
193762
193838
|
//===----------------------------------------------------------------------===//
|
|
@@ -193773,6 +193849,14 @@ Allocator &BufferManager::GetBufferAllocator() {
|
|
|
193773
193849
|
|
|
193774
193850
|
|
|
193775
193851
|
|
|
193852
|
+
//===----------------------------------------------------------------------===//
|
|
193853
|
+
// DuckDB
|
|
193854
|
+
//
|
|
193855
|
+
// duckdb/storage/partial_block_manager.hpp
|
|
193856
|
+
//
|
|
193857
|
+
//
|
|
193858
|
+
//===----------------------------------------------------------------------===//
|
|
193859
|
+
|
|
193776
193860
|
|
|
193777
193861
|
|
|
193778
193862
|
|
|
@@ -193792,83 +193876,390 @@ class TableCatalogEntry;
|
|
|
193792
193876
|
class ViewCatalogEntry;
|
|
193793
193877
|
class TypeCatalogEntry;
|
|
193794
193878
|
|
|
193795
|
-
struct
|
|
193796
|
-
|
|
193879
|
+
struct PartialBlockState {
|
|
193880
|
+
block_id_t block_id;
|
|
193881
|
+
//! How big is the block we're writing to. (Total bytes to assign).
|
|
193882
|
+
uint32_t block_size;
|
|
193883
|
+
//! How many bytes of the allocation are used. (offset_in_block of next allocation)
|
|
193797
193884
|
uint32_t offset_in_block;
|
|
193885
|
+
//! How many times has the block been used?
|
|
193886
|
+
uint32_t block_use_count;
|
|
193798
193887
|
};
|
|
193799
193888
|
|
|
193800
193889
|
struct PartialBlock {
|
|
193801
|
-
|
|
193802
|
-
|
|
193803
|
-
|
|
193804
|
-
|
|
193805
|
-
|
|
193890
|
+
explicit PartialBlock(PartialBlockState state) : state(move(state)) {
|
|
193891
|
+
}
|
|
193892
|
+
virtual ~PartialBlock() {
|
|
193893
|
+
}
|
|
193894
|
+
|
|
193895
|
+
PartialBlockState state;
|
|
193806
193896
|
|
|
193807
|
-
|
|
193897
|
+
public:
|
|
193898
|
+
virtual void Flush() = 0;
|
|
193899
|
+
};
|
|
193900
|
+
|
|
193901
|
+
struct PartialBlockAllocation {
|
|
193902
|
+
// BlockManager owning the block_id
|
|
193903
|
+
BlockManager *block_manager {nullptr};
|
|
193904
|
+
//! How many bytes assigned to the caller?
|
|
193905
|
+
uint32_t allocation_size;
|
|
193906
|
+
//! State of assigned block.
|
|
193907
|
+
PartialBlockState state;
|
|
193908
|
+
//! Arbitrary state related to partial block storage.
|
|
193909
|
+
unique_ptr<PartialBlock> partial_block;
|
|
193808
193910
|
};
|
|
193809
193911
|
|
|
193810
|
-
//!
|
|
193811
|
-
|
|
193912
|
+
//! Enables sharing blocks across some scope. Scope is whatever we want to share
|
|
193913
|
+
//! blocks across. It may be an entire checkpoint or just a single row group.
|
|
193914
|
+
//! In any case, they must share a block manager.
|
|
193915
|
+
class PartialBlockManager {
|
|
193812
193916
|
public:
|
|
193813
|
-
|
|
193917
|
+
// 20% free / 80% utilization
|
|
193918
|
+
static constexpr const idx_t DEFAULT_MAX_PARTIAL_BLOCK_SIZE = Storage::BLOCK_SIZE / 5 * 4;
|
|
193919
|
+
// Max number of shared references to a block. No effective limit by default.
|
|
193920
|
+
static constexpr const idx_t DEFAULT_MAX_USE_COUNT = 1 << 20;
|
|
193921
|
+
// No point letting map size grow unbounded. We'll drop blocks with the
|
|
193922
|
+
// least free space first.
|
|
193923
|
+
static constexpr const idx_t MAX_BLOCK_MAP_SIZE = 1 << 31;
|
|
193814
193924
|
|
|
193815
193925
|
public:
|
|
193816
|
-
|
|
193926
|
+
PartialBlockManager(BlockManager &block_manager, uint32_t max_partial_block_size = DEFAULT_MAX_PARTIAL_BLOCK_SIZE,
|
|
193927
|
+
uint32_t max_use_count = DEFAULT_MAX_USE_COUNT)
|
|
193928
|
+
: block_manager(block_manager), max_partial_block_size(max_partial_block_size), max_use_count(max_use_count) {
|
|
193929
|
+
}
|
|
193930
|
+
|
|
193931
|
+
public:
|
|
193932
|
+
//! Flush any remaining partial blocks to disk
|
|
193933
|
+
void FlushPartialBlocks();
|
|
193934
|
+
|
|
193935
|
+
PartialBlockAllocation GetBlockAllocation(uint32_t segment_size);
|
|
193936
|
+
|
|
193937
|
+
virtual void AllocateBlock(PartialBlockState &state, uint32_t segment_size);
|
|
193938
|
+
|
|
193939
|
+
//! Register a partially filled block that is filled with "segment_size" entries
|
|
193940
|
+
void RegisterPartialBlock(PartialBlockAllocation &&allocation);
|
|
193941
|
+
|
|
193942
|
+
protected:
|
|
193943
|
+
BlockManager &block_manager;
|
|
193944
|
+
//! A map of (available space -> PartialBlock) for partially filled blocks
|
|
193945
|
+
//! This is a multimap because there might be outstanding partial blocks with
|
|
193946
|
+
//! the same amount of left-over space
|
|
193947
|
+
multimap<idx_t, unique_ptr<PartialBlock>> partially_filled_blocks;
|
|
193948
|
+
|
|
193949
|
+
//! The maximum size (in bytes) at which a partial block will be considered a partial block
|
|
193950
|
+
uint32_t max_partial_block_size;
|
|
193951
|
+
uint32_t max_use_count;
|
|
193952
|
+
|
|
193953
|
+
protected:
|
|
193954
|
+
//! Try to obtain a partially filled block that can fit "segment_size" bytes
|
|
193955
|
+
//! If successful, returns true and returns the block_id and offset_in_block to write to
|
|
193956
|
+
//! Otherwise, returns false
|
|
193957
|
+
bool GetPartialBlock(idx_t segment_size, unique_ptr<PartialBlock> &state);
|
|
193958
|
+
};
|
|
193959
|
+
|
|
193960
|
+
} // namespace duckdb
|
|
193961
|
+
|
|
193962
|
+
|
|
193963
|
+
|
|
193964
|
+
namespace duckdb {
|
|
193965
|
+
class DatabaseInstance;
|
|
193966
|
+
class ClientContext;
|
|
193967
|
+
class ColumnSegment;
|
|
193968
|
+
class MetaBlockReader;
|
|
193969
|
+
class SchemaCatalogEntry;
|
|
193970
|
+
class SequenceCatalogEntry;
|
|
193971
|
+
class TableCatalogEntry;
|
|
193972
|
+
class ViewCatalogEntry;
|
|
193973
|
+
class TypeCatalogEntry;
|
|
193974
|
+
|
|
193975
|
+
class CheckpointWriter {
|
|
193976
|
+
public:
|
|
193977
|
+
explicit CheckpointWriter(DatabaseInstance &db) : db(db) {
|
|
193978
|
+
}
|
|
193979
|
+
virtual ~CheckpointWriter() {
|
|
193980
|
+
}
|
|
193817
193981
|
|
|
193818
193982
|
//! The database
|
|
193819
193983
|
DatabaseInstance &db;
|
|
193984
|
+
|
|
193985
|
+
virtual MetaBlockWriter &GetMetaBlockWriter() = 0;
|
|
193986
|
+
virtual unique_ptr<TableDataWriter> GetTableDataWriter(TableCatalogEntry &table) = 0;
|
|
193987
|
+
virtual BlockPointer WriteIndexData(IndexCatalogEntry &index_catalog) = 0;
|
|
193988
|
+
|
|
193989
|
+
protected:
|
|
193990
|
+
virtual void WriteSchema(SchemaCatalogEntry &schema);
|
|
193991
|
+
virtual void WriteTable(TableCatalogEntry &table);
|
|
193992
|
+
virtual void WriteView(ViewCatalogEntry &table);
|
|
193993
|
+
virtual void WriteSequence(SequenceCatalogEntry &table);
|
|
193994
|
+
virtual void WriteMacro(ScalarMacroCatalogEntry &table);
|
|
193995
|
+
virtual void WriteTableMacro(TableMacroCatalogEntry &table);
|
|
193996
|
+
virtual void WriteIndex(IndexCatalogEntry &index_catalog);
|
|
193997
|
+
virtual void WriteType(TypeCatalogEntry &table);
|
|
193998
|
+
};
|
|
193999
|
+
|
|
194000
|
+
class CheckpointReader {
|
|
194001
|
+
public:
|
|
194002
|
+
virtual ~CheckpointReader() {
|
|
194003
|
+
}
|
|
194004
|
+
|
|
194005
|
+
protected:
|
|
194006
|
+
virtual void LoadCheckpoint(ClientContext &context, MetaBlockReader &reader);
|
|
194007
|
+
virtual void ReadSchema(ClientContext &context, MetaBlockReader &reader);
|
|
194008
|
+
virtual void ReadTable(ClientContext &context, MetaBlockReader &reader);
|
|
194009
|
+
virtual void ReadView(ClientContext &context, MetaBlockReader &reader);
|
|
194010
|
+
virtual void ReadSequence(ClientContext &context, MetaBlockReader &reader);
|
|
194011
|
+
virtual void ReadMacro(ClientContext &context, MetaBlockReader &reader);
|
|
194012
|
+
virtual void ReadTableMacro(ClientContext &context, MetaBlockReader &reader);
|
|
194013
|
+
virtual void ReadIndex(ClientContext &context, MetaBlockReader &reader);
|
|
194014
|
+
virtual void ReadType(ClientContext &context, MetaBlockReader &reader);
|
|
194015
|
+
|
|
194016
|
+
virtual void ReadTableData(ClientContext &context, MetaBlockReader &reader, BoundCreateTableInfo &bound_info);
|
|
194017
|
+
};
|
|
194018
|
+
|
|
194019
|
+
class SingleFileCheckpointReader final : public CheckpointReader {
|
|
194020
|
+
public:
|
|
194021
|
+
explicit SingleFileCheckpointReader(SingleFileStorageManager &storage) : storage(storage) {
|
|
194022
|
+
}
|
|
194023
|
+
|
|
194024
|
+
void LoadFromStorage();
|
|
194025
|
+
|
|
194026
|
+
//! The database
|
|
194027
|
+
SingleFileStorageManager &storage;
|
|
194028
|
+
};
|
|
194029
|
+
|
|
194030
|
+
//! CheckpointWriter is responsible for checkpointing the database
|
|
194031
|
+
class SingleFileRowGroupWriter;
|
|
194032
|
+
class SingleFileTableDataWriter;
|
|
194033
|
+
|
|
194034
|
+
class SingleFileCheckpointWriter final : public CheckpointWriter {
|
|
194035
|
+
friend class SingleFileRowGroupWriter;
|
|
194036
|
+
friend class SingleFileTableDataWriter;
|
|
194037
|
+
|
|
194038
|
+
public:
|
|
194039
|
+
explicit SingleFileCheckpointWriter(DatabaseInstance &db, BlockManager &block_manager)
|
|
194040
|
+
: CheckpointWriter(db), partial_block_manager(block_manager) {
|
|
194041
|
+
}
|
|
194042
|
+
|
|
194043
|
+
//! Checkpoint the current state of the WAL and flush it to the main storage. This should be called BEFORE any
|
|
194044
|
+
//! connection is available because right now the checkpointing cannot be done online. (TODO)
|
|
194045
|
+
void CreateCheckpoint();
|
|
194046
|
+
|
|
194047
|
+
virtual MetaBlockWriter &GetMetaBlockWriter() override;
|
|
194048
|
+
virtual unique_ptr<TableDataWriter> GetTableDataWriter(TableCatalogEntry &table) override;
|
|
194049
|
+
virtual BlockPointer WriteIndexData(IndexCatalogEntry &index_catalog) override;
|
|
194050
|
+
|
|
194051
|
+
BlockManager &GetBlockManager();
|
|
194052
|
+
|
|
194053
|
+
private:
|
|
193820
194054
|
//! The metadata writer is responsible for writing schema information
|
|
193821
194055
|
unique_ptr<MetaBlockWriter> metadata_writer;
|
|
193822
194056
|
//! The table data writer is responsible for writing the DataPointers used by the table chunks
|
|
193823
|
-
unique_ptr<MetaBlockWriter>
|
|
194057
|
+
unique_ptr<MetaBlockWriter> table_metadata_writer;
|
|
194058
|
+
//! Because this is single-file storage, we can share partial blocks across
|
|
194059
|
+
//! an entire checkpoint.
|
|
194060
|
+
PartialBlockManager partial_block_manager;
|
|
194061
|
+
};
|
|
193824
194062
|
|
|
194063
|
+
} // namespace duckdb
|
|
194064
|
+
|
|
194065
|
+
|
|
194066
|
+
namespace duckdb {
|
|
194067
|
+
struct ColumnCheckpointState;
|
|
194068
|
+
class CheckpointWriter;
|
|
194069
|
+
class ColumnData;
|
|
194070
|
+
class ColumnSegment;
|
|
194071
|
+
class RowGroup;
|
|
194072
|
+
class BaseStatistics;
|
|
194073
|
+
class SegmentStatistics;
|
|
194074
|
+
|
|
194075
|
+
// Writes data for an entire row group.
|
|
194076
|
+
class RowGroupWriter {
|
|
193825
194077
|
public:
|
|
193826
|
-
|
|
193827
|
-
|
|
193828
|
-
|
|
193829
|
-
|
|
193830
|
-
|
|
194078
|
+
RowGroupWriter(TableCatalogEntry &table, PartialBlockManager &partial_block_manager)
|
|
194079
|
+
: table(table), partial_block_manager(partial_block_manager) {
|
|
194080
|
+
}
|
|
194081
|
+
virtual ~RowGroupWriter() {
|
|
194082
|
+
}
|
|
193831
194083
|
|
|
193832
|
-
|
|
193833
|
-
//! If successful, returns true and returns the block_id and offset_in_block to write to
|
|
193834
|
-
//! Otherwise, returns false
|
|
193835
|
-
bool GetPartialBlock(ColumnSegment *segment, idx_t segment_size, block_id_t &block_id, uint32_t &offset_in_block,
|
|
193836
|
-
PartialBlock *&partial_block_ptr, unique_ptr<PartialBlock> &owned_partial_block);
|
|
194084
|
+
CompressionType GetColumnCompressionType(idx_t i);
|
|
193837
194085
|
|
|
193838
|
-
|
|
193839
|
-
void RegisterPartialBlock(ColumnSegment *segment, idx_t segment_size, block_id_t block_id);
|
|
194086
|
+
virtual void WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state) = 0;
|
|
193840
194087
|
|
|
193841
|
-
|
|
193842
|
-
void FlushPartialSegments();
|
|
194088
|
+
virtual MetaBlockWriter &GetPayloadWriter() = 0;
|
|
193843
194089
|
|
|
193844
|
-
|
|
193845
|
-
|
|
193846
|
-
|
|
193847
|
-
|
|
193848
|
-
|
|
193849
|
-
|
|
193850
|
-
|
|
193851
|
-
|
|
193852
|
-
|
|
193853
|
-
|
|
193854
|
-
|
|
193855
|
-
|
|
193856
|
-
|
|
193857
|
-
|
|
193858
|
-
|
|
193859
|
-
|
|
193860
|
-
|
|
193861
|
-
|
|
194090
|
+
void RegisterPartialBlock(PartialBlockAllocation &&allocation);
|
|
194091
|
+
PartialBlockAllocation GetBlockAllocation(uint32_t segment_size);
|
|
194092
|
+
|
|
194093
|
+
protected:
|
|
194094
|
+
TableCatalogEntry &table;
|
|
194095
|
+
PartialBlockManager &partial_block_manager;
|
|
194096
|
+
};
|
|
194097
|
+
|
|
194098
|
+
// Writes data for an entire row group.
|
|
194099
|
+
class SingleFileRowGroupWriter : public RowGroupWriter {
|
|
194100
|
+
public:
|
|
194101
|
+
SingleFileRowGroupWriter(TableCatalogEntry &table, PartialBlockManager &partial_block_manager,
|
|
194102
|
+
MetaBlockWriter &table_data_writer)
|
|
194103
|
+
: RowGroupWriter(table, partial_block_manager), table_data_writer(table_data_writer) {
|
|
194104
|
+
}
|
|
194105
|
+
|
|
194106
|
+
//! MetaBlockWriter is a cursor on a given BlockManager. This returns the
|
|
194107
|
+
//! cursor against which we should write payload data for the specified RowGroup.
|
|
194108
|
+
MetaBlockWriter &table_data_writer;
|
|
194109
|
+
|
|
194110
|
+
public:
|
|
194111
|
+
virtual void WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state) override;
|
|
194112
|
+
|
|
194113
|
+
virtual MetaBlockWriter &GetPayloadWriter() override;
|
|
194114
|
+
};
|
|
194115
|
+
|
|
194116
|
+
} // namespace duckdb
|
|
194117
|
+
|
|
194118
|
+
|
|
194119
|
+
namespace duckdb {
|
|
194120
|
+
|
|
194121
|
+
//! The table data writer is responsible for writing the data of a table to
|
|
194122
|
+
//! storage.
|
|
194123
|
+
//
|
|
194124
|
+
//! This is meant to encapsulate and abstract:
|
|
194125
|
+
//! - Storage/encoding of table metadata (block pointers)
|
|
194126
|
+
//! - Mapping management of data block locations
|
|
194127
|
+
//! Abstraction will support, for example: tiering, versioning, or splitting into multiple block managers.
|
|
194128
|
+
class TableDataWriter {
|
|
194129
|
+
public:
|
|
194130
|
+
explicit TableDataWriter(TableCatalogEntry &table);
|
|
194131
|
+
virtual ~TableDataWriter();
|
|
194132
|
+
|
|
194133
|
+
public:
|
|
194134
|
+
void WriteTableData();
|
|
194135
|
+
|
|
194136
|
+
CompressionType GetColumnCompressionType(idx_t i);
|
|
194137
|
+
|
|
194138
|
+
virtual void FinalizeTable(vector<unique_ptr<BaseStatistics>> &&global_stats, DataTableInfo *info) = 0;
|
|
194139
|
+
virtual unique_ptr<RowGroupWriter> GetRowGroupWriter(RowGroup &row_group) = 0;
|
|
194140
|
+
|
|
194141
|
+
virtual void AddRowGroup(RowGroupPointer &&row_group_pointer, unique_ptr<RowGroupWriter> &&writer);
|
|
194142
|
+
|
|
194143
|
+
protected:
|
|
194144
|
+
TableCatalogEntry &table;
|
|
194145
|
+
// Pointers to the start of each row group.
|
|
194146
|
+
vector<RowGroupPointer> row_group_pointers;
|
|
194147
|
+
};
|
|
194148
|
+
|
|
194149
|
+
class SingleFileTableDataWriter : public TableDataWriter {
|
|
194150
|
+
public:
|
|
194151
|
+
SingleFileTableDataWriter(SingleFileCheckpointWriter &checkpoint_manager, TableCatalogEntry &table,
|
|
194152
|
+
MetaBlockWriter &table_data_writer, MetaBlockWriter &meta_data_writer);
|
|
194153
|
+
|
|
194154
|
+
public:
|
|
194155
|
+
virtual void FinalizeTable(vector<unique_ptr<BaseStatistics>> &&global_stats, DataTableInfo *info) override;
|
|
194156
|
+
virtual unique_ptr<RowGroupWriter> GetRowGroupWriter(RowGroup &row_group) override;
|
|
193862
194157
|
|
|
193863
194158
|
private:
|
|
193864
|
-
|
|
193865
|
-
|
|
193866
|
-
|
|
194159
|
+
SingleFileCheckpointWriter &checkpoint_manager;
|
|
194160
|
+
// Writes the actual table data
|
|
194161
|
+
MetaBlockWriter &table_data_writer;
|
|
194162
|
+
// Writes the metadata of the table
|
|
194163
|
+
MetaBlockWriter &meta_data_writer;
|
|
194164
|
+
};
|
|
194165
|
+
|
|
194166
|
+
} // namespace duckdb
|
|
194167
|
+
|
|
194168
|
+
|
|
194169
|
+
|
|
194170
|
+
|
|
194171
|
+
|
|
194172
|
+
//===----------------------------------------------------------------------===//
|
|
194173
|
+
// DuckDB
|
|
194174
|
+
//
|
|
194175
|
+
// duckdb/storage/table/column_checkpoint_state.hpp
|
|
194176
|
+
//
|
|
194177
|
+
//
|
|
194178
|
+
//===----------------------------------------------------------------------===//
|
|
194179
|
+
|
|
194180
|
+
|
|
194181
|
+
|
|
194182
|
+
|
|
194183
|
+
|
|
194184
|
+
|
|
194185
|
+
|
|
194186
|
+
|
|
194187
|
+
|
|
194188
|
+
namespace duckdb {
|
|
194189
|
+
class ColumnData;
|
|
194190
|
+
class DatabaseInstance;
|
|
194191
|
+
class RowGroup;
|
|
194192
|
+
class TableDataWriter;
|
|
194193
|
+
|
|
194194
|
+
struct ColumnCheckpointState {
|
|
194195
|
+
ColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, RowGroupWriter &writer);
|
|
194196
|
+
virtual ~ColumnCheckpointState();
|
|
194197
|
+
|
|
194198
|
+
RowGroup &row_group;
|
|
194199
|
+
ColumnData &column_data;
|
|
194200
|
+
RowGroupWriter &writer;
|
|
194201
|
+
SegmentTree new_tree;
|
|
194202
|
+
vector<DataPointer> data_pointers;
|
|
194203
|
+
unique_ptr<BaseStatistics> global_stats;
|
|
194204
|
+
|
|
194205
|
+
public:
|
|
194206
|
+
virtual unique_ptr<BaseStatistics> GetStatistics();
|
|
194207
|
+
|
|
194208
|
+
virtual void FlushSegment(unique_ptr<ColumnSegment> segment, idx_t segment_size);
|
|
194209
|
+
virtual void WriteDataPointers();
|
|
193867
194210
|
};
|
|
193868
194211
|
|
|
193869
194212
|
} // namespace duckdb
|
|
193870
194213
|
|
|
193871
194214
|
|
|
194215
|
+
namespace duckdb {
|
|
194216
|
+
|
|
194217
|
+
CompressionType RowGroupWriter::GetColumnCompressionType(idx_t i) {
|
|
194218
|
+
return table.columns[i].CompressionType();
|
|
194219
|
+
}
|
|
194220
|
+
|
|
194221
|
+
void RowGroupWriter::RegisterPartialBlock(PartialBlockAllocation &&allocation) {
|
|
194222
|
+
partial_block_manager.RegisterPartialBlock(move(allocation));
|
|
194223
|
+
}
|
|
194224
|
+
|
|
194225
|
+
PartialBlockAllocation RowGroupWriter::GetBlockAllocation(uint32_t segment_size) {
|
|
194226
|
+
return partial_block_manager.GetBlockAllocation(segment_size);
|
|
194227
|
+
}
|
|
194228
|
+
|
|
194229
|
+
void SingleFileRowGroupWriter::WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state) {
|
|
194230
|
+
auto &meta_writer = table_data_writer;
|
|
194231
|
+
const auto &data_pointers = column_checkpoint_state.data_pointers;
|
|
194232
|
+
|
|
194233
|
+
meta_writer.Write<idx_t>(data_pointers.size());
|
|
194234
|
+
// then write the data pointers themselves
|
|
194235
|
+
for (idx_t k = 0; k < data_pointers.size(); k++) {
|
|
194236
|
+
auto &data_pointer = data_pointers[k];
|
|
194237
|
+
meta_writer.Write<idx_t>(data_pointer.row_start);
|
|
194238
|
+
meta_writer.Write<idx_t>(data_pointer.tuple_count);
|
|
194239
|
+
meta_writer.Write<block_id_t>(data_pointer.block_pointer.block_id);
|
|
194240
|
+
meta_writer.Write<uint32_t>(data_pointer.block_pointer.offset);
|
|
194241
|
+
meta_writer.Write<CompressionType>(data_pointer.compression_type);
|
|
194242
|
+
data_pointer.statistics->Serialize(meta_writer);
|
|
194243
|
+
}
|
|
194244
|
+
}
|
|
194245
|
+
|
|
194246
|
+
MetaBlockWriter &SingleFileRowGroupWriter::GetPayloadWriter() {
|
|
194247
|
+
return table_data_writer;
|
|
194248
|
+
}
|
|
194249
|
+
|
|
194250
|
+
} // namespace duckdb
|
|
194251
|
+
//===----------------------------------------------------------------------===//
|
|
194252
|
+
// DuckDB
|
|
194253
|
+
//
|
|
194254
|
+
// duckdb/storage/checkpoint/table_data_reader.hpp
|
|
194255
|
+
//
|
|
194256
|
+
//
|
|
194257
|
+
//===----------------------------------------------------------------------===//
|
|
194258
|
+
|
|
194259
|
+
|
|
194260
|
+
|
|
194261
|
+
|
|
194262
|
+
|
|
193872
194263
|
namespace duckdb {
|
|
193873
194264
|
struct BoundCreateTableInfo;
|
|
193874
194265
|
|
|
@@ -193931,61 +194322,6 @@ void TableDataReader::ReadTableData() {
|
|
|
193931
194322
|
}
|
|
193932
194323
|
|
|
193933
194324
|
} // namespace duckdb
|
|
193934
|
-
//===----------------------------------------------------------------------===//
|
|
193935
|
-
// DuckDB
|
|
193936
|
-
//
|
|
193937
|
-
// duckdb/storage/checkpoint/table_data_writer.hpp
|
|
193938
|
-
//
|
|
193939
|
-
//
|
|
193940
|
-
//===----------------------------------------------------------------------===//
|
|
193941
|
-
|
|
193942
|
-
|
|
193943
|
-
|
|
193944
|
-
|
|
193945
|
-
|
|
193946
|
-
namespace duckdb {
|
|
193947
|
-
class CheckpointManager;
|
|
193948
|
-
class ColumnData;
|
|
193949
|
-
class ColumnSegment;
|
|
193950
|
-
class RowGroup;
|
|
193951
|
-
class BaseStatistics;
|
|
193952
|
-
class SegmentStatistics;
|
|
193953
|
-
|
|
193954
|
-
//! The table data writer is responsible for writing the data of a table to the block manager
|
|
193955
|
-
class TableDataWriter {
|
|
193956
|
-
friend class ColumnData;
|
|
193957
|
-
|
|
193958
|
-
public:
|
|
193959
|
-
TableDataWriter(DatabaseInstance &db, CheckpointManager &checkpoint_manager, TableCatalogEntry &table,
|
|
193960
|
-
MetaBlockWriter &table_data_writer, MetaBlockWriter &meta_data_writer);
|
|
193961
|
-
~TableDataWriter();
|
|
193962
|
-
|
|
193963
|
-
void WriteTableData();
|
|
193964
|
-
|
|
193965
|
-
MetaBlockWriter &GetTableWriter() {
|
|
193966
|
-
return table_data_writer;
|
|
193967
|
-
}
|
|
193968
|
-
MetaBlockWriter &GetMetaWriter() {
|
|
193969
|
-
return meta_data_writer;
|
|
193970
|
-
}
|
|
193971
|
-
|
|
193972
|
-
CheckpointManager &GetCheckpointManager() {
|
|
193973
|
-
return checkpoint_manager;
|
|
193974
|
-
}
|
|
193975
|
-
|
|
193976
|
-
CompressionType GetColumnCompressionType(idx_t i);
|
|
193977
|
-
|
|
193978
|
-
private:
|
|
193979
|
-
CheckpointManager &checkpoint_manager;
|
|
193980
|
-
TableCatalogEntry &table;
|
|
193981
|
-
// Writes the actual table data
|
|
193982
|
-
MetaBlockWriter &table_data_writer;
|
|
193983
|
-
// Writes the metadata of the table
|
|
193984
|
-
MetaBlockWriter &meta_data_writer;
|
|
193985
|
-
};
|
|
193986
|
-
|
|
193987
|
-
} // namespace duckdb
|
|
193988
|
-
|
|
193989
194325
|
|
|
193990
194326
|
|
|
193991
194327
|
|
|
@@ -193995,10 +194331,7 @@ private:
|
|
|
193995
194331
|
|
|
193996
194332
|
namespace duckdb {
|
|
193997
194333
|
|
|
193998
|
-
TableDataWriter::TableDataWriter(
|
|
193999
|
-
MetaBlockWriter &table_data_writer, MetaBlockWriter &meta_data_writer)
|
|
194000
|
-
: checkpoint_manager(checkpoint_manager), table(table), table_data_writer(table_data_writer),
|
|
194001
|
-
meta_data_writer(meta_data_writer) {
|
|
194334
|
+
TableDataWriter::TableDataWriter(TableCatalogEntry &table) : table(table) {
|
|
194002
194335
|
}
|
|
194003
194336
|
|
|
194004
194337
|
TableDataWriter::~TableDataWriter() {
|
|
@@ -194013,6 +194346,51 @@ CompressionType TableDataWriter::GetColumnCompressionType(idx_t i) {
|
|
|
194013
194346
|
return table.columns[i].CompressionType();
|
|
194014
194347
|
}
|
|
194015
194348
|
|
|
194349
|
+
void TableDataWriter::AddRowGroup(RowGroupPointer &&row_group_pointer, unique_ptr<RowGroupWriter> &&writer) {
|
|
194350
|
+
row_group_pointers.push_back(move(row_group_pointer));
|
|
194351
|
+
writer.reset();
|
|
194352
|
+
}
|
|
194353
|
+
|
|
194354
|
+
SingleFileTableDataWriter::SingleFileTableDataWriter(SingleFileCheckpointWriter &checkpoint_manager,
|
|
194355
|
+
TableCatalogEntry &table, MetaBlockWriter &table_data_writer,
|
|
194356
|
+
MetaBlockWriter &meta_data_writer)
|
|
194357
|
+
: TableDataWriter(table), checkpoint_manager(checkpoint_manager), table_data_writer(table_data_writer),
|
|
194358
|
+
meta_data_writer(meta_data_writer) {
|
|
194359
|
+
}
|
|
194360
|
+
|
|
194361
|
+
unique_ptr<RowGroupWriter> SingleFileTableDataWriter::GetRowGroupWriter(RowGroup &row_group) {
|
|
194362
|
+
return make_unique<SingleFileRowGroupWriter>(table, checkpoint_manager.partial_block_manager, table_data_writer);
|
|
194363
|
+
}
|
|
194364
|
+
|
|
194365
|
+
void SingleFileTableDataWriter::FinalizeTable(vector<unique_ptr<BaseStatistics>> &&global_stats, DataTableInfo *info) {
|
|
194366
|
+
// store the current position in the metadata writer
|
|
194367
|
+
// this is where the row groups for this table start
|
|
194368
|
+
auto pointer = table_data_writer.GetBlockPointer();
|
|
194369
|
+
|
|
194370
|
+
for (auto &stats : global_stats) {
|
|
194371
|
+
stats->Serialize(table_data_writer);
|
|
194372
|
+
}
|
|
194373
|
+
// now start writing the row group pointers to disk
|
|
194374
|
+
table_data_writer.Write<uint64_t>(row_group_pointers.size());
|
|
194375
|
+
for (auto &row_group_pointer : row_group_pointers) {
|
|
194376
|
+
RowGroup::Serialize(row_group_pointer, table_data_writer);
|
|
194377
|
+
}
|
|
194378
|
+
|
|
194379
|
+
// Pointer to the table itself goes to the metadata stream.
|
|
194380
|
+
meta_data_writer.Write<block_id_t>(pointer.block_id);
|
|
194381
|
+
meta_data_writer.Write<uint64_t>(pointer.offset);
|
|
194382
|
+
|
|
194383
|
+
// Now we serialize indexes in the table_metadata_writer
|
|
194384
|
+
std::vector<BlockPointer> index_pointers = info->indexes.SerializeIndexes(table_data_writer);
|
|
194385
|
+
|
|
194386
|
+
// Write-off to metadata block ids and offsets of indexes
|
|
194387
|
+
meta_data_writer.Write<idx_t>(index_pointers.size());
|
|
194388
|
+
for (auto &block_info : index_pointers) {
|
|
194389
|
+
meta_data_writer.Write<idx_t>(block_info.block_id);
|
|
194390
|
+
meta_data_writer.Write<idx_t>(block_info.offset);
|
|
194391
|
+
}
|
|
194392
|
+
}
|
|
194393
|
+
|
|
194016
194394
|
} // namespace duckdb
|
|
194017
194395
|
//===----------------------------------------------------------------------===//
|
|
194018
194396
|
// DuckDB
|
|
@@ -194030,11 +194408,11 @@ namespace duckdb {
|
|
|
194030
194408
|
|
|
194031
194409
|
class WriteOverflowStringsToDisk : public OverflowStringWriter {
|
|
194032
194410
|
public:
|
|
194033
|
-
explicit WriteOverflowStringsToDisk(
|
|
194411
|
+
explicit WriteOverflowStringsToDisk(BlockManager &block_manager);
|
|
194034
194412
|
~WriteOverflowStringsToDisk() override;
|
|
194035
194413
|
|
|
194036
|
-
//! The
|
|
194037
|
-
|
|
194414
|
+
//! The block manager
|
|
194415
|
+
BlockManager &block_manager;
|
|
194038
194416
|
|
|
194039
194417
|
//! Temporary buffer
|
|
194040
194418
|
BufferHandle handle;
|
|
@@ -194060,20 +194438,18 @@ private:
|
|
|
194060
194438
|
|
|
194061
194439
|
namespace duckdb {
|
|
194062
194440
|
|
|
194063
|
-
WriteOverflowStringsToDisk::WriteOverflowStringsToDisk(
|
|
194064
|
-
:
|
|
194441
|
+
WriteOverflowStringsToDisk::WriteOverflowStringsToDisk(BlockManager &block_manager)
|
|
194442
|
+
: block_manager(block_manager), block_id(INVALID_BLOCK), offset(0) {
|
|
194065
194443
|
}
|
|
194066
194444
|
|
|
194067
194445
|
WriteOverflowStringsToDisk::~WriteOverflowStringsToDisk() {
|
|
194068
|
-
auto &block_manager = BlockManager::GetBlockManager(db);
|
|
194069
194446
|
if (offset > 0) {
|
|
194070
194447
|
block_manager.Write(handle.GetFileBuffer(), block_id);
|
|
194071
194448
|
}
|
|
194072
194449
|
}
|
|
194073
194450
|
|
|
194074
194451
|
void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result_block, int32_t &result_offset) {
|
|
194075
|
-
auto &buffer_manager =
|
|
194076
|
-
auto &block_manager = BlockManager::GetBlockManager(db);
|
|
194452
|
+
auto &buffer_manager = block_manager.buffer_manager;
|
|
194077
194453
|
if (!handle.IsValid()) {
|
|
194078
194454
|
handle = buffer_manager.Allocate(Storage::BLOCK_SIZE);
|
|
194079
194455
|
}
|
|
@@ -194123,7 +194499,6 @@ void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result
|
|
|
194123
194499
|
}
|
|
194124
194500
|
|
|
194125
194501
|
void WriteOverflowStringsToDisk::AllocateNewBlock(block_id_t new_block_id) {
|
|
194126
|
-
auto &block_manager = BlockManager::GetBlockManager(db);
|
|
194127
194502
|
if (block_id != INVALID_BLOCK) {
|
|
194128
194503
|
// there is an old block, write it first
|
|
194129
194504
|
block_manager.Write(handle.GetFileBuffer(), block_id);
|
|
@@ -194163,33 +194538,47 @@ void WriteOverflowStringsToDisk::AllocateNewBlock(block_id_t new_block_id) {
|
|
|
194163
194538
|
|
|
194164
194539
|
|
|
194165
194540
|
|
|
194541
|
+
|
|
194166
194542
|
|
|
194167
194543
|
|
|
194168
194544
|
namespace duckdb {
|
|
194169
194545
|
|
|
194170
194546
|
void ReorderTableEntries(vector<TableCatalogEntry *> &tables);
|
|
194171
194547
|
|
|
194172
|
-
|
|
194548
|
+
BlockManager &SingleFileCheckpointWriter::GetBlockManager() {
|
|
194549
|
+
auto &storage_manager = (SingleFileStorageManager &)db.GetStorageManager();
|
|
194550
|
+
return *storage_manager.block_manager;
|
|
194551
|
+
}
|
|
194552
|
+
|
|
194553
|
+
BlockPointer SingleFileCheckpointWriter::WriteIndexData(IndexCatalogEntry &index_catalog) {
|
|
194554
|
+
return index_catalog.index->Serialize(*table_metadata_writer);
|
|
194173
194555
|
}
|
|
194174
194556
|
|
|
194175
|
-
|
|
194557
|
+
MetaBlockWriter &SingleFileCheckpointWriter::GetMetaBlockWriter() {
|
|
194558
|
+
return *metadata_writer;
|
|
194559
|
+
}
|
|
194560
|
+
|
|
194561
|
+
unique_ptr<TableDataWriter> SingleFileCheckpointWriter::GetTableDataWriter(TableCatalogEntry &table) {
|
|
194562
|
+
return make_unique<SingleFileTableDataWriter>(*this, table, *table_metadata_writer, GetMetaBlockWriter());
|
|
194563
|
+
}
|
|
194564
|
+
|
|
194565
|
+
void SingleFileCheckpointWriter::CreateCheckpoint() {
|
|
194176
194566
|
auto &config = DBConfig::GetConfig(db);
|
|
194177
|
-
auto &storage_manager =
|
|
194567
|
+
auto &storage_manager = (SingleFileStorageManager &)db.GetStorageManager();
|
|
194178
194568
|
if (storage_manager.InMemory()) {
|
|
194179
194569
|
return;
|
|
194180
194570
|
}
|
|
194181
194571
|
// assert that the checkpoint manager hasn't been used before
|
|
194182
194572
|
D_ASSERT(!metadata_writer);
|
|
194183
194573
|
|
|
194184
|
-
auto &block_manager =
|
|
194185
|
-
block_manager.StartCheckpoint();
|
|
194574
|
+
auto &block_manager = GetBlockManager();
|
|
194186
194575
|
|
|
194187
194576
|
//! Set up the writers for the checkpoints
|
|
194188
|
-
metadata_writer = make_unique<MetaBlockWriter>(
|
|
194189
|
-
|
|
194577
|
+
metadata_writer = make_unique<MetaBlockWriter>(block_manager);
|
|
194578
|
+
table_metadata_writer = make_unique<MetaBlockWriter>(block_manager);
|
|
194190
194579
|
|
|
194191
194580
|
// get the id of the first meta block
|
|
194192
|
-
block_id_t meta_block = metadata_writer->
|
|
194581
|
+
block_id_t meta_block = metadata_writer->GetBlockPointer().block_id;
|
|
194193
194582
|
|
|
194194
194583
|
vector<SchemaCatalogEntry *> schemas;
|
|
194195
194584
|
// we scan the set of committed schemas
|
|
@@ -194201,10 +194590,10 @@ void CheckpointManager::CreateCheckpoint() {
|
|
|
194201
194590
|
for (auto &schema : schemas) {
|
|
194202
194591
|
WriteSchema(*schema);
|
|
194203
194592
|
}
|
|
194204
|
-
|
|
194593
|
+
partial_block_manager.FlushPartialBlocks();
|
|
194205
194594
|
// flush the meta data to disk
|
|
194206
194595
|
metadata_writer->Flush();
|
|
194207
|
-
|
|
194596
|
+
table_metadata_writer->Flush();
|
|
194208
194597
|
|
|
194209
194598
|
// write a checkpoint flag to the WAL
|
|
194210
194599
|
// this protects against the rare event that the database crashes AFTER writing the file, but BEFORE truncating the
|
|
@@ -194232,39 +194621,39 @@ void CheckpointManager::CreateCheckpoint() {
|
|
|
194232
194621
|
wal->Truncate(0);
|
|
194233
194622
|
|
|
194234
194623
|
// mark all blocks written as part of the metadata as modified
|
|
194235
|
-
|
|
194236
|
-
|
|
194237
|
-
}
|
|
194238
|
-
for (auto &block_id : tabledata_writer->written_blocks) {
|
|
194239
|
-
block_manager.MarkBlockAsModified(block_id);
|
|
194240
|
-
}
|
|
194624
|
+
metadata_writer->MarkWrittenBlocks();
|
|
194625
|
+
table_metadata_writer->MarkWrittenBlocks();
|
|
194241
194626
|
}
|
|
194242
194627
|
|
|
194243
|
-
void
|
|
194244
|
-
auto &block_manager =
|
|
194628
|
+
void SingleFileCheckpointReader::LoadFromStorage() {
|
|
194629
|
+
auto &block_manager = *storage.block_manager;
|
|
194245
194630
|
block_id_t meta_block = block_manager.GetMetaBlock();
|
|
194246
194631
|
if (meta_block < 0) {
|
|
194247
194632
|
// storage is empty
|
|
194248
194633
|
return;
|
|
194249
194634
|
}
|
|
194250
194635
|
|
|
194251
|
-
Connection con(db);
|
|
194636
|
+
Connection con(storage.db);
|
|
194252
194637
|
con.BeginTransaction();
|
|
194253
194638
|
// create the MetaBlockReader to read from the storage
|
|
194254
|
-
MetaBlockReader reader(
|
|
194639
|
+
MetaBlockReader reader(block_manager, meta_block);
|
|
194640
|
+
LoadCheckpoint(*con.context, reader);
|
|
194641
|
+
con.Commit();
|
|
194642
|
+
}
|
|
194643
|
+
|
|
194644
|
+
void CheckpointReader::LoadCheckpoint(ClientContext &context, MetaBlockReader &reader) {
|
|
194255
194645
|
uint32_t schema_count = reader.Read<uint32_t>();
|
|
194256
194646
|
for (uint32_t i = 0; i < schema_count; i++) {
|
|
194257
|
-
ReadSchema(
|
|
194647
|
+
ReadSchema(context, reader);
|
|
194258
194648
|
}
|
|
194259
|
-
con.Commit();
|
|
194260
194649
|
}
|
|
194261
194650
|
|
|
194262
194651
|
//===--------------------------------------------------------------------===//
|
|
194263
194652
|
// Schema
|
|
194264
194653
|
//===--------------------------------------------------------------------===//
|
|
194265
|
-
void
|
|
194654
|
+
void CheckpointWriter::WriteSchema(SchemaCatalogEntry &schema) {
|
|
194266
194655
|
// write the schema data
|
|
194267
|
-
schema.Serialize(
|
|
194656
|
+
schema.Serialize(GetMetaBlockWriter());
|
|
194268
194657
|
// then, we fetch the tables/views/sequences information
|
|
194269
194658
|
vector<TableCatalogEntry *> tables;
|
|
194270
194659
|
vector<ViewCatalogEntry *> views;
|
|
@@ -194322,7 +194711,7 @@ void CheckpointManager::WriteSchema(SchemaCatalogEntry &schema) {
|
|
|
194322
194711
|
indexes.push_back((IndexCatalogEntry *)entry);
|
|
194323
194712
|
});
|
|
194324
194713
|
|
|
194325
|
-
FieldWriter writer(
|
|
194714
|
+
FieldWriter writer(GetMetaBlockWriter());
|
|
194326
194715
|
writer.WriteField<uint32_t>(custom_types.size());
|
|
194327
194716
|
writer.WriteField<uint32_t>(sequences.size());
|
|
194328
194717
|
writer.WriteField<uint32_t>(tables.size());
|
|
@@ -194367,8 +194756,8 @@ void CheckpointManager::WriteSchema(SchemaCatalogEntry &schema) {
|
|
|
194367
194756
|
}
|
|
194368
194757
|
}
|
|
194369
194758
|
|
|
194370
|
-
void
|
|
194371
|
-
auto &catalog = Catalog::GetCatalog(
|
|
194759
|
+
void CheckpointReader::ReadSchema(ClientContext &context, MetaBlockReader &reader) {
|
|
194760
|
+
auto &catalog = Catalog::GetCatalog(context);
|
|
194372
194761
|
|
|
194373
194762
|
// read the schema and create it in the catalog
|
|
194374
194763
|
auto info = SchemaCatalogEntry::Deserialize(reader);
|
|
@@ -194421,51 +194810,52 @@ void CheckpointManager::ReadSchema(ClientContext &context, MetaBlockReader &read
|
|
|
194421
194810
|
//===--------------------------------------------------------------------===//
|
|
194422
194811
|
// Views
|
|
194423
194812
|
//===--------------------------------------------------------------------===//
|
|
194424
|
-
void
|
|
194425
|
-
view.Serialize(
|
|
194813
|
+
void CheckpointWriter::WriteView(ViewCatalogEntry &view) {
|
|
194814
|
+
view.Serialize(GetMetaBlockWriter());
|
|
194426
194815
|
}
|
|
194427
194816
|
|
|
194428
|
-
void
|
|
194817
|
+
void CheckpointReader::ReadView(ClientContext &context, MetaBlockReader &reader) {
|
|
194429
194818
|
auto info = ViewCatalogEntry::Deserialize(reader, context);
|
|
194430
194819
|
|
|
194431
|
-
auto &catalog = Catalog::GetCatalog(
|
|
194820
|
+
auto &catalog = Catalog::GetCatalog(context);
|
|
194432
194821
|
catalog.CreateView(context, info.get());
|
|
194433
194822
|
}
|
|
194434
194823
|
|
|
194435
194824
|
//===--------------------------------------------------------------------===//
|
|
194436
194825
|
// Sequences
|
|
194437
194826
|
//===--------------------------------------------------------------------===//
|
|
194438
|
-
void
|
|
194439
|
-
seq.Serialize(
|
|
194827
|
+
void CheckpointWriter::WriteSequence(SequenceCatalogEntry &seq) {
|
|
194828
|
+
seq.Serialize(GetMetaBlockWriter());
|
|
194440
194829
|
}
|
|
194441
194830
|
|
|
194442
|
-
void
|
|
194831
|
+
void CheckpointReader::ReadSequence(ClientContext &context, MetaBlockReader &reader) {
|
|
194443
194832
|
auto info = SequenceCatalogEntry::Deserialize(reader);
|
|
194444
194833
|
|
|
194445
|
-
auto &catalog = Catalog::GetCatalog(
|
|
194834
|
+
auto &catalog = Catalog::GetCatalog(context);
|
|
194446
194835
|
catalog.CreateSequence(context, info.get());
|
|
194447
194836
|
}
|
|
194448
194837
|
|
|
194449
194838
|
//===--------------------------------------------------------------------===//
|
|
194450
194839
|
// Indexes
|
|
194451
194840
|
//===--------------------------------------------------------------------===//
|
|
194452
|
-
void
|
|
194841
|
+
void CheckpointWriter::WriteIndex(IndexCatalogEntry &index_catalog) {
|
|
194453
194842
|
// Write the index data and metadata
|
|
194454
194843
|
// Serialize the necessary meta data for index catalog construction.
|
|
194455
|
-
auto root_offset = index_catalog
|
|
194456
|
-
|
|
194844
|
+
auto root_offset = WriteIndexData(index_catalog);
|
|
194845
|
+
auto &metadata_writer = GetMetaBlockWriter();
|
|
194846
|
+
index_catalog.Serialize(metadata_writer);
|
|
194457
194847
|
// Serialize the Block id and offset of root node
|
|
194458
|
-
metadata_writer
|
|
194459
|
-
metadata_writer
|
|
194848
|
+
metadata_writer.Write(root_offset.block_id);
|
|
194849
|
+
metadata_writer.Write(root_offset.offset);
|
|
194460
194850
|
}
|
|
194461
194851
|
|
|
194462
|
-
void
|
|
194852
|
+
void CheckpointReader::ReadIndex(ClientContext &context, MetaBlockReader &reader) {
|
|
194463
194853
|
|
|
194464
194854
|
// Deserialize the index meta data
|
|
194465
194855
|
auto info = IndexCatalogEntry::Deserialize(reader, context);
|
|
194466
194856
|
|
|
194467
194857
|
// Create index in the catalog
|
|
194468
|
-
auto &catalog = Catalog::GetCatalog(
|
|
194858
|
+
auto &catalog = Catalog::GetCatalog(context);
|
|
194469
194859
|
auto schema_catalog = catalog.GetSchema(context, info->schema);
|
|
194470
194860
|
auto table_catalog =
|
|
194471
194861
|
(TableCatalogEntry *)catalog.GetEntry(context, CatalogType::TABLE_ENTRY, info->schema, info->table->table_name);
|
|
@@ -194506,8 +194896,9 @@ void CheckpointManager::ReadIndex(ClientContext &context, MetaBlockReader &reade
|
|
|
194506
194896
|
|
|
194507
194897
|
switch (info->index_type) {
|
|
194508
194898
|
case IndexType::ART: {
|
|
194509
|
-
auto art =
|
|
194510
|
-
|
|
194899
|
+
auto art =
|
|
194900
|
+
make_unique<ART>(info->column_ids, TableIOManager::Get(*table_catalog->storage), move(unbound_expressions),
|
|
194901
|
+
info->constraint_type, *context.db, root_block_id, root_offset);
|
|
194511
194902
|
index_catalog->index = art.get();
|
|
194512
194903
|
table_catalog->storage->info->indexes.AddIndex(move(art));
|
|
194513
194904
|
break;
|
|
@@ -194520,52 +194911,53 @@ void CheckpointManager::ReadIndex(ClientContext &context, MetaBlockReader &reade
|
|
|
194520
194911
|
//===--------------------------------------------------------------------===//
|
|
194521
194912
|
// Custom Types
|
|
194522
194913
|
//===--------------------------------------------------------------------===//
|
|
194523
|
-
void
|
|
194524
|
-
table.Serialize(
|
|
194914
|
+
void CheckpointWriter::WriteType(TypeCatalogEntry &table) {
|
|
194915
|
+
table.Serialize(GetMetaBlockWriter());
|
|
194525
194916
|
}
|
|
194526
194917
|
|
|
194527
|
-
void
|
|
194918
|
+
void CheckpointReader::ReadType(ClientContext &context, MetaBlockReader &reader) {
|
|
194528
194919
|
auto info = TypeCatalogEntry::Deserialize(reader);
|
|
194529
194920
|
|
|
194530
|
-
auto &catalog = Catalog::GetCatalog(
|
|
194921
|
+
auto &catalog = Catalog::GetCatalog(context);
|
|
194531
194922
|
catalog.CreateType(context, info.get());
|
|
194532
194923
|
}
|
|
194533
194924
|
|
|
194534
194925
|
//===--------------------------------------------------------------------===//
|
|
194535
194926
|
// Macro's
|
|
194536
194927
|
//===--------------------------------------------------------------------===//
|
|
194537
|
-
void
|
|
194538
|
-
macro.Serialize(
|
|
194928
|
+
void CheckpointWriter::WriteMacro(ScalarMacroCatalogEntry ¯o) {
|
|
194929
|
+
macro.Serialize(GetMetaBlockWriter());
|
|
194539
194930
|
}
|
|
194540
194931
|
|
|
194541
|
-
void
|
|
194932
|
+
void CheckpointReader::ReadMacro(ClientContext &context, MetaBlockReader &reader) {
|
|
194542
194933
|
auto info = ScalarMacroCatalogEntry::Deserialize(reader, context);
|
|
194543
|
-
auto &catalog = Catalog::GetCatalog(
|
|
194934
|
+
auto &catalog = Catalog::GetCatalog(context);
|
|
194544
194935
|
catalog.CreateFunction(context, info.get());
|
|
194545
194936
|
}
|
|
194546
194937
|
|
|
194547
|
-
void
|
|
194548
|
-
macro.Serialize(
|
|
194938
|
+
void CheckpointWriter::WriteTableMacro(TableMacroCatalogEntry ¯o) {
|
|
194939
|
+
macro.Serialize(GetMetaBlockWriter());
|
|
194549
194940
|
}
|
|
194550
194941
|
|
|
194551
|
-
void
|
|
194942
|
+
void CheckpointReader::ReadTableMacro(ClientContext &context, MetaBlockReader &reader) {
|
|
194552
194943
|
auto info = TableMacroCatalogEntry::Deserialize(reader, context);
|
|
194553
|
-
auto &catalog = Catalog::GetCatalog(
|
|
194944
|
+
auto &catalog = Catalog::GetCatalog(context);
|
|
194554
194945
|
catalog.CreateFunction(context, info.get());
|
|
194555
194946
|
}
|
|
194556
194947
|
|
|
194557
194948
|
//===--------------------------------------------------------------------===//
|
|
194558
194949
|
// Table Metadata
|
|
194559
194950
|
//===--------------------------------------------------------------------===//
|
|
194560
|
-
void
|
|
194951
|
+
void CheckpointWriter::WriteTable(TableCatalogEntry &table) {
|
|
194561
194952
|
// write the table meta data
|
|
194562
|
-
table.Serialize(
|
|
194563
|
-
// now we need to write the table data
|
|
194564
|
-
|
|
194565
|
-
|
|
194953
|
+
table.Serialize(GetMetaBlockWriter());
|
|
194954
|
+
// now we need to write the table data.
|
|
194955
|
+
if (auto writer = GetTableDataWriter(table)) {
|
|
194956
|
+
writer->WriteTableData();
|
|
194957
|
+
}
|
|
194566
194958
|
}
|
|
194567
194959
|
|
|
194568
|
-
void
|
|
194960
|
+
void CheckpointReader::ReadTable(ClientContext &context, MetaBlockReader &reader) {
|
|
194569
194961
|
// deserialize the table meta data
|
|
194570
194962
|
auto info = TableCatalogEntry::Deserialize(reader, context);
|
|
194571
194963
|
// bind the info
|
|
@@ -194573,11 +194965,22 @@ void CheckpointManager::ReadTable(ClientContext &context, MetaBlockReader &reade
|
|
|
194573
194965
|
auto bound_info = binder->BindCreateTableInfo(move(info));
|
|
194574
194966
|
|
|
194575
194967
|
// now read the actual table data and place it into the create table info
|
|
194968
|
+
ReadTableData(context, reader, *bound_info);
|
|
194969
|
+
|
|
194970
|
+
// finally create the table in the catalog
|
|
194971
|
+
auto &catalog = Catalog::GetCatalog(context);
|
|
194972
|
+
catalog.CreateTable(context, bound_info.get());
|
|
194973
|
+
}
|
|
194974
|
+
|
|
194975
|
+
void CheckpointReader::ReadTableData(ClientContext &context, MetaBlockReader &reader,
|
|
194976
|
+
BoundCreateTableInfo &bound_info) {
|
|
194576
194977
|
auto block_id = reader.Read<block_id_t>();
|
|
194577
194978
|
auto offset = reader.Read<uint64_t>();
|
|
194578
|
-
|
|
194979
|
+
|
|
194980
|
+
MetaBlockReader table_data_reader(reader.block_manager, block_id);
|
|
194579
194981
|
table_data_reader.offset = offset;
|
|
194580
|
-
TableDataReader data_reader(table_data_reader,
|
|
194982
|
+
TableDataReader data_reader(table_data_reader, bound_info);
|
|
194983
|
+
|
|
194581
194984
|
data_reader.ReadTableData();
|
|
194582
194985
|
|
|
194583
194986
|
// Get any indexes block info
|
|
@@ -194585,82 +194988,7 @@ void CheckpointManager::ReadTable(ClientContext &context, MetaBlockReader &reade
|
|
|
194585
194988
|
for (idx_t i = 0; i < num_indexes; i++) {
|
|
194586
194989
|
auto idx_block_id = reader.Read<idx_t>();
|
|
194587
194990
|
auto idx_offset = reader.Read<idx_t>();
|
|
194588
|
-
bound_info
|
|
194589
|
-
}
|
|
194590
|
-
|
|
194591
|
-
// finally create the table in the catalog
|
|
194592
|
-
auto &catalog = Catalog::GetCatalog(db);
|
|
194593
|
-
catalog.CreateTable(context, bound_info.get());
|
|
194594
|
-
}
|
|
194595
|
-
|
|
194596
|
-
//===--------------------------------------------------------------------===//
|
|
194597
|
-
// Partial Blocks
|
|
194598
|
-
//===--------------------------------------------------------------------===//
|
|
194599
|
-
bool CheckpointManager::GetPartialBlock(ColumnSegment *segment, idx_t segment_size, block_id_t &block_id,
|
|
194600
|
-
uint32_t &offset_in_block, PartialBlock *&partial_block_ptr,
|
|
194601
|
-
unique_ptr<PartialBlock> &owned_partial_block) {
|
|
194602
|
-
auto entry = partially_filled_blocks.lower_bound(segment_size);
|
|
194603
|
-
if (entry == partially_filled_blocks.end()) {
|
|
194604
|
-
return false;
|
|
194605
|
-
}
|
|
194606
|
-
// found a partially filled block! fill in the info
|
|
194607
|
-
auto partial_block = move(entry->second);
|
|
194608
|
-
partial_block_ptr = partial_block.get();
|
|
194609
|
-
block_id = partial_block->block_id;
|
|
194610
|
-
offset_in_block = Storage::BLOCK_SIZE - entry->first;
|
|
194611
|
-
partially_filled_blocks.erase(entry);
|
|
194612
|
-
PartialColumnSegment partial_segment;
|
|
194613
|
-
partial_segment.segment = segment;
|
|
194614
|
-
partial_segment.offset_in_block = offset_in_block;
|
|
194615
|
-
partial_block->segments.push_back(partial_segment);
|
|
194616
|
-
|
|
194617
|
-
D_ASSERT(offset_in_block > 0);
|
|
194618
|
-
D_ASSERT(ValueIsAligned(offset_in_block));
|
|
194619
|
-
|
|
194620
|
-
// check if the block is STILL partially filled after adding the segment_size
|
|
194621
|
-
auto new_size = AlignValue(offset_in_block + segment_size);
|
|
194622
|
-
if (new_size <= CheckpointManager::PARTIAL_BLOCK_THRESHOLD) {
|
|
194623
|
-
// the block is still partially filled: add it to the partially_filled_blocks list
|
|
194624
|
-
auto new_space_left = Storage::BLOCK_SIZE - new_size;
|
|
194625
|
-
partially_filled_blocks.insert(make_pair(new_space_left, move(partial_block)));
|
|
194626
|
-
// should not write the block yet: perhaps more columns will be added
|
|
194627
|
-
} else {
|
|
194628
|
-
// we are done with this block after the current write: write it to disk
|
|
194629
|
-
owned_partial_block = move(partial_block);
|
|
194630
|
-
}
|
|
194631
|
-
return true;
|
|
194632
|
-
}
|
|
194633
|
-
|
|
194634
|
-
void CheckpointManager::RegisterPartialBlock(ColumnSegment *segment, idx_t segment_size, block_id_t block_id) {
|
|
194635
|
-
D_ASSERT(segment_size <= CheckpointManager::PARTIAL_BLOCK_THRESHOLD);
|
|
194636
|
-
auto partial_block = make_unique<PartialBlock>();
|
|
194637
|
-
partial_block->block_id = block_id;
|
|
194638
|
-
partial_block->block = segment->block;
|
|
194639
|
-
|
|
194640
|
-
PartialColumnSegment partial_segment;
|
|
194641
|
-
partial_segment.segment = segment;
|
|
194642
|
-
partial_segment.offset_in_block = 0;
|
|
194643
|
-
partial_block->segments.push_back(partial_segment);
|
|
194644
|
-
auto space_left = Storage::BLOCK_SIZE - AlignValue(segment_size);
|
|
194645
|
-
partially_filled_blocks.insert(make_pair(space_left, move(partial_block)));
|
|
194646
|
-
}
|
|
194647
|
-
|
|
194648
|
-
void CheckpointManager::FlushPartialSegments() {
|
|
194649
|
-
for (auto &entry : partially_filled_blocks) {
|
|
194650
|
-
entry.second->FlushToDisk(db);
|
|
194651
|
-
}
|
|
194652
|
-
}
|
|
194653
|
-
|
|
194654
|
-
void PartialBlock::FlushToDisk(DatabaseInstance &db) {
|
|
194655
|
-
auto &block_manager = BlockManager::GetBlockManager(db);
|
|
194656
|
-
|
|
194657
|
-
// the data for the block might already exists in-memory of our block
|
|
194658
|
-
// instead of copying the data we alter some metadata so the buffer points to an on-disk block
|
|
194659
|
-
block = block_manager.ConvertToPersistent(block_id, move(block));
|
|
194660
|
-
|
|
194661
|
-
// now set this block as the block for all segments
|
|
194662
|
-
for (auto &seg : segments) {
|
|
194663
|
-
seg.segment->ConvertToPersistent(block, block_id, seg.offset_in_block);
|
|
194991
|
+
bound_info.indexes.emplace_back(idx_block_id, idx_offset);
|
|
194664
194992
|
}
|
|
194665
194993
|
}
|
|
194666
194994
|
|
|
@@ -196111,47 +196439,6 @@ private:
|
|
|
196111
196439
|
|
|
196112
196440
|
|
|
196113
196441
|
|
|
196114
|
-
//===----------------------------------------------------------------------===//
|
|
196115
|
-
// DuckDB
|
|
196116
|
-
//
|
|
196117
|
-
// duckdb/storage/table/column_checkpoint_state.hpp
|
|
196118
|
-
//
|
|
196119
|
-
//
|
|
196120
|
-
//===----------------------------------------------------------------------===//
|
|
196121
|
-
|
|
196122
|
-
|
|
196123
|
-
|
|
196124
|
-
|
|
196125
|
-
|
|
196126
|
-
|
|
196127
|
-
|
|
196128
|
-
|
|
196129
|
-
|
|
196130
|
-
namespace duckdb {
|
|
196131
|
-
class ColumnData;
|
|
196132
|
-
class DatabaseInstance;
|
|
196133
|
-
class RowGroup;
|
|
196134
|
-
class TableDataWriter;
|
|
196135
|
-
|
|
196136
|
-
struct ColumnCheckpointState {
|
|
196137
|
-
ColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, TableDataWriter &writer);
|
|
196138
|
-
virtual ~ColumnCheckpointState();
|
|
196139
|
-
|
|
196140
|
-
RowGroup &row_group;
|
|
196141
|
-
ColumnData &column_data;
|
|
196142
|
-
TableDataWriter &writer;
|
|
196143
|
-
SegmentTree new_tree;
|
|
196144
|
-
vector<DataPointer> data_pointers;
|
|
196145
|
-
unique_ptr<BaseStatistics> global_stats;
|
|
196146
|
-
|
|
196147
|
-
public:
|
|
196148
|
-
virtual unique_ptr<BaseStatistics> GetStatistics();
|
|
196149
|
-
|
|
196150
|
-
virtual void FlushSegment(unique_ptr<ColumnSegment> segment, idx_t segment_size);
|
|
196151
|
-
virtual void FlushToDisk();
|
|
196152
|
-
};
|
|
196153
|
-
|
|
196154
|
-
} // namespace duckdb
|
|
196155
196442
|
|
|
196156
196443
|
|
|
196157
196444
|
|
|
@@ -196160,6 +196447,7 @@ class ColumnData;
|
|
|
196160
196447
|
class ColumnSegment;
|
|
196161
196448
|
class DatabaseInstance;
|
|
196162
196449
|
class RowGroup;
|
|
196450
|
+
class RowGroupWriter;
|
|
196163
196451
|
class TableDataWriter;
|
|
196164
196452
|
struct TransactionData;
|
|
196165
196453
|
|
|
@@ -196174,11 +196462,13 @@ class ColumnData {
|
|
|
196174
196462
|
friend class ColumnDataCheckpointer;
|
|
196175
196463
|
|
|
196176
196464
|
public:
|
|
196177
|
-
ColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type,
|
|
196465
|
+
ColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row, LogicalType type,
|
|
196466
|
+
ColumnData *parent);
|
|
196178
196467
|
ColumnData(ColumnData &other, idx_t start, ColumnData *parent);
|
|
196179
|
-
|
|
196180
196468
|
virtual ~ColumnData();
|
|
196181
196469
|
|
|
196470
|
+
//! The block manager
|
|
196471
|
+
BlockManager &block_manager;
|
|
196182
196472
|
//! Table info for the column
|
|
196183
196473
|
DataTableInfo &info;
|
|
196184
196474
|
//! The column index of the column, either within the parent table or within the parent
|
|
@@ -196242,25 +196532,27 @@ public:
|
|
|
196242
196532
|
|
|
196243
196533
|
virtual void CommitDropColumn();
|
|
196244
196534
|
|
|
196245
|
-
virtual unique_ptr<ColumnCheckpointState> CreateCheckpointState(RowGroup &row_group,
|
|
196246
|
-
virtual unique_ptr<ColumnCheckpointState> Checkpoint(RowGroup &row_group,
|
|
196535
|
+
virtual unique_ptr<ColumnCheckpointState> CreateCheckpointState(RowGroup &row_group, RowGroupWriter &writer);
|
|
196536
|
+
virtual unique_ptr<ColumnCheckpointState> Checkpoint(RowGroup &row_group, RowGroupWriter &writer,
|
|
196247
196537
|
ColumnCheckpointInfo &checkpoint_info);
|
|
196248
196538
|
|
|
196249
196539
|
virtual void CheckpointScan(ColumnSegment *segment, ColumnScanState &state, idx_t row_group_start, idx_t count,
|
|
196250
196540
|
Vector &scan_vector);
|
|
196251
196541
|
|
|
196252
196542
|
virtual void DeserializeColumn(Deserializer &source);
|
|
196253
|
-
static shared_ptr<ColumnData> Deserialize(DataTableInfo &info, idx_t column_index,
|
|
196254
|
-
Deserializer &source, const LogicalType &type,
|
|
196543
|
+
static shared_ptr<ColumnData> Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
|
|
196544
|
+
idx_t start_row, Deserializer &source, const LogicalType &type,
|
|
196545
|
+
ColumnData *parent);
|
|
196255
196546
|
|
|
196256
196547
|
virtual void GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, vector<vector<Value>> &result);
|
|
196257
196548
|
virtual void Verify(RowGroup &parent);
|
|
196258
196549
|
|
|
196259
|
-
static shared_ptr<ColumnData> CreateColumn(DataTableInfo &info, idx_t column_index,
|
|
196260
|
-
const LogicalType &type, ColumnData *parent = nullptr);
|
|
196550
|
+
static shared_ptr<ColumnData> CreateColumn(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
|
|
196551
|
+
idx_t start_row, const LogicalType &type, ColumnData *parent = nullptr);
|
|
196261
196552
|
static shared_ptr<ColumnData> CreateColumn(ColumnData &other, idx_t start_row, ColumnData *parent = nullptr);
|
|
196262
|
-
static unique_ptr<ColumnData> CreateColumnUnique(
|
|
196263
|
-
const LogicalType &type,
|
|
196553
|
+
static unique_ptr<ColumnData> CreateColumnUnique(BlockManager &block_manager, DataTableInfo &info,
|
|
196554
|
+
idx_t column_index, idx_t start_row, const LogicalType &type,
|
|
196555
|
+
ColumnData *parent = nullptr);
|
|
196264
196556
|
static unique_ptr<ColumnData> CreateColumnUnique(ColumnData &other, idx_t start_row, ColumnData *parent = nullptr);
|
|
196265
196557
|
|
|
196266
196558
|
protected:
|
|
@@ -197032,7 +197324,7 @@ public:
|
|
|
197032
197324
|
next_width = 0;
|
|
197033
197325
|
|
|
197034
197326
|
// Reset the pointers into the current segment
|
|
197035
|
-
auto &buffer_manager = BufferManager::GetBufferManager(
|
|
197327
|
+
auto &buffer_manager = BufferManager::GetBufferManager(checkpointer.GetDatabase());
|
|
197036
197328
|
current_handle = buffer_manager.Pin(current_segment->block);
|
|
197037
197329
|
current_dictionary = DictionaryCompressionStorage::GetDictionary(*current_segment, current_handle);
|
|
197038
197330
|
current_end_ptr = current_handle.Ptr() + current_dictionary.end;
|
|
@@ -197116,7 +197408,7 @@ public:
|
|
|
197116
197408
|
}
|
|
197117
197409
|
|
|
197118
197410
|
idx_t Finalize() {
|
|
197119
|
-
auto &buffer_manager = BufferManager::GetBufferManager(
|
|
197411
|
+
auto &buffer_manager = BufferManager::GetBufferManager(checkpointer.GetDatabase());
|
|
197120
197412
|
auto handle = buffer_manager.Pin(current_segment->block);
|
|
197121
197413
|
D_ASSERT(current_dictionary.end == Storage::BLOCK_SIZE);
|
|
197122
197414
|
|
|
@@ -197559,7 +197851,7 @@ void UncompressedCompressState::CreateEmptySegment(idx_t row_start) {
|
|
|
197559
197851
|
auto compressed_segment = ColumnSegment::CreateTransientSegment(db, type, row_start);
|
|
197560
197852
|
if (type.InternalType() == PhysicalType::VARCHAR) {
|
|
197561
197853
|
auto &state = (UncompressedStringSegmentState &)*compressed_segment->GetSegmentState();
|
|
197562
|
-
state.overflow_writer = make_unique<WriteOverflowStringsToDisk>(
|
|
197854
|
+
state.overflow_writer = make_unique<WriteOverflowStringsToDisk>(checkpointer.GetColumnData().block_manager);
|
|
197563
197855
|
}
|
|
197564
197856
|
current_segment = move(compressed_segment);
|
|
197565
197857
|
}
|
|
@@ -199357,8 +199649,8 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
|
|
|
199357
199649
|
D_ASSERT(block != INVALID_BLOCK);
|
|
199358
199650
|
D_ASSERT(offset < Storage::BLOCK_SIZE);
|
|
199359
199651
|
|
|
199360
|
-
auto &block_manager =
|
|
199361
|
-
auto &buffer_manager =
|
|
199652
|
+
auto &block_manager = segment.GetBlockManager();
|
|
199653
|
+
auto &buffer_manager = block_manager.buffer_manager;
|
|
199362
199654
|
auto &state = (UncompressedStringSegmentState &)*segment.GetSegmentState();
|
|
199363
199655
|
if (block < MAXIMUM_BLOCK) {
|
|
199364
199656
|
// read the overflow string from disk
|
|
@@ -200042,7 +200334,8 @@ namespace duckdb {
|
|
|
200042
200334
|
//! Validity column data represents the validity data (i.e. which values are null)
|
|
200043
200335
|
class ValidityColumnData : public ColumnData {
|
|
200044
200336
|
public:
|
|
200045
|
-
ValidityColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
200337
|
+
ValidityColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
200338
|
+
ColumnData *parent);
|
|
200046
200339
|
ValidityColumnData(ColumnData &original, idx_t start_row, ColumnData *parent = nullptr);
|
|
200047
200340
|
|
|
200048
200341
|
public:
|
|
@@ -200057,8 +200350,8 @@ namespace duckdb {
|
|
|
200057
200350
|
//! Standard column data represents a regular flat column (e.g. a column of type INTEGER or STRING)
|
|
200058
200351
|
class StandardColumnData : public ColumnData {
|
|
200059
200352
|
public:
|
|
200060
|
-
StandardColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
200061
|
-
ColumnData *parent = nullptr);
|
|
200353
|
+
StandardColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
200354
|
+
LogicalType type, ColumnData *parent = nullptr);
|
|
200062
200355
|
StandardColumnData(ColumnData &original, idx_t start_row, ColumnData *parent = nullptr);
|
|
200063
200356
|
|
|
200064
200357
|
//! The validity column data
|
|
@@ -200088,8 +200381,8 @@ public:
|
|
|
200088
200381
|
|
|
200089
200382
|
void CommitDropColumn() override;
|
|
200090
200383
|
|
|
200091
|
-
unique_ptr<ColumnCheckpointState> CreateCheckpointState(RowGroup &row_group,
|
|
200092
|
-
unique_ptr<ColumnCheckpointState> Checkpoint(RowGroup &row_group,
|
|
200384
|
+
unique_ptr<ColumnCheckpointState> CreateCheckpointState(RowGroup &row_group, RowGroupWriter &writer) override;
|
|
200385
|
+
unique_ptr<ColumnCheckpointState> Checkpoint(RowGroup &row_group, RowGroupWriter &writer,
|
|
200093
200386
|
ColumnCheckpointInfo &checkpoint_info) override;
|
|
200094
200387
|
void CheckpointScan(ColumnSegment *segment, ColumnScanState &state, idx_t row_group_start, idx_t count,
|
|
200095
200388
|
Vector &scan_vector) override;
|
|
@@ -200112,13 +200405,15 @@ private:
|
|
|
200112
200405
|
|
|
200113
200406
|
namespace duckdb {
|
|
200114
200407
|
|
|
200115
|
-
DataTable::DataTable(DatabaseInstance &db,
|
|
200116
|
-
vector<ColumnDefinition> column_definitions_p,
|
|
200117
|
-
|
|
200118
|
-
|
|
200408
|
+
DataTable::DataTable(DatabaseInstance &db, shared_ptr<TableIOManager> table_io_manager_p, const string &schema,
|
|
200409
|
+
const string &table, vector<ColumnDefinition> column_definitions_p,
|
|
200410
|
+
unique_ptr<PersistentTableData> data)
|
|
200411
|
+
: info(make_shared<DataTableInfo>(db, move(table_io_manager_p), schema, table)),
|
|
200412
|
+
column_definitions(move(column_definitions_p)), db(db), is_root(true) {
|
|
200119
200413
|
// initialize the table with the existing data from disk, if any
|
|
200120
200414
|
auto types = GetTypes();
|
|
200121
|
-
this->row_groups =
|
|
200415
|
+
this->row_groups =
|
|
200416
|
+
make_shared<RowGroupCollection>(info, TableIOManager::Get(*this).GetBlockManagerForRowData(), types, 0);
|
|
200122
200417
|
if (data && !data->row_groups.empty()) {
|
|
200123
200418
|
this->row_groups->Initialize(*data);
|
|
200124
200419
|
stats.Initialize(types, *data);
|
|
@@ -200268,6 +200563,10 @@ vector<LogicalType> DataTable::GetTypes() {
|
|
|
200268
200563
|
return types;
|
|
200269
200564
|
}
|
|
200270
200565
|
|
|
200566
|
+
TableIOManager &TableIOManager::Get(DataTable &table) {
|
|
200567
|
+
return *table.info->table_io_manager;
|
|
200568
|
+
}
|
|
200569
|
+
|
|
200271
200570
|
//===--------------------------------------------------------------------===//
|
|
200272
200571
|
// Scan
|
|
200273
200572
|
//===--------------------------------------------------------------------===//
|
|
@@ -201006,38 +201305,14 @@ void DataTable::Checkpoint(TableDataWriter &writer) {
|
|
|
201006
201305
|
global_stats.push_back(stats.CopyStats(i));
|
|
201007
201306
|
}
|
|
201008
201307
|
|
|
201009
|
-
|
|
201010
|
-
row_groups->Checkpoint(writer, row_group_pointers, global_stats);
|
|
201011
|
-
|
|
201012
|
-
// store the current position in the metadata writer
|
|
201013
|
-
// this is where the row groups for this table start
|
|
201014
|
-
auto &data_writer = writer.GetTableWriter();
|
|
201015
|
-
auto pointer = data_writer.GetBlockPointer();
|
|
201016
|
-
|
|
201017
|
-
for (auto &stats : global_stats) {
|
|
201018
|
-
stats->Serialize(data_writer);
|
|
201019
|
-
}
|
|
201020
|
-
// now start writing the row group pointers to disk
|
|
201021
|
-
data_writer.Write<uint64_t>(row_group_pointers.size());
|
|
201022
|
-
for (auto &row_group_pointer : row_group_pointers) {
|
|
201023
|
-
RowGroup::Serialize(row_group_pointer, data_writer);
|
|
201024
|
-
}
|
|
201025
|
-
// Now we serialize indexes in the tabledata_writer
|
|
201026
|
-
auto blocks_info = info->indexes.SerializeIndexes(data_writer);
|
|
201027
|
-
|
|
201028
|
-
// metadata writing time
|
|
201029
|
-
auto &metadata_writer = writer.GetMetaWriter();
|
|
201308
|
+
row_groups->Checkpoint(writer, global_stats);
|
|
201030
201309
|
|
|
201031
|
-
//
|
|
201032
|
-
|
|
201033
|
-
|
|
201034
|
-
|
|
201035
|
-
//
|
|
201036
|
-
|
|
201037
|
-
for (auto &block_info : blocks_info) {
|
|
201038
|
-
metadata_writer.Write<idx_t>(block_info.block_id);
|
|
201039
|
-
metadata_writer.Write<idx_t>(block_info.offset);
|
|
201040
|
-
}
|
|
201310
|
+
// The rowgroup payload data has been written. Now write:
|
|
201311
|
+
// column stats
|
|
201312
|
+
// row-group pointers
|
|
201313
|
+
// table pointer
|
|
201314
|
+
// index data
|
|
201315
|
+
writer.FinalizeTable(move(global_stats), info.get());
|
|
201041
201316
|
}
|
|
201042
201317
|
|
|
201043
201318
|
void DataTable::CommitDropColumn(idx_t index) {
|
|
@@ -201071,9 +201346,9 @@ vector<vector<Value>> DataTable::GetStorageInfo() {
|
|
|
201071
201346
|
|
|
201072
201347
|
namespace duckdb {
|
|
201073
201348
|
|
|
201074
|
-
Index::Index(IndexType type, const vector<column_t> &column_ids_p,
|
|
201349
|
+
Index::Index(IndexType type, TableIOManager &table_io_manager, const vector<column_t> &column_ids_p,
|
|
201075
201350
|
const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type_p)
|
|
201076
|
-
: type(type), column_ids(column_ids_p), constraint_type(constraint_type_p),
|
|
201351
|
+
: type(type), table_io_manager(table_io_manager), column_ids(column_ids_p), constraint_type(constraint_type_p),
|
|
201077
201352
|
executor(Allocator::DefaultAllocator()) {
|
|
201078
201353
|
for (auto &expr : unbound_expressions) {
|
|
201079
201354
|
types.push_back(expr->return_type.InternalType());
|
|
@@ -201160,12 +201435,15 @@ BlockPointer Index::Serialize(duckdb::MetaBlockWriter &writer) {
|
|
|
201160
201435
|
|
|
201161
201436
|
|
|
201162
201437
|
|
|
201438
|
+
|
|
201163
201439
|
namespace duckdb {
|
|
201164
201440
|
|
|
201165
201441
|
LocalTableStorage::LocalTableStorage(DataTable &table)
|
|
201166
201442
|
: table(table), allocator(Allocator::Get(table.db)), deleted_rows(0) {
|
|
201167
201443
|
auto types = table.GetTypes();
|
|
201168
|
-
row_groups = make_shared<RowGroupCollection>(table.info,
|
|
201444
|
+
row_groups = make_shared<RowGroupCollection>(table.info, TableIOManager::Get(table).GetBlockManagerForRowData(),
|
|
201445
|
+
types, MAX_ROW_ID, 0);
|
|
201446
|
+
|
|
201169
201447
|
stats.InitializeEmpty(types);
|
|
201170
201448
|
table.info->indexes.Scan([&](Index &index) {
|
|
201171
201449
|
D_ASSERT(index.type == IndexType::ART);
|
|
@@ -201176,7 +201454,8 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
|
|
|
201176
201454
|
for (auto &expr : art.unbound_expressions) {
|
|
201177
201455
|
unbound_expressions.push_back(expr->Copy());
|
|
201178
201456
|
}
|
|
201179
|
-
indexes.AddIndex(make_unique<ART>(art.column_ids, move(unbound_expressions),
|
|
201457
|
+
indexes.AddIndex(make_unique<ART>(art.column_ids, art.table_io_manager, move(unbound_expressions),
|
|
201458
|
+
art.constraint_type, art.db));
|
|
201180
201459
|
}
|
|
201181
201460
|
return false;
|
|
201182
201461
|
});
|
|
@@ -201518,7 +201797,8 @@ void LocalStorage::VerifyNewConstraint(DataTable &parent, const BoundConstraint
|
|
|
201518
201797
|
|
|
201519
201798
|
namespace duckdb {
|
|
201520
201799
|
|
|
201521
|
-
MetaBlockReader::MetaBlockReader(
|
|
201800
|
+
MetaBlockReader::MetaBlockReader(BlockManager &block_manager, block_id_t block_id, bool free_blocks_on_read)
|
|
201801
|
+
: block_manager(block_manager), offset(0), next_block(-1), free_blocks_on_read(free_blocks_on_read) {
|
|
201522
201802
|
ReadNewBlock(block_id);
|
|
201523
201803
|
}
|
|
201524
201804
|
|
|
@@ -201536,6 +201816,9 @@ void MetaBlockReader::ReadData(data_ptr_t buffer, idx_t read_size) {
|
|
|
201536
201816
|
buffer += to_read;
|
|
201537
201817
|
}
|
|
201538
201818
|
// then move to the next block
|
|
201819
|
+
if (next_block == INVALID_BLOCK) {
|
|
201820
|
+
throw IOException("Cannot read from INVALID_BLOCK.");
|
|
201821
|
+
}
|
|
201539
201822
|
ReadNewBlock(next_block);
|
|
201540
201823
|
}
|
|
201541
201824
|
// we have enough left in this block to read from the buffer
|
|
@@ -201544,10 +201827,15 @@ void MetaBlockReader::ReadData(data_ptr_t buffer, idx_t read_size) {
|
|
|
201544
201827
|
}
|
|
201545
201828
|
|
|
201546
201829
|
void MetaBlockReader::ReadNewBlock(block_id_t id) {
|
|
201547
|
-
auto &
|
|
201548
|
-
auto &buffer_manager = BufferManager::GetBufferManager(db);
|
|
201830
|
+
auto &buffer_manager = block_manager.buffer_manager;
|
|
201549
201831
|
|
|
201550
|
-
|
|
201832
|
+
// Marking these blocks as modified will cause them to be moved to the free
|
|
201833
|
+
// list upon the next successful checkpoint. Marking them modified here
|
|
201834
|
+
// assumes MetaBlockReader is exclusively used for reading checkpoint data,
|
|
201835
|
+
// and thus any blocks we're reading will be obviated by the next checkpoint.
|
|
201836
|
+
if (free_blocks_on_read) {
|
|
201837
|
+
block_manager.MarkBlockAsModified(id);
|
|
201838
|
+
}
|
|
201551
201839
|
block = block_manager.RegisterBlock(id);
|
|
201552
201840
|
handle = buffer_manager.Pin(block);
|
|
201553
201841
|
|
|
@@ -201563,28 +201851,26 @@ void MetaBlockReader::ReadNewBlock(block_id_t id) {
|
|
|
201563
201851
|
|
|
201564
201852
|
namespace duckdb {
|
|
201565
201853
|
|
|
201566
|
-
MetaBlockWriter::MetaBlockWriter(
|
|
201854
|
+
MetaBlockWriter::MetaBlockWriter(BlockManager &block_manager, block_id_t initial_block_id)
|
|
201855
|
+
: block_manager(block_manager) {
|
|
201567
201856
|
if (initial_block_id == INVALID_BLOCK) {
|
|
201568
201857
|
initial_block_id = GetNextBlockId();
|
|
201569
201858
|
}
|
|
201570
|
-
|
|
201571
|
-
block = block_manager.CreateBlock(initial_block_id);
|
|
201859
|
+
block = block_manager.CreateBlock(initial_block_id, nullptr);
|
|
201572
201860
|
Store<block_id_t>(-1, block->buffer);
|
|
201573
201861
|
offset = sizeof(block_id_t);
|
|
201574
201862
|
}
|
|
201575
201863
|
|
|
201576
201864
|
MetaBlockWriter::~MetaBlockWriter() {
|
|
201577
|
-
|
|
201578
|
-
|
|
201579
|
-
|
|
201580
|
-
|
|
201581
|
-
|
|
201582
|
-
|
|
201583
|
-
}
|
|
201865
|
+
// If there's an exception during checkpoint, this can get destroyed without
|
|
201866
|
+
// flushing the data...which is fine, because none of the unwritten data
|
|
201867
|
+
// will be referenced.
|
|
201868
|
+
//
|
|
201869
|
+
// Otherwise, we should have explicitly flushed (and thereby nulled the block).
|
|
201870
|
+
D_ASSERT(!block || Exception::UncaughtException());
|
|
201584
201871
|
}
|
|
201585
201872
|
|
|
201586
201873
|
block_id_t MetaBlockWriter::GetNextBlockId() {
|
|
201587
|
-
auto &block_manager = BlockManager::GetBlockManager(db);
|
|
201588
201874
|
return block_manager.GetFreeBlockId();
|
|
201589
201875
|
}
|
|
201590
201876
|
|
|
@@ -201596,9 +201882,13 @@ BlockPointer MetaBlockWriter::GetBlockPointer() {
|
|
|
201596
201882
|
}
|
|
201597
201883
|
|
|
201598
201884
|
void MetaBlockWriter::Flush() {
|
|
201885
|
+
AdvanceBlock();
|
|
201886
|
+
block = nullptr;
|
|
201887
|
+
}
|
|
201888
|
+
|
|
201889
|
+
void MetaBlockWriter::AdvanceBlock() {
|
|
201599
201890
|
written_blocks.insert(block->id);
|
|
201600
201891
|
if (offset > sizeof(block_id_t)) {
|
|
201601
|
-
auto &block_manager = BlockManager::GetBlockManager(db);
|
|
201602
201892
|
block_manager.Write(*block);
|
|
201603
201893
|
offset = sizeof(block_id_t);
|
|
201604
201894
|
}
|
|
@@ -201621,8 +201911,8 @@ void MetaBlockWriter::WriteData(const_data_ptr_t buffer, idx_t write_size) {
|
|
|
201621
201911
|
// write the block id of the new block to the start of the current block
|
|
201622
201912
|
Store<block_id_t>(new_block_id, block->buffer);
|
|
201623
201913
|
// first flush the old block
|
|
201624
|
-
|
|
201625
|
-
// now update the block id of the
|
|
201914
|
+
AdvanceBlock();
|
|
201915
|
+
// now update the block id of the block
|
|
201626
201916
|
block->id = new_block_id;
|
|
201627
201917
|
Store<block_id_t>(-1, block->buffer);
|
|
201628
201918
|
}
|
|
@@ -201630,6 +201920,87 @@ void MetaBlockWriter::WriteData(const_data_ptr_t buffer, idx_t write_size) {
|
|
|
201630
201920
|
offset += write_size;
|
|
201631
201921
|
}
|
|
201632
201922
|
|
|
201923
|
+
} // namespace duckdb
|
|
201924
|
+
|
|
201925
|
+
|
|
201926
|
+
namespace duckdb {
|
|
201927
|
+
|
|
201928
|
+
//===--------------------------------------------------------------------===//
|
|
201929
|
+
// Partial Blocks
|
|
201930
|
+
//===--------------------------------------------------------------------===//
|
|
201931
|
+
PartialBlockAllocation PartialBlockManager::GetBlockAllocation(uint32_t segment_size) {
|
|
201932
|
+
PartialBlockAllocation allocation;
|
|
201933
|
+
allocation.block_manager = &block_manager;
|
|
201934
|
+
allocation.allocation_size = segment_size;
|
|
201935
|
+
|
|
201936
|
+
// if the block is less than 80% full, we consider it a "partial block"
|
|
201937
|
+
// which means we will try to fit it with other blocks
|
|
201938
|
+
// check if there is a partial block available we can write to
|
|
201939
|
+
if (segment_size <= max_partial_block_size && GetPartialBlock(segment_size, allocation.partial_block)) {
|
|
201940
|
+
//! there is! increase the reference count of this block
|
|
201941
|
+
allocation.partial_block->state.block_use_count += 1;
|
|
201942
|
+
allocation.state = allocation.partial_block->state;
|
|
201943
|
+
block_manager.IncreaseBlockReferenceCount(allocation.state.block_id);
|
|
201944
|
+
} else {
|
|
201945
|
+
// full block: get a free block to write to
|
|
201946
|
+
AllocateBlock(allocation.state, segment_size);
|
|
201947
|
+
}
|
|
201948
|
+
return allocation;
|
|
201949
|
+
}
|
|
201950
|
+
|
|
201951
|
+
void PartialBlockManager::AllocateBlock(PartialBlockState &state, uint32_t segment_size) {
|
|
201952
|
+
D_ASSERT(segment_size <= Storage::BLOCK_SIZE);
|
|
201953
|
+
state.block_id = block_manager.GetFreeBlockId();
|
|
201954
|
+
state.block_size = Storage::BLOCK_SIZE;
|
|
201955
|
+
state.offset_in_block = 0;
|
|
201956
|
+
state.block_use_count = 1;
|
|
201957
|
+
}
|
|
201958
|
+
|
|
201959
|
+
bool PartialBlockManager::GetPartialBlock(idx_t segment_size, unique_ptr<PartialBlock> &partial_block) {
|
|
201960
|
+
auto entry = partially_filled_blocks.lower_bound(segment_size);
|
|
201961
|
+
if (entry == partially_filled_blocks.end()) {
|
|
201962
|
+
return false;
|
|
201963
|
+
}
|
|
201964
|
+
// found a partially filled block! fill in the info
|
|
201965
|
+
partial_block = move(entry->second);
|
|
201966
|
+
partially_filled_blocks.erase(entry);
|
|
201967
|
+
|
|
201968
|
+
D_ASSERT(partial_block->state.offset_in_block > 0);
|
|
201969
|
+
D_ASSERT(ValueIsAligned(partial_block->state.offset_in_block));
|
|
201970
|
+
return true;
|
|
201971
|
+
}
|
|
201972
|
+
|
|
201973
|
+
void PartialBlockManager::RegisterPartialBlock(PartialBlockAllocation &&allocation) {
|
|
201974
|
+
auto &state(allocation.partial_block->state);
|
|
201975
|
+
if (state.block_use_count < max_use_count) {
|
|
201976
|
+
auto new_size = AlignValue(allocation.allocation_size + state.offset_in_block);
|
|
201977
|
+
state.offset_in_block = new_size;
|
|
201978
|
+
auto new_space_left = state.block_size - new_size;
|
|
201979
|
+
// check if the block is STILL partially filled after adding the segment_size
|
|
201980
|
+
if (new_space_left >= Storage::BLOCK_SIZE - max_partial_block_size) {
|
|
201981
|
+
// the block is still partially filled: add it to the partially_filled_blocks list
|
|
201982
|
+
partially_filled_blocks.insert(make_pair(new_space_left, move(allocation.partial_block)));
|
|
201983
|
+
}
|
|
201984
|
+
}
|
|
201985
|
+
auto block_to_free = move(allocation.partial_block);
|
|
201986
|
+
if (!block_to_free && partially_filled_blocks.size() > MAX_BLOCK_MAP_SIZE) {
|
|
201987
|
+
// Free the page with the least space free.
|
|
201988
|
+
auto itr = partially_filled_blocks.begin();
|
|
201989
|
+
block_to_free = move(itr->second);
|
|
201990
|
+
partially_filled_blocks.erase(itr);
|
|
201991
|
+
}
|
|
201992
|
+
// Flush any block that we're not going to reuse.
|
|
201993
|
+
if (block_to_free) {
|
|
201994
|
+
block_to_free->Flush();
|
|
201995
|
+
}
|
|
201996
|
+
}
|
|
201997
|
+
|
|
201998
|
+
void PartialBlockManager::FlushPartialBlocks() {
|
|
201999
|
+
for (auto &e : partially_filled_blocks) {
|
|
202000
|
+
e.second->Flush();
|
|
202001
|
+
}
|
|
202002
|
+
}
|
|
202003
|
+
|
|
201633
202004
|
} // namespace duckdb
|
|
201634
202005
|
//===----------------------------------------------------------------------===//
|
|
201635
202006
|
// DuckDB
|
|
@@ -201660,9 +202031,8 @@ class SingleFileBlockManager : public BlockManager {
|
|
|
201660
202031
|
public:
|
|
201661
202032
|
SingleFileBlockManager(DatabaseInstance &db, string path, bool read_only, bool create_new, bool use_direct_io);
|
|
201662
202033
|
|
|
201663
|
-
void StartCheckpoint() override;
|
|
201664
202034
|
//! Creates a new Block using the specified block_id and returns a pointer
|
|
201665
|
-
unique_ptr<Block> CreateBlock(block_id_t block_id) override;
|
|
202035
|
+
unique_ptr<Block> CreateBlock(block_id_t block_id, FileBuffer *source_buffer) override;
|
|
201666
202036
|
//! Return the next free block id
|
|
201667
202037
|
block_id_t GetFreeBlockId() override;
|
|
201668
202038
|
//! Returns whether or not a specified block is the root block
|
|
@@ -201688,10 +202058,11 @@ public:
|
|
|
201688
202058
|
idx_t FreeBlocks() override {
|
|
201689
202059
|
return free_list.size();
|
|
201690
202060
|
}
|
|
202061
|
+
|
|
202062
|
+
private:
|
|
201691
202063
|
//! Load the free list from the file
|
|
201692
202064
|
void LoadFreeList();
|
|
201693
202065
|
|
|
201694
|
-
private:
|
|
201695
202066
|
void Initialize(DatabaseHeader &header);
|
|
201696
202067
|
|
|
201697
202068
|
//! Return the blocks to which we will write the free list and modified blocks
|
|
@@ -201911,6 +202282,7 @@ SingleFileBlockManager::SingleFileBlockManager(DatabaseInstance &db, string path
|
|
|
201911
202282
|
active_header = 1;
|
|
201912
202283
|
Initialize(h2);
|
|
201913
202284
|
}
|
|
202285
|
+
LoadFreeList();
|
|
201914
202286
|
}
|
|
201915
202287
|
}
|
|
201916
202288
|
|
|
@@ -201930,7 +202302,7 @@ void SingleFileBlockManager::LoadFreeList() {
|
|
|
201930
202302
|
// no free list
|
|
201931
202303
|
return;
|
|
201932
202304
|
}
|
|
201933
|
-
MetaBlockReader reader(
|
|
202305
|
+
MetaBlockReader reader(*this, free_list_id);
|
|
201934
202306
|
auto free_list_count = reader.Read<uint64_t>();
|
|
201935
202307
|
free_list.clear();
|
|
201936
202308
|
for (idx_t i = 0; i < free_list_count; i++) {
|
|
@@ -201945,9 +202317,6 @@ void SingleFileBlockManager::LoadFreeList() {
|
|
|
201945
202317
|
}
|
|
201946
202318
|
}
|
|
201947
202319
|
|
|
201948
|
-
void SingleFileBlockManager::StartCheckpoint() {
|
|
201949
|
-
}
|
|
201950
|
-
|
|
201951
202320
|
bool SingleFileBlockManager::IsRootBlock(block_id_t root) {
|
|
201952
202321
|
return root == meta_block;
|
|
201953
202322
|
}
|
|
@@ -201981,6 +202350,10 @@ void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) {
|
|
|
201981
202350
|
}
|
|
201982
202351
|
return;
|
|
201983
202352
|
}
|
|
202353
|
+
// Check for multi-free
|
|
202354
|
+
// TODO: Fix the bug that causes this assert to fire, then uncomment it.
|
|
202355
|
+
// D_ASSERT(modified_blocks.find(block_id) == modified_blocks.end());
|
|
202356
|
+
D_ASSERT(free_list.find(block_id) == free_list.end());
|
|
201984
202357
|
modified_blocks.insert(block_id);
|
|
201985
202358
|
}
|
|
201986
202359
|
|
|
@@ -201998,8 +202371,13 @@ block_id_t SingleFileBlockManager::GetMetaBlock() {
|
|
|
201998
202371
|
return meta_block;
|
|
201999
202372
|
}
|
|
202000
202373
|
|
|
202001
|
-
unique_ptr<Block> SingleFileBlockManager::CreateBlock(block_id_t block_id) {
|
|
202002
|
-
|
|
202374
|
+
unique_ptr<Block> SingleFileBlockManager::CreateBlock(block_id_t block_id, FileBuffer *source_buffer) {
|
|
202375
|
+
if (source_buffer) {
|
|
202376
|
+
D_ASSERT(source_buffer->AllocSize() == Storage::BLOCK_ALLOC_SIZE);
|
|
202377
|
+
return make_unique<Block>(*source_buffer, block_id);
|
|
202378
|
+
} else {
|
|
202379
|
+
return make_unique<Block>(Allocator::Get(db), block_id);
|
|
202380
|
+
}
|
|
202003
202381
|
}
|
|
202004
202382
|
|
|
202005
202383
|
void SingleFileBlockManager::Read(Block &block) {
|
|
@@ -202047,8 +202425,8 @@ vector<block_id_t> SingleFileBlockManager::GetFreeListBlocks() {
|
|
|
202047
202425
|
|
|
202048
202426
|
class FreeListBlockWriter : public MetaBlockWriter {
|
|
202049
202427
|
public:
|
|
202050
|
-
FreeListBlockWriter(
|
|
202051
|
-
: MetaBlockWriter(
|
|
202428
|
+
FreeListBlockWriter(BlockManager &block_manager, vector<block_id_t> &free_list_blocks_p)
|
|
202429
|
+
: MetaBlockWriter(block_manager, free_list_blocks_p[0]), free_list_blocks(free_list_blocks_p), index(1) {
|
|
202052
202430
|
}
|
|
202053
202431
|
|
|
202054
202432
|
vector<block_id_t> &free_list_blocks;
|
|
@@ -202083,10 +202461,11 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
|
|
|
202083
202461
|
// a normal MetaBlockWriter will fetch blocks to use from the free_list
|
|
202084
202462
|
// but since we are WRITING the free_list, this behavior is sub-optimal
|
|
202085
202463
|
|
|
202086
|
-
FreeListBlockWriter writer(
|
|
202464
|
+
FreeListBlockWriter writer(*this, free_list_blocks);
|
|
202087
202465
|
|
|
202088
|
-
|
|
202089
|
-
|
|
202466
|
+
auto ptr = writer.GetBlockPointer();
|
|
202467
|
+
D_ASSERT(ptr.block_id == free_list_blocks[0]);
|
|
202468
|
+
header.free_list = ptr.block_id;
|
|
202090
202469
|
for (auto &block_id : free_list_blocks) {
|
|
202091
202470
|
modified_blocks.insert(block_id);
|
|
202092
202471
|
}
|
|
@@ -203268,7 +203647,7 @@ void StorageLock::ReleaseSharedLock() {
|
|
|
203268
203647
|
namespace duckdb {
|
|
203269
203648
|
|
|
203270
203649
|
StorageManager::StorageManager(DatabaseInstance &db, string path, bool read_only)
|
|
203271
|
-
: db(db), path(move(path)),
|
|
203650
|
+
: db(db), path(move(path)), read_only(read_only) {
|
|
203272
203651
|
}
|
|
203273
203652
|
|
|
203274
203653
|
StorageManager::~StorageManager() {
|
|
@@ -203294,14 +203673,20 @@ bool StorageManager::InMemory() {
|
|
|
203294
203673
|
return path.empty() || path == ":memory:";
|
|
203295
203674
|
}
|
|
203296
203675
|
|
|
203676
|
+
void StorageManager::CreateBufferManager() {
|
|
203677
|
+
auto &config = DBConfig::GetConfig(db);
|
|
203678
|
+
buffer_manager = make_unique<BufferManager>(db, config.options.temporary_directory, config.options.maximum_memory);
|
|
203679
|
+
}
|
|
203680
|
+
|
|
203297
203681
|
void StorageManager::Initialize() {
|
|
203298
203682
|
bool in_memory = InMemory();
|
|
203299
203683
|
if (in_memory && read_only) {
|
|
203300
203684
|
throw CatalogException("Cannot launch in-memory database in read-only mode!");
|
|
203301
203685
|
}
|
|
203686
|
+
CreateBufferManager();
|
|
203687
|
+
|
|
203302
203688
|
auto &config = DBConfig::GetConfig(db);
|
|
203303
203689
|
auto &catalog = Catalog::GetCatalog(db);
|
|
203304
|
-
buffer_manager = make_unique<BufferManager>(db, config.options.temporary_directory, config.options.maximum_memory);
|
|
203305
203690
|
|
|
203306
203691
|
// first initialize the base system catalogs
|
|
203307
203692
|
// these are never written to the WAL
|
|
@@ -203323,15 +203708,38 @@ void StorageManager::Initialize() {
|
|
|
203323
203708
|
// commit transactions
|
|
203324
203709
|
con.Commit();
|
|
203325
203710
|
|
|
203326
|
-
if
|
|
203327
|
-
|
|
203328
|
-
|
|
203329
|
-
|
|
203330
|
-
|
|
203711
|
+
// create or load the database from disk, if not in-memory mode
|
|
203712
|
+
LoadDatabase();
|
|
203713
|
+
}
|
|
203714
|
+
|
|
203715
|
+
///////////////////////////////////////////////////////////////////////////
|
|
203716
|
+
class SingleFileTableIOManager : public TableIOManager {
|
|
203717
|
+
public:
|
|
203718
|
+
explicit SingleFileTableIOManager(BlockManager &block_manager) : block_manager(block_manager) {
|
|
203331
203719
|
}
|
|
203720
|
+
|
|
203721
|
+
BlockManager &block_manager;
|
|
203722
|
+
|
|
203723
|
+
public:
|
|
203724
|
+
BlockManager &GetIndexBlockManager() override {
|
|
203725
|
+
return block_manager;
|
|
203726
|
+
}
|
|
203727
|
+
BlockManager &GetBlockManagerForRowData() override {
|
|
203728
|
+
return block_manager;
|
|
203729
|
+
}
|
|
203730
|
+
};
|
|
203731
|
+
|
|
203732
|
+
SingleFileStorageManager::SingleFileStorageManager(DatabaseInstance &db, string path, bool read_only)
|
|
203733
|
+
: StorageManager(db, move(path), read_only) {
|
|
203332
203734
|
}
|
|
203333
203735
|
|
|
203334
|
-
void
|
|
203736
|
+
void SingleFileStorageManager::LoadDatabase() {
|
|
203737
|
+
if (InMemory()) {
|
|
203738
|
+
block_manager = make_unique<InMemoryBlockManager>(*buffer_manager);
|
|
203739
|
+
table_io_manager = make_unique<SingleFileTableIOManager>(*block_manager);
|
|
203740
|
+
return;
|
|
203741
|
+
}
|
|
203742
|
+
|
|
203335
203743
|
string wal_path = path + ".wal";
|
|
203336
203744
|
auto &fs = db.GetFileSystem();
|
|
203337
203745
|
auto &config = db.config;
|
|
@@ -203349,15 +203757,14 @@ void StorageManager::LoadDatabase() {
|
|
|
203349
203757
|
}
|
|
203350
203758
|
// initialize the block manager while creating a new db file
|
|
203351
203759
|
block_manager = make_unique<SingleFileBlockManager>(db, path, read_only, true, config.options.use_direct_io);
|
|
203760
|
+
table_io_manager = make_unique<SingleFileTableIOManager>(*block_manager);
|
|
203352
203761
|
} else {
|
|
203353
203762
|
// initialize the block manager while loading the current db file
|
|
203354
|
-
|
|
203355
|
-
|
|
203356
|
-
block_manager = move(sf_bm);
|
|
203357
|
-
sf->LoadFreeList();
|
|
203763
|
+
block_manager = make_unique<SingleFileBlockManager>(db, path, read_only, false, config.options.use_direct_io);
|
|
203764
|
+
table_io_manager = make_unique<SingleFileTableIOManager>(*block_manager);
|
|
203358
203765
|
|
|
203359
203766
|
//! Load from storage
|
|
203360
|
-
|
|
203767
|
+
auto checkpointer = SingleFileCheckpointReader(*this);
|
|
203361
203768
|
checkpointer.LoadFromStorage();
|
|
203362
203769
|
// check if the WAL file exists
|
|
203363
203770
|
if (fs.FileExists(wal_path)) {
|
|
@@ -203367,27 +203774,131 @@ void StorageManager::LoadDatabase() {
|
|
|
203367
203774
|
}
|
|
203368
203775
|
// initialize the WAL file
|
|
203369
203776
|
if (!read_only) {
|
|
203370
|
-
wal
|
|
203777
|
+
wal = make_unique<WriteAheadLog>(db, wal_path);
|
|
203371
203778
|
if (truncate_wal) {
|
|
203372
|
-
wal
|
|
203779
|
+
wal->Truncate(0);
|
|
203373
203780
|
}
|
|
203374
203781
|
}
|
|
203375
203782
|
}
|
|
203376
203783
|
|
|
203377
|
-
|
|
203378
|
-
|
|
203784
|
+
///////////////////////////////////////////////////////////////////////////////
|
|
203785
|
+
|
|
203786
|
+
class SingleFileStorageCommitState : public StorageCommitState {
|
|
203787
|
+
idx_t initial_wal_size = 0;
|
|
203788
|
+
idx_t initial_written = 0;
|
|
203789
|
+
WriteAheadLog *log;
|
|
203790
|
+
bool checkpoint;
|
|
203791
|
+
|
|
203792
|
+
public:
|
|
203793
|
+
SingleFileStorageCommitState(StorageManager &storage_manager, bool checkpoint);
|
|
203794
|
+
~SingleFileStorageCommitState() override;
|
|
203795
|
+
|
|
203796
|
+
// Make the commit persistent
|
|
203797
|
+
void FlushCommit() override;
|
|
203798
|
+
};
|
|
203799
|
+
|
|
203800
|
+
SingleFileStorageCommitState::SingleFileStorageCommitState(StorageManager &storage_manager, bool checkpoint)
|
|
203801
|
+
: checkpoint(checkpoint) {
|
|
203802
|
+
log = storage_manager.GetWriteAheadLog();
|
|
203803
|
+
if (log) {
|
|
203804
|
+
auto initial_size = log->GetWALSize();
|
|
203805
|
+
initial_written = log->GetTotalWritten();
|
|
203806
|
+
initial_wal_size = initial_size < 0 ? 0 : idx_t(initial_size);
|
|
203807
|
+
|
|
203808
|
+
if (checkpoint) {
|
|
203809
|
+
// check if we are checkpointing after this commit
|
|
203810
|
+
// if we are checkpointing, we don't need to write anything to the WAL
|
|
203811
|
+
// this saves us a lot of unnecessary writes to disk in the case of large commits
|
|
203812
|
+
log->skip_writing = true;
|
|
203813
|
+
}
|
|
203814
|
+
} else {
|
|
203815
|
+
D_ASSERT(!checkpoint);
|
|
203816
|
+
}
|
|
203817
|
+
}
|
|
203818
|
+
|
|
203819
|
+
// Make the commit persistent
|
|
203820
|
+
void SingleFileStorageCommitState::FlushCommit() {
|
|
203821
|
+
if (log) {
|
|
203822
|
+
// flush the WAL if any changes were made
|
|
203823
|
+
if (log->GetTotalWritten() > initial_written) {
|
|
203824
|
+
D_ASSERT(!checkpoint);
|
|
203825
|
+
D_ASSERT(!log->skip_writing);
|
|
203826
|
+
log->Flush();
|
|
203827
|
+
}
|
|
203828
|
+
log->skip_writing = false;
|
|
203829
|
+
}
|
|
203830
|
+
// Null so that the destructor will not truncate the log.
|
|
203831
|
+
log = nullptr;
|
|
203832
|
+
}
|
|
203833
|
+
|
|
203834
|
+
SingleFileStorageCommitState::~SingleFileStorageCommitState() {
|
|
203835
|
+
// If log is non-null, then commit threw an exception before flushing.
|
|
203836
|
+
if (log) {
|
|
203837
|
+
log->skip_writing = false;
|
|
203838
|
+
if (log->GetTotalWritten() > initial_written) {
|
|
203839
|
+
// remove any entries written into the WAL by truncating it
|
|
203840
|
+
log->Truncate(initial_wal_size);
|
|
203841
|
+
}
|
|
203842
|
+
}
|
|
203843
|
+
}
|
|
203844
|
+
|
|
203845
|
+
unique_ptr<StorageCommitState> SingleFileStorageManager::GenStorageCommitState(Transaction &transaction,
|
|
203846
|
+
bool checkpoint) {
|
|
203847
|
+
return make_unique<SingleFileStorageCommitState>(*this, checkpoint);
|
|
203848
|
+
}
|
|
203849
|
+
|
|
203850
|
+
bool SingleFileStorageManager::IsCheckpointClean(block_id_t checkpoint_id) {
|
|
203851
|
+
return block_manager->IsRootBlock(checkpoint_id);
|
|
203852
|
+
}
|
|
203853
|
+
|
|
203854
|
+
void SingleFileStorageManager::CreateCheckpoint(bool delete_wal, bool force_checkpoint) {
|
|
203855
|
+
if (InMemory() || read_only || !wal) {
|
|
203379
203856
|
return;
|
|
203380
203857
|
}
|
|
203381
|
-
if (wal
|
|
203858
|
+
if (wal->GetWALSize() > 0 || db.config.options.force_checkpoint || force_checkpoint) {
|
|
203382
203859
|
// we only need to checkpoint if there is anything in the WAL
|
|
203383
|
-
|
|
203860
|
+
SingleFileCheckpointWriter checkpointer(db, *block_manager);
|
|
203384
203861
|
checkpointer.CreateCheckpoint();
|
|
203385
203862
|
}
|
|
203386
203863
|
if (delete_wal) {
|
|
203387
|
-
wal
|
|
203864
|
+
wal->Delete();
|
|
203865
|
+
wal.reset();
|
|
203388
203866
|
}
|
|
203389
203867
|
}
|
|
203390
203868
|
|
|
203869
|
+
DatabaseSize SingleFileStorageManager::GetDatabaseSize() {
|
|
203870
|
+
// All members default to zero
|
|
203871
|
+
DatabaseSize ds;
|
|
203872
|
+
if (!InMemory()) {
|
|
203873
|
+
ds.total_blocks = block_manager->TotalBlocks();
|
|
203874
|
+
ds.block_size = Storage::BLOCK_ALLOC_SIZE;
|
|
203875
|
+
ds.free_blocks = block_manager->FreeBlocks();
|
|
203876
|
+
ds.used_blocks = ds.total_blocks - ds.free_blocks;
|
|
203877
|
+
ds.bytes = (ds.total_blocks * ds.block_size);
|
|
203878
|
+
if (auto wal = GetWriteAheadLog()) {
|
|
203879
|
+
ds.wal_size = wal->GetWALSize();
|
|
203880
|
+
}
|
|
203881
|
+
}
|
|
203882
|
+
return ds;
|
|
203883
|
+
}
|
|
203884
|
+
|
|
203885
|
+
bool SingleFileStorageManager::AutomaticCheckpoint(idx_t estimated_wal_bytes) {
|
|
203886
|
+
auto log = GetWriteAheadLog();
|
|
203887
|
+
if (!log) {
|
|
203888
|
+
return false;
|
|
203889
|
+
}
|
|
203890
|
+
|
|
203891
|
+
auto initial_size = log->GetWALSize();
|
|
203892
|
+
idx_t expected_wal_size = initial_size + estimated_wal_bytes;
|
|
203893
|
+
return expected_wal_size > db.config.options.checkpoint_wal_size;
|
|
203894
|
+
}
|
|
203895
|
+
|
|
203896
|
+
shared_ptr<TableIOManager> SingleFileStorageManager::GetTableIOManager(BoundCreateTableInfo *info /*info*/) {
|
|
203897
|
+
// This is an unmanaged reference. No ref/deref overhead. Lifetime of the
|
|
203898
|
+
// TableIoManager follows lifetime of the StorageManager (this).
|
|
203899
|
+
return shared_ptr<TableIOManager>(shared_ptr<char>(nullptr), table_io_manager.get());
|
|
203900
|
+
}
|
|
203901
|
+
|
|
203391
203902
|
} // namespace duckdb
|
|
203392
203903
|
|
|
203393
203904
|
|
|
@@ -203679,8 +204190,8 @@ namespace duckdb {
|
|
|
203679
204190
|
//! List column data represents a list
|
|
203680
204191
|
class ListColumnData : public ColumnData {
|
|
203681
204192
|
public:
|
|
203682
|
-
ListColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
203683
|
-
ColumnData *parent = nullptr);
|
|
204193
|
+
ListColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
204194
|
+
LogicalType type, ColumnData *parent = nullptr);
|
|
203684
204195
|
ListColumnData(ColumnData &original, idx_t start_row, ColumnData *parent = nullptr);
|
|
203685
204196
|
|
|
203686
204197
|
//! The child-column of the list
|
|
@@ -203714,8 +204225,8 @@ public:
|
|
|
203714
204225
|
|
|
203715
204226
|
void CommitDropColumn() override;
|
|
203716
204227
|
|
|
203717
|
-
unique_ptr<ColumnCheckpointState> CreateCheckpointState(RowGroup &row_group,
|
|
203718
|
-
unique_ptr<ColumnCheckpointState> Checkpoint(RowGroup &row_group,
|
|
204228
|
+
unique_ptr<ColumnCheckpointState> CreateCheckpointState(RowGroup &row_group, RowGroupWriter &writer) override;
|
|
204229
|
+
unique_ptr<ColumnCheckpointState> Checkpoint(RowGroup &row_group, RowGroupWriter &writer,
|
|
203719
204230
|
ColumnCheckpointInfo &checkpoint_info) override;
|
|
203720
204231
|
|
|
203721
204232
|
void DeserializeColumn(Deserializer &source) override;
|
|
@@ -203736,7 +204247,7 @@ private:
|
|
|
203736
204247
|
|
|
203737
204248
|
namespace duckdb {
|
|
203738
204249
|
|
|
203739
|
-
ColumnCheckpointState::ColumnCheckpointState(RowGroup &row_group, ColumnData &column_data,
|
|
204250
|
+
ColumnCheckpointState::ColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, RowGroupWriter &writer)
|
|
203740
204251
|
: row_group(row_group), column_data(column_data), writer(writer) {
|
|
203741
204252
|
}
|
|
203742
204253
|
|
|
@@ -203748,6 +204259,56 @@ unique_ptr<BaseStatistics> ColumnCheckpointState::GetStatistics() {
|
|
|
203748
204259
|
return move(global_stats);
|
|
203749
204260
|
}
|
|
203750
204261
|
|
|
204262
|
+
struct PartialBlockForCheckpoint : PartialBlock {
|
|
204263
|
+
struct PartialColumnSegment {
|
|
204264
|
+
ColumnSegment *segment;
|
|
204265
|
+
uint32_t offset_in_block;
|
|
204266
|
+
};
|
|
204267
|
+
|
|
204268
|
+
public:
|
|
204269
|
+
PartialBlockForCheckpoint(ColumnSegment *first_segment, BlockManager &block_manager, PartialBlockState state)
|
|
204270
|
+
: PartialBlock(state), first_segment(first_segment), block_manager(block_manager) {
|
|
204271
|
+
}
|
|
204272
|
+
|
|
204273
|
+
~PartialBlockForCheckpoint() override {
|
|
204274
|
+
D_ASSERT(IsFlushed() || Exception::UncaughtException());
|
|
204275
|
+
}
|
|
204276
|
+
|
|
204277
|
+
// We will copy all subsequent segment data into the memory corresponding
|
|
204278
|
+
// to the first segment. Once the block is full (or checkpoint is complete)
|
|
204279
|
+
// we'll invoke Flush(), which will cause
|
|
204280
|
+
// the block to get written to storage (via BlockManger::ConvertToPersistent),
|
|
204281
|
+
// and all segments to have their references updated
|
|
204282
|
+
// (via ColumnSegment::ConvertToPersistent)
|
|
204283
|
+
ColumnSegment *first_segment;
|
|
204284
|
+
BlockManager &block_manager;
|
|
204285
|
+
vector<PartialColumnSegment> tail_segments;
|
|
204286
|
+
|
|
204287
|
+
public:
|
|
204288
|
+
bool IsFlushed() {
|
|
204289
|
+
// first_segment is zeroed on Flush
|
|
204290
|
+
return !first_segment;
|
|
204291
|
+
}
|
|
204292
|
+
|
|
204293
|
+
void Flush() override {
|
|
204294
|
+
// At this point, we've already copied all data from tail_segments
|
|
204295
|
+
// into the page owned by first_segment. We flush all segment data to
|
|
204296
|
+
// disk with the following call.
|
|
204297
|
+
first_segment->ConvertToPersistent(&block_manager, state.block_id);
|
|
204298
|
+
// Now that the page is persistent, update tail_segments to point to the
|
|
204299
|
+
// newly persistent block.
|
|
204300
|
+
for (auto e : tail_segments) {
|
|
204301
|
+
e.segment->MarkAsPersistent(first_segment->block, e.offset_in_block);
|
|
204302
|
+
}
|
|
204303
|
+
first_segment = nullptr;
|
|
204304
|
+
tail_segments.clear();
|
|
204305
|
+
}
|
|
204306
|
+
|
|
204307
|
+
void AddSegmentToTail(ColumnSegment *segment, uint32_t offset_in_block) {
|
|
204308
|
+
tail_segments.push_back({segment, offset_in_block});
|
|
204309
|
+
}
|
|
204310
|
+
};
|
|
204311
|
+
|
|
203751
204312
|
void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_t segment_size) {
|
|
203752
204313
|
D_ASSERT(segment_size <= Storage::BLOCK_SIZE);
|
|
203753
204314
|
auto tuple_count = segment->count.load();
|
|
@@ -203761,46 +204322,41 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
|
|
|
203761
204322
|
// get the buffer of the segment and pin it
|
|
203762
204323
|
auto &db = column_data.GetDatabase();
|
|
203763
204324
|
auto &buffer_manager = BufferManager::GetBufferManager(db);
|
|
203764
|
-
auto &block_manager = BlockManager::GetBlockManager(db);
|
|
203765
|
-
auto &checkpoint_manager = writer.GetCheckpointManager();
|
|
203766
|
-
|
|
203767
|
-
bool block_is_constant = segment->stats.statistics->IsConstant();
|
|
203768
|
-
|
|
203769
204325
|
block_id_t block_id = INVALID_BLOCK;
|
|
203770
204326
|
uint32_t offset_in_block = 0;
|
|
203771
|
-
|
|
203772
|
-
|
|
203773
|
-
unique_ptr<PartialBlock> owned_partial_block;
|
|
203774
|
-
if (!block_is_constant) {
|
|
204327
|
+
|
|
204328
|
+
if (!segment->stats.statistics->IsConstant()) {
|
|
203775
204329
|
// non-constant block
|
|
203776
|
-
|
|
203777
|
-
|
|
203778
|
-
|
|
203779
|
-
|
|
203780
|
-
|
|
203781
|
-
|
|
203782
|
-
|
|
203783
|
-
|
|
203784
|
-
|
|
203785
|
-
|
|
203786
|
-
|
|
203787
|
-
|
|
203788
|
-
|
|
203789
|
-
|
|
203790
|
-
|
|
203791
|
-
checkpoint_manager.RegisterPartialBlock(segment.get(), segment_size, block_id);
|
|
203792
|
-
}
|
|
204330
|
+
PartialBlockAllocation allocation = writer.GetBlockAllocation(segment_size);
|
|
204331
|
+
block_id = allocation.state.block_id;
|
|
204332
|
+
offset_in_block = allocation.state.offset_in_block;
|
|
204333
|
+
|
|
204334
|
+
if (allocation.partial_block) {
|
|
204335
|
+
// Use an existing block.
|
|
204336
|
+
D_ASSERT(offset_in_block > 0);
|
|
204337
|
+
auto pstate = (PartialBlockForCheckpoint *)allocation.partial_block.get();
|
|
204338
|
+
// pin the source block
|
|
204339
|
+
auto old_handle = buffer_manager.Pin(segment->block);
|
|
204340
|
+
// pin the target block
|
|
204341
|
+
auto new_handle = buffer_manager.Pin(pstate->first_segment->block);
|
|
204342
|
+
// memcpy the contents of the old block to the new block
|
|
204343
|
+
memcpy(new_handle.Ptr() + offset_in_block, old_handle.Ptr(), segment_size);
|
|
204344
|
+
pstate->AddSegmentToTail(segment.get(), offset_in_block);
|
|
203793
204345
|
} else {
|
|
203794
|
-
//
|
|
203795
|
-
|
|
203796
|
-
|
|
204346
|
+
// Create a new block for future reuse.
|
|
204347
|
+
D_ASSERT(offset_in_block == 0);
|
|
204348
|
+
allocation.partial_block =
|
|
204349
|
+
make_unique<PartialBlockForCheckpoint>(segment.get(), *allocation.block_manager, allocation.state);
|
|
203797
204350
|
}
|
|
204351
|
+
// Writer will decide whether to reuse this block.
|
|
204352
|
+
writer.RegisterPartialBlock(move(allocation));
|
|
203798
204353
|
} else {
|
|
203799
204354
|
// constant block: no need to write anything to disk besides the stats
|
|
203800
204355
|
// set up the compression function to constant
|
|
203801
204356
|
auto &config = DBConfig::GetConfig(db);
|
|
203802
204357
|
segment->function =
|
|
203803
204358
|
config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, segment->type.InternalType());
|
|
204359
|
+
segment->ConvertToPersistent(nullptr, INVALID_BLOCK);
|
|
203804
204360
|
}
|
|
203805
204361
|
|
|
203806
204362
|
// construct the data pointer
|
|
@@ -203816,43 +204372,13 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
|
|
|
203816
204372
|
data_pointer.compression_type = segment->function->type;
|
|
203817
204373
|
data_pointer.statistics = segment->stats.statistics->Copy();
|
|
203818
204374
|
|
|
203819
|
-
if (need_to_write) {
|
|
203820
|
-
if (partial_block) {
|
|
203821
|
-
// pin the current block
|
|
203822
|
-
auto old_handle = buffer_manager.Pin(segment->block);
|
|
203823
|
-
// pin the new block
|
|
203824
|
-
auto new_handle = buffer_manager.Pin(partial_block->block);
|
|
203825
|
-
// memcpy the contents of the old block to the new block
|
|
203826
|
-
memcpy(new_handle.Ptr() + offset_in_block, old_handle.Ptr(), segment_size);
|
|
203827
|
-
} else {
|
|
203828
|
-
// convert the segment into a persistent segment that points to this block
|
|
203829
|
-
segment->ConvertToPersistent(block_id);
|
|
203830
|
-
}
|
|
203831
|
-
}
|
|
203832
|
-
if (owned_partial_block) {
|
|
203833
|
-
// the partial block has become full: write it to disk
|
|
203834
|
-
owned_partial_block->FlushToDisk(db);
|
|
203835
|
-
}
|
|
203836
|
-
|
|
203837
204375
|
// append the segment to the new segment tree
|
|
203838
204376
|
new_tree.AppendSegment(move(segment));
|
|
203839
204377
|
data_pointers.push_back(move(data_pointer));
|
|
203840
204378
|
}
|
|
203841
204379
|
|
|
203842
|
-
void ColumnCheckpointState::
|
|
203843
|
-
|
|
203844
|
-
|
|
203845
|
-
meta_writer.Write<idx_t>(data_pointers.size());
|
|
203846
|
-
// then write the data pointers themselves
|
|
203847
|
-
for (idx_t k = 0; k < data_pointers.size(); k++) {
|
|
203848
|
-
auto &data_pointer = data_pointers[k];
|
|
203849
|
-
meta_writer.Write<idx_t>(data_pointer.row_start);
|
|
203850
|
-
meta_writer.Write<idx_t>(data_pointer.tuple_count);
|
|
203851
|
-
meta_writer.Write<block_id_t>(data_pointer.block_pointer.block_id);
|
|
203852
|
-
meta_writer.Write<uint32_t>(data_pointer.block_pointer.offset);
|
|
203853
|
-
meta_writer.Write<CompressionType>(data_pointer.compression_type);
|
|
203854
|
-
data_pointer.statistics->Serialize(meta_writer);
|
|
203855
|
-
}
|
|
204380
|
+
void ColumnCheckpointState::WriteDataPointers() {
|
|
204381
|
+
writer.WriteColumnDataPointers(*this);
|
|
203856
204382
|
}
|
|
203857
204383
|
|
|
203858
204384
|
} // namespace duckdb
|
|
@@ -203888,8 +204414,8 @@ namespace duckdb {
|
|
|
203888
204414
|
//! Struct column data represents a struct
|
|
203889
204415
|
class StructColumnData : public ColumnData {
|
|
203890
204416
|
public:
|
|
203891
|
-
StructColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
203892
|
-
ColumnData *parent = nullptr);
|
|
204417
|
+
StructColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
204418
|
+
LogicalType type, ColumnData *parent = nullptr);
|
|
203893
204419
|
StructColumnData(ColumnData &original, idx_t start_row, ColumnData *parent = nullptr);
|
|
203894
204420
|
|
|
203895
204421
|
//! The sub-columns of the struct
|
|
@@ -203924,8 +204450,8 @@ public:
|
|
|
203924
204450
|
|
|
203925
204451
|
void CommitDropColumn() override;
|
|
203926
204452
|
|
|
203927
|
-
unique_ptr<ColumnCheckpointState> CreateCheckpointState(RowGroup &row_group,
|
|
203928
|
-
unique_ptr<ColumnCheckpointState> Checkpoint(RowGroup &row_group,
|
|
204453
|
+
unique_ptr<ColumnCheckpointState> CreateCheckpointState(RowGroup &row_group, RowGroupWriter &writer) override;
|
|
204454
|
+
unique_ptr<ColumnCheckpointState> Checkpoint(RowGroup &row_group, RowGroupWriter &writer,
|
|
203929
204455
|
ColumnCheckpointInfo &checkpoint_info) override;
|
|
203930
204456
|
|
|
203931
204457
|
void DeserializeColumn(Deserializer &source) override;
|
|
@@ -204049,13 +204575,15 @@ struct UpdateNode {
|
|
|
204049
204575
|
|
|
204050
204576
|
namespace duckdb {
|
|
204051
204577
|
|
|
204052
|
-
ColumnData::ColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
204053
|
-
|
|
204578
|
+
ColumnData::ColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
204579
|
+
LogicalType type, ColumnData *parent)
|
|
204580
|
+
: block_manager(block_manager), info(info), column_index(column_index), start(start_row), type(move(type)),
|
|
204581
|
+
parent(parent) {
|
|
204054
204582
|
}
|
|
204055
204583
|
|
|
204056
204584
|
ColumnData::ColumnData(ColumnData &other, idx_t start, ColumnData *parent)
|
|
204057
|
-
: info(other.info), column_index(other.column_index), start(start),
|
|
204058
|
-
updates(move(other.updates)) {
|
|
204585
|
+
: block_manager(other.block_manager), info(other.info), column_index(other.column_index), start(start),
|
|
204586
|
+
type(move(other.type)), parent(parent), updates(move(other.updates)) {
|
|
204059
204587
|
idx_t offset = 0;
|
|
204060
204588
|
for (auto segment = other.data.GetRootSegment(); segment; segment = segment->next.get()) {
|
|
204061
204589
|
auto &other = (ColumnSegment &)*segment;
|
|
@@ -204251,6 +204779,7 @@ void ColumnData::InitializeAppend(ColumnAppendState &state) {
|
|
|
204251
204779
|
|
|
204252
204780
|
D_ASSERT(state.current->segment_type == ColumnSegmentType::TRANSIENT);
|
|
204253
204781
|
state.current->InitializeAppend(state);
|
|
204782
|
+
D_ASSERT(state.current->function->append);
|
|
204254
204783
|
}
|
|
204255
204784
|
|
|
204256
204785
|
void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata, idx_t count) {
|
|
@@ -204353,7 +204882,6 @@ void ColumnData::AppendTransientSegment(idx_t start_row) {
|
|
|
204353
204882
|
}
|
|
204354
204883
|
|
|
204355
204884
|
void ColumnData::CommitDropColumn() {
|
|
204356
|
-
auto &block_manager = BlockManager::GetBlockManager(GetDatabase());
|
|
204357
204885
|
auto segment = (ColumnSegment *)data.GetRootSegment();
|
|
204358
204886
|
while (segment) {
|
|
204359
204887
|
if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
|
|
@@ -204366,7 +204894,7 @@ void ColumnData::CommitDropColumn() {
|
|
|
204366
204894
|
}
|
|
204367
204895
|
}
|
|
204368
204896
|
|
|
204369
|
-
unique_ptr<ColumnCheckpointState> ColumnData::CreateCheckpointState(RowGroup &row_group,
|
|
204897
|
+
unique_ptr<ColumnCheckpointState> ColumnData::CreateCheckpointState(RowGroup &row_group, RowGroupWriter &writer) {
|
|
204370
204898
|
return make_unique<ColumnCheckpointState>(row_group, *this, writer);
|
|
204371
204899
|
}
|
|
204372
204900
|
|
|
@@ -204379,7 +204907,7 @@ void ColumnData::CheckpointScan(ColumnSegment *segment, ColumnScanState &state,
|
|
|
204379
204907
|
}
|
|
204380
204908
|
}
|
|
204381
204909
|
|
|
204382
|
-
unique_ptr<ColumnCheckpointState> ColumnData::Checkpoint(RowGroup &row_group,
|
|
204910
|
+
unique_ptr<ColumnCheckpointState> ColumnData::Checkpoint(RowGroup &row_group, RowGroupWriter &writer,
|
|
204383
204911
|
ColumnCheckpointInfo &checkpoint_info) {
|
|
204384
204912
|
// scan the segments of the column data
|
|
204385
204913
|
// set up the checkpoint state
|
|
@@ -204416,16 +204944,17 @@ void ColumnData::DeserializeColumn(Deserializer &source) {
|
|
|
204416
204944
|
|
|
204417
204945
|
// create a persistent segment
|
|
204418
204946
|
auto segment = ColumnSegment::CreatePersistentSegment(
|
|
204419
|
-
GetDatabase(), data_pointer.block_pointer.block_id, data_pointer.block_pointer.offset, type,
|
|
204947
|
+
GetDatabase(), block_manager, data_pointer.block_pointer.block_id, data_pointer.block_pointer.offset, type,
|
|
204420
204948
|
data_pointer.row_start, data_pointer.tuple_count, data_pointer.compression_type,
|
|
204421
204949
|
move(data_pointer.statistics));
|
|
204422
204950
|
data.AppendSegment(move(segment));
|
|
204423
204951
|
}
|
|
204424
204952
|
}
|
|
204425
204953
|
|
|
204426
|
-
shared_ptr<ColumnData> ColumnData::Deserialize(DataTableInfo &info, idx_t column_index,
|
|
204427
|
-
Deserializer &source, const LogicalType &type,
|
|
204428
|
-
|
|
204954
|
+
shared_ptr<ColumnData> ColumnData::Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
|
|
204955
|
+
idx_t start_row, Deserializer &source, const LogicalType &type,
|
|
204956
|
+
ColumnData *parent) {
|
|
204957
|
+
auto entry = ColumnData::CreateColumn(block_manager, info, column_index, start_row, type, parent);
|
|
204429
204958
|
entry->DeserializeColumn(source);
|
|
204430
204959
|
return entry;
|
|
204431
204960
|
}
|
|
@@ -204510,16 +205039,16 @@ void ColumnData::Verify(RowGroup &parent) {
|
|
|
204510
205039
|
}
|
|
204511
205040
|
|
|
204512
205041
|
template <class RET, class OP>
|
|
204513
|
-
static RET CreateColumnInternal(DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
204514
|
-
ColumnData *parent) {
|
|
205042
|
+
static RET CreateColumnInternal(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
205043
|
+
const LogicalType &type, ColumnData *parent) {
|
|
204515
205044
|
if (type.InternalType() == PhysicalType::STRUCT) {
|
|
204516
|
-
return OP::template Create<StructColumnData>(info, column_index, start_row, type, parent);
|
|
205045
|
+
return OP::template Create<StructColumnData>(block_manager, info, column_index, start_row, type, parent);
|
|
204517
205046
|
} else if (type.InternalType() == PhysicalType::LIST) {
|
|
204518
|
-
return OP::template Create<ListColumnData>(info, column_index, start_row, type, parent);
|
|
205047
|
+
return OP::template Create<ListColumnData>(block_manager, info, column_index, start_row, type, parent);
|
|
204519
205048
|
} else if (type.id() == LogicalTypeId::VALIDITY) {
|
|
204520
|
-
return OP::template Create<ValidityColumnData>(info, column_index, start_row, parent);
|
|
205049
|
+
return OP::template Create<ValidityColumnData>(block_manager, info, column_index, start_row, parent);
|
|
204521
205050
|
}
|
|
204522
|
-
return OP::template Create<StandardColumnData>(info, column_index, start_row, type, parent);
|
|
205051
|
+
return OP::template Create<StandardColumnData>(block_manager, info, column_index, start_row, type, parent);
|
|
204523
205052
|
}
|
|
204524
205053
|
|
|
204525
205054
|
template <class RET, class OP>
|
|
@@ -204534,18 +205063,21 @@ static RET CreateColumnInternal(ColumnData &other, idx_t start_row, ColumnData *
|
|
|
204534
205063
|
return OP::template Create<StandardColumnData>(other, start_row, parent);
|
|
204535
205064
|
}
|
|
204536
205065
|
|
|
204537
|
-
shared_ptr<ColumnData> ColumnData::CreateColumn(DataTableInfo &info, idx_t column_index,
|
|
204538
|
-
const LogicalType &type, ColumnData *parent) {
|
|
204539
|
-
return CreateColumnInternal<shared_ptr<ColumnData>, SharedConstructor>(info, column_index, start_row,
|
|
205066
|
+
shared_ptr<ColumnData> ColumnData::CreateColumn(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
|
|
205067
|
+
idx_t start_row, const LogicalType &type, ColumnData *parent) {
|
|
205068
|
+
return CreateColumnInternal<shared_ptr<ColumnData>, SharedConstructor>(block_manager, info, column_index, start_row,
|
|
205069
|
+
type, parent);
|
|
204540
205070
|
}
|
|
204541
205071
|
|
|
204542
205072
|
shared_ptr<ColumnData> ColumnData::CreateColumn(ColumnData &other, idx_t start_row, ColumnData *parent) {
|
|
204543
205073
|
return CreateColumnInternal<shared_ptr<ColumnData>, SharedConstructor>(other, start_row, parent);
|
|
204544
205074
|
}
|
|
204545
205075
|
|
|
204546
|
-
unique_ptr<ColumnData> ColumnData::CreateColumnUnique(
|
|
204547
|
-
const LogicalType &type,
|
|
204548
|
-
|
|
205076
|
+
unique_ptr<ColumnData> ColumnData::CreateColumnUnique(BlockManager &block_manager, DataTableInfo &info,
|
|
205077
|
+
idx_t column_index, idx_t start_row, const LogicalType &type,
|
|
205078
|
+
ColumnData *parent) {
|
|
205079
|
+
return CreateColumnInternal<unique_ptr<ColumnData>, UniqueConstructor>(block_manager, info, column_index, start_row,
|
|
205080
|
+
type, parent);
|
|
204549
205081
|
}
|
|
204550
205082
|
|
|
204551
205083
|
unique_ptr<ColumnData> ColumnData::CreateColumnUnique(ColumnData &other, idx_t start_row, ColumnData *parent) {
|
|
@@ -204715,7 +205247,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
|
|
|
204715
205247
|
// first we check the current segments
|
|
204716
205248
|
// if there are any persistent segments, we will mark their old block ids as modified
|
|
204717
205249
|
// since the segments will be rewritten their old on disk data is no longer required
|
|
204718
|
-
auto &block_manager =
|
|
205250
|
+
auto &block_manager = col_data.block_manager;
|
|
204719
205251
|
for (auto segment = (ColumnSegment *)owned_segment.get(); segment; segment = (ColumnSegment *)segment->next.get()) {
|
|
204720
205252
|
if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
|
|
204721
205253
|
// persistent segment has updates: mark it as modified and rewrite the block with the merged updates
|
|
@@ -204825,26 +205357,32 @@ void ColumnDataCheckpointer::Checkpoint(unique_ptr<SegmentBase> segment) {
|
|
|
204825
205357
|
|
|
204826
205358
|
namespace duckdb {
|
|
204827
205359
|
|
|
204828
|
-
unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstance &db,
|
|
204829
|
-
|
|
204830
|
-
idx_t
|
|
205360
|
+
unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager,
|
|
205361
|
+
block_id_t block_id, idx_t offset,
|
|
205362
|
+
const LogicalType &type, idx_t start, idx_t count,
|
|
205363
|
+
CompressionType compression_type,
|
|
204831
205364
|
unique_ptr<BaseStatistics> statistics) {
|
|
204832
205365
|
auto &config = DBConfig::GetConfig(db);
|
|
204833
205366
|
CompressionFunction *function;
|
|
205367
|
+
shared_ptr<BlockHandle> block;
|
|
204834
205368
|
if (block_id == INVALID_BLOCK) {
|
|
205369
|
+
// constant segment, no need to allocate an actual block
|
|
204835
205370
|
function = config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, type.InternalType());
|
|
204836
205371
|
} else {
|
|
204837
205372
|
function = config.GetCompressionFunction(compression_type, type.InternalType());
|
|
205373
|
+
block = block_manager.RegisterBlock(block_id);
|
|
204838
205374
|
}
|
|
204839
|
-
return make_unique<ColumnSegment>(db, type, ColumnSegmentType::PERSISTENT, start, count, function,
|
|
204840
|
-
block_id, offset);
|
|
205375
|
+
return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::PERSISTENT, start, count, function,
|
|
205376
|
+
move(statistics), block_id, offset);
|
|
204841
205377
|
}
|
|
204842
205378
|
|
|
204843
205379
|
unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance &db, const LogicalType &type,
|
|
204844
205380
|
idx_t start) {
|
|
204845
205381
|
auto &config = DBConfig::GetConfig(db);
|
|
204846
205382
|
auto function = config.GetCompressionFunction(CompressionType::COMPRESSION_UNCOMPRESSED, type.InternalType());
|
|
204847
|
-
|
|
205383
|
+
// transient: allocate a buffer for the uncompressed segment
|
|
205384
|
+
auto block = BufferManager::GetBufferManager(db).RegisterMemory(Storage::BLOCK_SIZE, false);
|
|
205385
|
+
return make_unique<ColumnSegment>(db, block, type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr,
|
|
204848
205386
|
INVALID_BLOCK, 0);
|
|
204849
205387
|
}
|
|
204850
205388
|
|
|
@@ -204852,27 +205390,13 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx
|
|
|
204852
205390
|
return make_unique<ColumnSegment>(other, start);
|
|
204853
205391
|
}
|
|
204854
205392
|
|
|
204855
|
-
ColumnSegment::ColumnSegment(DatabaseInstance &db,
|
|
204856
|
-
idx_t count, CompressionFunction *function_p,
|
|
204857
|
-
block_id_t block_id_p, idx_t offset_p)
|
|
205393
|
+
ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type_p,
|
|
205394
|
+
ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction *function_p,
|
|
205395
|
+
unique_ptr<BaseStatistics> statistics, block_id_t block_id_p, idx_t offset_p)
|
|
204858
205396
|
: SegmentBase(start, count), db(db), type(move(type_p)), type_size(GetTypeIdSize(type.InternalType())),
|
|
204859
|
-
segment_type(segment_type), function(function_p), stats(type, move(statistics)),
|
|
204860
|
-
offset(offset_p) {
|
|
205397
|
+
segment_type(segment_type), function(function_p), stats(type, move(statistics)), block(move(block)),
|
|
205398
|
+
block_id(block_id_p), offset(offset_p) {
|
|
204861
205399
|
D_ASSERT(function);
|
|
204862
|
-
auto &block_manager = BlockManager::GetBlockManager(db);
|
|
204863
|
-
auto &buffer_manager = BufferManager::GetBufferManager(db);
|
|
204864
|
-
if (block_id == INVALID_BLOCK) {
|
|
204865
|
-
// no block id specified
|
|
204866
|
-
// there are two cases here:
|
|
204867
|
-
// transient: allocate a buffer for the uncompressed segment
|
|
204868
|
-
// persistent: constant segment, no need to allocate anything
|
|
204869
|
-
if (segment_type == ColumnSegmentType::TRANSIENT) {
|
|
204870
|
-
this->block = buffer_manager.RegisterMemory(Storage::BLOCK_SIZE, false);
|
|
204871
|
-
}
|
|
204872
|
-
} else {
|
|
204873
|
-
D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT);
|
|
204874
|
-
this->block = block_manager.RegisterBlock(block_id);
|
|
204875
|
-
}
|
|
204876
205400
|
if (function->init_segment) {
|
|
204877
205401
|
segment_state = function->init_segment(*this, block_id);
|
|
204878
205402
|
}
|
|
@@ -204960,22 +205484,23 @@ void ColumnSegment::RevertAppend(idx_t start_row) {
|
|
|
204960
205484
|
//===--------------------------------------------------------------------===//
|
|
204961
205485
|
// Convert To Persistent
|
|
204962
205486
|
//===--------------------------------------------------------------------===//
|
|
204963
|
-
void ColumnSegment::ConvertToPersistent(block_id_t block_id_p) {
|
|
205487
|
+
void ColumnSegment::ConvertToPersistent(BlockManager *block_manager, block_id_t block_id_p) {
|
|
204964
205488
|
D_ASSERT(segment_type == ColumnSegmentType::TRANSIENT);
|
|
204965
205489
|
segment_type = ColumnSegmentType::PERSISTENT;
|
|
205490
|
+
|
|
204966
205491
|
block_id = block_id_p;
|
|
204967
205492
|
offset = 0;
|
|
204968
205493
|
|
|
204969
205494
|
if (block_id == INVALID_BLOCK) {
|
|
204970
205495
|
// constant block: reset the block buffer
|
|
205496
|
+
D_ASSERT(stats.statistics->IsConstant());
|
|
204971
205497
|
block.reset();
|
|
204972
205498
|
} else {
|
|
205499
|
+
D_ASSERT(!stats.statistics->IsConstant());
|
|
204973
205500
|
// non-constant block: write the block to disk
|
|
204974
|
-
auto &block_manager = BlockManager::GetBlockManager(db);
|
|
204975
|
-
|
|
204976
205501
|
// the data for the block already exists in-memory of our block
|
|
204977
205502
|
// instead of copying the data we alter some metadata so the buffer points to an on-disk block
|
|
204978
|
-
block = block_manager
|
|
205503
|
+
block = block_manager->ConvertToPersistent(block_id, move(block));
|
|
204979
205504
|
}
|
|
204980
205505
|
|
|
204981
205506
|
segment_state.reset();
|
|
@@ -204984,10 +205509,11 @@ void ColumnSegment::ConvertToPersistent(block_id_t block_id_p) {
|
|
|
204984
205509
|
}
|
|
204985
205510
|
}
|
|
204986
205511
|
|
|
204987
|
-
void ColumnSegment::
|
|
205512
|
+
void ColumnSegment::MarkAsPersistent(shared_ptr<BlockHandle> block_p, uint32_t offset_p) {
|
|
204988
205513
|
D_ASSERT(segment_type == ColumnSegmentType::TRANSIENT);
|
|
204989
205514
|
segment_type = ColumnSegmentType::PERSISTENT;
|
|
204990
|
-
|
|
205515
|
+
|
|
205516
|
+
block_id = block_p->BlockId();
|
|
204991
205517
|
offset = offset_p;
|
|
204992
205518
|
block = move(block_p);
|
|
204993
205519
|
|
|
@@ -205278,13 +205804,14 @@ idx_t ColumnSegment::FilterSelection(SelectionVector &sel, Vector &result, const
|
|
|
205278
205804
|
|
|
205279
205805
|
namespace duckdb {
|
|
205280
205806
|
|
|
205281
|
-
ListColumnData::ListColumnData(DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
205282
|
-
ColumnData *parent)
|
|
205283
|
-
: ColumnData(info, column_index, start_row, move(type_p), parent),
|
|
205807
|
+
ListColumnData::ListColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
|
|
205808
|
+
LogicalType type_p, ColumnData *parent)
|
|
205809
|
+
: ColumnData(block_manager, info, column_index, start_row, move(type_p), parent),
|
|
205810
|
+
validity(block_manager, info, 0, start_row, this) {
|
|
205284
205811
|
D_ASSERT(type.InternalType() == PhysicalType::LIST);
|
|
205285
205812
|
auto &child_type = ListType::GetChildType(type);
|
|
205286
205813
|
// the child column, with column index 1 (0 is the validity mask)
|
|
205287
|
-
child_column = ColumnData::CreateColumnUnique(info, 1, start_row, child_type, this);
|
|
205814
|
+
child_column = ColumnData::CreateColumnUnique(block_manager, info, 1, start_row, child_type, this);
|
|
205288
205815
|
}
|
|
205289
205816
|
|
|
205290
205817
|
ListColumnData::ListColumnData(ColumnData &original, idx_t start_row, ColumnData *parent)
|
|
@@ -205595,7 +206122,7 @@ void ListColumnData::CommitDropColumn() {
|
|
|
205595
206122
|
}
|
|
205596
206123
|
|
|
205597
206124
|
struct ListColumnCheckpointState : public ColumnCheckpointState {
|
|
205598
|
-
ListColumnCheckpointState(RowGroup &row_group, ColumnData &column_data,
|
|
206125
|
+
ListColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, RowGroupWriter &writer)
|
|
205599
206126
|
: ColumnCheckpointState(row_group, column_data, writer) {
|
|
205600
206127
|
global_stats = make_unique<ListStatistics>(column_data.type);
|
|
205601
206128
|
}
|
|
@@ -205612,18 +206139,18 @@ public:
|
|
|
205612
206139
|
return stats;
|
|
205613
206140
|
}
|
|
205614
206141
|
|
|
205615
|
-
void
|
|
205616
|
-
ColumnCheckpointState::
|
|
205617
|
-
validity_state->
|
|
205618
|
-
child_state->
|
|
206142
|
+
void WriteDataPointers() override {
|
|
206143
|
+
ColumnCheckpointState::WriteDataPointers();
|
|
206144
|
+
validity_state->WriteDataPointers();
|
|
206145
|
+
child_state->WriteDataPointers();
|
|
205619
206146
|
}
|
|
205620
206147
|
};
|
|
205621
206148
|
|
|
205622
|
-
unique_ptr<ColumnCheckpointState> ListColumnData::CreateCheckpointState(RowGroup &row_group,
|
|
206149
|
+
unique_ptr<ColumnCheckpointState> ListColumnData::CreateCheckpointState(RowGroup &row_group, RowGroupWriter &writer) {
|
|
205623
206150
|
return make_unique<ListColumnCheckpointState>(row_group, *this, writer);
|
|
205624
206151
|
}
|
|
205625
206152
|
|
|
205626
|
-
unique_ptr<ColumnCheckpointState> ListColumnData::Checkpoint(RowGroup &row_group,
|
|
206153
|
+
unique_ptr<ColumnCheckpointState> ListColumnData::Checkpoint(RowGroup &row_group, RowGroupWriter &writer,
|
|
205627
206154
|
ColumnCheckpointInfo &checkpoint_info) {
|
|
205628
206155
|
auto validity_state = validity.Checkpoint(row_group, writer, checkpoint_info);
|
|
205629
206156
|
auto base_state = ColumnData::Checkpoint(row_group, writer, checkpoint_info);
|
|
@@ -205682,24 +206209,27 @@ namespace duckdb {
|
|
|
205682
206209
|
constexpr const idx_t RowGroup::ROW_GROUP_VECTOR_COUNT;
|
|
205683
206210
|
constexpr const idx_t RowGroup::ROW_GROUP_SIZE;
|
|
205684
206211
|
|
|
205685
|
-
RowGroup::RowGroup(DatabaseInstance &db, DataTableInfo &table_info, idx_t start,
|
|
205686
|
-
|
|
206212
|
+
RowGroup::RowGroup(DatabaseInstance &db, BlockManager &block_manager, DataTableInfo &table_info, idx_t start,
|
|
206213
|
+
idx_t count)
|
|
206214
|
+
: SegmentBase(start, count), db(db), block_manager(block_manager), table_info(table_info) {
|
|
205687
206215
|
|
|
205688
206216
|
Verify();
|
|
205689
206217
|
}
|
|
205690
206218
|
|
|
205691
|
-
RowGroup::RowGroup(DatabaseInstance &db,
|
|
205692
|
-
RowGroupPointer
|
|
205693
|
-
: SegmentBase(pointer.row_start, pointer.tuple_count), db(db),
|
|
206219
|
+
RowGroup::RowGroup(DatabaseInstance &db, BlockManager &block_manager, DataTableInfo &table_info,
|
|
206220
|
+
const vector<LogicalType> &types, RowGroupPointer &&pointer)
|
|
206221
|
+
: SegmentBase(pointer.row_start, pointer.tuple_count), db(db), block_manager(block_manager),
|
|
206222
|
+
table_info(table_info) {
|
|
205694
206223
|
// deserialize the columns
|
|
205695
206224
|
if (pointer.data_pointers.size() != types.size()) {
|
|
205696
206225
|
throw IOException("Row group column count is unaligned with table column count. Corrupt file?");
|
|
205697
206226
|
}
|
|
205698
206227
|
for (idx_t i = 0; i < pointer.data_pointers.size(); i++) {
|
|
205699
206228
|
auto &block_pointer = pointer.data_pointers[i];
|
|
205700
|
-
MetaBlockReader column_data_reader(
|
|
206229
|
+
MetaBlockReader column_data_reader(block_manager, block_pointer.block_id);
|
|
205701
206230
|
column_data_reader.offset = block_pointer.offset;
|
|
205702
|
-
this->columns.push_back(
|
|
206231
|
+
this->columns.push_back(
|
|
206232
|
+
ColumnData::Deserialize(block_manager, table_info, i, start, column_data_reader, types[i], nullptr));
|
|
205703
206233
|
}
|
|
205704
206234
|
|
|
205705
206235
|
// set up the statistics
|
|
@@ -205713,8 +206243,8 @@ RowGroup::RowGroup(DatabaseInstance &db, DataTableInfo &table_info, const vector
|
|
|
205713
206243
|
}
|
|
205714
206244
|
|
|
205715
206245
|
RowGroup::RowGroup(RowGroup &row_group, idx_t start)
|
|
205716
|
-
: SegmentBase(start, row_group.count), db(row_group.db),
|
|
205717
|
-
version_info(move(row_group.version_info)), stats(move(row_group.stats)) {
|
|
206246
|
+
: SegmentBase(start, row_group.count), db(row_group.db), block_manager(row_group.block_manager),
|
|
206247
|
+
table_info(row_group.table_info), version_info(move(row_group.version_info)), stats(move(row_group.stats)) {
|
|
205718
206248
|
for (auto &column : row_group.columns) {
|
|
205719
206249
|
this->columns.push_back(ColumnData::CreateColumn(*column, start));
|
|
205720
206250
|
}
|
|
@@ -205740,7 +206270,7 @@ RowGroup::~RowGroup() {
|
|
|
205740
206270
|
void RowGroup::InitializeEmpty(const vector<LogicalType> &types) {
|
|
205741
206271
|
// set up the segment trees for the column segments
|
|
205742
206272
|
for (idx_t i = 0; i < types.size(); i++) {
|
|
205743
|
-
auto column_data = ColumnData::CreateColumn(GetTableInfo(), i, start, types[i]);
|
|
206273
|
+
auto column_data = ColumnData::CreateColumn(block_manager, GetTableInfo(), i, start, types[i]);
|
|
205744
206274
|
stats.push_back(make_shared<SegmentStatistics>(types[i]));
|
|
205745
206275
|
columns.push_back(move(column_data));
|
|
205746
206276
|
}
|
|
@@ -205802,7 +206332,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
|
|
|
205802
206332
|
Verify();
|
|
205803
206333
|
|
|
205804
206334
|
// construct a new column data for this type
|
|
205805
|
-
auto column_data = ColumnData::CreateColumn(GetTableInfo(), changed_idx, start, target_type);
|
|
206335
|
+
auto column_data = ColumnData::CreateColumn(block_manager, GetTableInfo(), changed_idx, start, target_type);
|
|
205806
206336
|
|
|
205807
206337
|
ColumnAppendState append_state;
|
|
205808
206338
|
column_data->InitializeAppend(append_state);
|
|
@@ -205825,7 +206355,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
|
|
|
205825
206355
|
}
|
|
205826
206356
|
|
|
205827
206357
|
// set up the row_group based on this row_group
|
|
205828
|
-
auto row_group = make_unique<RowGroup>(db, table_info, this->start, this->count);
|
|
206358
|
+
auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
|
|
205829
206359
|
row_group->version_info = version_info;
|
|
205830
206360
|
for (idx_t i = 0; i < columns.size(); i++) {
|
|
205831
206361
|
if (i == changed_idx) {
|
|
@@ -205847,7 +206377,8 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
|
|
|
205847
206377
|
Verify();
|
|
205848
206378
|
|
|
205849
206379
|
// construct a new column data for the new column
|
|
205850
|
-
auto added_column =
|
|
206380
|
+
auto added_column =
|
|
206381
|
+
ColumnData::CreateColumn(block_manager, GetTableInfo(), columns.size(), start, new_column.Type());
|
|
205851
206382
|
auto added_col_stats = make_shared<SegmentStatistics>(
|
|
205852
206383
|
new_column.Type(), BaseStatistics::CreateEmpty(new_column.Type(), StatisticsType::LOCAL_STATS));
|
|
205853
206384
|
|
|
@@ -205868,7 +206399,7 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
|
|
|
205868
206399
|
}
|
|
205869
206400
|
|
|
205870
206401
|
// set up the row_group based on this row_group
|
|
205871
|
-
auto row_group = make_unique<RowGroup>(db, table_info, this->start, this->count);
|
|
206402
|
+
auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
|
|
205872
206403
|
row_group->version_info = version_info;
|
|
205873
206404
|
row_group->columns = columns;
|
|
205874
206405
|
row_group->stats = stats;
|
|
@@ -205885,7 +206416,7 @@ unique_ptr<RowGroup> RowGroup::RemoveColumn(idx_t removed_column) {
|
|
|
205885
206416
|
|
|
205886
206417
|
D_ASSERT(removed_column < columns.size());
|
|
205887
206418
|
|
|
205888
|
-
auto row_group = make_unique<RowGroup>(db, table_info, this->start, this->count);
|
|
206419
|
+
auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
|
|
205889
206420
|
row_group->version_info = version_info;
|
|
205890
206421
|
row_group->columns = columns;
|
|
205891
206422
|
row_group->stats = stats;
|
|
@@ -206342,12 +206873,19 @@ void RowGroup::MergeIntoStatistics(idx_t column_idx, BaseStatistics &other) {
|
|
|
206342
206873
|
other.Merge(*stats[column_idx]->statistics);
|
|
206343
206874
|
}
|
|
206344
206875
|
|
|
206345
|
-
RowGroupPointer RowGroup::Checkpoint(
|
|
206876
|
+
RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats) {
|
|
206346
206877
|
RowGroupPointer row_group_pointer;
|
|
206347
206878
|
vector<unique_ptr<ColumnCheckpointState>> states;
|
|
206348
206879
|
states.reserve(columns.size());
|
|
206349
206880
|
|
|
206350
|
-
//
|
|
206881
|
+
// Checkpoint the individual columns of the row group
|
|
206882
|
+
// Here we're iterating over columns. Each column can have multiple segments.
|
|
206883
|
+
// (Some columns will be wider than others, and require different numbers
|
|
206884
|
+
// of blocks to encode.) Segments cannot span blocks.
|
|
206885
|
+
//
|
|
206886
|
+
// Some of these columns are composite (list, struct). The data is written
|
|
206887
|
+
// first sequentially, and the pointers are written later, so that the
|
|
206888
|
+
// pointers all end up densely packed, and thus more cache-friendly.
|
|
206351
206889
|
for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) {
|
|
206352
206890
|
auto &column = columns[column_idx];
|
|
206353
206891
|
ColumnCheckpointInfo checkpoint_info {writer.GetColumnCompressionType(column_idx)};
|
|
@@ -206367,15 +206905,18 @@ RowGroupPointer RowGroup::Checkpoint(TableDataWriter &writer, vector<unique_ptr<
|
|
|
206367
206905
|
row_group_pointer.row_start = start;
|
|
206368
206906
|
row_group_pointer.tuple_count = count;
|
|
206369
206907
|
for (auto &state : states) {
|
|
206370
|
-
// get the current position of the
|
|
206371
|
-
auto &
|
|
206372
|
-
auto pointer =
|
|
206908
|
+
// get the current position of the table data writer
|
|
206909
|
+
auto &data_writer = writer.GetPayloadWriter();
|
|
206910
|
+
auto pointer = data_writer.GetBlockPointer();
|
|
206373
206911
|
|
|
206374
206912
|
// store the stats and the data pointers in the row group pointers
|
|
206375
206913
|
row_group_pointer.data_pointers.push_back(pointer);
|
|
206376
206914
|
|
|
206377
|
-
//
|
|
206378
|
-
|
|
206915
|
+
// Write pointers to the column segments.
|
|
206916
|
+
//
|
|
206917
|
+
// Just as above, the state can refer to many other states, so this
|
|
206918
|
+
// can cascade recursively into more pointer writes.
|
|
206919
|
+
state->WriteDataPointers();
|
|
206379
206920
|
}
|
|
206380
206921
|
row_group_pointer.versions = version_info;
|
|
206381
206922
|
Verify();
|
|
@@ -206592,11 +207133,13 @@ void VersionDeleteState::Flush() {
|
|
|
206592
207133
|
|
|
206593
207134
|
|
|
206594
207135
|
|
|
207136
|
+
|
|
206595
207137
|
namespace duckdb {
|
|
206596
207138
|
|
|
206597
|
-
RowGroupCollection::RowGroupCollection(shared_ptr<DataTableInfo> info_p,
|
|
206598
|
-
idx_t total_rows_p)
|
|
206599
|
-
: total_rows(total_rows_p), info(move(info_p)), types(move(types_p)),
|
|
207139
|
+
RowGroupCollection::RowGroupCollection(shared_ptr<DataTableInfo> info_p, BlockManager &block_manager,
|
|
207140
|
+
vector<LogicalType> types_p, idx_t row_start_p, idx_t total_rows_p)
|
|
207141
|
+
: block_manager(block_manager), total_rows(total_rows_p), info(move(info_p)), types(move(types_p)),
|
|
207142
|
+
row_start(row_start_p) {
|
|
206600
207143
|
row_groups = make_shared<SegmentTree>();
|
|
206601
207144
|
}
|
|
206602
207145
|
|
|
@@ -206618,7 +207161,7 @@ Allocator &RowGroupCollection::GetAllocator() const {
|
|
|
206618
207161
|
void RowGroupCollection::Initialize(PersistentTableData &data) {
|
|
206619
207162
|
D_ASSERT(this->row_start == 0);
|
|
206620
207163
|
for (auto &row_group_pointer : data.row_groups) {
|
|
206621
|
-
auto new_row_group = make_unique<RowGroup>(info->db, *info, types, row_group_pointer);
|
|
207164
|
+
auto new_row_group = make_unique<RowGroup>(info->db, block_manager, *info, types, move(row_group_pointer));
|
|
206622
207165
|
auto row_group_count = new_row_group->start + new_row_group->count;
|
|
206623
207166
|
if (row_group_count > this->total_rows) {
|
|
206624
207167
|
this->total_rows = row_group_count;
|
|
@@ -206629,7 +207172,7 @@ void RowGroupCollection::Initialize(PersistentTableData &data) {
|
|
|
206629
207172
|
|
|
206630
207173
|
void RowGroupCollection::AppendRowGroup(idx_t start_row) {
|
|
206631
207174
|
D_ASSERT(start_row >= row_start);
|
|
206632
|
-
auto new_row_group = make_unique<RowGroup>(info->db, *info, start_row, 0);
|
|
207175
|
+
auto new_row_group = make_unique<RowGroup>(info->db, block_manager, *info, start_row, 0);
|
|
206633
207176
|
new_row_group->InitializeEmpty(types);
|
|
206634
207177
|
row_groups->AppendSegment(move(new_row_group));
|
|
206635
207178
|
}
|
|
@@ -206997,13 +207540,12 @@ void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_i
|
|
|
206997
207540
|
//===--------------------------------------------------------------------===//
|
|
206998
207541
|
// Checkpoint
|
|
206999
207542
|
//===--------------------------------------------------------------------===//
|
|
207000
|
-
void RowGroupCollection::Checkpoint(TableDataWriter &writer, vector<
|
|
207001
|
-
|
|
207002
|
-
|
|
207003
|
-
|
|
207004
|
-
auto pointer = row_group->Checkpoint(
|
|
207005
|
-
|
|
207006
|
-
row_group = (RowGroup *)row_group->next.get();
|
|
207543
|
+
void RowGroupCollection::Checkpoint(TableDataWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats) {
|
|
207544
|
+
for (auto row_group = (RowGroup *)row_groups->GetRootSegment(); row_group;
|
|
207545
|
+
row_group = (RowGroup *)row_group->next.get()) {
|
|
207546
|
+
auto rowg_writer = writer.GetRowGroupWriter(*row_group);
|
|
207547
|
+
auto pointer = row_group->Checkpoint(*rowg_writer, global_stats);
|
|
207548
|
+
writer.AddRowGroup(move(pointer), move(rowg_writer));
|
|
207007
207549
|
}
|
|
207008
207550
|
}
|
|
207009
207551
|
|
|
@@ -207052,7 +207594,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ColumnDefinition &n
|
|
|
207052
207594
|
idx_t new_column_idx = types.size();
|
|
207053
207595
|
auto new_types = types;
|
|
207054
207596
|
new_types.push_back(new_column.GetType());
|
|
207055
|
-
auto result = make_shared<RowGroupCollection>(info, move(new_types), row_start, total_rows.load());
|
|
207597
|
+
auto result = make_shared<RowGroupCollection>(info, block_manager, move(new_types), row_start, total_rows.load());
|
|
207056
207598
|
|
|
207057
207599
|
ExpressionExecutor executor(GetAllocator());
|
|
207058
207600
|
DataChunk dummy_chunk;
|
|
@@ -207082,7 +207624,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::RemoveColumn(idx_t col_idx) {
|
|
|
207082
207624
|
auto new_types = types;
|
|
207083
207625
|
new_types.erase(new_types.begin() + col_idx);
|
|
207084
207626
|
|
|
207085
|
-
auto result = make_shared<RowGroupCollection>(info, move(new_types), row_start, total_rows.load());
|
|
207627
|
+
auto result = make_shared<RowGroupCollection>(info, block_manager, move(new_types), row_start, total_rows.load());
|
|
207086
207628
|
|
|
207087
207629
|
auto current_row_group = (RowGroup *)row_groups->GetRootSegment();
|
|
207088
207630
|
while (current_row_group) {
|
|
@@ -207100,7 +207642,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AlterType(idx_t changed_idx,
|
|
|
207100
207642
|
auto new_types = types;
|
|
207101
207643
|
new_types[changed_idx] = target_type;
|
|
207102
207644
|
|
|
207103
|
-
auto result = make_shared<RowGroupCollection>(info, move(new_types), row_start, total_rows.load());
|
|
207645
|
+
auto result = make_shared<RowGroupCollection>(info, block_manager, move(new_types), row_start, total_rows.load());
|
|
207104
207646
|
|
|
207105
207647
|
vector<LogicalType> scan_types;
|
|
207106
207648
|
for (idx_t i = 0; i < bound_columns.size(); i++) {
|
|
@@ -207366,9 +207908,10 @@ void SegmentTree::Replace(SegmentTree &other) {
|
|
|
207366
207908
|
|
|
207367
207909
|
namespace duckdb {
|
|
207368
207910
|
|
|
207369
|
-
StandardColumnData::StandardColumnData(DataTableInfo &info, idx_t column_index,
|
|
207370
|
-
ColumnData *parent)
|
|
207371
|
-
: ColumnData(info, column_index, start_row, move(type), parent),
|
|
207911
|
+
StandardColumnData::StandardColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
|
|
207912
|
+
idx_t start_row, LogicalType type, ColumnData *parent)
|
|
207913
|
+
: ColumnData(block_manager, info, column_index, start_row, move(type), parent),
|
|
207914
|
+
validity(block_manager, info, 0, start_row, this) {
|
|
207372
207915
|
}
|
|
207373
207916
|
|
|
207374
207917
|
StandardColumnData::StandardColumnData(ColumnData &original, idx_t start_row, ColumnData *parent)
|
|
@@ -207516,7 +208059,7 @@ void StandardColumnData::CommitDropColumn() {
|
|
|
207516
208059
|
}
|
|
207517
208060
|
|
|
207518
208061
|
struct StandardColumnCheckpointState : public ColumnCheckpointState {
|
|
207519
|
-
StandardColumnCheckpointState(RowGroup &row_group, ColumnData &column_data,
|
|
208062
|
+
StandardColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, RowGroupWriter &writer)
|
|
207520
208063
|
: ColumnCheckpointState(row_group, column_data, writer) {
|
|
207521
208064
|
}
|
|
207522
208065
|
|
|
@@ -207529,18 +208072,18 @@ public:
|
|
|
207529
208072
|
return move(global_stats);
|
|
207530
208073
|
}
|
|
207531
208074
|
|
|
207532
|
-
void
|
|
207533
|
-
ColumnCheckpointState::
|
|
207534
|
-
validity_state->
|
|
208075
|
+
void WriteDataPointers() override {
|
|
208076
|
+
ColumnCheckpointState::WriteDataPointers();
|
|
208077
|
+
validity_state->WriteDataPointers();
|
|
207535
208078
|
}
|
|
207536
208079
|
};
|
|
207537
208080
|
|
|
207538
208081
|
unique_ptr<ColumnCheckpointState> StandardColumnData::CreateCheckpointState(RowGroup &row_group,
|
|
207539
|
-
|
|
208082
|
+
RowGroupWriter &writer) {
|
|
207540
208083
|
return make_unique<StandardColumnCheckpointState>(row_group, *this, writer);
|
|
207541
208084
|
}
|
|
207542
208085
|
|
|
207543
|
-
unique_ptr<ColumnCheckpointState> StandardColumnData::Checkpoint(RowGroup &row_group,
|
|
208086
|
+
unique_ptr<ColumnCheckpointState> StandardColumnData::Checkpoint(RowGroup &row_group, RowGroupWriter &writer,
|
|
207544
208087
|
ColumnCheckpointInfo &checkpoint_info) {
|
|
207545
208088
|
auto validity_state = validity.Checkpoint(row_group, writer, checkpoint_info);
|
|
207546
208089
|
auto base_state = ColumnData::Checkpoint(row_group, writer, checkpoint_info);
|
|
@@ -207582,9 +208125,10 @@ void StandardColumnData::Verify(RowGroup &parent) {
|
|
|
207582
208125
|
|
|
207583
208126
|
namespace duckdb {
|
|
207584
208127
|
|
|
207585
|
-
StructColumnData::StructColumnData(DataTableInfo &info, idx_t column_index,
|
|
207586
|
-
ColumnData *parent)
|
|
207587
|
-
: ColumnData(info, column_index, start_row, move(type_p), parent),
|
|
208128
|
+
StructColumnData::StructColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
|
|
208129
|
+
idx_t start_row, LogicalType type_p, ColumnData *parent)
|
|
208130
|
+
: ColumnData(block_manager, info, column_index, start_row, move(type_p), parent),
|
|
208131
|
+
validity(block_manager, info, 0, start_row, this) {
|
|
207588
208132
|
D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
|
|
207589
208133
|
auto &child_types = StructType::GetChildTypes(type);
|
|
207590
208134
|
D_ASSERT(child_types.size() > 0);
|
|
@@ -207592,7 +208136,7 @@ StructColumnData::StructColumnData(DataTableInfo &info, idx_t column_index, idx_
|
|
|
207592
208136
|
idx_t sub_column_index = 1;
|
|
207593
208137
|
for (auto &child_type : child_types) {
|
|
207594
208138
|
sub_columns.push_back(
|
|
207595
|
-
ColumnData::CreateColumnUnique(info, sub_column_index, start_row, child_type.second, this));
|
|
208139
|
+
ColumnData::CreateColumnUnique(block_manager, info, sub_column_index, start_row, child_type.second, this));
|
|
207596
208140
|
sub_column_index++;
|
|
207597
208141
|
}
|
|
207598
208142
|
}
|
|
@@ -207814,7 +208358,7 @@ void StructColumnData::CommitDropColumn() {
|
|
|
207814
208358
|
}
|
|
207815
208359
|
|
|
207816
208360
|
struct StructColumnCheckpointState : public ColumnCheckpointState {
|
|
207817
|
-
StructColumnCheckpointState(RowGroup &row_group, ColumnData &column_data,
|
|
208361
|
+
StructColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, RowGroupWriter &writer)
|
|
207818
208362
|
: ColumnCheckpointState(row_group, column_data, writer) {
|
|
207819
208363
|
global_stats = make_unique<StructStatistics>(column_data.type);
|
|
207820
208364
|
}
|
|
@@ -207834,20 +208378,19 @@ public:
|
|
|
207834
208378
|
return move(stats);
|
|
207835
208379
|
}
|
|
207836
208380
|
|
|
207837
|
-
void
|
|
207838
|
-
validity_state->
|
|
208381
|
+
void WriteDataPointers() override {
|
|
208382
|
+
validity_state->WriteDataPointers();
|
|
207839
208383
|
for (auto &state : child_states) {
|
|
207840
|
-
state->
|
|
208384
|
+
state->WriteDataPointers();
|
|
207841
208385
|
}
|
|
207842
208386
|
}
|
|
207843
208387
|
};
|
|
207844
208388
|
|
|
207845
|
-
unique_ptr<ColumnCheckpointState> StructColumnData::CreateCheckpointState(RowGroup &row_group,
|
|
207846
|
-
TableDataWriter &writer) {
|
|
208389
|
+
unique_ptr<ColumnCheckpointState> StructColumnData::CreateCheckpointState(RowGroup &row_group, RowGroupWriter &writer) {
|
|
207847
208390
|
return make_unique<StructColumnCheckpointState>(row_group, *this, writer);
|
|
207848
208391
|
}
|
|
207849
208392
|
|
|
207850
|
-
unique_ptr<ColumnCheckpointState> StructColumnData::Checkpoint(RowGroup &row_group,
|
|
208393
|
+
unique_ptr<ColumnCheckpointState> StructColumnData::Checkpoint(RowGroup &row_group, RowGroupWriter &writer,
|
|
207851
208394
|
ColumnCheckpointInfo &checkpoint_info) {
|
|
207852
208395
|
auto checkpoint_state = make_unique<StructColumnCheckpointState>(row_group, *this, writer);
|
|
207853
208396
|
checkpoint_state->validity_state = validity.Checkpoint(row_group, writer, checkpoint_info);
|
|
@@ -209279,8 +209822,9 @@ bool UpdateSegment::HasUpdates(idx_t start_row_index, idx_t end_row_index) {
|
|
|
209279
209822
|
|
|
209280
209823
|
namespace duckdb {
|
|
209281
209824
|
|
|
209282
|
-
ValidityColumnData::ValidityColumnData(DataTableInfo &info, idx_t column_index,
|
|
209283
|
-
|
|
209825
|
+
ValidityColumnData::ValidityColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
|
|
209826
|
+
idx_t start_row, ColumnData *parent)
|
|
209827
|
+
: ColumnData(block_manager, info, column_index, start_row, LogicalType(LogicalTypeId::VALIDITY), parent) {
|
|
209284
209828
|
}
|
|
209285
209829
|
|
|
209286
209830
|
ValidityColumnData::ValidityColumnData(ColumnData &original, idx_t start_row, ColumnData *parent)
|
|
@@ -209384,6 +209928,7 @@ vector<BlockPointer> TableIndexList::SerializeIndexes(duckdb::MetaBlockWriter &w
|
|
|
209384
209928
|
|
|
209385
209929
|
|
|
209386
209930
|
|
|
209931
|
+
|
|
209387
209932
|
namespace duckdb {
|
|
209388
209933
|
|
|
209389
209934
|
class ReplayState {
|
|
@@ -209472,8 +210017,8 @@ bool WriteAheadLog::Replay(DatabaseInstance &database, string &path) {
|
|
|
209472
210017
|
initial_reader.reset();
|
|
209473
210018
|
if (checkpoint_state.checkpoint_id != INVALID_BLOCK) {
|
|
209474
210019
|
// there is a checkpoint flag: check if we need to deserialize the WAL
|
|
209475
|
-
auto &manager =
|
|
209476
|
-
if (manager.
|
|
210020
|
+
auto &manager = StorageManager::GetStorageManager(database);
|
|
210021
|
+
if (manager.IsCheckpointClean(checkpoint_state.checkpoint_id)) {
|
|
209477
210022
|
// the contents of the WAL have already been checkpointed
|
|
209478
210023
|
// we can safely truncate the WAL and ignore its contents
|
|
209479
210024
|
return true;
|
|
@@ -209904,15 +210449,14 @@ void ReplayState::ReplayCheckpoint() {
|
|
|
209904
210449
|
|
|
209905
210450
|
namespace duckdb {
|
|
209906
210451
|
|
|
209907
|
-
WriteAheadLog::WriteAheadLog(DatabaseInstance &database) :
|
|
209908
|
-
}
|
|
209909
|
-
|
|
209910
|
-
void WriteAheadLog::Initialize(string &path) {
|
|
210452
|
+
WriteAheadLog::WriteAheadLog(DatabaseInstance &database, const string &path) : skip_writing(false), database(database) {
|
|
209911
210453
|
wal_path = path;
|
|
209912
210454
|
writer = make_unique<BufferedFileWriter>(database.GetFileSystem(), path.c_str(),
|
|
209913
210455
|
FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE |
|
|
209914
210456
|
FileFlags::FILE_FLAGS_APPEND);
|
|
209915
|
-
|
|
210457
|
+
}
|
|
210458
|
+
|
|
210459
|
+
WriteAheadLog::~WriteAheadLog() {
|
|
209916
210460
|
}
|
|
209917
210461
|
|
|
209918
210462
|
int64_t WriteAheadLog::GetWALSize() {
|
|
@@ -209930,10 +210474,9 @@ void WriteAheadLog::Truncate(int64_t size) {
|
|
|
209930
210474
|
}
|
|
209931
210475
|
|
|
209932
210476
|
void WriteAheadLog::Delete() {
|
|
209933
|
-
if (!
|
|
210477
|
+
if (!writer) {
|
|
209934
210478
|
return;
|
|
209935
210479
|
}
|
|
209936
|
-
initialized = false;
|
|
209937
210480
|
writer.reset();
|
|
209938
210481
|
|
|
209939
210482
|
auto &fs = FileSystem::GetFileSystem(database);
|
|
@@ -210869,41 +211412,23 @@ bool Transaction::ChangesMade() {
|
|
|
210869
211412
|
}
|
|
210870
211413
|
|
|
210871
211414
|
bool Transaction::AutomaticCheckpoint(DatabaseInstance &db) {
|
|
210872
|
-
auto &config = DBConfig::GetConfig(db);
|
|
210873
211415
|
auto &storage_manager = StorageManager::GetStorageManager(db);
|
|
210874
|
-
|
|
210875
|
-
if (!log) {
|
|
210876
|
-
return false;
|
|
210877
|
-
}
|
|
210878
|
-
|
|
210879
|
-
auto initial_size = log->GetWALSize();
|
|
210880
|
-
idx_t expected_wal_size = initial_size + storage.EstimatedSize() + undo_buffer.EstimatedSize();
|
|
210881
|
-
return expected_wal_size > config.options.checkpoint_wal_size;
|
|
211416
|
+
return storage_manager.AutomaticCheckpoint(storage.EstimatedSize() + undo_buffer.EstimatedSize());
|
|
210882
211417
|
}
|
|
210883
211418
|
|
|
210884
211419
|
string Transaction::Commit(DatabaseInstance &db, transaction_t commit_id, bool checkpoint) noexcept {
|
|
211420
|
+
// "checkpoint" parameter indicates if the caller will checkpoint. If checkpoint ==
|
|
211421
|
+
// true: Then this function will NOT write to the WAL or flush/persist.
|
|
211422
|
+
// This method only makes commit in memory, expecting caller to checkpoint/flush.
|
|
211423
|
+
// false: Then this function WILL write to the WAL and Flush/Persist it.
|
|
210885
211424
|
this->commit_id = commit_id;
|
|
210886
211425
|
auto &storage_manager = StorageManager::GetStorageManager(db);
|
|
210887
211426
|
auto log = storage_manager.GetWriteAheadLog();
|
|
210888
211427
|
|
|
210889
211428
|
UndoBuffer::IteratorState iterator_state;
|
|
210890
211429
|
LocalStorage::CommitState commit_state;
|
|
210891
|
-
|
|
210892
|
-
idx_t initial_written = 0;
|
|
210893
|
-
if (log) {
|
|
210894
|
-
auto initial_size = log->GetWALSize();
|
|
210895
|
-
initial_written = log->GetTotalWritten();
|
|
210896
|
-
initial_wal_size = initial_size < 0 ? 0 : idx_t(initial_size);
|
|
210897
|
-
} else {
|
|
210898
|
-
D_ASSERT(!checkpoint);
|
|
210899
|
-
}
|
|
211430
|
+
auto storage_commit_state = storage_manager.GenStorageCommitState(*this, checkpoint);
|
|
210900
211431
|
try {
|
|
210901
|
-
if (checkpoint) {
|
|
210902
|
-
// check if we are checkpointing after this commit
|
|
210903
|
-
// if we are checkpointing, we don't need to write anything to the WAL
|
|
210904
|
-
// this saves us a lot of unnecessary writes to disk in the case of large commits
|
|
210905
|
-
log->skip_writing = true;
|
|
210906
|
-
}
|
|
210907
211432
|
storage.Commit(commit_state, *this, log, commit_id);
|
|
210908
211433
|
undo_buffer.Commit(iterator_state, log, commit_id);
|
|
210909
211434
|
if (log) {
|
|
@@ -210911,25 +211436,11 @@ string Transaction::Commit(DatabaseInstance &db, transaction_t commit_id, bool c
|
|
|
210911
211436
|
for (auto &entry : sequence_usage) {
|
|
210912
211437
|
log->WriteSequenceValue(entry.first, entry.second);
|
|
210913
211438
|
}
|
|
210914
|
-
// flush the WAL if any changes were made
|
|
210915
|
-
if (log->GetTotalWritten() > initial_written) {
|
|
210916
|
-
D_ASSERT(!checkpoint);
|
|
210917
|
-
D_ASSERT(!log->skip_writing);
|
|
210918
|
-
log->Flush();
|
|
210919
|
-
}
|
|
210920
|
-
log->skip_writing = false;
|
|
210921
211439
|
}
|
|
211440
|
+
storage_commit_state->FlushCommit();
|
|
210922
211441
|
return string();
|
|
210923
211442
|
} catch (std::exception &ex) {
|
|
210924
211443
|
undo_buffer.RevertCommit(iterator_state, transaction_id);
|
|
210925
|
-
if (log) {
|
|
210926
|
-
log->skip_writing = false;
|
|
210927
|
-
if (log->GetTotalWritten() > initial_written) {
|
|
210928
|
-
// remove any entries written into the WAL by truncating it
|
|
210929
|
-
log->Truncate(initial_wal_size);
|
|
210930
|
-
}
|
|
210931
|
-
}
|
|
210932
|
-
D_ASSERT(!log || !log->skip_writing);
|
|
210933
211444
|
return ex.what();
|
|
210934
211445
|
}
|
|
210935
211446
|
}
|