duckdb 0.5.2-dev547.0 → 0.5.2-dev561.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "83aff54f1"
15
- #define DUCKDB_VERSION "v0.5.2-dev547"
14
+ #define DUCKDB_SOURCE_ID "c2e70c43d"
15
+ #define DUCKDB_VERSION "v0.5.2-dev561"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -17831,12 +17831,13 @@ public:
17831
17831
 
17832
17832
 
17833
17833
  namespace duckdb {
17834
+ class BlockManager;
17834
17835
  class ColumnData;
17835
17836
  class DatabaseInstance;
17836
17837
  class DataTable;
17837
17838
  struct DataTableInfo;
17838
17839
  class ExpressionExecutor;
17839
- class TableDataWriter;
17840
+ class RowGroupWriter;
17840
17841
  class UpdateSegment;
17841
17842
  class Vector;
17842
17843
  struct RowGroupPointer;
@@ -17853,15 +17854,17 @@ public:
17853
17854
  static constexpr const idx_t ROW_GROUP_SIZE = STANDARD_VECTOR_SIZE * ROW_GROUP_VECTOR_COUNT;
17854
17855
 
17855
17856
  public:
17856
- RowGroup(DatabaseInstance &db, DataTableInfo &table_info, idx_t start, idx_t count);
17857
- RowGroup(DatabaseInstance &db, DataTableInfo &table_info, const vector<LogicalType> &types,
17858
- RowGroupPointer &pointer);
17857
+ RowGroup(DatabaseInstance &db, BlockManager &block_manager, DataTableInfo &table_info, idx_t start, idx_t count);
17858
+ RowGroup(DatabaseInstance &db, BlockManager &block_manager, DataTableInfo &table_info,
17859
+ const vector<LogicalType> &types, RowGroupPointer &&pointer);
17859
17860
  RowGroup(RowGroup &row_group, idx_t start);
17860
17861
  ~RowGroup();
17861
17862
 
17862
17863
  private:
17863
17864
  //! The database instance
17864
17865
  DatabaseInstance &db;
17866
+ //! The block manager
17867
+ BlockManager &block_manager;
17865
17868
  //! The table info of this row_group
17866
17869
  DataTableInfo &table_info;
17867
17870
  //! The version info of the row_group (inserted and deleted tuple info)
@@ -17875,6 +17878,9 @@ public:
17875
17878
  DatabaseInstance &GetDatabase() {
17876
17879
  return db;
17877
17880
  }
17881
+ BlockManager &GetBlockManager() {
17882
+ return block_manager;
17883
+ }
17878
17884
  DataTableInfo &GetTableInfo() {
17879
17885
  return table_info;
17880
17886
  }
@@ -17930,7 +17936,7 @@ public:
17930
17936
  //! Delete the given set of rows in the version manager
17931
17937
  idx_t Delete(TransactionData transaction, DataTable *table, row_t *row_ids, idx_t count);
17932
17938
 
17933
- RowGroupPointer Checkpoint(TableDataWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats);
17939
+ RowGroupPointer Checkpoint(RowGroupWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats);
17934
17940
  static void Serialize(RowGroupPointer &pointer, Serializer &serializer);
17935
17941
  static RowGroupPointer Deserialize(Deserializer &source, const vector<ColumnDefinition> &columns);
17936
17942
 
@@ -18060,13 +18066,14 @@ namespace duckdb {
18060
18066
  struct ParallelTableScanState;
18061
18067
 
18062
18068
  class PersistentTableData;
18069
+ class TableDataWriter;
18063
18070
  class TableIndexList;
18064
18071
  class TableStatistics;
18065
18072
 
18066
18073
  class RowGroupCollection {
18067
18074
  public:
18068
- RowGroupCollection(shared_ptr<DataTableInfo> info, vector<LogicalType> types, idx_t row_start,
18069
- idx_t total_rows = 0);
18075
+ RowGroupCollection(shared_ptr<DataTableInfo> info, BlockManager &block_manager, vector<LogicalType> types,
18076
+ idx_t row_start, idx_t total_rows = 0);
18070
18077
 
18071
18078
  public:
18072
18079
  idx_t GetTotalRows() const;
@@ -18106,8 +18113,7 @@ public:
18106
18113
  void UpdateColumn(TransactionData transaction, Vector &row_ids, const vector<column_t> &column_path,
18107
18114
  DataChunk &updates, TableStatistics &stats);
18108
18115
 
18109
- void Checkpoint(TableDataWriter &writer, vector<RowGroupPointer> &row_group_pointers,
18110
- vector<unique_ptr<BaseStatistics>> &global_stats);
18116
+ void Checkpoint(TableDataWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats);
18111
18117
 
18112
18118
  void CommitDropColumn(idx_t index);
18113
18119
  void CommitDropTable();
@@ -18124,6 +18130,8 @@ public:
18124
18130
  void VerifyNewConstraint(DataTable &parent, const BoundConstraint &constraint);
18125
18131
 
18126
18132
  private:
18133
+ //! BlockManager
18134
+ BlockManager &block_manager;
18127
18135
  //! The number of rows in the table
18128
18136
  atomic<idx_t> total_rows;
18129
18137
  shared_ptr<DataTableInfo> info;
@@ -19611,9 +19619,8 @@ public:
19611
19619
  BufferManager &buffer_manager;
19612
19620
 
19613
19621
  public:
19614
- virtual void StartCheckpoint() = 0;
19615
19622
  //! Creates a new block inside the block manager
19616
- virtual unique_ptr<Block> CreateBlock(block_id_t block_id) = 0;
19623
+ virtual unique_ptr<Block> CreateBlock(block_id_t block_id, FileBuffer *source_buffer) = 0;
19617
19624
  //! Return the next free block id
19618
19625
  virtual block_id_t GetFreeBlockId() = 0;
19619
19626
  //! Returns whether or not a specified block is the root block
@@ -20189,25 +20196,34 @@ private:
20189
20196
  namespace duckdb {
20190
20197
  class DatabaseInstance;
20191
20198
 
20192
- //! This struct is responsible for writing metadata to disk
20199
+ //! This struct is responsible for writing data to disk in a stream of blocks.
20193
20200
  class MetaBlockWriter : public Serializer {
20194
20201
  public:
20195
- MetaBlockWriter(DatabaseInstance &db, block_id_t initial_block_id = INVALID_BLOCK);
20202
+ MetaBlockWriter(BlockManager &block_manager, block_id_t initial_block_id = INVALID_BLOCK);
20196
20203
  ~MetaBlockWriter() override;
20197
20204
 
20198
- DatabaseInstance &db;
20205
+ BlockManager &block_manager;
20206
+
20207
+ protected:
20199
20208
  unique_ptr<Block> block;
20200
20209
  set<block_id_t> written_blocks;
20201
20210
  idx_t offset;
20202
20211
 
20203
20212
  public:
20204
20213
  BlockPointer GetBlockPointer();
20205
- void Flush();
20214
+ virtual void Flush();
20206
20215
 
20207
20216
  void WriteData(const_data_ptr_t buffer, idx_t write_size) override;
20208
20217
 
20218
+ void MarkWrittenBlocks() {
20219
+ for (auto &block_id : written_blocks) {
20220
+ block_manager.MarkBlockAsModified(block_id);
20221
+ }
20222
+ }
20223
+
20209
20224
  protected:
20210
20225
  virtual block_id_t GetNextBlockId();
20226
+ void AdvanceBlock();
20211
20227
  };
20212
20228
 
20213
20229
  } // namespace duckdb
@@ -20217,6 +20233,7 @@ protected:
20217
20233
  namespace duckdb {
20218
20234
 
20219
20235
  class ClientContext;
20236
+ class TableIOManager;
20220
20237
  class Transaction;
20221
20238
 
20222
20239
  struct IndexLock;
@@ -20224,12 +20241,14 @@ struct IndexLock;
20224
20241
  //! The index is an abstract base class that serves as the basis for indexes
20225
20242
  class Index {
20226
20243
  public:
20227
- Index(IndexType type, const vector<column_t> &column_ids, const vector<unique_ptr<Expression>> &unbound_expressions,
20228
- IndexConstraintType constraint_type);
20244
+ Index(IndexType type, TableIOManager &table_io_manager, const vector<column_t> &column_ids,
20245
+ const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type);
20229
20246
  virtual ~Index() = default;
20230
20247
 
20231
20248
  //! The type of the index
20232
20249
  IndexType type;
20250
+ //! Associated table io manager
20251
+ TableIOManager &table_io_manager;
20233
20252
  //! Column identifiers to extract from the base table
20234
20253
  vector<column_t> column_ids;
20235
20254
  //! unordered_set of column_ids used by the index
@@ -25740,7 +25759,6 @@ public:
25740
25759
 
25741
25760
 
25742
25761
 
25743
-
25744
25762
  //===----------------------------------------------------------------------===//
25745
25763
  // DuckDB
25746
25764
  //
@@ -25989,9 +26007,9 @@ public:
25989
26007
  //! The block that this segment relates to
25990
26008
  shared_ptr<BlockHandle> block;
25991
26009
 
25992
- static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, block_id_t id, idx_t offset,
25993
- const LogicalType &type_p, idx_t start, idx_t count,
25994
- CompressionType compression_type,
26010
+ static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager,
26011
+ block_id_t id, idx_t offset, const LogicalType &type_p,
26012
+ idx_t start, idx_t count, CompressionType compression_type,
25995
26013
  unique_ptr<BaseStatistics> statistics);
25996
26014
  static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start);
25997
26015
  static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
@@ -26022,15 +26040,20 @@ public:
26022
26040
 
26023
26041
  //! Convert a transient in-memory segment into a persistent segment blocked by an on-disk block.
26024
26042
  //! Only used during checkpointing.
26025
- void ConvertToPersistent(block_id_t block_id);
26026
- //! Convert a transient in-memory segment into a persistent segment blocked by an on-disk block.
26027
- void ConvertToPersistent(shared_ptr<BlockHandle> block, block_id_t block_id, uint32_t offset_in_block);
26043
+ void ConvertToPersistent(BlockManager *block_manager, block_id_t block_id);
26044
+ //! Updates pointers to refer to the given block and offset. This is only used
26045
+ //! when sharing a block among segments. This is invoked only AFTER the block is written.
26046
+ void MarkAsPersistent(shared_ptr<BlockHandle> block, uint32_t offset_in_block);
26028
26047
 
26029
26048
  block_id_t GetBlockId() {
26030
26049
  D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT);
26031
26050
  return block_id;
26032
26051
  }
26033
26052
 
26053
+ BlockManager &GetBlockManager() const {
26054
+ return block->block_manager;
26055
+ }
26056
+
26034
26057
  idx_t GetBlockOffset() {
26035
26058
  D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT || offset == 0);
26036
26059
  return offset;
@@ -26047,9 +26070,9 @@ public:
26047
26070
  }
26048
26071
 
26049
26072
  public:
26050
- ColumnSegment(DatabaseInstance &db, LogicalType type, ColumnSegmentType segment_type, idx_t start, idx_t count,
26051
- CompressionFunction *function, unique_ptr<BaseStatistics> statistics, block_id_t block_id,
26052
- idx_t offset);
26073
+ ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
26074
+ idx_t start, idx_t count, CompressionFunction *function, unique_ptr<BaseStatistics> statistics,
26075
+ block_id_t block_id, idx_t offset);
26053
26076
  ColumnSegment(ColumnSegment &other, idx_t start);
26054
26077
 
26055
26078
  private:
@@ -26071,26 +26094,33 @@ private:
26071
26094
 
26072
26095
 
26073
26096
 
26097
+ //===----------------------------------------------------------------------===//
26098
+ // DuckDB
26099
+ //
26100
+ // duckdb/storage/table/data_table_info.hpp
26101
+ //
26102
+ //
26103
+ //===----------------------------------------------------------------------===//
26104
+
26105
+
26106
+
26107
+
26108
+
26074
26109
 
26075
26110
 
26076
26111
  namespace duckdb {
26077
- class ClientContext;
26078
- class ColumnDefinition;
26079
- class DataTable;
26080
- class RowGroup;
26081
- class StorageManager;
26082
- class TableCatalogEntry;
26083
- class Transaction;
26084
- class WriteAheadLog;
26085
- class TableDataWriter;
26112
+ class DatabaseInstance;
26113
+ class TableIOManager;
26086
26114
 
26087
26115
  struct DataTableInfo {
26088
- DataTableInfo(DatabaseInstance &db, string schema, string table)
26089
- : db(db), cardinality(0), schema(move(schema)), table(move(table)) {
26116
+ DataTableInfo(DatabaseInstance &db, shared_ptr<TableIOManager> table_io_manager_p, string schema, string table)
26117
+ : db(db), table_io_manager(move(table_io_manager_p)), cardinality(0), schema(move(schema)), table(move(table)) {
26090
26118
  }
26091
26119
 
26092
26120
  //! The database instance of the table
26093
26121
  DatabaseInstance &db;
26122
+ //! The table IO manager
26123
+ shared_ptr<TableIOManager> table_io_manager;
26094
26124
  //! The amount of elements in the table. Note that this number signifies the amount of COMMITTED entries in the
26095
26125
  //! table. It can be inaccurate inside of transactions. More work is needed to properly support that.
26096
26126
  atomic<idx_t> cardinality;
@@ -26106,12 +26136,28 @@ struct DataTableInfo {
26106
26136
  }
26107
26137
  };
26108
26138
 
26139
+ } // namespace duckdb
26140
+
26141
+
26142
+ namespace duckdb {
26143
+ class ClientContext;
26144
+ class ColumnDefinition;
26145
+ class DataTable;
26146
+ class RowGroup;
26147
+ class StorageManager;
26148
+ class TableCatalogEntry;
26149
+ class TableIOManager;
26150
+ class Transaction;
26151
+ class WriteAheadLog;
26152
+ class TableDataWriter;
26153
+
26109
26154
  //! DataTable represents a physical table on disk
26110
26155
  class DataTable {
26111
26156
  public:
26112
26157
  //! Constructs a new data table from an (optional) set of persistent segments
26113
- DataTable(DatabaseInstance &db, const string &schema, const string &table,
26114
- vector<ColumnDefinition> column_definitions_p, unique_ptr<PersistentTableData> data = nullptr);
26158
+ DataTable(DatabaseInstance &db, shared_ptr<TableIOManager> table_io_manager, const string &schema,
26159
+ const string &table, vector<ColumnDefinition> column_definitions_p,
26160
+ unique_ptr<PersistentTableData> data = nullptr);
26115
26161
  //! Constructs a DataTable as a delta on an existing data table with a newly added column
26116
26162
  DataTable(ClientContext &context, DataTable &parent, ColumnDefinition &new_column, Expression *default_value);
26117
26163
  //! Constructs a DataTable as a delta on an existing data table but with one column removed
@@ -26122,10 +26168,10 @@ public:
26122
26168
  //! Constructs a DataTable as a delta on an existing data table but with one column added new constraint
26123
26169
  DataTable(ClientContext &context, DataTable &parent, unique_ptr<BoundConstraint> constraint);
26124
26170
 
26171
+ //! The table info
26125
26172
  shared_ptr<DataTableInfo> info;
26126
-
26173
+ //! The set of physical columns stored by this DataTable
26127
26174
  vector<ColumnDefinition> column_definitions;
26128
-
26129
26175
  //! A reference to the database instance
26130
26176
  DatabaseInstance &db;
26131
26177