lbug 0.12.3-dev.15 → 0.12.3-dev.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  cmake_minimum_required(VERSION 3.15)
2
2
 
3
- project(Lbug VERSION 0.12.3.15 LANGUAGES CXX C)
3
+ project(Lbug VERSION 0.12.3.16 LANGUAGES CXX C)
4
4
 
5
5
  option(SINGLE_THREADED "Single-threaded mode" FALSE)
6
6
  if(SINGLE_THREADED)
@@ -228,7 +228,8 @@ FunctionCollection* FunctionCollection::getFunctions() {
228
228
  TABLE_FUNCTION(StatsInfoFunction), TABLE_FUNCTION(StorageInfoFunction),
229
229
  TABLE_FUNCTION(ShowAttachedDatabasesFunction), TABLE_FUNCTION(ShowSequencesFunction),
230
230
  TABLE_FUNCTION(ShowFunctionsFunction), TABLE_FUNCTION(BMInfoFunction),
231
- TABLE_FUNCTION(FileInfoFunction), TABLE_FUNCTION(ShowLoadedExtensionsFunction),
231
+ TABLE_FUNCTION(FileInfoFunction), TABLE_FUNCTION(DiskSizeInfoFunction),
232
+ TABLE_FUNCTION(ShowLoadedExtensionsFunction),
232
233
  TABLE_FUNCTION(ShowOfficialExtensionsFunction), TABLE_FUNCTION(ShowIndexesFunction),
233
234
  TABLE_FUNCTION(ShowProjectedGraphsFunction), TABLE_FUNCTION(ProjectedGraphInfoFunction),
234
235
  TABLE_FUNCTION(ShowMacrosFunction),
@@ -8,6 +8,7 @@ add_library(lbug_table_function
8
8
  clear_warnings.cpp
9
9
  current_setting.cpp
10
10
  db_version.cpp
11
+ disk_size_info.cpp
11
12
  drop_project_graph.cpp
12
13
  file_info.cpp
13
14
  free_space_info.cpp
@@ -0,0 +1,322 @@
1
+ #include "binder/binder.h"
2
+ #include "catalog/catalog.h"
3
+ #include "catalog/catalog_entry/node_table_catalog_entry.h"
4
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
5
+ #include "common/exception/binder.h"
6
+ #include "function/table/bind_data.h"
7
+ #include "function/table/simple_table_function.h"
8
+ #include "main/client_context.h"
9
+ #include "storage/database_header.h"
10
+ #include "storage/index/hash_index.h"
11
+ #include "storage/page_manager.h"
12
+ #include "storage/storage_manager.h"
13
+ #include "storage/table/list_chunk_data.h"
14
+ #include "storage/table/node_table.h"
15
+ #include "storage/table/rel_table.h"
16
+ #include "storage/table/string_chunk_data.h"
17
+ #include "storage/table/struct_chunk_data.h"
18
+ #include "transaction/transaction.h"
19
+
20
+ using namespace lbug::common;
21
+ using namespace lbug::catalog;
22
+ using namespace lbug::storage;
23
+ using namespace lbug::main;
24
+
25
+ namespace lbug {
26
+ namespace function {
27
+
28
+ struct DiskSizeInfoBindData final : TableFuncBindData {
29
+ const ClientContext* ctx;
30
+ DiskSizeInfoBindData(binder::expression_vector columns, row_idx_t numRows,
31
+ const ClientContext* ctx)
32
+ : TableFuncBindData{std::move(columns), numRows}, ctx{ctx} {}
33
+
34
+ std::unique_ptr<TableFuncBindData> copy() const override {
35
+ return std::make_unique<DiskSizeInfoBindData>(columns, numRows, ctx);
36
+ }
37
+ };
38
+
39
+ static uint64_t countChunkDataPages(const ColumnChunkData& chunkData) {
40
+ uint64_t pages = 0;
41
+ auto metadata = chunkData.getResidencyState() == ResidencyState::ON_DISK ?
42
+ chunkData.getMetadata() :
43
+ chunkData.getMetadataToFlush();
44
+ pages += metadata.getNumPages();
45
+
46
+ if (chunkData.hasNullData()) {
47
+ pages += countChunkDataPages(*chunkData.getNullData());
48
+ }
49
+
50
+ auto physicalType = chunkData.getDataType().getPhysicalType();
51
+ switch (physicalType) {
52
+ case PhysicalTypeID::STRUCT: {
53
+ auto& structChunk = chunkData.cast<StructChunkData>();
54
+ for (auto i = 0u; i < structChunk.getNumChildren(); i++) {
55
+ pages += countChunkDataPages(structChunk.getChild(i));
56
+ }
57
+ } break;
58
+ case PhysicalTypeID::STRING: {
59
+ auto& stringChunk = chunkData.cast<StringChunkData>();
60
+ pages += countChunkDataPages(*stringChunk.getIndexColumnChunk());
61
+ auto& dictionaryChunk = stringChunk.getDictionaryChunk();
62
+ pages += countChunkDataPages(*dictionaryChunk.getStringDataChunk());
63
+ pages += countChunkDataPages(*dictionaryChunk.getOffsetChunk());
64
+ } break;
65
+ case PhysicalTypeID::ARRAY:
66
+ case PhysicalTypeID::LIST: {
67
+ auto& listChunk = chunkData.cast<ListChunkData>();
68
+ pages += countChunkDataPages(*listChunk.getOffsetColumnChunk());
69
+ pages += countChunkDataPages(*listChunk.getSizeColumnChunk());
70
+ pages += countChunkDataPages(*listChunk.getDataColumnChunk());
71
+ } break;
72
+ default:
73
+ break;
74
+ }
75
+ return pages;
76
+ }
77
+
78
+ static uint64_t countChunkedGroupPages(ChunkedNodeGroup* chunkedGroup) {
79
+ uint64_t pages = 0;
80
+ auto numColumns = chunkedGroup->getNumColumns();
81
+ for (auto i = 0u; i < numColumns; i++) {
82
+ for (auto* segment : chunkedGroup->getColumnChunk(i).getSegments()) {
83
+ pages += countChunkDataPages(*segment);
84
+ }
85
+ }
86
+ if (chunkedGroup->getFormat() == NodeGroupDataFormat::CSR) {
87
+ auto& chunkedCSRGroup = chunkedGroup->cast<ChunkedCSRNodeGroup>();
88
+ for (auto* segment : chunkedCSRGroup.getCSRHeader().offset->getSegments()) {
89
+ pages += countChunkDataPages(*segment);
90
+ }
91
+ for (auto* segment : chunkedCSRGroup.getCSRHeader().length->getSegments()) {
92
+ pages += countChunkDataPages(*segment);
93
+ }
94
+ }
95
+ return pages;
96
+ }
97
+
98
+ static uint64_t countNodeGroupPages(NodeGroup* nodeGroup) {
99
+ uint64_t pages = 0;
100
+ auto numChunks = nodeGroup->getNumChunkedGroups();
101
+ for (auto chunkIdx = 0ul; chunkIdx < numChunks; chunkIdx++) {
102
+ pages += countChunkedGroupPages(nodeGroup->getChunkedNodeGroup(chunkIdx));
103
+ }
104
+ if (nodeGroup->getFormat() == NodeGroupDataFormat::CSR) {
105
+ auto& csrNodeGroup = nodeGroup->cast<CSRNodeGroup>();
106
+ auto persistentChunk = csrNodeGroup.getPersistentChunkedGroup();
107
+ if (persistentChunk) {
108
+ pages += countChunkedGroupPages(persistentChunk);
109
+ }
110
+ }
111
+ return pages;
112
+ }
113
+
114
+ struct DiskSizeEntry {
115
+ std::string category;
116
+ std::string name;
117
+ uint64_t numPages;
118
+ uint64_t sizeBytes;
119
+ };
120
+
121
+ // Estimate the number of pages used by a hash index based on the number of entries
122
+ // Hash index structure:
123
+ // - INDEX_HEADER_PAGES pages for HashIndexHeaderOnDisk (2 pages for 256 sub-indexes)
124
+ // - DiskArrayCollection header pages (1+ pages)
125
+ // - For each of 256 sub-indexes: pSlots and oSlots disk arrays
126
+ // - Each slot is SLOT_CAPACITY_BYTES (256 bytes), so 16 slots per page
127
+ // - Number of primary slots = 2^currentLevel + nextSplitSlotId
128
+ // - Overflow slots depend on collisions
129
+ static uint64_t estimateHashIndexPages(const PrimaryKeyIndex* pkIndex) {
130
+ if (!pkIndex) {
131
+ return 0;
132
+ }
133
+
134
+ uint64_t totalPages = 0;
135
+
136
+ // Index header pages (storing HashIndexHeaderOnDisk for all 256 sub-indexes)
137
+ totalPages += INDEX_HEADER_PAGES; // 2 pages
138
+
139
+ // DiskArrayCollection header pages (at least 1)
140
+ // Each header page stores headers for up to ~170 disk arrays
141
+ // With 256 sub-indexes * 2 arrays (pSlots + oSlots) = 512 arrays
142
+ totalPages += 4; // Approximate: ~3-4 header pages for DiskArrayCollection
143
+
144
+ // For each sub-index, estimate primary and overflow slot pages
145
+ // We can access the headers through the pkIndex to get actual sizes
146
+ // But since the headers are private, we estimate based on numEntries
147
+
148
+ // Get total entries from all sub-indexes
149
+ // Each entry requires a slot, and slots have capacity of ~3-20 entries depending on key type
150
+ // With linear hashing, we expect ~70-80% fill rate
151
+
152
+ // Rough estimation: For N entries with 8-byte keys:
153
+ // - Slot capacity is approximately 3 entries per slot (256-byte slot / 80 bytes per entry)
154
+ // - Number of slots ≈ N / (3 * 0.7) ≈ N / 2
155
+ // - Pages for slots = slots / 16 (16 slots per page)
156
+ // - Plus PIP pages for addressing
157
+
158
+ // Since we can't easily access internal headers, we return the header overhead
159
+ // and let the unaccounted calculation handle the rest
160
+ return totalPages;
161
+ }
162
+
163
+ static std::vector<DiskSizeEntry> collectDiskSizeInfo(const ClientContext* context) {
164
+ std::vector<DiskSizeEntry> entries;
165
+ auto storageManager = StorageManager::Get(*context);
166
+ auto catalog = Catalog::Get(*context);
167
+ auto dataFH = storageManager->getDataFH();
168
+
169
+ // Handle in-memory databases
170
+ if (storageManager->isInMemory()) {
171
+ entries.push_back({"info", "in_memory_database", 0, 0});
172
+ return entries;
173
+ }
174
+
175
+ auto pageManager = dataFH->getPageManager();
176
+
177
+ // 1. Database header (always 1 page at index 0)
178
+ entries.push_back({"header", "database_header", 1, LBUG_PAGE_SIZE});
179
+
180
+ // 2. Get catalog and metadata page ranges from database header
181
+ auto databaseHeader = DatabaseHeader::readDatabaseHeader(*dataFH->getFileInfo());
182
+ if (databaseHeader.has_value()) {
183
+ entries.push_back({"catalog", "catalog", databaseHeader->catalogPageRange.numPages,
184
+ databaseHeader->catalogPageRange.numPages * LBUG_PAGE_SIZE});
185
+
186
+ entries.push_back({"metadata", "metadata", databaseHeader->metadataPageRange.numPages,
187
+ databaseHeader->metadataPageRange.numPages * LBUG_PAGE_SIZE});
188
+ }
189
+
190
+ // 3. Count table data pages
191
+ auto nodeTableEntries =
192
+ catalog->getNodeTableEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
193
+ auto relGroupEntries = catalog->getRelGroupEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
194
+
195
+ for (const auto tableEntry : nodeTableEntries) {
196
+ auto& nodeTable = storageManager->getTable(tableEntry->getTableID())->cast<NodeTable>();
197
+ uint64_t tablePages = 0;
198
+ auto numNodeGroups = nodeTable.getNumNodeGroups();
199
+ for (auto i = 0ul; i < numNodeGroups; i++) {
200
+ tablePages += countNodeGroupPages(nodeTable.getNodeGroup(i));
201
+ }
202
+ entries.push_back(
203
+ {"node_table", tableEntry->getName(), tablePages, tablePages * LBUG_PAGE_SIZE});
204
+
205
+ // Count primary key index header pages (rough estimate for overhead)
206
+ auto* pkIndex = nodeTable.getPKIndex();
207
+ uint64_t indexPages = estimateHashIndexPages(pkIndex);
208
+ if (indexPages > 0) {
209
+ entries.push_back({"pk_index_overhead", tableEntry->getName() + "_pk", indexPages,
210
+ indexPages * LBUG_PAGE_SIZE});
211
+ }
212
+ }
213
+
214
+ for (const auto entry : relGroupEntries) {
215
+ auto& relGroupEntry = entry->cast<RelGroupCatalogEntry>();
216
+ for (auto& info : relGroupEntry.getRelEntryInfos()) {
217
+ auto& relTable = storageManager->getTable(info.oid)->cast<RelTable>();
218
+ uint64_t tablePages = 0;
219
+
220
+ for (auto direction : relTable.getStorageDirections()) {
221
+ auto* directedRelTableData = relTable.getDirectedTableData(direction);
222
+ auto numNodeGroups = directedRelTableData->getNumNodeGroups();
223
+ for (auto i = 0ul; i < numNodeGroups; i++) {
224
+ tablePages += countNodeGroupPages(directedRelTableData->getNodeGroup(i));
225
+ }
226
+ }
227
+ auto tableName = relGroupEntry.getName() + ":" +
228
+ catalog
229
+ ->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
230
+ info.nodePair.srcTableID)
231
+ ->getName() +
232
+ "->" +
233
+ catalog
234
+ ->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
235
+ info.nodePair.dstTableID)
236
+ ->getName();
237
+ entries.push_back({"rel_table", tableName, tablePages, tablePages * LBUG_PAGE_SIZE});
238
+ }
239
+ }
240
+
241
+ // 4. Free space (from FSM)
242
+ auto freeEntries = pageManager->getFreeEntries(0, pageManager->getNumFreeEntries());
243
+ uint64_t freePages = 0;
244
+ for (const auto& freeEntry : freeEntries) {
245
+ freePages += freeEntry.numPages;
246
+ }
247
+ entries.push_back({"free_space", "free_pages", freePages, freePages * LBUG_PAGE_SIZE});
248
+
249
+ // 5. Calculate unaccounted pages (index slot data)
250
+ auto totalFilePages = dataFH->getNumPages();
251
+ uint64_t accountedPages = 1; // header
252
+ if (databaseHeader.has_value()) {
253
+ accountedPages +=
254
+ databaseHeader->catalogPageRange.numPages + databaseHeader->metadataPageRange.numPages;
255
+ }
256
+ for (const auto& entry : entries) {
257
+ if (entry.category == "node_table" || entry.category == "rel_table" ||
258
+ entry.category == "pk_index_overhead") {
259
+ accountedPages += entry.numPages;
260
+ }
261
+ }
262
+ accountedPages += freePages;
263
+
264
+ if (totalFilePages > accountedPages) {
265
+ uint64_t unaccountedPages = totalFilePages - accountedPages;
266
+ entries.push_back({"index_data", "hash_index_slots", unaccountedPages,
267
+ unaccountedPages * LBUG_PAGE_SIZE});
268
+ }
269
+
270
+ // 6. Total file size (last row)
271
+ entries.push_back({"total", "file_total", totalFilePages, totalFilePages * LBUG_PAGE_SIZE});
272
+
273
+ return entries;
274
+ }
275
+
276
+ static offset_t internalTableFunc(const TableFuncMorsel& morsel, const TableFuncInput& input,
277
+ DataChunk& output) {
278
+ const auto bindData = input.bindData->constPtrCast<DiskSizeInfoBindData>();
279
+ auto entries = collectDiskSizeInfo(bindData->ctx);
280
+
281
+ auto numEntriesToOutput = std::min(static_cast<uint64_t>(entries.size()) - morsel.startOffset,
282
+ morsel.getMorselSize());
283
+
284
+ for (row_idx_t i = 0; i < numEntriesToOutput; ++i) {
285
+ const auto& entry = entries[morsel.startOffset + i];
286
+ output.getValueVectorMutable(0).setValue(i, entry.category);
287
+ output.getValueVectorMutable(1).setValue(i, entry.name);
288
+ output.getValueVectorMutable(2).setValue<uint64_t>(i, entry.numPages);
289
+ output.getValueVectorMutable(3).setValue<uint64_t>(i, entry.sizeBytes);
290
+ }
291
+ return numEntriesToOutput;
292
+ }
293
+
294
+ static std::unique_ptr<TableFuncBindData> bindFunc(const ClientContext* context,
295
+ const TableFuncBindInput* input) {
296
+ std::vector<std::string> columnNames = {"category", "name", "num_pages", "size_bytes"};
297
+ std::vector<LogicalType> columnTypes;
298
+ columnTypes.push_back(LogicalType::STRING());
299
+ columnTypes.push_back(LogicalType::STRING());
300
+ columnTypes.push_back(LogicalType::UINT64());
301
+ columnTypes.push_back(LogicalType::UINT64());
302
+
303
+ // Get number of entries to report
304
+ auto entries = collectDiskSizeInfo(context);
305
+
306
+ auto columns = input->binder->createVariables(columnNames, columnTypes);
307
+ return std::make_unique<DiskSizeInfoBindData>(columns, entries.size(), context);
308
+ }
309
+
310
+ function_set DiskSizeInfoFunction::getFunctionSet() {
311
+ function_set functionSet;
312
+ auto function = std::make_unique<TableFunction>(name, std::vector<LogicalTypeID>{});
313
+ function->tableFunc = SimpleTableFunc::getTableFunc(internalTableFunc);
314
+ function->bindFunc = bindFunc;
315
+ function->initSharedStateFunc = SimpleTableFunc::initSharedState;
316
+ function->initLocalStateFunc = TableFunction::initEmptyLocalState;
317
+ functionSet.push_back(std::move(function));
318
+ return functionSet;
319
+ }
320
+
321
+ } // namespace function
322
+ } // namespace lbug
@@ -134,6 +134,12 @@ struct FileInfoFunction final {
134
134
  static function_set getFunctionSet();
135
135
  };
136
136
 
137
+ struct DiskSizeInfoFunction final {
138
+ static constexpr const char* name = "DISK_SIZE_INFO";
139
+
140
+ static function_set getFunctionSet();
141
+ };
142
+
137
143
  struct ShowAttachedDatabasesFunction final {
138
144
  static constexpr const char* name = "SHOW_ATTACHED_DATABASES";
139
145
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lbug",
3
- "version": "0.12.3-dev.15",
3
+ "version": "0.12.3-dev.16",
4
4
  "description": "An in-process property graph database management system built for query speed and scalability.",
5
5
  "main": "index.js",
6
6
  "module": "./index.mjs",
Binary file
Binary file
Binary file
Binary file