lbug 0.12.3-dev.14 → 0.12.3-dev.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lbug-source/CMakeLists.txt +1 -1
- package/lbug-source/src/function/function_collection.cpp +2 -1
- package/lbug-source/src/function/table/CMakeLists.txt +1 -0
- package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
- package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
- package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
- package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
- package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
- package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
- package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
- package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
- package/lbug-source/src/include/processor/plan_mapper.h +2 -0
- package/lbug-source/src/optimizer/CMakeLists.txt +1 -0
- package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
- package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
- package/lbug-source/src/optimizer/optimizer.cpp +6 -0
- package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
- package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
- package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
- package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
- package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
- package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
- package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
- package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
- package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
- package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
- package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
- package/lbug-source/tools/shell/embedded_shell.cpp +11 -0
- package/lbug-source/tools/shell/linenoise.cpp +3 -3
- package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
- package/package.json +1 -1
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
|
@@ -228,7 +228,8 @@ FunctionCollection* FunctionCollection::getFunctions() {
|
|
|
228
228
|
TABLE_FUNCTION(StatsInfoFunction), TABLE_FUNCTION(StorageInfoFunction),
|
|
229
229
|
TABLE_FUNCTION(ShowAttachedDatabasesFunction), TABLE_FUNCTION(ShowSequencesFunction),
|
|
230
230
|
TABLE_FUNCTION(ShowFunctionsFunction), TABLE_FUNCTION(BMInfoFunction),
|
|
231
|
-
TABLE_FUNCTION(FileInfoFunction), TABLE_FUNCTION(
|
|
231
|
+
TABLE_FUNCTION(FileInfoFunction), TABLE_FUNCTION(DiskSizeInfoFunction),
|
|
232
|
+
TABLE_FUNCTION(ShowLoadedExtensionsFunction),
|
|
232
233
|
TABLE_FUNCTION(ShowOfficialExtensionsFunction), TABLE_FUNCTION(ShowIndexesFunction),
|
|
233
234
|
TABLE_FUNCTION(ShowProjectedGraphsFunction), TABLE_FUNCTION(ProjectedGraphInfoFunction),
|
|
234
235
|
TABLE_FUNCTION(ShowMacrosFunction),
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
#include "binder/binder.h"
|
|
2
|
+
#include "catalog/catalog.h"
|
|
3
|
+
#include "catalog/catalog_entry/node_table_catalog_entry.h"
|
|
4
|
+
#include "catalog/catalog_entry/rel_group_catalog_entry.h"
|
|
5
|
+
#include "common/exception/binder.h"
|
|
6
|
+
#include "function/table/bind_data.h"
|
|
7
|
+
#include "function/table/simple_table_function.h"
|
|
8
|
+
#include "main/client_context.h"
|
|
9
|
+
#include "storage/database_header.h"
|
|
10
|
+
#include "storage/index/hash_index.h"
|
|
11
|
+
#include "storage/page_manager.h"
|
|
12
|
+
#include "storage/storage_manager.h"
|
|
13
|
+
#include "storage/table/list_chunk_data.h"
|
|
14
|
+
#include "storage/table/node_table.h"
|
|
15
|
+
#include "storage/table/rel_table.h"
|
|
16
|
+
#include "storage/table/string_chunk_data.h"
|
|
17
|
+
#include "storage/table/struct_chunk_data.h"
|
|
18
|
+
#include "transaction/transaction.h"
|
|
19
|
+
|
|
20
|
+
using namespace lbug::common;
|
|
21
|
+
using namespace lbug::catalog;
|
|
22
|
+
using namespace lbug::storage;
|
|
23
|
+
using namespace lbug::main;
|
|
24
|
+
|
|
25
|
+
namespace lbug {
|
|
26
|
+
namespace function {
|
|
27
|
+
|
|
28
|
+
struct DiskSizeInfoBindData final : TableFuncBindData {
|
|
29
|
+
const ClientContext* ctx;
|
|
30
|
+
DiskSizeInfoBindData(binder::expression_vector columns, row_idx_t numRows,
|
|
31
|
+
const ClientContext* ctx)
|
|
32
|
+
: TableFuncBindData{std::move(columns), numRows}, ctx{ctx} {}
|
|
33
|
+
|
|
34
|
+
std::unique_ptr<TableFuncBindData> copy() const override {
|
|
35
|
+
return std::make_unique<DiskSizeInfoBindData>(columns, numRows, ctx);
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
static uint64_t countChunkDataPages(const ColumnChunkData& chunkData) {
|
|
40
|
+
uint64_t pages = 0;
|
|
41
|
+
auto metadata = chunkData.getResidencyState() == ResidencyState::ON_DISK ?
|
|
42
|
+
chunkData.getMetadata() :
|
|
43
|
+
chunkData.getMetadataToFlush();
|
|
44
|
+
pages += metadata.getNumPages();
|
|
45
|
+
|
|
46
|
+
if (chunkData.hasNullData()) {
|
|
47
|
+
pages += countChunkDataPages(*chunkData.getNullData());
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
auto physicalType = chunkData.getDataType().getPhysicalType();
|
|
51
|
+
switch (physicalType) {
|
|
52
|
+
case PhysicalTypeID::STRUCT: {
|
|
53
|
+
auto& structChunk = chunkData.cast<StructChunkData>();
|
|
54
|
+
for (auto i = 0u; i < structChunk.getNumChildren(); i++) {
|
|
55
|
+
pages += countChunkDataPages(structChunk.getChild(i));
|
|
56
|
+
}
|
|
57
|
+
} break;
|
|
58
|
+
case PhysicalTypeID::STRING: {
|
|
59
|
+
auto& stringChunk = chunkData.cast<StringChunkData>();
|
|
60
|
+
pages += countChunkDataPages(*stringChunk.getIndexColumnChunk());
|
|
61
|
+
auto& dictionaryChunk = stringChunk.getDictionaryChunk();
|
|
62
|
+
pages += countChunkDataPages(*dictionaryChunk.getStringDataChunk());
|
|
63
|
+
pages += countChunkDataPages(*dictionaryChunk.getOffsetChunk());
|
|
64
|
+
} break;
|
|
65
|
+
case PhysicalTypeID::ARRAY:
|
|
66
|
+
case PhysicalTypeID::LIST: {
|
|
67
|
+
auto& listChunk = chunkData.cast<ListChunkData>();
|
|
68
|
+
pages += countChunkDataPages(*listChunk.getOffsetColumnChunk());
|
|
69
|
+
pages += countChunkDataPages(*listChunk.getSizeColumnChunk());
|
|
70
|
+
pages += countChunkDataPages(*listChunk.getDataColumnChunk());
|
|
71
|
+
} break;
|
|
72
|
+
default:
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
return pages;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
static uint64_t countChunkedGroupPages(ChunkedNodeGroup* chunkedGroup) {
|
|
79
|
+
uint64_t pages = 0;
|
|
80
|
+
auto numColumns = chunkedGroup->getNumColumns();
|
|
81
|
+
for (auto i = 0u; i < numColumns; i++) {
|
|
82
|
+
for (auto* segment : chunkedGroup->getColumnChunk(i).getSegments()) {
|
|
83
|
+
pages += countChunkDataPages(*segment);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
if (chunkedGroup->getFormat() == NodeGroupDataFormat::CSR) {
|
|
87
|
+
auto& chunkedCSRGroup = chunkedGroup->cast<ChunkedCSRNodeGroup>();
|
|
88
|
+
for (auto* segment : chunkedCSRGroup.getCSRHeader().offset->getSegments()) {
|
|
89
|
+
pages += countChunkDataPages(*segment);
|
|
90
|
+
}
|
|
91
|
+
for (auto* segment : chunkedCSRGroup.getCSRHeader().length->getSegments()) {
|
|
92
|
+
pages += countChunkDataPages(*segment);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return pages;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
static uint64_t countNodeGroupPages(NodeGroup* nodeGroup) {
|
|
99
|
+
uint64_t pages = 0;
|
|
100
|
+
auto numChunks = nodeGroup->getNumChunkedGroups();
|
|
101
|
+
for (auto chunkIdx = 0ul; chunkIdx < numChunks; chunkIdx++) {
|
|
102
|
+
pages += countChunkedGroupPages(nodeGroup->getChunkedNodeGroup(chunkIdx));
|
|
103
|
+
}
|
|
104
|
+
if (nodeGroup->getFormat() == NodeGroupDataFormat::CSR) {
|
|
105
|
+
auto& csrNodeGroup = nodeGroup->cast<CSRNodeGroup>();
|
|
106
|
+
auto persistentChunk = csrNodeGroup.getPersistentChunkedGroup();
|
|
107
|
+
if (persistentChunk) {
|
|
108
|
+
pages += countChunkedGroupPages(persistentChunk);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return pages;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
struct DiskSizeEntry {
|
|
115
|
+
std::string category;
|
|
116
|
+
std::string name;
|
|
117
|
+
uint64_t numPages;
|
|
118
|
+
uint64_t sizeBytes;
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
// Estimate the number of pages used by a hash index based on the number of entries
|
|
122
|
+
// Hash index structure:
|
|
123
|
+
// - INDEX_HEADER_PAGES pages for HashIndexHeaderOnDisk (2 pages for 256 sub-indexes)
|
|
124
|
+
// - DiskArrayCollection header pages (1+ pages)
|
|
125
|
+
// - For each of 256 sub-indexes: pSlots and oSlots disk arrays
|
|
126
|
+
// - Each slot is SLOT_CAPACITY_BYTES (256 bytes), so 16 slots per page
|
|
127
|
+
// - Number of primary slots = 2^currentLevel + nextSplitSlotId
|
|
128
|
+
// - Overflow slots depend on collisions
|
|
129
|
+
static uint64_t estimateHashIndexPages(const PrimaryKeyIndex* pkIndex) {
|
|
130
|
+
if (!pkIndex) {
|
|
131
|
+
return 0;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
uint64_t totalPages = 0;
|
|
135
|
+
|
|
136
|
+
// Index header pages (storing HashIndexHeaderOnDisk for all 256 sub-indexes)
|
|
137
|
+
totalPages += INDEX_HEADER_PAGES; // 2 pages
|
|
138
|
+
|
|
139
|
+
// DiskArrayCollection header pages (at least 1)
|
|
140
|
+
// Each header page stores headers for up to ~170 disk arrays
|
|
141
|
+
// With 256 sub-indexes * 2 arrays (pSlots + oSlots) = 512 arrays
|
|
142
|
+
totalPages += 4; // Approximate: ~3-4 header pages for DiskArrayCollection
|
|
143
|
+
|
|
144
|
+
// For each sub-index, estimate primary and overflow slot pages
|
|
145
|
+
// We can access the headers through the pkIndex to get actual sizes
|
|
146
|
+
// But since the headers are private, we estimate based on numEntries
|
|
147
|
+
|
|
148
|
+
// Get total entries from all sub-indexes
|
|
149
|
+
// Each entry requires a slot, and slots have capacity of ~3-20 entries depending on key type
|
|
150
|
+
// With linear hashing, we expect ~70-80% fill rate
|
|
151
|
+
|
|
152
|
+
// Rough estimation: For N entries with 8-byte keys:
|
|
153
|
+
// - Slot capacity is approximately 3 entries per slot (256-byte slot / 80 bytes per entry)
|
|
154
|
+
// - Number of slots ≈ N / (3 * 0.7) ≈ N / 2
|
|
155
|
+
// - Pages for slots = slots / 16 (16 slots per page)
|
|
156
|
+
// - Plus PIP pages for addressing
|
|
157
|
+
|
|
158
|
+
// Since we can't easily access internal headers, we return the header overhead
|
|
159
|
+
// and let the unaccounted calculation handle the rest
|
|
160
|
+
return totalPages;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
static std::vector<DiskSizeEntry> collectDiskSizeInfo(const ClientContext* context) {
|
|
164
|
+
std::vector<DiskSizeEntry> entries;
|
|
165
|
+
auto storageManager = StorageManager::Get(*context);
|
|
166
|
+
auto catalog = Catalog::Get(*context);
|
|
167
|
+
auto dataFH = storageManager->getDataFH();
|
|
168
|
+
|
|
169
|
+
// Handle in-memory databases
|
|
170
|
+
if (storageManager->isInMemory()) {
|
|
171
|
+
entries.push_back({"info", "in_memory_database", 0, 0});
|
|
172
|
+
return entries;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
auto pageManager = dataFH->getPageManager();
|
|
176
|
+
|
|
177
|
+
// 1. Database header (always 1 page at index 0)
|
|
178
|
+
entries.push_back({"header", "database_header", 1, LBUG_PAGE_SIZE});
|
|
179
|
+
|
|
180
|
+
// 2. Get catalog and metadata page ranges from database header
|
|
181
|
+
auto databaseHeader = DatabaseHeader::readDatabaseHeader(*dataFH->getFileInfo());
|
|
182
|
+
if (databaseHeader.has_value()) {
|
|
183
|
+
entries.push_back({"catalog", "catalog", databaseHeader->catalogPageRange.numPages,
|
|
184
|
+
databaseHeader->catalogPageRange.numPages * LBUG_PAGE_SIZE});
|
|
185
|
+
|
|
186
|
+
entries.push_back({"metadata", "metadata", databaseHeader->metadataPageRange.numPages,
|
|
187
|
+
databaseHeader->metadataPageRange.numPages * LBUG_PAGE_SIZE});
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// 3. Count table data pages
|
|
191
|
+
auto nodeTableEntries =
|
|
192
|
+
catalog->getNodeTableEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
|
|
193
|
+
auto relGroupEntries = catalog->getRelGroupEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
|
|
194
|
+
|
|
195
|
+
for (const auto tableEntry : nodeTableEntries) {
|
|
196
|
+
auto& nodeTable = storageManager->getTable(tableEntry->getTableID())->cast<NodeTable>();
|
|
197
|
+
uint64_t tablePages = 0;
|
|
198
|
+
auto numNodeGroups = nodeTable.getNumNodeGroups();
|
|
199
|
+
for (auto i = 0ul; i < numNodeGroups; i++) {
|
|
200
|
+
tablePages += countNodeGroupPages(nodeTable.getNodeGroup(i));
|
|
201
|
+
}
|
|
202
|
+
entries.push_back(
|
|
203
|
+
{"node_table", tableEntry->getName(), tablePages, tablePages * LBUG_PAGE_SIZE});
|
|
204
|
+
|
|
205
|
+
// Count primary key index header pages (rough estimate for overhead)
|
|
206
|
+
auto* pkIndex = nodeTable.getPKIndex();
|
|
207
|
+
uint64_t indexPages = estimateHashIndexPages(pkIndex);
|
|
208
|
+
if (indexPages > 0) {
|
|
209
|
+
entries.push_back({"pk_index_overhead", tableEntry->getName() + "_pk", indexPages,
|
|
210
|
+
indexPages * LBUG_PAGE_SIZE});
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
for (const auto entry : relGroupEntries) {
|
|
215
|
+
auto& relGroupEntry = entry->cast<RelGroupCatalogEntry>();
|
|
216
|
+
for (auto& info : relGroupEntry.getRelEntryInfos()) {
|
|
217
|
+
auto& relTable = storageManager->getTable(info.oid)->cast<RelTable>();
|
|
218
|
+
uint64_t tablePages = 0;
|
|
219
|
+
|
|
220
|
+
for (auto direction : relTable.getStorageDirections()) {
|
|
221
|
+
auto* directedRelTableData = relTable.getDirectedTableData(direction);
|
|
222
|
+
auto numNodeGroups = directedRelTableData->getNumNodeGroups();
|
|
223
|
+
for (auto i = 0ul; i < numNodeGroups; i++) {
|
|
224
|
+
tablePages += countNodeGroupPages(directedRelTableData->getNodeGroup(i));
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
auto tableName = relGroupEntry.getName() + ":" +
|
|
228
|
+
catalog
|
|
229
|
+
->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
|
|
230
|
+
info.nodePair.srcTableID)
|
|
231
|
+
->getName() +
|
|
232
|
+
"->" +
|
|
233
|
+
catalog
|
|
234
|
+
->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
|
|
235
|
+
info.nodePair.dstTableID)
|
|
236
|
+
->getName();
|
|
237
|
+
entries.push_back({"rel_table", tableName, tablePages, tablePages * LBUG_PAGE_SIZE});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// 4. Free space (from FSM)
|
|
242
|
+
auto freeEntries = pageManager->getFreeEntries(0, pageManager->getNumFreeEntries());
|
|
243
|
+
uint64_t freePages = 0;
|
|
244
|
+
for (const auto& freeEntry : freeEntries) {
|
|
245
|
+
freePages += freeEntry.numPages;
|
|
246
|
+
}
|
|
247
|
+
entries.push_back({"free_space", "free_pages", freePages, freePages * LBUG_PAGE_SIZE});
|
|
248
|
+
|
|
249
|
+
// 5. Calculate unaccounted pages (index slot data)
|
|
250
|
+
auto totalFilePages = dataFH->getNumPages();
|
|
251
|
+
uint64_t accountedPages = 1; // header
|
|
252
|
+
if (databaseHeader.has_value()) {
|
|
253
|
+
accountedPages +=
|
|
254
|
+
databaseHeader->catalogPageRange.numPages + databaseHeader->metadataPageRange.numPages;
|
|
255
|
+
}
|
|
256
|
+
for (const auto& entry : entries) {
|
|
257
|
+
if (entry.category == "node_table" || entry.category == "rel_table" ||
|
|
258
|
+
entry.category == "pk_index_overhead") {
|
|
259
|
+
accountedPages += entry.numPages;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
accountedPages += freePages;
|
|
263
|
+
|
|
264
|
+
if (totalFilePages > accountedPages) {
|
|
265
|
+
uint64_t unaccountedPages = totalFilePages - accountedPages;
|
|
266
|
+
entries.push_back({"index_data", "hash_index_slots", unaccountedPages,
|
|
267
|
+
unaccountedPages * LBUG_PAGE_SIZE});
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// 6. Total file size (last row)
|
|
271
|
+
entries.push_back({"total", "file_total", totalFilePages, totalFilePages * LBUG_PAGE_SIZE});
|
|
272
|
+
|
|
273
|
+
return entries;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
static offset_t internalTableFunc(const TableFuncMorsel& morsel, const TableFuncInput& input,
|
|
277
|
+
DataChunk& output) {
|
|
278
|
+
const auto bindData = input.bindData->constPtrCast<DiskSizeInfoBindData>();
|
|
279
|
+
auto entries = collectDiskSizeInfo(bindData->ctx);
|
|
280
|
+
|
|
281
|
+
auto numEntriesToOutput = std::min(static_cast<uint64_t>(entries.size()) - morsel.startOffset,
|
|
282
|
+
morsel.getMorselSize());
|
|
283
|
+
|
|
284
|
+
for (row_idx_t i = 0; i < numEntriesToOutput; ++i) {
|
|
285
|
+
const auto& entry = entries[morsel.startOffset + i];
|
|
286
|
+
output.getValueVectorMutable(0).setValue(i, entry.category);
|
|
287
|
+
output.getValueVectorMutable(1).setValue(i, entry.name);
|
|
288
|
+
output.getValueVectorMutable(2).setValue<uint64_t>(i, entry.numPages);
|
|
289
|
+
output.getValueVectorMutable(3).setValue<uint64_t>(i, entry.sizeBytes);
|
|
290
|
+
}
|
|
291
|
+
return numEntriesToOutput;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
static std::unique_ptr<TableFuncBindData> bindFunc(const ClientContext* context,
|
|
295
|
+
const TableFuncBindInput* input) {
|
|
296
|
+
std::vector<std::string> columnNames = {"category", "name", "num_pages", "size_bytes"};
|
|
297
|
+
std::vector<LogicalType> columnTypes;
|
|
298
|
+
columnTypes.push_back(LogicalType::STRING());
|
|
299
|
+
columnTypes.push_back(LogicalType::STRING());
|
|
300
|
+
columnTypes.push_back(LogicalType::UINT64());
|
|
301
|
+
columnTypes.push_back(LogicalType::UINT64());
|
|
302
|
+
|
|
303
|
+
// Get number of entries to report
|
|
304
|
+
auto entries = collectDiskSizeInfo(context);
|
|
305
|
+
|
|
306
|
+
auto columns = input->binder->createVariables(columnNames, columnTypes);
|
|
307
|
+
return std::make_unique<DiskSizeInfoBindData>(columns, entries.size(), context);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function_set DiskSizeInfoFunction::getFunctionSet() {
|
|
311
|
+
function_set functionSet;
|
|
312
|
+
auto function = std::make_unique<TableFunction>(name, std::vector<LogicalTypeID>{});
|
|
313
|
+
function->tableFunc = SimpleTableFunc::getTableFunc(internalTableFunc);
|
|
314
|
+
function->bindFunc = bindFunc;
|
|
315
|
+
function->initSharedStateFunc = SimpleTableFunc::initSharedState;
|
|
316
|
+
function->initLocalStateFunc = TableFunction::initEmptyLocalState;
|
|
317
|
+
functionSet.push_back(std::move(function));
|
|
318
|
+
return functionSet;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
} // namespace function
|
|
322
|
+
} // namespace lbug
|
|
@@ -134,6 +134,12 @@ struct FileInfoFunction final {
|
|
|
134
134
|
static function_set getFunctionSet();
|
|
135
135
|
};
|
|
136
136
|
|
|
137
|
+
struct DiskSizeInfoFunction final {
|
|
138
|
+
static constexpr const char* name = "DISK_SIZE_INFO";
|
|
139
|
+
|
|
140
|
+
static function_set getFunctionSet();
|
|
141
|
+
};
|
|
142
|
+
|
|
137
143
|
struct ShowAttachedDatabasesFunction final {
|
|
138
144
|
static constexpr const char* name = "SHOW_ATTACHED_DATABASES";
|
|
139
145
|
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "logical_operator_visitor.h"
|
|
4
|
+
#include "planner/operator/logical_plan.h"
|
|
5
|
+
|
|
6
|
+
namespace lbug {
|
|
7
|
+
namespace main {
|
|
8
|
+
class ClientContext;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
namespace optimizer {
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* This optimizer detects patterns where we're counting all rows from a single rel table
|
|
15
|
+
* without any filters, and replaces the scan + aggregate with a direct count from table metadata.
|
|
16
|
+
*
|
|
17
|
+
* Pattern detected:
|
|
18
|
+
* AGGREGATE (COUNT_STAR only, no keys) →
|
|
19
|
+
* PROJECTION (empty or pass-through) →
|
|
20
|
+
* EXTEND (single rel table) →
|
|
21
|
+
* SCAN_NODE_TABLE
|
|
22
|
+
*
|
|
23
|
+
* This pattern is replaced with:
|
|
24
|
+
* COUNT_REL_TABLE (new operator that directly reads the count from table metadata)
|
|
25
|
+
*/
|
|
26
|
+
class CountRelTableOptimizer : public LogicalOperatorVisitor {
|
|
27
|
+
public:
|
|
28
|
+
explicit CountRelTableOptimizer(main::ClientContext* context) : context{context} {}
|
|
29
|
+
|
|
30
|
+
void rewrite(planner::LogicalPlan* plan);
|
|
31
|
+
|
|
32
|
+
private:
|
|
33
|
+
std::shared_ptr<planner::LogicalOperator> visitOperator(
|
|
34
|
+
const std::shared_ptr<planner::LogicalOperator>& op);
|
|
35
|
+
|
|
36
|
+
std::shared_ptr<planner::LogicalOperator> visitAggregateReplace(
|
|
37
|
+
std::shared_ptr<planner::LogicalOperator> op) override;
|
|
38
|
+
|
|
39
|
+
// Check if the aggregate is a simple COUNT(*) with no keys
|
|
40
|
+
bool isSimpleCountStar(planner::LogicalOperator* op) const;
|
|
41
|
+
|
|
42
|
+
// Check if the plan below aggregate matches the pattern for optimization
|
|
43
|
+
bool canOptimize(planner::LogicalOperator* aggregate) const;
|
|
44
|
+
|
|
45
|
+
main::ClientContext* context;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
} // namespace optimizer
|
|
49
|
+
} // namespace lbug
|
|
@@ -39,6 +39,12 @@ protected:
|
|
|
39
39
|
return op;
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
+
virtual void visitCountRelTable(planner::LogicalOperator* /*op*/) {}
|
|
43
|
+
virtual std::shared_ptr<planner::LogicalOperator> visitCountRelTableReplace(
|
|
44
|
+
std::shared_ptr<planner::LogicalOperator> op) {
|
|
45
|
+
return op;
|
|
46
|
+
}
|
|
47
|
+
|
|
42
48
|
virtual void visitDelete(planner::LogicalOperator* /*op*/) {}
|
|
43
49
|
virtual std::shared_ptr<planner::LogicalOperator> visitDeleteReplace(
|
|
44
50
|
std::shared_ptr<planner::LogicalOperator> op) {
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "binder/expression/expression.h"
|
|
4
|
+
#include "binder/expression/node_expression.h"
|
|
5
|
+
#include "catalog/catalog_entry/rel_group_catalog_entry.h"
|
|
6
|
+
#include "common/enums/extend_direction.h"
|
|
7
|
+
#include "planner/operator/logical_operator.h"
|
|
8
|
+
|
|
9
|
+
namespace lbug {
|
|
10
|
+
namespace planner {
|
|
11
|
+
|
|
12
|
+
struct LogicalCountRelTablePrintInfo final : OPPrintInfo {
|
|
13
|
+
std::string relTableName;
|
|
14
|
+
std::shared_ptr<binder::Expression> countExpr;
|
|
15
|
+
|
|
16
|
+
LogicalCountRelTablePrintInfo(std::string relTableName,
|
|
17
|
+
std::shared_ptr<binder::Expression> countExpr)
|
|
18
|
+
: relTableName{std::move(relTableName)}, countExpr{std::move(countExpr)} {}
|
|
19
|
+
|
|
20
|
+
std::string toString() const override {
|
|
21
|
+
return "Table: " + relTableName + ", Count: " + countExpr->toString();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
std::unique_ptr<OPPrintInfo> copy() const override {
|
|
25
|
+
return std::make_unique<LogicalCountRelTablePrintInfo>(relTableName, countExpr);
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* LogicalCountRelTable is an optimized operator that counts the number of rows
|
|
31
|
+
* in a rel table by scanning through bound nodes and counting edges.
|
|
32
|
+
*
|
|
33
|
+
* This operator is created by CountRelTableOptimizer when it detects:
|
|
34
|
+
* COUNT(*) over a single rel table with no filters
|
|
35
|
+
*/
|
|
36
|
+
class LogicalCountRelTable final : public LogicalOperator {
|
|
37
|
+
static constexpr LogicalOperatorType type_ = LogicalOperatorType::COUNT_REL_TABLE;
|
|
38
|
+
|
|
39
|
+
public:
|
|
40
|
+
LogicalCountRelTable(catalog::RelGroupCatalogEntry* relGroupEntry,
|
|
41
|
+
std::vector<common::table_id_t> relTableIDs,
|
|
42
|
+
std::vector<common::table_id_t> boundNodeTableIDs,
|
|
43
|
+
std::shared_ptr<binder::NodeExpression> boundNode, common::ExtendDirection direction,
|
|
44
|
+
std::shared_ptr<binder::Expression> countExpr)
|
|
45
|
+
: LogicalOperator{type_}, relGroupEntry{relGroupEntry}, relTableIDs{std::move(relTableIDs)},
|
|
46
|
+
boundNodeTableIDs{std::move(boundNodeTableIDs)}, boundNode{std::move(boundNode)},
|
|
47
|
+
direction{direction}, countExpr{std::move(countExpr)} {
|
|
48
|
+
cardinality = 1; // Always returns exactly one row
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
void computeFactorizedSchema() override;
|
|
52
|
+
void computeFlatSchema() override;
|
|
53
|
+
|
|
54
|
+
std::string getExpressionsForPrinting() const override { return countExpr->toString(); }
|
|
55
|
+
|
|
56
|
+
catalog::RelGroupCatalogEntry* getRelGroupEntry() const { return relGroupEntry; }
|
|
57
|
+
const std::vector<common::table_id_t>& getRelTableIDs() const { return relTableIDs; }
|
|
58
|
+
const std::vector<common::table_id_t>& getBoundNodeTableIDs() const {
|
|
59
|
+
return boundNodeTableIDs;
|
|
60
|
+
}
|
|
61
|
+
std::shared_ptr<binder::NodeExpression> getBoundNode() const { return boundNode; }
|
|
62
|
+
common::ExtendDirection getDirection() const { return direction; }
|
|
63
|
+
std::shared_ptr<binder::Expression> getCountExpr() const { return countExpr; }
|
|
64
|
+
|
|
65
|
+
std::unique_ptr<OPPrintInfo> getPrintInfo() const override {
|
|
66
|
+
return std::make_unique<LogicalCountRelTablePrintInfo>(relGroupEntry->getName(), countExpr);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
std::unique_ptr<LogicalOperator> copy() override {
|
|
70
|
+
return std::make_unique<LogicalCountRelTable>(relGroupEntry, relTableIDs, boundNodeTableIDs,
|
|
71
|
+
boundNode, direction, countExpr);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
private:
|
|
75
|
+
catalog::RelGroupCatalogEntry* relGroupEntry;
|
|
76
|
+
std::vector<common::table_id_t> relTableIDs;
|
|
77
|
+
std::vector<common::table_id_t> boundNodeTableIDs;
|
|
78
|
+
std::shared_ptr<binder::NodeExpression> boundNode;
|
|
79
|
+
common::ExtendDirection direction;
|
|
80
|
+
std::shared_ptr<binder::Expression> countExpr;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
} // namespace planner
|
|
84
|
+
} // namespace lbug
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "common/enums/rel_direction.h"
|
|
4
|
+
#include "processor/operator/physical_operator.h"
|
|
5
|
+
#include "storage/table/node_table.h"
|
|
6
|
+
#include "storage/table/rel_table.h"
|
|
7
|
+
|
|
8
|
+
namespace lbug {
|
|
9
|
+
namespace processor {
|
|
10
|
+
|
|
11
|
+
struct CountRelTablePrintInfo final : OPPrintInfo {
|
|
12
|
+
std::string relTableName;
|
|
13
|
+
|
|
14
|
+
explicit CountRelTablePrintInfo(std::string relTableName)
|
|
15
|
+
: relTableName{std::move(relTableName)} {}
|
|
16
|
+
|
|
17
|
+
std::string toString() const override { return "Table: " + relTableName; }
|
|
18
|
+
|
|
19
|
+
std::unique_ptr<OPPrintInfo> copy() const override {
|
|
20
|
+
return std::make_unique<CountRelTablePrintInfo>(relTableName);
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* CountRelTable is a source operator that counts edges in a rel table
|
|
26
|
+
* by scanning through all bound nodes and counting their edges.
|
|
27
|
+
* It creates its own internal vectors for node scanning (not exposed in ResultSet).
|
|
28
|
+
*/
|
|
29
|
+
class CountRelTable final : public PhysicalOperator {
|
|
30
|
+
static constexpr PhysicalOperatorType type_ = PhysicalOperatorType::COUNT_REL_TABLE;
|
|
31
|
+
|
|
32
|
+
public:
|
|
33
|
+
CountRelTable(std::vector<storage::NodeTable*> nodeTables,
|
|
34
|
+
std::vector<storage::RelTable*> relTables, common::RelDataDirection direction,
|
|
35
|
+
DataPos countOutputPos, physical_op_id id, std::unique_ptr<OPPrintInfo> printInfo)
|
|
36
|
+
: PhysicalOperator{type_, id, std::move(printInfo)}, nodeTables{std::move(nodeTables)},
|
|
37
|
+
relTables{std::move(relTables)}, direction{direction}, countOutputPos{countOutputPos} {}
|
|
38
|
+
|
|
39
|
+
bool isSource() const override { return true; }
|
|
40
|
+
bool isParallel() const override { return false; }
|
|
41
|
+
|
|
42
|
+
void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override;
|
|
43
|
+
|
|
44
|
+
bool getNextTuplesInternal(ExecutionContext* context) override;
|
|
45
|
+
|
|
46
|
+
std::unique_ptr<PhysicalOperator> copy() override {
|
|
47
|
+
return std::make_unique<CountRelTable>(nodeTables, relTables, direction, countOutputPos, id,
|
|
48
|
+
printInfo->copy());
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
private:
|
|
52
|
+
std::vector<storage::NodeTable*> nodeTables;
|
|
53
|
+
std::vector<storage::RelTable*> relTables;
|
|
54
|
+
common::RelDataDirection direction;
|
|
55
|
+
DataPos countOutputPos;
|
|
56
|
+
common::ValueVector* countVector;
|
|
57
|
+
bool hasExecuted;
|
|
58
|
+
common::row_idx_t totalCount;
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
} // namespace processor
|
|
62
|
+
} // namespace lbug
|
|
@@ -90,6 +90,8 @@ public:
|
|
|
90
90
|
std::unique_ptr<PhysicalOperator> mapCopyRelFrom(
|
|
91
91
|
const planner::LogicalOperator* logicalOperator);
|
|
92
92
|
std::unique_ptr<PhysicalOperator> mapCopyTo(const planner::LogicalOperator* logicalOperator);
|
|
93
|
+
std::unique_ptr<PhysicalOperator> mapCountRelTable(
|
|
94
|
+
const planner::LogicalOperator* logicalOperator);
|
|
93
95
|
std::unique_ptr<PhysicalOperator> mapCreateMacro(
|
|
94
96
|
const planner::LogicalOperator* logicalOperator);
|
|
95
97
|
std::unique_ptr<PhysicalOperator> mapCreateSequence(
|