lbug 0.12.3-dev.14 → 0.12.3-dev.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/lbug-source/CMakeLists.txt +1 -1
  2. package/lbug-source/src/function/function_collection.cpp +2 -1
  3. package/lbug-source/src/function/table/CMakeLists.txt +1 -0
  4. package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
  5. package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
  6. package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
  7. package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
  8. package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
  9. package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
  10. package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
  11. package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
  12. package/lbug-source/src/include/processor/plan_mapper.h +2 -0
  13. package/lbug-source/src/optimizer/CMakeLists.txt +1 -0
  14. package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
  15. package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
  16. package/lbug-source/src/optimizer/optimizer.cpp +6 -0
  17. package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
  18. package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
  19. package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
  20. package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
  21. package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
  22. package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
  23. package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
  24. package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
  25. package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
  26. package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
  27. package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
  28. package/lbug-source/tools/shell/embedded_shell.cpp +11 -0
  29. package/lbug-source/tools/shell/linenoise.cpp +3 -3
  30. package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
  31. package/package.json +1 -1
  32. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  33. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  34. package/prebuilt/lbugjs-linux-x64.node +0 -0
  35. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -1,6 +1,6 @@
1
1
  cmake_minimum_required(VERSION 3.15)
2
2
 
3
- project(Lbug VERSION 0.12.3.14 LANGUAGES CXX C)
3
+ project(Lbug VERSION 0.12.3.16 LANGUAGES CXX C)
4
4
 
5
5
  option(SINGLE_THREADED "Single-threaded mode" FALSE)
6
6
  if(SINGLE_THREADED)
@@ -228,7 +228,8 @@ FunctionCollection* FunctionCollection::getFunctions() {
228
228
  TABLE_FUNCTION(StatsInfoFunction), TABLE_FUNCTION(StorageInfoFunction),
229
229
  TABLE_FUNCTION(ShowAttachedDatabasesFunction), TABLE_FUNCTION(ShowSequencesFunction),
230
230
  TABLE_FUNCTION(ShowFunctionsFunction), TABLE_FUNCTION(BMInfoFunction),
231
- TABLE_FUNCTION(FileInfoFunction), TABLE_FUNCTION(ShowLoadedExtensionsFunction),
231
+ TABLE_FUNCTION(FileInfoFunction), TABLE_FUNCTION(DiskSizeInfoFunction),
232
+ TABLE_FUNCTION(ShowLoadedExtensionsFunction),
232
233
  TABLE_FUNCTION(ShowOfficialExtensionsFunction), TABLE_FUNCTION(ShowIndexesFunction),
233
234
  TABLE_FUNCTION(ShowProjectedGraphsFunction), TABLE_FUNCTION(ProjectedGraphInfoFunction),
234
235
  TABLE_FUNCTION(ShowMacrosFunction),
@@ -8,6 +8,7 @@ add_library(lbug_table_function
8
8
  clear_warnings.cpp
9
9
  current_setting.cpp
10
10
  db_version.cpp
11
+ disk_size_info.cpp
11
12
  drop_project_graph.cpp
12
13
  file_info.cpp
13
14
  free_space_info.cpp
@@ -0,0 +1,322 @@
1
+ #include "binder/binder.h"
2
+ #include "catalog/catalog.h"
3
+ #include "catalog/catalog_entry/node_table_catalog_entry.h"
4
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
5
+ #include "common/exception/binder.h"
6
+ #include "function/table/bind_data.h"
7
+ #include "function/table/simple_table_function.h"
8
+ #include "main/client_context.h"
9
+ #include "storage/database_header.h"
10
+ #include "storage/index/hash_index.h"
11
+ #include "storage/page_manager.h"
12
+ #include "storage/storage_manager.h"
13
+ #include "storage/table/list_chunk_data.h"
14
+ #include "storage/table/node_table.h"
15
+ #include "storage/table/rel_table.h"
16
+ #include "storage/table/string_chunk_data.h"
17
+ #include "storage/table/struct_chunk_data.h"
18
+ #include "transaction/transaction.h"
19
+
20
+ using namespace lbug::common;
21
+ using namespace lbug::catalog;
22
+ using namespace lbug::storage;
23
+ using namespace lbug::main;
24
+
25
+ namespace lbug {
26
+ namespace function {
27
+
28
+ struct DiskSizeInfoBindData final : TableFuncBindData {
29
+ const ClientContext* ctx;
30
+ DiskSizeInfoBindData(binder::expression_vector columns, row_idx_t numRows,
31
+ const ClientContext* ctx)
32
+ : TableFuncBindData{std::move(columns), numRows}, ctx{ctx} {}
33
+
34
+ std::unique_ptr<TableFuncBindData> copy() const override {
35
+ return std::make_unique<DiskSizeInfoBindData>(columns, numRows, ctx);
36
+ }
37
+ };
38
+
39
+ static uint64_t countChunkDataPages(const ColumnChunkData& chunkData) {
40
+ uint64_t pages = 0;
41
+ auto metadata = chunkData.getResidencyState() == ResidencyState::ON_DISK ?
42
+ chunkData.getMetadata() :
43
+ chunkData.getMetadataToFlush();
44
+ pages += metadata.getNumPages();
45
+
46
+ if (chunkData.hasNullData()) {
47
+ pages += countChunkDataPages(*chunkData.getNullData());
48
+ }
49
+
50
+ auto physicalType = chunkData.getDataType().getPhysicalType();
51
+ switch (physicalType) {
52
+ case PhysicalTypeID::STRUCT: {
53
+ auto& structChunk = chunkData.cast<StructChunkData>();
54
+ for (auto i = 0u; i < structChunk.getNumChildren(); i++) {
55
+ pages += countChunkDataPages(structChunk.getChild(i));
56
+ }
57
+ } break;
58
+ case PhysicalTypeID::STRING: {
59
+ auto& stringChunk = chunkData.cast<StringChunkData>();
60
+ pages += countChunkDataPages(*stringChunk.getIndexColumnChunk());
61
+ auto& dictionaryChunk = stringChunk.getDictionaryChunk();
62
+ pages += countChunkDataPages(*dictionaryChunk.getStringDataChunk());
63
+ pages += countChunkDataPages(*dictionaryChunk.getOffsetChunk());
64
+ } break;
65
+ case PhysicalTypeID::ARRAY:
66
+ case PhysicalTypeID::LIST: {
67
+ auto& listChunk = chunkData.cast<ListChunkData>();
68
+ pages += countChunkDataPages(*listChunk.getOffsetColumnChunk());
69
+ pages += countChunkDataPages(*listChunk.getSizeColumnChunk());
70
+ pages += countChunkDataPages(*listChunk.getDataColumnChunk());
71
+ } break;
72
+ default:
73
+ break;
74
+ }
75
+ return pages;
76
+ }
77
+
78
+ static uint64_t countChunkedGroupPages(ChunkedNodeGroup* chunkedGroup) {
79
+ uint64_t pages = 0;
80
+ auto numColumns = chunkedGroup->getNumColumns();
81
+ for (auto i = 0u; i < numColumns; i++) {
82
+ for (auto* segment : chunkedGroup->getColumnChunk(i).getSegments()) {
83
+ pages += countChunkDataPages(*segment);
84
+ }
85
+ }
86
+ if (chunkedGroup->getFormat() == NodeGroupDataFormat::CSR) {
87
+ auto& chunkedCSRGroup = chunkedGroup->cast<ChunkedCSRNodeGroup>();
88
+ for (auto* segment : chunkedCSRGroup.getCSRHeader().offset->getSegments()) {
89
+ pages += countChunkDataPages(*segment);
90
+ }
91
+ for (auto* segment : chunkedCSRGroup.getCSRHeader().length->getSegments()) {
92
+ pages += countChunkDataPages(*segment);
93
+ }
94
+ }
95
+ return pages;
96
+ }
97
+
98
+ static uint64_t countNodeGroupPages(NodeGroup* nodeGroup) {
99
+ uint64_t pages = 0;
100
+ auto numChunks = nodeGroup->getNumChunkedGroups();
101
+ for (auto chunkIdx = 0ul; chunkIdx < numChunks; chunkIdx++) {
102
+ pages += countChunkedGroupPages(nodeGroup->getChunkedNodeGroup(chunkIdx));
103
+ }
104
+ if (nodeGroup->getFormat() == NodeGroupDataFormat::CSR) {
105
+ auto& csrNodeGroup = nodeGroup->cast<CSRNodeGroup>();
106
+ auto persistentChunk = csrNodeGroup.getPersistentChunkedGroup();
107
+ if (persistentChunk) {
108
+ pages += countChunkedGroupPages(persistentChunk);
109
+ }
110
+ }
111
+ return pages;
112
+ }
113
+
114
+ struct DiskSizeEntry {
115
+ std::string category;
116
+ std::string name;
117
+ uint64_t numPages;
118
+ uint64_t sizeBytes;
119
+ };
120
+
121
+ // Estimate the number of pages used by a hash index based on the number of entries
122
+ // Hash index structure:
123
+ // - INDEX_HEADER_PAGES pages for HashIndexHeaderOnDisk (2 pages for 256 sub-indexes)
124
+ // - DiskArrayCollection header pages (1+ pages)
125
+ // - For each of 256 sub-indexes: pSlots and oSlots disk arrays
126
+ // - Each slot is SLOT_CAPACITY_BYTES (256 bytes), so 16 slots per page
127
+ // - Number of primary slots = 2^currentLevel + nextSplitSlotId
128
+ // - Overflow slots depend on collisions
129
+ static uint64_t estimateHashIndexPages(const PrimaryKeyIndex* pkIndex) {
130
+ if (!pkIndex) {
131
+ return 0;
132
+ }
133
+
134
+ uint64_t totalPages = 0;
135
+
136
+ // Index header pages (storing HashIndexHeaderOnDisk for all 256 sub-indexes)
137
+ totalPages += INDEX_HEADER_PAGES; // 2 pages
138
+
139
+ // DiskArrayCollection header pages (at least 1)
140
+ // Each header page stores headers for up to ~170 disk arrays
141
+ // With 256 sub-indexes * 2 arrays (pSlots + oSlots) = 512 arrays
142
+ totalPages += 4; // Approximate: ~3-4 header pages for DiskArrayCollection
143
+
144
+ // For each sub-index, estimate primary and overflow slot pages
145
+ // We can access the headers through the pkIndex to get actual sizes
146
+ // But since the headers are private, we estimate based on numEntries
147
+
148
+ // Get total entries from all sub-indexes
149
+ // Each entry requires a slot, and slots have capacity of ~3-20 entries depending on key type
150
+ // With linear hashing, we expect ~70-80% fill rate
151
+
152
+ // Rough estimation: For N entries with 8-byte keys:
153
+ // - Slot capacity is approximately 3 entries per slot (256-byte slot / 80 bytes per entry)
154
+ // - Number of slots ≈ N / (3 * 0.7) ≈ N / 2
155
+ // - Pages for slots = slots / 16 (16 slots per page)
156
+ // - Plus PIP pages for addressing
157
+
158
+ // Since we can't easily access internal headers, we return the header overhead
159
+ // and let the unaccounted calculation handle the rest
160
+ return totalPages;
161
+ }
162
+
163
+ static std::vector<DiskSizeEntry> collectDiskSizeInfo(const ClientContext* context) {
164
+ std::vector<DiskSizeEntry> entries;
165
+ auto storageManager = StorageManager::Get(*context);
166
+ auto catalog = Catalog::Get(*context);
167
+ auto dataFH = storageManager->getDataFH();
168
+
169
+ // Handle in-memory databases
170
+ if (storageManager->isInMemory()) {
171
+ entries.push_back({"info", "in_memory_database", 0, 0});
172
+ return entries;
173
+ }
174
+
175
+ auto pageManager = dataFH->getPageManager();
176
+
177
+ // 1. Database header (always 1 page at index 0)
178
+ entries.push_back({"header", "database_header", 1, LBUG_PAGE_SIZE});
179
+
180
+ // 2. Get catalog and metadata page ranges from database header
181
+ auto databaseHeader = DatabaseHeader::readDatabaseHeader(*dataFH->getFileInfo());
182
+ if (databaseHeader.has_value()) {
183
+ entries.push_back({"catalog", "catalog", databaseHeader->catalogPageRange.numPages,
184
+ databaseHeader->catalogPageRange.numPages * LBUG_PAGE_SIZE});
185
+
186
+ entries.push_back({"metadata", "metadata", databaseHeader->metadataPageRange.numPages,
187
+ databaseHeader->metadataPageRange.numPages * LBUG_PAGE_SIZE});
188
+ }
189
+
190
+ // 3. Count table data pages
191
+ auto nodeTableEntries =
192
+ catalog->getNodeTableEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
193
+ auto relGroupEntries = catalog->getRelGroupEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
194
+
195
+ for (const auto tableEntry : nodeTableEntries) {
196
+ auto& nodeTable = storageManager->getTable(tableEntry->getTableID())->cast<NodeTable>();
197
+ uint64_t tablePages = 0;
198
+ auto numNodeGroups = nodeTable.getNumNodeGroups();
199
+ for (auto i = 0ul; i < numNodeGroups; i++) {
200
+ tablePages += countNodeGroupPages(nodeTable.getNodeGroup(i));
201
+ }
202
+ entries.push_back(
203
+ {"node_table", tableEntry->getName(), tablePages, tablePages * LBUG_PAGE_SIZE});
204
+
205
+ // Count primary key index header pages (rough estimate for overhead)
206
+ auto* pkIndex = nodeTable.getPKIndex();
207
+ uint64_t indexPages = estimateHashIndexPages(pkIndex);
208
+ if (indexPages > 0) {
209
+ entries.push_back({"pk_index_overhead", tableEntry->getName() + "_pk", indexPages,
210
+ indexPages * LBUG_PAGE_SIZE});
211
+ }
212
+ }
213
+
214
+ for (const auto entry : relGroupEntries) {
215
+ auto& relGroupEntry = entry->cast<RelGroupCatalogEntry>();
216
+ for (auto& info : relGroupEntry.getRelEntryInfos()) {
217
+ auto& relTable = storageManager->getTable(info.oid)->cast<RelTable>();
218
+ uint64_t tablePages = 0;
219
+
220
+ for (auto direction : relTable.getStorageDirections()) {
221
+ auto* directedRelTableData = relTable.getDirectedTableData(direction);
222
+ auto numNodeGroups = directedRelTableData->getNumNodeGroups();
223
+ for (auto i = 0ul; i < numNodeGroups; i++) {
224
+ tablePages += countNodeGroupPages(directedRelTableData->getNodeGroup(i));
225
+ }
226
+ }
227
+ auto tableName = relGroupEntry.getName() + ":" +
228
+ catalog
229
+ ->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
230
+ info.nodePair.srcTableID)
231
+ ->getName() +
232
+ "->" +
233
+ catalog
234
+ ->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
235
+ info.nodePair.dstTableID)
236
+ ->getName();
237
+ entries.push_back({"rel_table", tableName, tablePages, tablePages * LBUG_PAGE_SIZE});
238
+ }
239
+ }
240
+
241
+ // 4. Free space (from FSM)
242
+ auto freeEntries = pageManager->getFreeEntries(0, pageManager->getNumFreeEntries());
243
+ uint64_t freePages = 0;
244
+ for (const auto& freeEntry : freeEntries) {
245
+ freePages += freeEntry.numPages;
246
+ }
247
+ entries.push_back({"free_space", "free_pages", freePages, freePages * LBUG_PAGE_SIZE});
248
+
249
+ // 5. Calculate unaccounted pages (index slot data)
250
+ auto totalFilePages = dataFH->getNumPages();
251
+ uint64_t accountedPages = 1; // header
252
+ if (databaseHeader.has_value()) {
253
+ accountedPages +=
254
+ databaseHeader->catalogPageRange.numPages + databaseHeader->metadataPageRange.numPages;
255
+ }
256
+ for (const auto& entry : entries) {
257
+ if (entry.category == "node_table" || entry.category == "rel_table" ||
258
+ entry.category == "pk_index_overhead") {
259
+ accountedPages += entry.numPages;
260
+ }
261
+ }
262
+ accountedPages += freePages;
263
+
264
+ if (totalFilePages > accountedPages) {
265
+ uint64_t unaccountedPages = totalFilePages - accountedPages;
266
+ entries.push_back({"index_data", "hash_index_slots", unaccountedPages,
267
+ unaccountedPages * LBUG_PAGE_SIZE});
268
+ }
269
+
270
+ // 6. Total file size (last row)
271
+ entries.push_back({"total", "file_total", totalFilePages, totalFilePages * LBUG_PAGE_SIZE});
272
+
273
+ return entries;
274
+ }
275
+
276
+ static offset_t internalTableFunc(const TableFuncMorsel& morsel, const TableFuncInput& input,
277
+ DataChunk& output) {
278
+ const auto bindData = input.bindData->constPtrCast<DiskSizeInfoBindData>();
279
+ auto entries = collectDiskSizeInfo(bindData->ctx);
280
+
281
+ auto numEntriesToOutput = std::min(static_cast<uint64_t>(entries.size()) - morsel.startOffset,
282
+ morsel.getMorselSize());
283
+
284
+ for (row_idx_t i = 0; i < numEntriesToOutput; ++i) {
285
+ const auto& entry = entries[morsel.startOffset + i];
286
+ output.getValueVectorMutable(0).setValue(i, entry.category);
287
+ output.getValueVectorMutable(1).setValue(i, entry.name);
288
+ output.getValueVectorMutable(2).setValue<uint64_t>(i, entry.numPages);
289
+ output.getValueVectorMutable(3).setValue<uint64_t>(i, entry.sizeBytes);
290
+ }
291
+ return numEntriesToOutput;
292
+ }
293
+
294
+ static std::unique_ptr<TableFuncBindData> bindFunc(const ClientContext* context,
295
+ const TableFuncBindInput* input) {
296
+ std::vector<std::string> columnNames = {"category", "name", "num_pages", "size_bytes"};
297
+ std::vector<LogicalType> columnTypes;
298
+ columnTypes.push_back(LogicalType::STRING());
299
+ columnTypes.push_back(LogicalType::STRING());
300
+ columnTypes.push_back(LogicalType::UINT64());
301
+ columnTypes.push_back(LogicalType::UINT64());
302
+
303
+ // Get number of entries to report
304
+ auto entries = collectDiskSizeInfo(context);
305
+
306
+ auto columns = input->binder->createVariables(columnNames, columnTypes);
307
+ return std::make_unique<DiskSizeInfoBindData>(columns, entries.size(), context);
308
+ }
309
+
310
+ function_set DiskSizeInfoFunction::getFunctionSet() {
311
+ function_set functionSet;
312
+ auto function = std::make_unique<TableFunction>(name, std::vector<LogicalTypeID>{});
313
+ function->tableFunc = SimpleTableFunc::getTableFunc(internalTableFunc);
314
+ function->bindFunc = bindFunc;
315
+ function->initSharedStateFunc = SimpleTableFunc::initSharedState;
316
+ function->initLocalStateFunc = TableFunction::initEmptyLocalState;
317
+ functionSet.push_back(std::move(function));
318
+ return functionSet;
319
+ }
320
+
321
+ } // namespace function
322
+ } // namespace lbug
@@ -134,6 +134,12 @@ struct FileInfoFunction final {
134
134
  static function_set getFunctionSet();
135
135
  };
136
136
 
137
+ struct DiskSizeInfoFunction final {
138
+ static constexpr const char* name = "DISK_SIZE_INFO";
139
+
140
+ static function_set getFunctionSet();
141
+ };
142
+
137
143
  struct ShowAttachedDatabasesFunction final {
138
144
  static constexpr const char* name = "SHOW_ATTACHED_DATABASES";
139
145
 
@@ -0,0 +1,49 @@
1
+ #pragma once
2
+
3
+ #include "logical_operator_visitor.h"
4
+ #include "planner/operator/logical_plan.h"
5
+
6
+ namespace lbug {
7
+ namespace main {
8
+ class ClientContext;
9
+ }
10
+
11
+ namespace optimizer {
12
+
13
+ /**
14
+ * This optimizer detects patterns where we're counting all rows from a single rel table
15
+ * without any filters, and replaces the scan + aggregate with a direct count from table metadata.
16
+ *
17
+ * Pattern detected:
18
+ * AGGREGATE (COUNT_STAR only, no keys) →
19
+ * PROJECTION (empty or pass-through) →
20
+ * EXTEND (single rel table) →
21
+ * SCAN_NODE_TABLE
22
+ *
23
+ * This pattern is replaced with:
24
+ * COUNT_REL_TABLE (new operator that directly reads the count from table metadata)
25
+ */
26
+ class CountRelTableOptimizer : public LogicalOperatorVisitor {
27
+ public:
28
+ explicit CountRelTableOptimizer(main::ClientContext* context) : context{context} {}
29
+
30
+ void rewrite(planner::LogicalPlan* plan);
31
+
32
+ private:
33
+ std::shared_ptr<planner::LogicalOperator> visitOperator(
34
+ const std::shared_ptr<planner::LogicalOperator>& op);
35
+
36
+ std::shared_ptr<planner::LogicalOperator> visitAggregateReplace(
37
+ std::shared_ptr<planner::LogicalOperator> op) override;
38
+
39
+ // Check if the aggregate is a simple COUNT(*) with no keys
40
+ bool isSimpleCountStar(planner::LogicalOperator* op) const;
41
+
42
+ // Check if the plan below aggregate matches the pattern for optimization
43
+ bool canOptimize(planner::LogicalOperator* aggregate) const;
44
+
45
+ main::ClientContext* context;
46
+ };
47
+
48
+ } // namespace optimizer
49
+ } // namespace lbug
@@ -39,6 +39,12 @@ protected:
39
39
  return op;
40
40
  }
41
41
 
42
+ virtual void visitCountRelTable(planner::LogicalOperator* /*op*/) {}
43
+ virtual std::shared_ptr<planner::LogicalOperator> visitCountRelTableReplace(
44
+ std::shared_ptr<planner::LogicalOperator> op) {
45
+ return op;
46
+ }
47
+
42
48
  virtual void visitDelete(planner::LogicalOperator* /*op*/) {}
43
49
  virtual std::shared_ptr<planner::LogicalOperator> visitDeleteReplace(
44
50
  std::shared_ptr<planner::LogicalOperator> op) {
@@ -17,6 +17,7 @@ enum class LogicalOperatorType : uint8_t {
17
17
  ATTACH_DATABASE,
18
18
  COPY_FROM,
19
19
  COPY_TO,
20
+ COUNT_REL_TABLE,
20
21
  CREATE_MACRO,
21
22
  CREATE_SEQUENCE,
22
23
  CREATE_TABLE,
@@ -0,0 +1,84 @@
1
+ #pragma once
2
+
3
+ #include "binder/expression/expression.h"
4
+ #include "binder/expression/node_expression.h"
5
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
6
+ #include "common/enums/extend_direction.h"
7
+ #include "planner/operator/logical_operator.h"
8
+
9
+ namespace lbug {
10
+ namespace planner {
11
+
12
+ struct LogicalCountRelTablePrintInfo final : OPPrintInfo {
13
+ std::string relTableName;
14
+ std::shared_ptr<binder::Expression> countExpr;
15
+
16
+ LogicalCountRelTablePrintInfo(std::string relTableName,
17
+ std::shared_ptr<binder::Expression> countExpr)
18
+ : relTableName{std::move(relTableName)}, countExpr{std::move(countExpr)} {}
19
+
20
+ std::string toString() const override {
21
+ return "Table: " + relTableName + ", Count: " + countExpr->toString();
22
+ }
23
+
24
+ std::unique_ptr<OPPrintInfo> copy() const override {
25
+ return std::make_unique<LogicalCountRelTablePrintInfo>(relTableName, countExpr);
26
+ }
27
+ };
28
+
29
+ /**
30
+ * LogicalCountRelTable is an optimized operator that counts the number of rows
31
+ * in a rel table by scanning through bound nodes and counting edges.
32
+ *
33
+ * This operator is created by CountRelTableOptimizer when it detects:
34
+ * COUNT(*) over a single rel table with no filters
35
+ */
36
+ class LogicalCountRelTable final : public LogicalOperator {
37
+ static constexpr LogicalOperatorType type_ = LogicalOperatorType::COUNT_REL_TABLE;
38
+
39
+ public:
40
+ LogicalCountRelTable(catalog::RelGroupCatalogEntry* relGroupEntry,
41
+ std::vector<common::table_id_t> relTableIDs,
42
+ std::vector<common::table_id_t> boundNodeTableIDs,
43
+ std::shared_ptr<binder::NodeExpression> boundNode, common::ExtendDirection direction,
44
+ std::shared_ptr<binder::Expression> countExpr)
45
+ : LogicalOperator{type_}, relGroupEntry{relGroupEntry}, relTableIDs{std::move(relTableIDs)},
46
+ boundNodeTableIDs{std::move(boundNodeTableIDs)}, boundNode{std::move(boundNode)},
47
+ direction{direction}, countExpr{std::move(countExpr)} {
48
+ cardinality = 1; // Always returns exactly one row
49
+ }
50
+
51
+ void computeFactorizedSchema() override;
52
+ void computeFlatSchema() override;
53
+
54
+ std::string getExpressionsForPrinting() const override { return countExpr->toString(); }
55
+
56
+ catalog::RelGroupCatalogEntry* getRelGroupEntry() const { return relGroupEntry; }
57
+ const std::vector<common::table_id_t>& getRelTableIDs() const { return relTableIDs; }
58
+ const std::vector<common::table_id_t>& getBoundNodeTableIDs() const {
59
+ return boundNodeTableIDs;
60
+ }
61
+ std::shared_ptr<binder::NodeExpression> getBoundNode() const { return boundNode; }
62
+ common::ExtendDirection getDirection() const { return direction; }
63
+ std::shared_ptr<binder::Expression> getCountExpr() const { return countExpr; }
64
+
65
+ std::unique_ptr<OPPrintInfo> getPrintInfo() const override {
66
+ return std::make_unique<LogicalCountRelTablePrintInfo>(relGroupEntry->getName(), countExpr);
67
+ }
68
+
69
+ std::unique_ptr<LogicalOperator> copy() override {
70
+ return std::make_unique<LogicalCountRelTable>(relGroupEntry, relTableIDs, boundNodeTableIDs,
71
+ boundNode, direction, countExpr);
72
+ }
73
+
74
+ private:
75
+ catalog::RelGroupCatalogEntry* relGroupEntry;
76
+ std::vector<common::table_id_t> relTableIDs;
77
+ std::vector<common::table_id_t> boundNodeTableIDs;
78
+ std::shared_ptr<binder::NodeExpression> boundNode;
79
+ common::ExtendDirection direction;
80
+ std::shared_ptr<binder::Expression> countExpr;
81
+ };
82
+
83
+ } // namespace planner
84
+ } // namespace lbug
@@ -22,6 +22,7 @@ enum class PhysicalOperatorType : uint8_t {
22
22
  ATTACH_DATABASE,
23
23
  BATCH_INSERT,
24
24
  COPY_TO,
25
+ COUNT_REL_TABLE,
25
26
  CREATE_MACRO,
26
27
  CREATE_SEQUENCE,
27
28
  CREATE_TABLE,
@@ -0,0 +1,62 @@
1
+ #pragma once
2
+
3
+ #include "common/enums/rel_direction.h"
4
+ #include "processor/operator/physical_operator.h"
5
+ #include "storage/table/node_table.h"
6
+ #include "storage/table/rel_table.h"
7
+
8
+ namespace lbug {
9
+ namespace processor {
10
+
11
+ struct CountRelTablePrintInfo final : OPPrintInfo {
12
+ std::string relTableName;
13
+
14
+ explicit CountRelTablePrintInfo(std::string relTableName)
15
+ : relTableName{std::move(relTableName)} {}
16
+
17
+ std::string toString() const override { return "Table: " + relTableName; }
18
+
19
+ std::unique_ptr<OPPrintInfo> copy() const override {
20
+ return std::make_unique<CountRelTablePrintInfo>(relTableName);
21
+ }
22
+ };
23
+
24
+ /**
25
+ * CountRelTable is a source operator that counts edges in a rel table
26
+ * by scanning through all bound nodes and counting their edges.
27
+ * It creates its own internal vectors for node scanning (not exposed in ResultSet).
28
+ */
29
+ class CountRelTable final : public PhysicalOperator {
30
+ static constexpr PhysicalOperatorType type_ = PhysicalOperatorType::COUNT_REL_TABLE;
31
+
32
+ public:
33
+ CountRelTable(std::vector<storage::NodeTable*> nodeTables,
34
+ std::vector<storage::RelTable*> relTables, common::RelDataDirection direction,
35
+ DataPos countOutputPos, physical_op_id id, std::unique_ptr<OPPrintInfo> printInfo)
36
+ : PhysicalOperator{type_, id, std::move(printInfo)}, nodeTables{std::move(nodeTables)},
37
+ relTables{std::move(relTables)}, direction{direction}, countOutputPos{countOutputPos} {}
38
+
39
+ bool isSource() const override { return true; }
40
+ bool isParallel() const override { return false; }
41
+
42
+ void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override;
43
+
44
+ bool getNextTuplesInternal(ExecutionContext* context) override;
45
+
46
+ std::unique_ptr<PhysicalOperator> copy() override {
47
+ return std::make_unique<CountRelTable>(nodeTables, relTables, direction, countOutputPos, id,
48
+ printInfo->copy());
49
+ }
50
+
51
+ private:
52
+ std::vector<storage::NodeTable*> nodeTables;
53
+ std::vector<storage::RelTable*> relTables;
54
+ common::RelDataDirection direction;
55
+ DataPos countOutputPos;
56
+ common::ValueVector* countVector;
57
+ bool hasExecuted;
58
+ common::row_idx_t totalCount;
59
+ };
60
+
61
+ } // namespace processor
62
+ } // namespace lbug
@@ -90,6 +90,8 @@ public:
90
90
  std::unique_ptr<PhysicalOperator> mapCopyRelFrom(
91
91
  const planner::LogicalOperator* logicalOperator);
92
92
  std::unique_ptr<PhysicalOperator> mapCopyTo(const planner::LogicalOperator* logicalOperator);
93
+ std::unique_ptr<PhysicalOperator> mapCountRelTable(
94
+ const planner::LogicalOperator* logicalOperator);
93
95
  std::unique_ptr<PhysicalOperator> mapCreateMacro(
94
96
  const planner::LogicalOperator* logicalOperator);
95
97
  std::unique_ptr<PhysicalOperator> mapCreateSequence(
@@ -4,6 +4,7 @@ add_library(lbug_optimizer
4
4
  agg_key_dependency_optimizer.cpp
5
5
  cardinality_updater.cpp
6
6
  correlated_subquery_unnest_solver.cpp
7
+ count_rel_table_optimizer.cpp
7
8
  factorization_rewriter.cpp
8
9
  filter_push_down_optimizer.cpp
9
10
  logical_operator_collector.cpp