lbug 0.12.3-dev.9 → 0.13.1-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/lbug-source/.github/workflows/ci-workflow.yml +9 -2
  2. package/lbug-source/CMakeLists.txt +15 -6
  3. package/lbug-source/Makefile +15 -4
  4. package/lbug-source/benchmark/serializer.py +24 -3
  5. package/lbug-source/dataset/demo-db/csv/copy.cypher +4 -4
  6. package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
  7. package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
  8. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
  9. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
  10. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
  11. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
  12. package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
  13. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
  14. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
  15. package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
  16. package/lbug-source/dataset/demo-db/parquet/copy.cypher +4 -4
  17. package/lbug-source/extension/duckdb/src/catalog/duckdb_catalog.cpp +1 -1
  18. package/lbug-source/extension/duckdb/src/catalog/duckdb_table_catalog_entry.cpp +43 -4
  19. package/lbug-source/extension/duckdb/src/connector/duckdb_result_converter.cpp +6 -0
  20. package/lbug-source/extension/duckdb/src/connector/duckdb_secret_manager.cpp +1 -1
  21. package/lbug-source/extension/duckdb/src/function/duckdb_scan.cpp +49 -4
  22. package/lbug-source/extension/duckdb/src/include/catalog/duckdb_table_catalog_entry.h +6 -1
  23. package/lbug-source/extension/duckdb/src/include/function/duckdb_scan.h +2 -0
  24. package/lbug-source/extension/duckdb/test/test_files/duckdb.test +28 -0
  25. package/lbug-source/extension/extension_config.cmake +3 -2
  26. package/lbug-source/extension/httpfs/test/test_files/http.test +1 -0
  27. package/lbug-source/scripts/antlr4/Cypher.g4 +4 -4
  28. package/lbug-source/scripts/antlr4/hash.md5 +1 -1
  29. package/lbug-source/scripts/extension/PRODUCTION_RELEASES +1 -0
  30. package/lbug-source/scripts/generate_binary_demo.sh +1 -1
  31. package/lbug-source/src/antlr4/Cypher.g4 +4 -4
  32. package/lbug-source/src/binder/bind/bind_ddl.cpp +97 -15
  33. package/lbug-source/src/binder/bind/bind_graph_pattern.cpp +30 -3
  34. package/lbug-source/src/catalog/catalog.cpp +6 -4
  35. package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
  36. package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +46 -7
  37. package/lbug-source/src/catalog/catalog_set.cpp +1 -0
  38. package/lbug-source/src/function/function_collection.cpp +2 -1
  39. package/lbug-source/src/function/table/CMakeLists.txt +1 -0
  40. package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
  41. package/lbug-source/src/function/table/show_connection.cpp +6 -1
  42. package/lbug-source/src/function/table/show_tables.cpp +10 -2
  43. package/lbug-source/src/function/table/table_function.cpp +11 -2
  44. package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +23 -6
  45. package/lbug-source/src/include/binder/expression/variable_expression.h +1 -1
  46. package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
  47. package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +21 -2
  48. package/lbug-source/src/include/catalog/catalog_entry/table_catalog_entry.h +7 -0
  49. package/lbug-source/src/include/common/constants.h +1 -0
  50. package/lbug-source/src/include/common/string_format.h +2 -2
  51. package/lbug-source/src/include/common/types/types.h +1 -0
  52. package/lbug-source/src/include/function/table/bind_data.h +12 -1
  53. package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
  54. package/lbug-source/src/include/function/table/table_function.h +2 -0
  55. package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
  56. package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
  57. package/lbug-source/src/include/optimizer/order_by_push_down_optimizer.h +21 -0
  58. package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
  59. package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
  60. package/lbug-source/src/include/planner/operator/logical_table_function_call.h +14 -1
  61. package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
  62. package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
  63. package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
  64. package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
  65. package/lbug-source/src/include/processor/plan_mapper.h +2 -0
  66. package/lbug-source/src/include/storage/storage_manager.h +1 -0
  67. package/lbug-source/src/include/storage/storage_version_info.h +1 -1
  68. package/lbug-source/src/include/storage/table/foreign_rel_table.h +56 -0
  69. package/lbug-source/src/include/storage/table/node_table.h +6 -1
  70. package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
  71. package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
  72. package/lbug-source/src/include/storage/table/rel_table.h +2 -2
  73. package/lbug-source/src/include/transaction/transaction.h +2 -0
  74. package/lbug-source/src/optimizer/CMakeLists.txt +3 -1
  75. package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
  76. package/lbug-source/src/optimizer/limit_push_down_optimizer.cpp +12 -0
  77. package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
  78. package/lbug-source/src/optimizer/optimizer.cpp +10 -0
  79. package/lbug-source/src/optimizer/order_by_push_down_optimizer.cpp +123 -0
  80. package/lbug-source/src/optimizer/projection_push_down_optimizer.cpp +5 -1
  81. package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
  82. package/lbug-source/src/parser/transform/transform_expression.cpp +1 -1
  83. package/lbug-source/src/parser/transform/transform_graph_pattern.cpp +6 -1
  84. package/lbug-source/src/parser/transformer.cpp +7 -1
  85. package/lbug-source/src/planner/join_order/cardinality_estimator.cpp +11 -2
  86. package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
  87. package/lbug-source/src/planner/operator/logical_table_function_call.cpp +4 -0
  88. package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
  89. package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
  90. package/lbug-source/src/planner/plan/plan_join_order.cpp +16 -1
  91. package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
  92. package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
  93. package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
  94. package/lbug-source/src/processor/operator/index_lookup.cpp +31 -23
  95. package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
  96. package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
  97. package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
  98. package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
  99. package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
  100. package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
  101. package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +18 -2
  102. package/lbug-source/src/storage/storage_manager.cpp +43 -6
  103. package/lbug-source/src/storage/table/CMakeLists.txt +3 -0
  104. package/lbug-source/src/storage/table/foreign_rel_table.cpp +63 -0
  105. package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
  106. package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
  107. package/lbug-source/test/common/string_format.cpp +9 -1
  108. package/lbug-source/test/copy/copy_test.cpp +4 -4
  109. package/lbug-source/test/graph_test/CMakeLists.txt +1 -1
  110. package/lbug-source/test/include/test_runner/test_group.h +11 -1
  111. package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
  112. package/lbug-source/test/runner/e2e_test.cpp +7 -1
  113. package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
  114. package/lbug-source/test/test_helper/CMakeLists.txt +1 -1
  115. package/lbug-source/test/test_helper/test_helper.cpp +33 -1
  116. package/lbug-source/test/test_runner/CMakeLists.txt +1 -1
  117. package/lbug-source/test/test_runner/insert_by_row.cpp +6 -8
  118. package/lbug-source/test/test_runner/multi_copy_split.cpp +2 -4
  119. package/lbug-source/test/test_runner/test_parser.cpp +3 -0
  120. package/lbug-source/test/transaction/checkpoint_test.cpp +1 -1
  121. package/lbug-source/test/transaction/transaction_test.cpp +19 -15
  122. package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2805 -2708
  123. package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +7 -3
  124. package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
  125. package/lbug-source/tools/nodejs_api/package.json +4 -2
  126. package/lbug-source/tools/shell/embedded_shell.cpp +78 -3
  127. package/lbug-source/tools/shell/include/embedded_shell.h +2 -0
  128. package/lbug-source/tools/shell/linenoise.cpp +3 -3
  129. package/lbug-source/tools/shell/test/test_helper.py +1 -1
  130. package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
  131. package/lbug-source/tools/shell/test/test_shell_commands.py +19 -0
  132. package/package.json +9 -2
  133. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  134. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  135. package/prebuilt/lbugjs-linux-x64.node +0 -0
  136. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -0,0 +1,322 @@
1
+ #include "binder/binder.h"
2
+ #include "catalog/catalog.h"
3
+ #include "catalog/catalog_entry/node_table_catalog_entry.h"
4
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
5
+ #include "common/exception/binder.h"
6
+ #include "function/table/bind_data.h"
7
+ #include "function/table/simple_table_function.h"
8
+ #include "main/client_context.h"
9
+ #include "storage/database_header.h"
10
+ #include "storage/index/hash_index.h"
11
+ #include "storage/page_manager.h"
12
+ #include "storage/storage_manager.h"
13
+ #include "storage/table/list_chunk_data.h"
14
+ #include "storage/table/node_table.h"
15
+ #include "storage/table/rel_table.h"
16
+ #include "storage/table/string_chunk_data.h"
17
+ #include "storage/table/struct_chunk_data.h"
18
+ #include "transaction/transaction.h"
19
+
20
+ using namespace lbug::common;
21
+ using namespace lbug::catalog;
22
+ using namespace lbug::storage;
23
+ using namespace lbug::main;
24
+
25
+ namespace lbug {
26
+ namespace function {
27
+
28
+ struct DiskSizeInfoBindData final : TableFuncBindData {
29
+ const ClientContext* ctx;
30
+ DiskSizeInfoBindData(binder::expression_vector columns, row_idx_t numRows,
31
+ const ClientContext* ctx)
32
+ : TableFuncBindData{std::move(columns), numRows}, ctx{ctx} {}
33
+
34
+ std::unique_ptr<TableFuncBindData> copy() const override {
35
+ return std::make_unique<DiskSizeInfoBindData>(columns, numRows, ctx);
36
+ }
37
+ };
38
+
39
+ static uint64_t countChunkDataPages(const ColumnChunkData& chunkData) {
40
+ uint64_t pages = 0;
41
+ auto metadata = chunkData.getResidencyState() == ResidencyState::ON_DISK ?
42
+ chunkData.getMetadata() :
43
+ chunkData.getMetadataToFlush();
44
+ pages += metadata.getNumPages();
45
+
46
+ if (chunkData.hasNullData()) {
47
+ pages += countChunkDataPages(*chunkData.getNullData());
48
+ }
49
+
50
+ auto physicalType = chunkData.getDataType().getPhysicalType();
51
+ switch (physicalType) {
52
+ case PhysicalTypeID::STRUCT: {
53
+ auto& structChunk = chunkData.cast<StructChunkData>();
54
+ for (auto i = 0u; i < structChunk.getNumChildren(); i++) {
55
+ pages += countChunkDataPages(structChunk.getChild(i));
56
+ }
57
+ } break;
58
+ case PhysicalTypeID::STRING: {
59
+ auto& stringChunk = chunkData.cast<StringChunkData>();
60
+ pages += countChunkDataPages(*stringChunk.getIndexColumnChunk());
61
+ auto& dictionaryChunk = stringChunk.getDictionaryChunk();
62
+ pages += countChunkDataPages(*dictionaryChunk.getStringDataChunk());
63
+ pages += countChunkDataPages(*dictionaryChunk.getOffsetChunk());
64
+ } break;
65
+ case PhysicalTypeID::ARRAY:
66
+ case PhysicalTypeID::LIST: {
67
+ auto& listChunk = chunkData.cast<ListChunkData>();
68
+ pages += countChunkDataPages(*listChunk.getOffsetColumnChunk());
69
+ pages += countChunkDataPages(*listChunk.getSizeColumnChunk());
70
+ pages += countChunkDataPages(*listChunk.getDataColumnChunk());
71
+ } break;
72
+ default:
73
+ break;
74
+ }
75
+ return pages;
76
+ }
77
+
78
+ static uint64_t countChunkedGroupPages(ChunkedNodeGroup* chunkedGroup) {
79
+ uint64_t pages = 0;
80
+ auto numColumns = chunkedGroup->getNumColumns();
81
+ for (auto i = 0u; i < numColumns; i++) {
82
+ for (auto* segment : chunkedGroup->getColumnChunk(i).getSegments()) {
83
+ pages += countChunkDataPages(*segment);
84
+ }
85
+ }
86
+ if (chunkedGroup->getFormat() == NodeGroupDataFormat::CSR) {
87
+ auto& chunkedCSRGroup = chunkedGroup->cast<ChunkedCSRNodeGroup>();
88
+ for (auto* segment : chunkedCSRGroup.getCSRHeader().offset->getSegments()) {
89
+ pages += countChunkDataPages(*segment);
90
+ }
91
+ for (auto* segment : chunkedCSRGroup.getCSRHeader().length->getSegments()) {
92
+ pages += countChunkDataPages(*segment);
93
+ }
94
+ }
95
+ return pages;
96
+ }
97
+
98
+ static uint64_t countNodeGroupPages(NodeGroup* nodeGroup) {
99
+ uint64_t pages = 0;
100
+ auto numChunks = nodeGroup->getNumChunkedGroups();
101
+ for (auto chunkIdx = 0ul; chunkIdx < numChunks; chunkIdx++) {
102
+ pages += countChunkedGroupPages(nodeGroup->getChunkedNodeGroup(chunkIdx));
103
+ }
104
+ if (nodeGroup->getFormat() == NodeGroupDataFormat::CSR) {
105
+ auto& csrNodeGroup = nodeGroup->cast<CSRNodeGroup>();
106
+ auto persistentChunk = csrNodeGroup.getPersistentChunkedGroup();
107
+ if (persistentChunk) {
108
+ pages += countChunkedGroupPages(persistentChunk);
109
+ }
110
+ }
111
+ return pages;
112
+ }
113
+
114
+ struct DiskSizeEntry {
115
+ std::string category;
116
+ std::string name;
117
+ uint64_t numPages;
118
+ uint64_t sizeBytes;
119
+ };
120
+
121
+ // Estimate the number of pages used by a hash index based on the number of entries
122
+ // Hash index structure:
123
+ // - INDEX_HEADER_PAGES pages for HashIndexHeaderOnDisk (2 pages for 256 sub-indexes)
124
+ // - DiskArrayCollection header pages (1+ pages)
125
+ // - For each of 256 sub-indexes: pSlots and oSlots disk arrays
126
+ // - Each slot is SLOT_CAPACITY_BYTES (256 bytes), so 16 slots per page
127
+ // - Number of primary slots = 2^currentLevel + nextSplitSlotId
128
+ // - Overflow slots depend on collisions
129
+ static uint64_t estimateHashIndexPages(const PrimaryKeyIndex* pkIndex) {
130
+ if (!pkIndex) {
131
+ return 0;
132
+ }
133
+
134
+ uint64_t totalPages = 0;
135
+
136
+ // Index header pages (storing HashIndexHeaderOnDisk for all 256 sub-indexes)
137
+ totalPages += INDEX_HEADER_PAGES; // 2 pages
138
+
139
+ // DiskArrayCollection header pages (at least 1)
140
+ // Each header page stores headers for up to ~170 disk arrays
141
+ // With 256 sub-indexes * 2 arrays (pSlots + oSlots) = 512 arrays
142
+ totalPages += 4; // Approximate: ~3-4 header pages for DiskArrayCollection
143
+
144
+ // For each sub-index, estimate primary and overflow slot pages
145
+ // We can access the headers through the pkIndex to get actual sizes
146
+ // But since the headers are private, we estimate based on numEntries
147
+
148
+ // Get total entries from all sub-indexes
149
+ // Each entry requires a slot, and slots have capacity of ~3-20 entries depending on key type
150
+ // With linear hashing, we expect ~70-80% fill rate
151
+
152
+ // Rough estimation: For N entries with 8-byte keys:
153
+ // - Slot capacity is approximately 3 entries per slot (256-byte slot / 80 bytes per entry)
154
+ // - Number of slots ≈ N / (3 * 0.7) ≈ N / 2
155
+ // - Pages for slots = slots / 16 (16 slots per page)
156
+ // - Plus PIP pages for addressing
157
+
158
+ // Since we can't easily access internal headers, we return the header overhead
159
+ // and let the unaccounted calculation handle the rest
160
+ return totalPages;
161
+ }
162
+
163
+ static std::vector<DiskSizeEntry> collectDiskSizeInfo(const ClientContext* context) {
164
+ std::vector<DiskSizeEntry> entries;
165
+ auto storageManager = StorageManager::Get(*context);
166
+ auto catalog = Catalog::Get(*context);
167
+ auto dataFH = storageManager->getDataFH();
168
+
169
+ // Handle in-memory databases
170
+ if (storageManager->isInMemory()) {
171
+ entries.push_back({"info", "in_memory_database", 0, 0});
172
+ return entries;
173
+ }
174
+
175
+ auto pageManager = dataFH->getPageManager();
176
+
177
+ // 1. Database header (always 1 page at index 0)
178
+ entries.push_back({"header", "database_header", 1, LBUG_PAGE_SIZE});
179
+
180
+ // 2. Get catalog and metadata page ranges from database header
181
+ auto databaseHeader = DatabaseHeader::readDatabaseHeader(*dataFH->getFileInfo());
182
+ if (databaseHeader.has_value()) {
183
+ entries.push_back({"catalog", "catalog", databaseHeader->catalogPageRange.numPages,
184
+ databaseHeader->catalogPageRange.numPages * LBUG_PAGE_SIZE});
185
+
186
+ entries.push_back({"metadata", "metadata", databaseHeader->metadataPageRange.numPages,
187
+ databaseHeader->metadataPageRange.numPages * LBUG_PAGE_SIZE});
188
+ }
189
+
190
+ // 3. Count table data pages
191
+ auto nodeTableEntries =
192
+ catalog->getNodeTableEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
193
+ auto relGroupEntries = catalog->getRelGroupEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
194
+
195
+ for (const auto tableEntry : nodeTableEntries) {
196
+ auto& nodeTable = storageManager->getTable(tableEntry->getTableID())->cast<NodeTable>();
197
+ uint64_t tablePages = 0;
198
+ auto numNodeGroups = nodeTable.getNumNodeGroups();
199
+ for (auto i = 0ul; i < numNodeGroups; i++) {
200
+ tablePages += countNodeGroupPages(nodeTable.getNodeGroup(i));
201
+ }
202
+ entries.push_back(
203
+ {"node_table", tableEntry->getName(), tablePages, tablePages * LBUG_PAGE_SIZE});
204
+
205
+ // Count primary key index header pages (rough estimate for overhead)
206
+ auto* pkIndex = nodeTable.getPKIndex();
207
+ uint64_t indexPages = estimateHashIndexPages(pkIndex);
208
+ if (indexPages > 0) {
209
+ entries.push_back({"pk_index_overhead", tableEntry->getName() + "_pk", indexPages,
210
+ indexPages * LBUG_PAGE_SIZE});
211
+ }
212
+ }
213
+
214
+ for (const auto entry : relGroupEntries) {
215
+ auto& relGroupEntry = entry->cast<RelGroupCatalogEntry>();
216
+ for (auto& info : relGroupEntry.getRelEntryInfos()) {
217
+ auto& relTable = storageManager->getTable(info.oid)->cast<RelTable>();
218
+ uint64_t tablePages = 0;
219
+
220
+ for (auto direction : relTable.getStorageDirections()) {
221
+ auto* directedRelTableData = relTable.getDirectedTableData(direction);
222
+ auto numNodeGroups = directedRelTableData->getNumNodeGroups();
223
+ for (auto i = 0ul; i < numNodeGroups; i++) {
224
+ tablePages += countNodeGroupPages(directedRelTableData->getNodeGroup(i));
225
+ }
226
+ }
227
+ auto tableName = relGroupEntry.getName() + ":" +
228
+ catalog
229
+ ->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
230
+ info.nodePair.srcTableID)
231
+ ->getName() +
232
+ "->" +
233
+ catalog
234
+ ->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
235
+ info.nodePair.dstTableID)
236
+ ->getName();
237
+ entries.push_back({"rel_table", tableName, tablePages, tablePages * LBUG_PAGE_SIZE});
238
+ }
239
+ }
240
+
241
+ // 4. Free space (from FSM)
242
+ auto freeEntries = pageManager->getFreeEntries(0, pageManager->getNumFreeEntries());
243
+ uint64_t freePages = 0;
244
+ for (const auto& freeEntry : freeEntries) {
245
+ freePages += freeEntry.numPages;
246
+ }
247
+ entries.push_back({"free_space", "free_pages", freePages, freePages * LBUG_PAGE_SIZE});
248
+
249
+ // 5. Calculate unaccounted pages (index slot data)
250
+ auto totalFilePages = dataFH->getNumPages();
251
+ uint64_t accountedPages = 1; // header
252
+ if (databaseHeader.has_value()) {
253
+ accountedPages +=
254
+ databaseHeader->catalogPageRange.numPages + databaseHeader->metadataPageRange.numPages;
255
+ }
256
+ for (const auto& entry : entries) {
257
+ if (entry.category == "node_table" || entry.category == "rel_table" ||
258
+ entry.category == "pk_index_overhead") {
259
+ accountedPages += entry.numPages;
260
+ }
261
+ }
262
+ accountedPages += freePages;
263
+
264
+ if (totalFilePages > accountedPages) {
265
+ uint64_t unaccountedPages = totalFilePages - accountedPages;
266
+ entries.push_back({"index_data", "hash_index_slots", unaccountedPages,
267
+ unaccountedPages * LBUG_PAGE_SIZE});
268
+ }
269
+
270
+ // 6. Total file size (last row)
271
+ entries.push_back({"total", "file_total", totalFilePages, totalFilePages * LBUG_PAGE_SIZE});
272
+
273
+ return entries;
274
+ }
275
+
276
+ static offset_t internalTableFunc(const TableFuncMorsel& morsel, const TableFuncInput& input,
277
+ DataChunk& output) {
278
+ const auto bindData = input.bindData->constPtrCast<DiskSizeInfoBindData>();
279
+ auto entries = collectDiskSizeInfo(bindData->ctx);
280
+
281
+ auto numEntriesToOutput = std::min(static_cast<uint64_t>(entries.size()) - morsel.startOffset,
282
+ morsel.getMorselSize());
283
+
284
+ for (row_idx_t i = 0; i < numEntriesToOutput; ++i) {
285
+ const auto& entry = entries[morsel.startOffset + i];
286
+ output.getValueVectorMutable(0).setValue(i, entry.category);
287
+ output.getValueVectorMutable(1).setValue(i, entry.name);
288
+ output.getValueVectorMutable(2).setValue<uint64_t>(i, entry.numPages);
289
+ output.getValueVectorMutable(3).setValue<uint64_t>(i, entry.sizeBytes);
290
+ }
291
+ return numEntriesToOutput;
292
+ }
293
+
294
+ static std::unique_ptr<TableFuncBindData> bindFunc(const ClientContext* context,
295
+ const TableFuncBindInput* input) {
296
+ std::vector<std::string> columnNames = {"category", "name", "num_pages", "size_bytes"};
297
+ std::vector<LogicalType> columnTypes;
298
+ columnTypes.push_back(LogicalType::STRING());
299
+ columnTypes.push_back(LogicalType::STRING());
300
+ columnTypes.push_back(LogicalType::UINT64());
301
+ columnTypes.push_back(LogicalType::UINT64());
302
+
303
+ // Get number of entries to report
304
+ auto entries = collectDiskSizeInfo(context);
305
+
306
+ auto columns = input->binder->createVariables(columnNames, columnTypes);
307
+ return std::make_unique<DiskSizeInfoBindData>(columns, entries.size(), context);
308
+ }
309
+
310
+ function_set DiskSizeInfoFunction::getFunctionSet() {
311
+ function_set functionSet;
312
+ auto function = std::make_unique<TableFunction>(name, std::vector<LogicalTypeID>{});
313
+ function->tableFunc = SimpleTableFunc::getTableFunc(internalTableFunc);
314
+ function->bindFunc = bindFunc;
315
+ function->initSharedStateFunc = SimpleTableFunc::initSharedState;
316
+ function->initLocalStateFunc = TableFunction::initEmptyLocalState;
317
+ functionSet.push_back(std::move(function));
318
+ return functionSet;
319
+ }
320
+
321
+ } // namespace function
322
+ } // namespace lbug
@@ -71,7 +71,12 @@ static std::unique_ptr<TableFuncBindData> bindFunc(const ClientContext* context,
71
71
  if (entry->getType() != catalog::CatalogEntryType::REL_GROUP_ENTRY) {
72
72
  throw BinderException{"Show connection can only be called on a rel table!"};
73
73
  }
74
- for (auto& info : entry->ptrCast<RelGroupCatalogEntry>()->getRelEntryInfos()) {
74
+ for (auto& info : entry->ptrCast<RelGroupCatalogEntry>()
75
+ ->getRelEntryInfos()) { // Skip foreign-backed rel tables (they have
76
+ // FOREIGN_TABLE_ID)
77
+ if (info.nodePair.srcTableID == common::FOREIGN_TABLE_ID) {
78
+ continue;
79
+ }
75
80
  auto srcEntry = catalog->getTableCatalogEntry(transaction, info.nodePair.srcTableID)
76
81
  ->ptrCast<NodeTableCatalogEntry>();
77
82
  auto dstEntry = catalog->getTableCatalogEntry(transaction, info.nodePair.dstTableID)
@@ -1,5 +1,6 @@
1
1
  #include "binder/binder.h"
2
2
  #include "catalog/catalog.h"
3
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
3
4
  #include "catalog/catalog_entry/table_catalog_entry.h"
4
5
  #include "function/table/bind_data.h"
5
6
  #include "function/table/simple_table_function.h"
@@ -72,9 +73,16 @@ static std::unique_ptr<TableFuncBindData> bindFunc(const main::ClientContext* co
72
73
  auto catalog = Catalog::Get(*context);
73
74
  for (auto& entry :
74
75
  catalog->getTableEntries(transaction, context->useInternalCatalogEntry())) {
76
+ std::string dbName = LOCAL_DB_NAME;
77
+ // For foreign-backed rel tables, use the foreign database name
78
+ if (entry->getType() == CatalogEntryType::REL_GROUP_ENTRY) {
79
+ auto relEntry = entry->constPtrCast<RelGroupCatalogEntry>();
80
+ if (!relEntry->getForeignDatabaseName().empty()) {
81
+ dbName = relEntry->getForeignDatabaseName();
82
+ }
83
+ }
75
84
  tableInfos.emplace_back(entry->getName(), entry->getTableID(),
76
- TableTypeUtils::toString(entry->getTableType()), LOCAL_DB_NAME,
77
- entry->getComment());
85
+ TableTypeUtils::toString(entry->getTableType()), dbName, entry->getComment());
78
86
  }
79
87
  }
80
88
 
@@ -100,8 +100,17 @@ std::unique_ptr<PhysicalOperator> TableFunction::getPhysicalPlan(PlanMapper* pla
100
100
  auto initInput =
101
101
  TableFuncInitSharedStateInput(info.bindData.get(), planMapper->executionContext);
102
102
  auto sharedState = info.function.initSharedStateFunc(initInput);
103
- auto printInfo = std::make_unique<TableFunctionCallPrintInfo>(call.getTableFunc().name,
104
- call.getBindData()->columns);
103
+ // Filter columns for print info based on column skips
104
+ binder::expression_vector printExprs;
105
+ auto columnSkips = call.getBindData()->getColumnSkips();
106
+ for (auto i = 0u; i < call.getBindData()->columns.size(); ++i) {
107
+ if (columnSkips.empty() || !columnSkips[i]) {
108
+ printExprs.push_back(call.getBindData()->columns[i]);
109
+ }
110
+ }
111
+ auto desc = call.getBindData()->getDescription();
112
+ auto printInfo = std::make_unique<TableFunctionCallPrintInfo>(
113
+ desc.empty() ? call.getTableFunc().name : desc, printExprs);
105
114
  return std::make_unique<TableFunctionCall>(std::move(info), sharedState,
106
115
  planMapper->getOperatorID(), std::move(printInfo));
107
116
  }
@@ -1,10 +1,14 @@
1
1
  #pragma once
2
2
 
3
+ #include <optional>
4
+
3
5
  #include "catalog/catalog_entry/catalog_entry_type.h"
4
6
  #include "catalog/catalog_entry/node_table_id_pair.h"
5
7
  #include "common/enums/conflict_action.h"
6
8
  #include "common/enums/extend_direction.h"
7
9
  #include "common/enums/rel_multiplicity.h"
10
+ #include "function/table/bind_data.h"
11
+ #include "function/table/table_function.h"
8
12
  #include "property_definition.h"
9
13
 
10
14
  namespace lbug {
@@ -71,14 +75,15 @@ struct LBUG_API BoundExtraCreateTableInfo : BoundExtraCreateCatalogEntryInfo {
71
75
 
72
76
  struct BoundExtraCreateNodeTableInfo final : BoundExtraCreateTableInfo {
73
77
  std::string primaryKeyName;
78
+ std::string storage;
74
79
 
75
80
  BoundExtraCreateNodeTableInfo(std::string primaryKeyName,
76
- std::vector<PropertyDefinition> definitions)
81
+ std::vector<PropertyDefinition> definitions, std::string storage = "")
77
82
  : BoundExtraCreateTableInfo{std::move(definitions)},
78
- primaryKeyName{std::move(primaryKeyName)} {}
83
+ primaryKeyName{std::move(primaryKeyName)}, storage{std::move(storage)} {}
79
84
  BoundExtraCreateNodeTableInfo(const BoundExtraCreateNodeTableInfo& other)
80
85
  : BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)},
81
- primaryKeyName{other.primaryKeyName} {}
86
+ primaryKeyName{other.primaryKeyName}, storage{other.storage} {}
82
87
 
83
88
  std::unique_ptr<BoundExtraCreateCatalogEntryInfo> copy() const override {
84
89
  return std::make_unique<BoundExtraCreateNodeTableInfo>(*this);
@@ -90,18 +95,30 @@ struct BoundExtraCreateRelTableGroupInfo final : BoundExtraCreateTableInfo {
90
95
  common::RelMultiplicity dstMultiplicity;
91
96
  common::ExtendDirection storageDirection;
92
97
  std::vector<catalog::NodeTableIDPair> nodePairs;
98
+ std::string storage;
99
+ std::optional<function::TableFunction> scanFunction;
100
+ std::optional<std::shared_ptr<function::TableFuncBindData>> scanBindData;
101
+ std::string foreignDatabaseName;
93
102
 
94
103
  explicit BoundExtraCreateRelTableGroupInfo(std::vector<PropertyDefinition> definitions,
95
104
  common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity,
96
- common::ExtendDirection storageDirection, std::vector<catalog::NodeTableIDPair> nodePairs)
105
+ common::ExtendDirection storageDirection, std::vector<catalog::NodeTableIDPair> nodePairs,
106
+ std::string storage = "",
107
+ std::optional<function::TableFunction> scanFunction = std::nullopt,
108
+ std::optional<std::shared_ptr<function::TableFuncBindData>> scanBindData = std::nullopt,
109
+ std::string foreignDatabaseName = "")
97
110
  : BoundExtraCreateTableInfo{std::move(definitions)}, srcMultiplicity{srcMultiplicity},
98
111
  dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection},
99
- nodePairs{std::move(nodePairs)} {}
112
+ nodePairs{std::move(nodePairs)}, storage{std::move(storage)},
113
+ scanFunction{std::move(scanFunction)}, scanBindData{std::move(scanBindData)},
114
+ foreignDatabaseName{std::move(foreignDatabaseName)} {}
100
115
 
101
116
  BoundExtraCreateRelTableGroupInfo(const BoundExtraCreateRelTableGroupInfo& other)
102
117
  : BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)},
103
118
  srcMultiplicity{other.srcMultiplicity}, dstMultiplicity{other.dstMultiplicity},
104
- storageDirection{other.storageDirection}, nodePairs{other.nodePairs} {}
119
+ storageDirection{other.storageDirection}, nodePairs{other.nodePairs},
120
+ storage{other.storage}, scanFunction{other.scanFunction},
121
+ scanBindData{other.scanBindData}, foreignDatabaseName{other.foreignDatabaseName} {}
105
122
 
106
123
  std::unique_ptr<BoundExtraCreateCatalogEntryInfo> copy() const override {
107
124
  return std::make_unique<BoundExtraCreateRelTableGroupInfo>(*this);
@@ -5,7 +5,7 @@
5
5
  namespace lbug {
6
6
  namespace binder {
7
7
 
8
- class VariableExpression final : public Expression {
8
+ class LBUG_API VariableExpression final : public Expression {
9
9
  static constexpr common::ExpressionType expressionType_ = common::ExpressionType::VARIABLE;
10
10
 
11
11
  public:
@@ -15,9 +15,9 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry {
15
15
 
16
16
  public:
17
17
  NodeTableCatalogEntry() = default;
18
- NodeTableCatalogEntry(std::string name, std::string primaryKeyName)
19
- : TableCatalogEntry{entryType_, std::move(name)},
20
- primaryKeyName{std::move(primaryKeyName)} {}
18
+ NodeTableCatalogEntry(std::string name, std::string primaryKeyName, std::string storage = "")
19
+ : TableCatalogEntry{entryType_, std::move(name)}, primaryKeyName{std::move(primaryKeyName)},
20
+ storage{std::move(storage)} {}
21
21
 
22
22
  bool isParent(common::table_id_t /*tableID*/) override { return false; }
23
23
  common::TableType getTableType() const override { return common::TableType::NODE; }
@@ -29,6 +29,7 @@ public:
29
29
  const binder::PropertyDefinition& getPrimaryKeyDefinition() const {
30
30
  return getProperty(primaryKeyName);
31
31
  }
32
+ const std::string& getStorage() const { return storage; }
32
33
 
33
34
  void renameProperty(const std::string& propertyName, const std::string& newName) override;
34
35
 
@@ -44,6 +45,7 @@ private:
44
45
 
45
46
  private:
46
47
  std::string primaryKeyName;
48
+ std::string storage;
47
49
  };
48
50
 
49
51
  } // namespace catalog
@@ -1,9 +1,13 @@
1
1
  #pragma once
2
2
 
3
+ #include <optional>
4
+
3
5
  #include "catalog/catalog_entry/table_catalog_entry.h"
4
6
  #include "common/enums/extend_direction.h"
5
7
  #include "common/enums/rel_direction.h"
6
8
  #include "common/enums/rel_multiplicity.h"
9
+ #include "function/table/bind_data.h"
10
+ #include "function/table/table_function.h"
7
11
  #include "node_table_id_pair.h"
8
12
 
9
13
  namespace lbug {
@@ -34,10 +38,15 @@ public:
34
38
  RelGroupCatalogEntry() = default;
35
39
  RelGroupCatalogEntry(std::string tableName, common::RelMultiplicity srcMultiplicity,
36
40
  common::RelMultiplicity dstMultiplicity, common::ExtendDirection storageDirection,
37
- std::vector<RelTableCatalogInfo> relTableInfos)
41
+ std::vector<RelTableCatalogInfo> relTableInfos, std::string storage = "",
42
+ std::optional<function::TableFunction> scanFunction = std::nullopt,
43
+ std::optional<std::shared_ptr<function::TableFuncBindData>> scanBindData = std::nullopt,
44
+ std::string foreignDatabaseName = "")
38
45
  : TableCatalogEntry{type_, std::move(tableName)}, srcMultiplicity{srcMultiplicity},
39
46
  dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection},
40
- relTableInfos{std::move(relTableInfos)} {
47
+ relTableInfos{std::move(relTableInfos)}, storage{std::move(storage)},
48
+ scanFunction{std::move(scanFunction)}, scanBindData{std::move(scanBindData)},
49
+ foreignDatabaseName{std::move(foreignDatabaseName)} {
41
50
  propertyCollection =
42
51
  PropertyDefinitionCollection{1}; // Skip NBR_NODE_ID column as the first one.
43
52
  }
@@ -53,6 +62,12 @@ public:
53
62
  }
54
63
 
55
64
  common::ExtendDirection getStorageDirection() const { return storageDirection; }
65
+ const std::string& getStorage() const { return storage; }
66
+ const std::optional<function::TableFunction>& getScanFunction() const { return scanFunction; }
67
+ const std::optional<std::shared_ptr<function::TableFuncBindData>>& getScanBindData() const {
68
+ return scanBindData;
69
+ }
70
+ const std::string& getForeignDatabaseName() const { return foreignDatabaseName; }
56
71
 
57
72
  common::idx_t getNumRelTables() const { return relTableInfos.size(); }
58
73
  const std::vector<RelTableCatalogInfo>& getRelEntryInfos() const { return relTableInfos; }
@@ -97,6 +112,10 @@ private:
97
112
  // TODO(Guodong): Avoid using extend direction for storage direction
98
113
  common::ExtendDirection storageDirection = common::ExtendDirection::BOTH;
99
114
  std::vector<RelTableCatalogInfo> relTableInfos;
115
+ std::string storage;
116
+ std::optional<function::TableFunction> scanFunction;
117
+ std::optional<std::shared_ptr<function::TableFuncBindData>> scanBindData;
118
+ std::string foreignDatabaseName; // Database name for foreign-backed rel tables
100
119
  };
101
120
 
102
121
  } // namespace catalog
@@ -2,6 +2,7 @@
2
2
 
3
3
  #include <vector>
4
4
 
5
+ #include "binder/bound_scan_source.h"
5
6
  #include "binder/ddl/bound_alter_info.h"
6
7
  #include "binder/ddl/bound_create_table_info.h"
7
8
  #include "catalog/catalog_entry/catalog_entry.h"
@@ -43,6 +44,12 @@ public:
43
44
 
44
45
  virtual function::TableFunction getScanFunction() { KU_UNREACHABLE; }
45
46
 
47
+ virtual std::unique_ptr<binder::BoundTableScanInfo> getBoundScanInfo(
48
+ [[maybe_unused]] main::ClientContext* context,
49
+ [[maybe_unused]] const std::string& nodeUniqueName = "") {
50
+ return nullptr;
51
+ }
52
+
46
53
  common::column_id_t getMaxColumnID() const;
47
54
  void vacuumColumnIDs(common::column_id_t nextColumnID);
48
55
  std::vector<binder::PropertyDefinition> getProperties() const {
@@ -83,6 +83,7 @@ struct StorageConstants {
83
83
 
84
84
  struct TableOptionConstants {
85
85
  static constexpr char REL_STORAGE_DIRECTION_OPTION[] = "STORAGE_DIRECTION";
86
+ static constexpr char REL_STORAGE_OPTION[] = "STORAGE";
86
87
  };
87
88
 
88
89
  // Hash Index Configurations
@@ -14,8 +14,8 @@ namespace common {
14
14
  #if USE_STD_FORMAT
15
15
 
16
16
  template<typename... Args>
17
- inline std::string stringFormat(std::format_string<Args...> format, Args&&... args) {
18
- return std::format(format, std::forward<Args>(args)...);
17
+ inline std::string stringFormat(std::string_view format, Args&&... args) {
18
+ return std::vformat(format, std::make_format_args(args...));
19
19
  }
20
20
 
21
21
  #else
@@ -75,6 +75,7 @@ using table_id_set_t = std::unordered_set<table_id_t>;
75
75
  template<typename T>
76
76
  using table_id_map_t = std::unordered_map<table_id_t, T>;
77
77
  constexpr table_id_t INVALID_TABLE_ID = INVALID_OID;
78
+ constexpr table_id_t FOREIGN_TABLE_ID = INVALID_OID - 1;
78
79
  // offset type alias
79
80
  using offset_t = uint64_t;
80
81
  constexpr offset_t INVALID_OFFSET = UINT64_MAX;
@@ -25,7 +25,8 @@ struct LBUG_API TableFuncBindData {
25
25
  TableFuncBindData(const TableFuncBindData& other)
26
26
  : columns{other.columns}, numRows{other.numRows},
27
27
  optionalParams{other.optionalParams == nullptr ? nullptr : other.optionalParams->copy()},
28
- columnSkips{other.columnSkips}, columnPredicates{copyVector(other.columnPredicates)} {}
28
+ columnSkips{other.columnSkips}, columnPredicates{copyVector(other.columnPredicates)},
29
+ limitNum{other.limitNum}, orderBy{other.orderBy} {}
29
30
  TableFuncBindData& operator=(const TableFuncBindData& other) = delete;
30
31
  virtual ~TableFuncBindData() = default;
31
32
 
@@ -46,10 +47,18 @@ struct LBUG_API TableFuncBindData {
46
47
  return columnPredicates;
47
48
  }
48
49
 
50
+ void setLimitNum(common::row_idx_t limit) { limitNum = limit; }
51
+ common::row_idx_t getLimitNum() const { return limitNum; }
52
+
53
+ void setOrderBy(std::string orderBy) { this->orderBy = orderBy; }
54
+ std::string getOrderBy() const { return orderBy; }
55
+
49
56
  virtual bool getIgnoreErrorsOption() const;
50
57
 
51
58
  virtual std::unique_ptr<TableFuncBindData> copy() const;
52
59
 
60
+ virtual std::string getDescription() const { return ""; }
61
+
53
62
  template<class TARGET>
54
63
  const TARGET* constPtrCast() const {
55
64
  return common::ku_dynamic_cast<const TARGET*>(this);
@@ -63,6 +72,8 @@ struct LBUG_API TableFuncBindData {
63
72
  protected:
64
73
  std::vector<bool> columnSkips;
65
74
  std::vector<storage::ColumnPredicateSet> columnPredicates;
75
+ common::row_idx_t limitNum = common::INVALID_ROW_IDX;
76
+ std::string orderBy;
66
77
  };
67
78
 
68
79
  } // namespace function
@@ -134,6 +134,12 @@ struct FileInfoFunction final {
134
134
  static function_set getFunctionSet();
135
135
  };
136
136
 
137
+ struct DiskSizeInfoFunction final {
138
+ static constexpr const char* name = "DISK_SIZE_INFO";
139
+
140
+ static function_set getFunctionSet();
141
+ };
142
+
137
143
  struct ShowAttachedDatabasesFunction final {
138
144
  static constexpr const char* name = "SHOW_ATTACHED_DATABASES";
139
145
 
@@ -133,6 +133,7 @@ using table_func_init_local_t =
133
133
  using table_func_init_output_t =
134
134
  std::function<std::unique_ptr<TableFuncOutput>(const TableFuncInitOutputInput&)>;
135
135
  using table_func_can_parallel_t = std::function<bool()>;
136
+ using table_func_supports_push_down_t = std::function<bool()>;
136
137
  using table_func_progress_t = std::function<double(TableFuncSharedState* sharedState)>;
137
138
  using table_func_finalize_t =
138
139
  std::function<void(const processor::ExecutionContext*, TableFuncSharedState*)>;
@@ -153,6 +154,7 @@ struct LBUG_API TableFunction final : Function {
153
154
  table_func_init_local_t initLocalStateFunc = nullptr;
154
155
  table_func_init_output_t initOutputFunc = nullptr;
155
156
  table_func_can_parallel_t canParallelFunc = [] { return true; };
157
+ table_func_supports_push_down_t supportsPushDownFunc = [] { return false; };
156
158
  table_func_progress_t progressFunc = [](TableFuncSharedState*) { return 0.0; };
157
159
  table_func_finalize_t finalizeFunc = [](auto, auto) {};
158
160
  table_func_rewrite_t rewriteFunc = nullptr;