lbug 0.12.3-dev.15 → 0.12.3-dev.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/lbug-source/CMakeLists.txt +1 -1
  2. package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
  3. package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
  4. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
  5. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
  6. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
  7. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
  8. package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
  9. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
  10. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
  11. package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
  12. package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
  13. package/lbug-source/scripts/antlr4/hash.md5 +1 -1
  14. package/lbug-source/src/antlr4/Cypher.g4 +1 -1
  15. package/lbug-source/src/binder/bind/bind_ddl.cpp +23 -13
  16. package/lbug-source/src/catalog/catalog.cpp +5 -4
  17. package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
  18. package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +7 -0
  19. package/lbug-source/src/function/function_collection.cpp +2 -1
  20. package/lbug-source/src/function/table/CMakeLists.txt +1 -0
  21. package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
  22. package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +10 -6
  23. package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
  24. package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +4 -2
  25. package/lbug-source/src/include/common/constants.h +1 -0
  26. package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
  27. package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
  28. package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
  29. package/lbug-source/src/include/storage/storage_manager.h +1 -0
  30. package/lbug-source/src/include/storage/table/node_table.h +6 -1
  31. package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
  32. package/lbug-source/src/include/storage/table/parquet_rel_table.h +99 -0
  33. package/lbug-source/src/include/storage/table/rel_table.h +2 -2
  34. package/lbug-source/src/include/transaction/transaction.h +2 -0
  35. package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
  36. package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
  37. package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
  38. package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
  39. package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +12 -2
  40. package/lbug-source/src/storage/storage_manager.cpp +40 -6
  41. package/lbug-source/src/storage/table/CMakeLists.txt +2 -0
  42. package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
  43. package/lbug-source/src/storage/table/parquet_rel_table.cpp +470 -0
  44. package/lbug-source/test/include/test_runner/test_group.h +11 -1
  45. package/lbug-source/test/runner/e2e_test.cpp +7 -1
  46. package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +43 -0
  47. package/lbug-source/test/test_helper/test_helper.cpp +24 -0
  48. package/lbug-source/test/test_runner/test_parser.cpp +3 -0
  49. package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2761 -2701
  50. package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -0
  51. package/package.json +1 -1
  52. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  53. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  54. package/prebuilt/lbugjs-linux-x64.node +0 -0
  55. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -1,6 +1,6 @@
1
1
  cmake_minimum_required(VERSION 3.15)
2
2
 
3
- project(Lbug VERSION 0.12.3.15 LANGUAGES CXX C)
3
+ project(Lbug VERSION 0.12.3.17 LANGUAGES CXX C)
4
4
 
5
5
  option(SINGLE_THREADED "Single-threaded mode" FALSE)
6
6
  if(SINGLE_THREADED)
@@ -0,0 +1,4 @@
1
+ CREATE NODE TABLE city(id INT32, name STRING, population INT64, PRIMARY KEY(id)) WITH (storage = 'dataset/demo-db/graph-std/demo');
2
+ CREATE NODE TABLE user(id INT32, name STRING, age INT64, PRIMARY KEY(id)) WITH (storage = 'dataset/demo-db/graph-std/demo');
3
+ CREATE REL TABLE follows(FROM user TO user, since INT32) WITH (storage = 'dataset/demo-db/graph-std/demo');
4
+ CREATE REL TABLE livesin(FROM user TO city) WITH (storage = 'dataset/demo-db/graph-std/demo');
@@ -342,7 +342,7 @@ kU_IfNotExists
342
342
  : IF SP NOT SP EXISTS ;
343
343
 
344
344
  kU_CreateNodeTable
345
- : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ;
345
+ : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ( SP WITH SP? '(' SP? kU_Options SP? ')')? ;
346
346
 
347
347
  kU_CreateRelTable
348
348
  : CREATE SP REL SP TABLE ( SP GROUP )? ( SP kU_IfNotExists )? SP oC_SchemaName
@@ -1 +1 @@
1
- 8334a684be17e562250acf07ae2bbca0
1
+ 52606d4848c2f224b8e480fec2923081
@@ -95,7 +95,7 @@ kU_IfNotExists
95
95
  : IF SP NOT SP EXISTS ;
96
96
 
97
97
  kU_CreateNodeTable
98
- : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ;
98
+ : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ( SP WITH SP? '(' SP? kU_Options SP? ')')? ;
99
99
 
100
100
  kU_CreateRelTable
101
101
  : CREATE SP REL SP TABLE ( SP GROUP )? ( SP kU_IfNotExists )? SP oC_SchemaName
@@ -136,16 +136,6 @@ BoundCreateTableInfo Binder::bindCreateTableInfo(const CreateTableInfo* info) {
136
136
  }
137
137
  }
138
138
 
139
- BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
140
- auto propertyDefinitions = bindPropertyDefinitions(info->propertyDefinitions, info->tableName);
141
- auto& extraInfo = info->extraInfo->constCast<ExtraCreateNodeTableInfo>();
142
- validatePrimaryKey(extraInfo.pKName, propertyDefinitions);
143
- auto boundExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pKName,
144
- std::move(propertyDefinitions));
145
- return BoundCreateTableInfo(CatalogEntryType::NODE_TABLE_ENTRY, info->tableName,
146
- info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
147
- }
148
-
149
139
  void Binder::validateNodeTableType(const TableCatalogEntry* entry) {
150
140
  if (entry->getType() != CatalogEntryType::NODE_TABLE_ENTRY) {
151
141
  throw BinderException(stringFormat("{} is not of type NODE.", entry->getName()));
@@ -168,6 +158,13 @@ void Binder::validateColumnExistence(const TableCatalogEntry* entry,
168
158
  }
169
159
  }
170
160
 
161
+ static std::string getStorage(const case_insensitive_map_t<Value>& options) {
162
+ if (options.contains(TableOptionConstants::REL_STORAGE_OPTION)) {
163
+ return options.at(TableOptionConstants::REL_STORAGE_OPTION).toString();
164
+ }
165
+ return "";
166
+ }
167
+
171
168
  static ExtendDirection getStorageDirection(const case_insensitive_map_t<Value>& options) {
172
169
  if (options.contains(TableOptionConstants::REL_STORAGE_DIRECTION_OPTION)) {
173
170
  return ExtendDirectionUtil::fromString(
@@ -176,6 +173,18 @@ static ExtendDirection getStorageDirection(const case_insensitive_map_t<Value>&
176
173
  return DEFAULT_EXTEND_DIRECTION;
177
174
  }
178
175
 
176
+ BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
177
+ auto propertyDefinitions = bindPropertyDefinitions(info->propertyDefinitions, info->tableName);
178
+ auto& extraInfo = info->extraInfo->constCast<ExtraCreateNodeTableInfo>();
179
+ validatePrimaryKey(extraInfo.pKName, propertyDefinitions);
180
+ auto boundOptions = bindParsingOptions(extraInfo.options);
181
+ auto storage = getStorage(boundOptions);
182
+ auto boundExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pKName,
183
+ std::move(propertyDefinitions), std::move(storage));
184
+ return BoundCreateTableInfo(CatalogEntryType::NODE_TABLE_ENTRY, info->tableName,
185
+ info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
186
+ }
187
+
179
188
  std::vector<PropertyDefinition> Binder::bindRelPropertyDefinitions(const CreateTableInfo& info) {
180
189
  std::vector<PropertyDefinition> propertyDefinitions;
181
190
  propertyDefinitions.emplace_back(
@@ -193,6 +202,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo*
193
202
  auto dstMultiplicity = RelMultiplicityUtils::getBwd(extraInfo.relMultiplicity);
194
203
  auto boundOptions = bindParsingOptions(extraInfo.options);
195
204
  auto storageDirection = getStorageDirection(boundOptions);
205
+ auto storage = getStorage(boundOptions);
196
206
  // Bind from to pairs
197
207
  node_table_id_pair_set_t nodePairsSet;
198
208
  std::vector<NodeTableIDPair> nodePairs;
@@ -209,9 +219,9 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo*
209
219
  nodePairsSet.insert(pair);
210
220
  nodePairs.emplace_back(pair);
211
221
  }
212
- auto boundExtraInfo =
213
- std::make_unique<BoundExtraCreateRelTableGroupInfo>(std::move(propertyDefinitions),
214
- srcMultiplicity, dstMultiplicity, storageDirection, std::move(nodePairs));
222
+ auto boundExtraInfo = std::make_unique<BoundExtraCreateRelTableGroupInfo>(
223
+ std::move(propertyDefinitions), srcMultiplicity, dstMultiplicity, storageDirection,
224
+ std::move(nodePairs), std::move(storage));
215
225
  return BoundCreateTableInfo(CatalogEntryType::REL_GROUP_ENTRY, info->tableName,
216
226
  info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
217
227
  }
@@ -190,9 +190,9 @@ CatalogEntry* Catalog::createRelGroupEntry(Transaction* transaction,
190
190
  for (auto& nodePair : extraInfo->nodePairs) {
191
191
  relTableInfos.emplace_back(nodePair, tables->getNextOID());
192
192
  }
193
- auto relGroupEntry =
194
- std::make_unique<RelGroupCatalogEntry>(info.tableName, extraInfo->srcMultiplicity,
195
- extraInfo->dstMultiplicity, extraInfo->storageDirection, std::move(relTableInfos));
193
+ auto relGroupEntry = std::make_unique<RelGroupCatalogEntry>(info.tableName,
194
+ extraInfo->srcMultiplicity, extraInfo->dstMultiplicity, extraInfo->storageDirection,
195
+ std::move(relTableInfos), extraInfo->storage);
196
196
  for (auto& definition : extraInfo->propertyDefinitions) {
197
197
  relGroupEntry->addProperty(definition);
198
198
  }
@@ -541,7 +541,8 @@ CatalogEntry* Catalog::createTableEntry(Transaction* transaction,
541
541
  CatalogEntry* Catalog::createNodeTableEntry(Transaction* transaction,
542
542
  const BoundCreateTableInfo& info) {
543
543
  const auto extraInfo = info.extraInfo->constPtrCast<BoundExtraCreateNodeTableInfo>();
544
- auto entry = std::make_unique<NodeTableCatalogEntry>(info.tableName, extraInfo->primaryKeyName);
544
+ auto entry = std::make_unique<NodeTableCatalogEntry>(info.tableName, extraInfo->primaryKeyName,
545
+ extraInfo->storage);
545
546
  for (auto& definition : extraInfo->propertyDefinitions) {
546
547
  entry->addProperty(definition);
547
548
  }
@@ -21,16 +21,22 @@ void NodeTableCatalogEntry::serialize(common::Serializer& serializer) const {
21
21
  TableCatalogEntry::serialize(serializer);
22
22
  serializer.writeDebuggingInfo("primaryKeyName");
23
23
  serializer.write(primaryKeyName);
24
+ serializer.writeDebuggingInfo("storage");
25
+ serializer.write(storage);
24
26
  }
25
27
 
26
28
  std::unique_ptr<NodeTableCatalogEntry> NodeTableCatalogEntry::deserialize(
27
29
  common::Deserializer& deserializer) {
28
30
  std::string debuggingInfo;
29
31
  std::string primaryKeyName;
32
+ std::string storage;
30
33
  deserializer.validateDebuggingInfo(debuggingInfo, "primaryKeyName");
31
34
  deserializer.deserializeValue(primaryKeyName);
35
+ deserializer.validateDebuggingInfo(debuggingInfo, "storage");
36
+ deserializer.deserializeValue(storage);
32
37
  auto nodeTableEntry = std::make_unique<NodeTableCatalogEntry>();
33
38
  nodeTableEntry->primaryKeyName = primaryKeyName;
39
+ nodeTableEntry->storage = storage;
34
40
  return nodeTableEntry;
35
41
  }
36
42
 
@@ -42,6 +48,7 @@ std::string NodeTableCatalogEntry::toCypher(const ToCypherInfo& /*info*/) const
42
48
  std::unique_ptr<TableCatalogEntry> NodeTableCatalogEntry::copy() const {
43
49
  auto other = std::make_unique<NodeTableCatalogEntry>();
44
50
  other->primaryKeyName = primaryKeyName;
51
+ other->storage = storage;
45
52
  other->copyFrom(*this);
46
53
  return other;
47
54
  }
@@ -49,7 +56,7 @@ std::unique_ptr<TableCatalogEntry> NodeTableCatalogEntry::copy() const {
49
56
  std::unique_ptr<BoundExtraCreateCatalogEntryInfo> NodeTableCatalogEntry::getBoundExtraCreateInfo(
50
57
  transaction::Transaction*) const {
51
58
  return std::make_unique<BoundExtraCreateNodeTableInfo>(primaryKeyName,
52
- copyVector(getProperties()));
59
+ copyVector(getProperties()), storage);
53
60
  }
54
61
 
55
62
  } // namespace catalog
@@ -95,6 +95,8 @@ void RelGroupCatalogEntry::serialize(Serializer& serializer) const {
95
95
  serializer.serializeValue(dstMultiplicity);
96
96
  serializer.writeDebuggingInfo("storageDirection");
97
97
  serializer.serializeValue(storageDirection);
98
+ serializer.writeDebuggingInfo("storage");
99
+ serializer.serializeValue(storage);
98
100
  serializer.writeDebuggingInfo("relTableInfos");
99
101
  serializer.serializeVector(relTableInfos);
100
102
  }
@@ -105,6 +107,7 @@ std::unique_ptr<RelGroupCatalogEntry> RelGroupCatalogEntry::deserialize(
105
107
  auto srcMultiplicity = RelMultiplicity::MANY;
106
108
  auto dstMultiplicity = RelMultiplicity::MANY;
107
109
  auto storageDirection = ExtendDirection::BOTH;
110
+ std::string storage;
108
111
  std::vector<RelTableCatalogInfo> relTableInfos;
109
112
  deserializer.validateDebuggingInfo(debuggingInfo, "srcMultiplicity");
110
113
  deserializer.deserializeValue(srcMultiplicity);
@@ -112,12 +115,15 @@ std::unique_ptr<RelGroupCatalogEntry> RelGroupCatalogEntry::deserialize(
112
115
  deserializer.deserializeValue(dstMultiplicity);
113
116
  deserializer.validateDebuggingInfo(debuggingInfo, "storageDirection");
114
117
  deserializer.deserializeValue(storageDirection);
118
+ deserializer.validateDebuggingInfo(debuggingInfo, "storage");
119
+ deserializer.deserializeValue(storage);
115
120
  deserializer.validateDebuggingInfo(debuggingInfo, "relTableInfos");
116
121
  deserializer.deserializeVector(relTableInfos);
117
122
  auto relGroupEntry = std::make_unique<RelGroupCatalogEntry>();
118
123
  relGroupEntry->srcMultiplicity = srcMultiplicity;
119
124
  relGroupEntry->dstMultiplicity = dstMultiplicity;
120
125
  relGroupEntry->storageDirection = storageDirection;
126
+ relGroupEntry->storage = storage;
121
127
  relGroupEntry->relTableInfos = relTableInfos;
122
128
  return relGroupEntry;
123
129
  }
@@ -167,6 +173,7 @@ std::unique_ptr<TableCatalogEntry> RelGroupCatalogEntry::copy() const {
167
173
  other->srcMultiplicity = srcMultiplicity;
168
174
  other->dstMultiplicity = dstMultiplicity;
169
175
  other->storageDirection = storageDirection;
176
+ other->storage = storage;
170
177
  other->relTableInfos = relTableInfos;
171
178
  other->copyFrom(*this);
172
179
  return other;
@@ -228,7 +228,8 @@ FunctionCollection* FunctionCollection::getFunctions() {
228
228
  TABLE_FUNCTION(StatsInfoFunction), TABLE_FUNCTION(StorageInfoFunction),
229
229
  TABLE_FUNCTION(ShowAttachedDatabasesFunction), TABLE_FUNCTION(ShowSequencesFunction),
230
230
  TABLE_FUNCTION(ShowFunctionsFunction), TABLE_FUNCTION(BMInfoFunction),
231
- TABLE_FUNCTION(FileInfoFunction), TABLE_FUNCTION(ShowLoadedExtensionsFunction),
231
+ TABLE_FUNCTION(FileInfoFunction), TABLE_FUNCTION(DiskSizeInfoFunction),
232
+ TABLE_FUNCTION(ShowLoadedExtensionsFunction),
232
233
  TABLE_FUNCTION(ShowOfficialExtensionsFunction), TABLE_FUNCTION(ShowIndexesFunction),
233
234
  TABLE_FUNCTION(ShowProjectedGraphsFunction), TABLE_FUNCTION(ProjectedGraphInfoFunction),
234
235
  TABLE_FUNCTION(ShowMacrosFunction),
@@ -8,6 +8,7 @@ add_library(lbug_table_function
8
8
  clear_warnings.cpp
9
9
  current_setting.cpp
10
10
  db_version.cpp
11
+ disk_size_info.cpp
11
12
  drop_project_graph.cpp
12
13
  file_info.cpp
13
14
  free_space_info.cpp
@@ -0,0 +1,322 @@
1
+ #include "binder/binder.h"
2
+ #include "catalog/catalog.h"
3
+ #include "catalog/catalog_entry/node_table_catalog_entry.h"
4
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
5
+ #include "common/exception/binder.h"
6
+ #include "function/table/bind_data.h"
7
+ #include "function/table/simple_table_function.h"
8
+ #include "main/client_context.h"
9
+ #include "storage/database_header.h"
10
+ #include "storage/index/hash_index.h"
11
+ #include "storage/page_manager.h"
12
+ #include "storage/storage_manager.h"
13
+ #include "storage/table/list_chunk_data.h"
14
+ #include "storage/table/node_table.h"
15
+ #include "storage/table/rel_table.h"
16
+ #include "storage/table/string_chunk_data.h"
17
+ #include "storage/table/struct_chunk_data.h"
18
+ #include "transaction/transaction.h"
19
+
20
+ using namespace lbug::common;
21
+ using namespace lbug::catalog;
22
+ using namespace lbug::storage;
23
+ using namespace lbug::main;
24
+
25
+ namespace lbug {
26
+ namespace function {
27
+
28
+ struct DiskSizeInfoBindData final : TableFuncBindData {
29
+ const ClientContext* ctx;
30
+ DiskSizeInfoBindData(binder::expression_vector columns, row_idx_t numRows,
31
+ const ClientContext* ctx)
32
+ : TableFuncBindData{std::move(columns), numRows}, ctx{ctx} {}
33
+
34
+ std::unique_ptr<TableFuncBindData> copy() const override {
35
+ return std::make_unique<DiskSizeInfoBindData>(columns, numRows, ctx);
36
+ }
37
+ };
38
+
39
+ static uint64_t countChunkDataPages(const ColumnChunkData& chunkData) {
40
+ uint64_t pages = 0;
41
+ auto metadata = chunkData.getResidencyState() == ResidencyState::ON_DISK ?
42
+ chunkData.getMetadata() :
43
+ chunkData.getMetadataToFlush();
44
+ pages += metadata.getNumPages();
45
+
46
+ if (chunkData.hasNullData()) {
47
+ pages += countChunkDataPages(*chunkData.getNullData());
48
+ }
49
+
50
+ auto physicalType = chunkData.getDataType().getPhysicalType();
51
+ switch (physicalType) {
52
+ case PhysicalTypeID::STRUCT: {
53
+ auto& structChunk = chunkData.cast<StructChunkData>();
54
+ for (auto i = 0u; i < structChunk.getNumChildren(); i++) {
55
+ pages += countChunkDataPages(structChunk.getChild(i));
56
+ }
57
+ } break;
58
+ case PhysicalTypeID::STRING: {
59
+ auto& stringChunk = chunkData.cast<StringChunkData>();
60
+ pages += countChunkDataPages(*stringChunk.getIndexColumnChunk());
61
+ auto& dictionaryChunk = stringChunk.getDictionaryChunk();
62
+ pages += countChunkDataPages(*dictionaryChunk.getStringDataChunk());
63
+ pages += countChunkDataPages(*dictionaryChunk.getOffsetChunk());
64
+ } break;
65
+ case PhysicalTypeID::ARRAY:
66
+ case PhysicalTypeID::LIST: {
67
+ auto& listChunk = chunkData.cast<ListChunkData>();
68
+ pages += countChunkDataPages(*listChunk.getOffsetColumnChunk());
69
+ pages += countChunkDataPages(*listChunk.getSizeColumnChunk());
70
+ pages += countChunkDataPages(*listChunk.getDataColumnChunk());
71
+ } break;
72
+ default:
73
+ break;
74
+ }
75
+ return pages;
76
+ }
77
+
78
+ static uint64_t countChunkedGroupPages(ChunkedNodeGroup* chunkedGroup) {
79
+ uint64_t pages = 0;
80
+ auto numColumns = chunkedGroup->getNumColumns();
81
+ for (auto i = 0u; i < numColumns; i++) {
82
+ for (auto* segment : chunkedGroup->getColumnChunk(i).getSegments()) {
83
+ pages += countChunkDataPages(*segment);
84
+ }
85
+ }
86
+ if (chunkedGroup->getFormat() == NodeGroupDataFormat::CSR) {
87
+ auto& chunkedCSRGroup = chunkedGroup->cast<ChunkedCSRNodeGroup>();
88
+ for (auto* segment : chunkedCSRGroup.getCSRHeader().offset->getSegments()) {
89
+ pages += countChunkDataPages(*segment);
90
+ }
91
+ for (auto* segment : chunkedCSRGroup.getCSRHeader().length->getSegments()) {
92
+ pages += countChunkDataPages(*segment);
93
+ }
94
+ }
95
+ return pages;
96
+ }
97
+
98
+ static uint64_t countNodeGroupPages(NodeGroup* nodeGroup) {
99
+ uint64_t pages = 0;
100
+ auto numChunks = nodeGroup->getNumChunkedGroups();
101
+ for (auto chunkIdx = 0ul; chunkIdx < numChunks; chunkIdx++) {
102
+ pages += countChunkedGroupPages(nodeGroup->getChunkedNodeGroup(chunkIdx));
103
+ }
104
+ if (nodeGroup->getFormat() == NodeGroupDataFormat::CSR) {
105
+ auto& csrNodeGroup = nodeGroup->cast<CSRNodeGroup>();
106
+ auto persistentChunk = csrNodeGroup.getPersistentChunkedGroup();
107
+ if (persistentChunk) {
108
+ pages += countChunkedGroupPages(persistentChunk);
109
+ }
110
+ }
111
+ return pages;
112
+ }
113
+
114
+ struct DiskSizeEntry {
115
+ std::string category;
116
+ std::string name;
117
+ uint64_t numPages;
118
+ uint64_t sizeBytes;
119
+ };
120
+
121
+ // Estimate the number of pages used by a hash index based on the number of entries
122
+ // Hash index structure:
123
+ // - INDEX_HEADER_PAGES pages for HashIndexHeaderOnDisk (2 pages for 256 sub-indexes)
124
+ // - DiskArrayCollection header pages (1+ pages)
125
+ // - For each of 256 sub-indexes: pSlots and oSlots disk arrays
126
+ // - Each slot is SLOT_CAPACITY_BYTES (256 bytes), so 16 slots per page
127
+ // - Number of primary slots = 2^currentLevel + nextSplitSlotId
128
+ // - Overflow slots depend on collisions
129
+ static uint64_t estimateHashIndexPages(const PrimaryKeyIndex* pkIndex) {
130
+ if (!pkIndex) {
131
+ return 0;
132
+ }
133
+
134
+ uint64_t totalPages = 0;
135
+
136
+ // Index header pages (storing HashIndexHeaderOnDisk for all 256 sub-indexes)
137
+ totalPages += INDEX_HEADER_PAGES; // 2 pages
138
+
139
+ // DiskArrayCollection header pages (at least 1)
140
+ // Each header page stores headers for up to ~170 disk arrays
141
+ // With 256 sub-indexes * 2 arrays (pSlots + oSlots) = 512 arrays
142
+ totalPages += 4; // Approximate: ~3-4 header pages for DiskArrayCollection
143
+
144
+ // For each sub-index, estimate primary and overflow slot pages
145
+ // We can access the headers through the pkIndex to get actual sizes
146
+ // But since the headers are private, we estimate based on numEntries
147
+
148
+ // Get total entries from all sub-indexes
149
+ // Each entry requires a slot, and slots have capacity of ~3-20 entries depending on key type
150
+ // With linear hashing, we expect ~70-80% fill rate
151
+
152
+ // Rough estimation: For N entries with 8-byte keys:
153
+ // - Slot capacity is approximately 3 entries per slot (256-byte slot / 80 bytes per entry)
154
+ // - Number of slots ≈ N / (3 * 0.7) ≈ N / 2
155
+ // - Pages for slots = slots / 16 (16 slots per page)
156
+ // - Plus PIP pages for addressing
157
+
158
+ // Since we can't easily access internal headers, we return the header overhead
159
+ // and let the unaccounted calculation handle the rest
160
+ return totalPages;
161
+ }
162
+
163
+ static std::vector<DiskSizeEntry> collectDiskSizeInfo(const ClientContext* context) {
164
+ std::vector<DiskSizeEntry> entries;
165
+ auto storageManager = StorageManager::Get(*context);
166
+ auto catalog = Catalog::Get(*context);
167
+ auto dataFH = storageManager->getDataFH();
168
+
169
+ // Handle in-memory databases
170
+ if (storageManager->isInMemory()) {
171
+ entries.push_back({"info", "in_memory_database", 0, 0});
172
+ return entries;
173
+ }
174
+
175
+ auto pageManager = dataFH->getPageManager();
176
+
177
+ // 1. Database header (always 1 page at index 0)
178
+ entries.push_back({"header", "database_header", 1, LBUG_PAGE_SIZE});
179
+
180
+ // 2. Get catalog and metadata page ranges from database header
181
+ auto databaseHeader = DatabaseHeader::readDatabaseHeader(*dataFH->getFileInfo());
182
+ if (databaseHeader.has_value()) {
183
+ entries.push_back({"catalog", "catalog", databaseHeader->catalogPageRange.numPages,
184
+ databaseHeader->catalogPageRange.numPages * LBUG_PAGE_SIZE});
185
+
186
+ entries.push_back({"metadata", "metadata", databaseHeader->metadataPageRange.numPages,
187
+ databaseHeader->metadataPageRange.numPages * LBUG_PAGE_SIZE});
188
+ }
189
+
190
+ // 3. Count table data pages
191
+ auto nodeTableEntries =
192
+ catalog->getNodeTableEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
193
+ auto relGroupEntries = catalog->getRelGroupEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
194
+
195
+ for (const auto tableEntry : nodeTableEntries) {
196
+ auto& nodeTable = storageManager->getTable(tableEntry->getTableID())->cast<NodeTable>();
197
+ uint64_t tablePages = 0;
198
+ auto numNodeGroups = nodeTable.getNumNodeGroups();
199
+ for (auto i = 0ul; i < numNodeGroups; i++) {
200
+ tablePages += countNodeGroupPages(nodeTable.getNodeGroup(i));
201
+ }
202
+ entries.push_back(
203
+ {"node_table", tableEntry->getName(), tablePages, tablePages * LBUG_PAGE_SIZE});
204
+
205
+ // Count primary key index header pages (rough estimate for overhead)
206
+ auto* pkIndex = nodeTable.getPKIndex();
207
+ uint64_t indexPages = estimateHashIndexPages(pkIndex);
208
+ if (indexPages > 0) {
209
+ entries.push_back({"pk_index_overhead", tableEntry->getName() + "_pk", indexPages,
210
+ indexPages * LBUG_PAGE_SIZE});
211
+ }
212
+ }
213
+
214
+ for (const auto entry : relGroupEntries) {
215
+ auto& relGroupEntry = entry->cast<RelGroupCatalogEntry>();
216
+ for (auto& info : relGroupEntry.getRelEntryInfos()) {
217
+ auto& relTable = storageManager->getTable(info.oid)->cast<RelTable>();
218
+ uint64_t tablePages = 0;
219
+
220
+ for (auto direction : relTable.getStorageDirections()) {
221
+ auto* directedRelTableData = relTable.getDirectedTableData(direction);
222
+ auto numNodeGroups = directedRelTableData->getNumNodeGroups();
223
+ for (auto i = 0ul; i < numNodeGroups; i++) {
224
+ tablePages += countNodeGroupPages(directedRelTableData->getNodeGroup(i));
225
+ }
226
+ }
227
+ auto tableName = relGroupEntry.getName() + ":" +
228
+ catalog
229
+ ->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
230
+ info.nodePair.srcTableID)
231
+ ->getName() +
232
+ "->" +
233
+ catalog
234
+ ->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
235
+ info.nodePair.dstTableID)
236
+ ->getName();
237
+ entries.push_back({"rel_table", tableName, tablePages, tablePages * LBUG_PAGE_SIZE});
238
+ }
239
+ }
240
+
241
+ // 4. Free space (from FSM)
242
+ auto freeEntries = pageManager->getFreeEntries(0, pageManager->getNumFreeEntries());
243
+ uint64_t freePages = 0;
244
+ for (const auto& freeEntry : freeEntries) {
245
+ freePages += freeEntry.numPages;
246
+ }
247
+ entries.push_back({"free_space", "free_pages", freePages, freePages * LBUG_PAGE_SIZE});
248
+
249
+ // 5. Calculate unaccounted pages (index slot data)
250
+ auto totalFilePages = dataFH->getNumPages();
251
+ uint64_t accountedPages = 1; // header
252
+ if (databaseHeader.has_value()) {
253
+ accountedPages +=
254
+ databaseHeader->catalogPageRange.numPages + databaseHeader->metadataPageRange.numPages;
255
+ }
256
+ for (const auto& entry : entries) {
257
+ if (entry.category == "node_table" || entry.category == "rel_table" ||
258
+ entry.category == "pk_index_overhead") {
259
+ accountedPages += entry.numPages;
260
+ }
261
+ }
262
+ accountedPages += freePages;
263
+
264
+ if (totalFilePages > accountedPages) {
265
+ uint64_t unaccountedPages = totalFilePages - accountedPages;
266
+ entries.push_back({"index_data", "hash_index_slots", unaccountedPages,
267
+ unaccountedPages * LBUG_PAGE_SIZE});
268
+ }
269
+
270
+ // 6. Total file size (last row)
271
+ entries.push_back({"total", "file_total", totalFilePages, totalFilePages * LBUG_PAGE_SIZE});
272
+
273
+ return entries;
274
+ }
275
+
276
+ static offset_t internalTableFunc(const TableFuncMorsel& morsel, const TableFuncInput& input,
277
+ DataChunk& output) {
278
+ const auto bindData = input.bindData->constPtrCast<DiskSizeInfoBindData>();
279
+ auto entries = collectDiskSizeInfo(bindData->ctx);
280
+
281
+ auto numEntriesToOutput = std::min(static_cast<uint64_t>(entries.size()) - morsel.startOffset,
282
+ morsel.getMorselSize());
283
+
284
+ for (row_idx_t i = 0; i < numEntriesToOutput; ++i) {
285
+ const auto& entry = entries[morsel.startOffset + i];
286
+ output.getValueVectorMutable(0).setValue(i, entry.category);
287
+ output.getValueVectorMutable(1).setValue(i, entry.name);
288
+ output.getValueVectorMutable(2).setValue<uint64_t>(i, entry.numPages);
289
+ output.getValueVectorMutable(3).setValue<uint64_t>(i, entry.sizeBytes);
290
+ }
291
+ return numEntriesToOutput;
292
+ }
293
+
294
+ static std::unique_ptr<TableFuncBindData> bindFunc(const ClientContext* context,
295
+ const TableFuncBindInput* input) {
296
+ std::vector<std::string> columnNames = {"category", "name", "num_pages", "size_bytes"};
297
+ std::vector<LogicalType> columnTypes;
298
+ columnTypes.push_back(LogicalType::STRING());
299
+ columnTypes.push_back(LogicalType::STRING());
300
+ columnTypes.push_back(LogicalType::UINT64());
301
+ columnTypes.push_back(LogicalType::UINT64());
302
+
303
+ // Get number of entries to report
304
+ auto entries = collectDiskSizeInfo(context);
305
+
306
+ auto columns = input->binder->createVariables(columnNames, columnTypes);
307
+ return std::make_unique<DiskSizeInfoBindData>(columns, entries.size(), context);
308
+ }
309
+
310
+ function_set DiskSizeInfoFunction::getFunctionSet() {
311
+ function_set functionSet;
312
+ auto function = std::make_unique<TableFunction>(name, std::vector<LogicalTypeID>{});
313
+ function->tableFunc = SimpleTableFunc::getTableFunc(internalTableFunc);
314
+ function->bindFunc = bindFunc;
315
+ function->initSharedStateFunc = SimpleTableFunc::initSharedState;
316
+ function->initLocalStateFunc = TableFunction::initEmptyLocalState;
317
+ functionSet.push_back(std::move(function));
318
+ return functionSet;
319
+ }
320
+
321
+ } // namespace function
322
+ } // namespace lbug
@@ -71,14 +71,15 @@ struct LBUG_API BoundExtraCreateTableInfo : BoundExtraCreateCatalogEntryInfo {
71
71
 
72
72
  struct BoundExtraCreateNodeTableInfo final : BoundExtraCreateTableInfo {
73
73
  std::string primaryKeyName;
74
+ std::string storage;
74
75
 
75
76
  BoundExtraCreateNodeTableInfo(std::string primaryKeyName,
76
- std::vector<PropertyDefinition> definitions)
77
+ std::vector<PropertyDefinition> definitions, std::string storage = "")
77
78
  : BoundExtraCreateTableInfo{std::move(definitions)},
78
- primaryKeyName{std::move(primaryKeyName)} {}
79
+ primaryKeyName{std::move(primaryKeyName)}, storage{std::move(storage)} {}
79
80
  BoundExtraCreateNodeTableInfo(const BoundExtraCreateNodeTableInfo& other)
80
81
  : BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)},
81
- primaryKeyName{other.primaryKeyName} {}
82
+ primaryKeyName{other.primaryKeyName}, storage{other.storage} {}
82
83
 
83
84
  std::unique_ptr<BoundExtraCreateCatalogEntryInfo> copy() const override {
84
85
  return std::make_unique<BoundExtraCreateNodeTableInfo>(*this);
@@ -90,18 +91,21 @@ struct BoundExtraCreateRelTableGroupInfo final : BoundExtraCreateTableInfo {
90
91
  common::RelMultiplicity dstMultiplicity;
91
92
  common::ExtendDirection storageDirection;
92
93
  std::vector<catalog::NodeTableIDPair> nodePairs;
94
+ std::string storage;
93
95
 
94
96
  explicit BoundExtraCreateRelTableGroupInfo(std::vector<PropertyDefinition> definitions,
95
97
  common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity,
96
- common::ExtendDirection storageDirection, std::vector<catalog::NodeTableIDPair> nodePairs)
98
+ common::ExtendDirection storageDirection, std::vector<catalog::NodeTableIDPair> nodePairs,
99
+ std::string storage = "")
97
100
  : BoundExtraCreateTableInfo{std::move(definitions)}, srcMultiplicity{srcMultiplicity},
98
101
  dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection},
99
- nodePairs{std::move(nodePairs)} {}
102
+ nodePairs{std::move(nodePairs)}, storage{std::move(storage)} {}
100
103
 
101
104
  BoundExtraCreateRelTableGroupInfo(const BoundExtraCreateRelTableGroupInfo& other)
102
105
  : BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)},
103
106
  srcMultiplicity{other.srcMultiplicity}, dstMultiplicity{other.dstMultiplicity},
104
- storageDirection{other.storageDirection}, nodePairs{other.nodePairs} {}
107
+ storageDirection{other.storageDirection}, nodePairs{other.nodePairs},
108
+ storage{other.storage} {}
105
109
 
106
110
  std::unique_ptr<BoundExtraCreateCatalogEntryInfo> copy() const override {
107
111
  return std::make_unique<BoundExtraCreateRelTableGroupInfo>(*this);
@@ -15,9 +15,9 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry {
15
15
 
16
16
  public:
17
17
  NodeTableCatalogEntry() = default;
18
- NodeTableCatalogEntry(std::string name, std::string primaryKeyName)
19
- : TableCatalogEntry{entryType_, std::move(name)},
20
- primaryKeyName{std::move(primaryKeyName)} {}
18
+ NodeTableCatalogEntry(std::string name, std::string primaryKeyName, std::string storage = "")
19
+ : TableCatalogEntry{entryType_, std::move(name)}, primaryKeyName{std::move(primaryKeyName)},
20
+ storage{std::move(storage)} {}
21
21
 
22
22
  bool isParent(common::table_id_t /*tableID*/) override { return false; }
23
23
  common::TableType getTableType() const override { return common::TableType::NODE; }
@@ -29,6 +29,7 @@ public:
29
29
  const binder::PropertyDefinition& getPrimaryKeyDefinition() const {
30
30
  return getProperty(primaryKeyName);
31
31
  }
32
+ const std::string& getStorage() const { return storage; }
32
33
 
33
34
  void renameProperty(const std::string& propertyName, const std::string& newName) override;
34
35
 
@@ -44,6 +45,7 @@ private:
44
45
 
45
46
  private:
46
47
  std::string primaryKeyName;
48
+ std::string storage;
47
49
  };
48
50
 
49
51
  } // namespace catalog