lbug 0.12.3-dev.15 → 0.12.3-dev.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lbug-source/CMakeLists.txt +1 -1
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
- package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
- package/lbug-source/scripts/antlr4/hash.md5 +1 -1
- package/lbug-source/src/antlr4/Cypher.g4 +1 -1
- package/lbug-source/src/binder/bind/bind_ddl.cpp +23 -13
- package/lbug-source/src/catalog/catalog.cpp +5 -4
- package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
- package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +7 -0
- package/lbug-source/src/function/function_collection.cpp +2 -1
- package/lbug-source/src/function/table/CMakeLists.txt +1 -0
- package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
- package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +10 -6
- package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
- package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +4 -2
- package/lbug-source/src/include/common/constants.h +1 -0
- package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
- package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
- package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
- package/lbug-source/src/include/storage/storage_manager.h +1 -0
- package/lbug-source/src/include/storage/table/node_table.h +6 -1
- package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
- package/lbug-source/src/include/storage/table/parquet_rel_table.h +99 -0
- package/lbug-source/src/include/storage/table/rel_table.h +2 -2
- package/lbug-source/src/include/transaction/transaction.h +2 -0
- package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
- package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
- package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
- package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
- package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +12 -2
- package/lbug-source/src/storage/storage_manager.cpp +40 -6
- package/lbug-source/src/storage/table/CMakeLists.txt +2 -0
- package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
- package/lbug-source/src/storage/table/parquet_rel_table.cpp +470 -0
- package/lbug-source/test/include/test_runner/test_group.h +11 -1
- package/lbug-source/test/runner/e2e_test.cpp +7 -1
- package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +43 -0
- package/lbug-source/test/test_helper/test_helper.cpp +24 -0
- package/lbug-source/test/test_runner/test_parser.cpp +3 -0
- package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2761 -2701
- package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -0
- package/package.json +1 -1
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
CREATE NODE TABLE city(id INT32, name STRING, population INT64, PRIMARY KEY(id)) WITH (storage = 'dataset/demo-db/graph-std/demo');
|
|
2
|
+
CREATE NODE TABLE user(id INT32, name STRING, age INT64, PRIMARY KEY(id)) WITH (storage = 'dataset/demo-db/graph-std/demo');
|
|
3
|
+
CREATE REL TABLE follows(FROM user TO user, since INT32) WITH (storage = 'dataset/demo-db/graph-std/demo');
|
|
4
|
+
CREATE REL TABLE livesin(FROM user TO city) WITH (storage = 'dataset/demo-db/graph-std/demo');
|
|
@@ -342,7 +342,7 @@ kU_IfNotExists
|
|
|
342
342
|
: IF SP NOT SP EXISTS ;
|
|
343
343
|
|
|
344
344
|
kU_CreateNodeTable
|
|
345
|
-
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ;
|
|
345
|
+
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ( SP WITH SP? '(' SP? kU_Options SP? ')')? ;
|
|
346
346
|
|
|
347
347
|
kU_CreateRelTable
|
|
348
348
|
: CREATE SP REL SP TABLE ( SP GROUP )? ( SP kU_IfNotExists )? SP oC_SchemaName
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
52606d4848c2f224b8e480fec2923081
|
|
@@ -95,7 +95,7 @@ kU_IfNotExists
|
|
|
95
95
|
: IF SP NOT SP EXISTS ;
|
|
96
96
|
|
|
97
97
|
kU_CreateNodeTable
|
|
98
|
-
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ;
|
|
98
|
+
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ( SP WITH SP? '(' SP? kU_Options SP? ')')? ;
|
|
99
99
|
|
|
100
100
|
kU_CreateRelTable
|
|
101
101
|
: CREATE SP REL SP TABLE ( SP GROUP )? ( SP kU_IfNotExists )? SP oC_SchemaName
|
|
@@ -136,16 +136,6 @@ BoundCreateTableInfo Binder::bindCreateTableInfo(const CreateTableInfo* info) {
|
|
|
136
136
|
}
|
|
137
137
|
}
|
|
138
138
|
|
|
139
|
-
BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
|
|
140
|
-
auto propertyDefinitions = bindPropertyDefinitions(info->propertyDefinitions, info->tableName);
|
|
141
|
-
auto& extraInfo = info->extraInfo->constCast<ExtraCreateNodeTableInfo>();
|
|
142
|
-
validatePrimaryKey(extraInfo.pKName, propertyDefinitions);
|
|
143
|
-
auto boundExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pKName,
|
|
144
|
-
std::move(propertyDefinitions));
|
|
145
|
-
return BoundCreateTableInfo(CatalogEntryType::NODE_TABLE_ENTRY, info->tableName,
|
|
146
|
-
info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
|
|
147
|
-
}
|
|
148
|
-
|
|
149
139
|
void Binder::validateNodeTableType(const TableCatalogEntry* entry) {
|
|
150
140
|
if (entry->getType() != CatalogEntryType::NODE_TABLE_ENTRY) {
|
|
151
141
|
throw BinderException(stringFormat("{} is not of type NODE.", entry->getName()));
|
|
@@ -168,6 +158,13 @@ void Binder::validateColumnExistence(const TableCatalogEntry* entry,
|
|
|
168
158
|
}
|
|
169
159
|
}
|
|
170
160
|
|
|
161
|
+
static std::string getStorage(const case_insensitive_map_t<Value>& options) {
|
|
162
|
+
if (options.contains(TableOptionConstants::REL_STORAGE_OPTION)) {
|
|
163
|
+
return options.at(TableOptionConstants::REL_STORAGE_OPTION).toString();
|
|
164
|
+
}
|
|
165
|
+
return "";
|
|
166
|
+
}
|
|
167
|
+
|
|
171
168
|
static ExtendDirection getStorageDirection(const case_insensitive_map_t<Value>& options) {
|
|
172
169
|
if (options.contains(TableOptionConstants::REL_STORAGE_DIRECTION_OPTION)) {
|
|
173
170
|
return ExtendDirectionUtil::fromString(
|
|
@@ -176,6 +173,18 @@ static ExtendDirection getStorageDirection(const case_insensitive_map_t<Value>&
|
|
|
176
173
|
return DEFAULT_EXTEND_DIRECTION;
|
|
177
174
|
}
|
|
178
175
|
|
|
176
|
+
BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
|
|
177
|
+
auto propertyDefinitions = bindPropertyDefinitions(info->propertyDefinitions, info->tableName);
|
|
178
|
+
auto& extraInfo = info->extraInfo->constCast<ExtraCreateNodeTableInfo>();
|
|
179
|
+
validatePrimaryKey(extraInfo.pKName, propertyDefinitions);
|
|
180
|
+
auto boundOptions = bindParsingOptions(extraInfo.options);
|
|
181
|
+
auto storage = getStorage(boundOptions);
|
|
182
|
+
auto boundExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pKName,
|
|
183
|
+
std::move(propertyDefinitions), std::move(storage));
|
|
184
|
+
return BoundCreateTableInfo(CatalogEntryType::NODE_TABLE_ENTRY, info->tableName,
|
|
185
|
+
info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
|
|
186
|
+
}
|
|
187
|
+
|
|
179
188
|
std::vector<PropertyDefinition> Binder::bindRelPropertyDefinitions(const CreateTableInfo& info) {
|
|
180
189
|
std::vector<PropertyDefinition> propertyDefinitions;
|
|
181
190
|
propertyDefinitions.emplace_back(
|
|
@@ -193,6 +202,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo*
|
|
|
193
202
|
auto dstMultiplicity = RelMultiplicityUtils::getBwd(extraInfo.relMultiplicity);
|
|
194
203
|
auto boundOptions = bindParsingOptions(extraInfo.options);
|
|
195
204
|
auto storageDirection = getStorageDirection(boundOptions);
|
|
205
|
+
auto storage = getStorage(boundOptions);
|
|
196
206
|
// Bind from to pairs
|
|
197
207
|
node_table_id_pair_set_t nodePairsSet;
|
|
198
208
|
std::vector<NodeTableIDPair> nodePairs;
|
|
@@ -209,9 +219,9 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo*
|
|
|
209
219
|
nodePairsSet.insert(pair);
|
|
210
220
|
nodePairs.emplace_back(pair);
|
|
211
221
|
}
|
|
212
|
-
auto boundExtraInfo =
|
|
213
|
-
std::
|
|
214
|
-
|
|
222
|
+
auto boundExtraInfo = std::make_unique<BoundExtraCreateRelTableGroupInfo>(
|
|
223
|
+
std::move(propertyDefinitions), srcMultiplicity, dstMultiplicity, storageDirection,
|
|
224
|
+
std::move(nodePairs), std::move(storage));
|
|
215
225
|
return BoundCreateTableInfo(CatalogEntryType::REL_GROUP_ENTRY, info->tableName,
|
|
216
226
|
info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
|
|
217
227
|
}
|
|
@@ -190,9 +190,9 @@ CatalogEntry* Catalog::createRelGroupEntry(Transaction* transaction,
|
|
|
190
190
|
for (auto& nodePair : extraInfo->nodePairs) {
|
|
191
191
|
relTableInfos.emplace_back(nodePair, tables->getNextOID());
|
|
192
192
|
}
|
|
193
|
-
auto relGroupEntry =
|
|
194
|
-
|
|
195
|
-
|
|
193
|
+
auto relGroupEntry = std::make_unique<RelGroupCatalogEntry>(info.tableName,
|
|
194
|
+
extraInfo->srcMultiplicity, extraInfo->dstMultiplicity, extraInfo->storageDirection,
|
|
195
|
+
std::move(relTableInfos), extraInfo->storage);
|
|
196
196
|
for (auto& definition : extraInfo->propertyDefinitions) {
|
|
197
197
|
relGroupEntry->addProperty(definition);
|
|
198
198
|
}
|
|
@@ -541,7 +541,8 @@ CatalogEntry* Catalog::createTableEntry(Transaction* transaction,
|
|
|
541
541
|
CatalogEntry* Catalog::createNodeTableEntry(Transaction* transaction,
|
|
542
542
|
const BoundCreateTableInfo& info) {
|
|
543
543
|
const auto extraInfo = info.extraInfo->constPtrCast<BoundExtraCreateNodeTableInfo>();
|
|
544
|
-
auto entry = std::make_unique<NodeTableCatalogEntry>(info.tableName, extraInfo->primaryKeyName
|
|
544
|
+
auto entry = std::make_unique<NodeTableCatalogEntry>(info.tableName, extraInfo->primaryKeyName,
|
|
545
|
+
extraInfo->storage);
|
|
545
546
|
for (auto& definition : extraInfo->propertyDefinitions) {
|
|
546
547
|
entry->addProperty(definition);
|
|
547
548
|
}
|
|
@@ -21,16 +21,22 @@ void NodeTableCatalogEntry::serialize(common::Serializer& serializer) const {
|
|
|
21
21
|
TableCatalogEntry::serialize(serializer);
|
|
22
22
|
serializer.writeDebuggingInfo("primaryKeyName");
|
|
23
23
|
serializer.write(primaryKeyName);
|
|
24
|
+
serializer.writeDebuggingInfo("storage");
|
|
25
|
+
serializer.write(storage);
|
|
24
26
|
}
|
|
25
27
|
|
|
26
28
|
std::unique_ptr<NodeTableCatalogEntry> NodeTableCatalogEntry::deserialize(
|
|
27
29
|
common::Deserializer& deserializer) {
|
|
28
30
|
std::string debuggingInfo;
|
|
29
31
|
std::string primaryKeyName;
|
|
32
|
+
std::string storage;
|
|
30
33
|
deserializer.validateDebuggingInfo(debuggingInfo, "primaryKeyName");
|
|
31
34
|
deserializer.deserializeValue(primaryKeyName);
|
|
35
|
+
deserializer.validateDebuggingInfo(debuggingInfo, "storage");
|
|
36
|
+
deserializer.deserializeValue(storage);
|
|
32
37
|
auto nodeTableEntry = std::make_unique<NodeTableCatalogEntry>();
|
|
33
38
|
nodeTableEntry->primaryKeyName = primaryKeyName;
|
|
39
|
+
nodeTableEntry->storage = storage;
|
|
34
40
|
return nodeTableEntry;
|
|
35
41
|
}
|
|
36
42
|
|
|
@@ -42,6 +48,7 @@ std::string NodeTableCatalogEntry::toCypher(const ToCypherInfo& /*info*/) const
|
|
|
42
48
|
std::unique_ptr<TableCatalogEntry> NodeTableCatalogEntry::copy() const {
|
|
43
49
|
auto other = std::make_unique<NodeTableCatalogEntry>();
|
|
44
50
|
other->primaryKeyName = primaryKeyName;
|
|
51
|
+
other->storage = storage;
|
|
45
52
|
other->copyFrom(*this);
|
|
46
53
|
return other;
|
|
47
54
|
}
|
|
@@ -49,7 +56,7 @@ std::unique_ptr<TableCatalogEntry> NodeTableCatalogEntry::copy() const {
|
|
|
49
56
|
std::unique_ptr<BoundExtraCreateCatalogEntryInfo> NodeTableCatalogEntry::getBoundExtraCreateInfo(
|
|
50
57
|
transaction::Transaction*) const {
|
|
51
58
|
return std::make_unique<BoundExtraCreateNodeTableInfo>(primaryKeyName,
|
|
52
|
-
copyVector(getProperties()));
|
|
59
|
+
copyVector(getProperties()), storage);
|
|
53
60
|
}
|
|
54
61
|
|
|
55
62
|
} // namespace catalog
|
|
@@ -95,6 +95,8 @@ void RelGroupCatalogEntry::serialize(Serializer& serializer) const {
|
|
|
95
95
|
serializer.serializeValue(dstMultiplicity);
|
|
96
96
|
serializer.writeDebuggingInfo("storageDirection");
|
|
97
97
|
serializer.serializeValue(storageDirection);
|
|
98
|
+
serializer.writeDebuggingInfo("storage");
|
|
99
|
+
serializer.serializeValue(storage);
|
|
98
100
|
serializer.writeDebuggingInfo("relTableInfos");
|
|
99
101
|
serializer.serializeVector(relTableInfos);
|
|
100
102
|
}
|
|
@@ -105,6 +107,7 @@ std::unique_ptr<RelGroupCatalogEntry> RelGroupCatalogEntry::deserialize(
|
|
|
105
107
|
auto srcMultiplicity = RelMultiplicity::MANY;
|
|
106
108
|
auto dstMultiplicity = RelMultiplicity::MANY;
|
|
107
109
|
auto storageDirection = ExtendDirection::BOTH;
|
|
110
|
+
std::string storage;
|
|
108
111
|
std::vector<RelTableCatalogInfo> relTableInfos;
|
|
109
112
|
deserializer.validateDebuggingInfo(debuggingInfo, "srcMultiplicity");
|
|
110
113
|
deserializer.deserializeValue(srcMultiplicity);
|
|
@@ -112,12 +115,15 @@ std::unique_ptr<RelGroupCatalogEntry> RelGroupCatalogEntry::deserialize(
|
|
|
112
115
|
deserializer.deserializeValue(dstMultiplicity);
|
|
113
116
|
deserializer.validateDebuggingInfo(debuggingInfo, "storageDirection");
|
|
114
117
|
deserializer.deserializeValue(storageDirection);
|
|
118
|
+
deserializer.validateDebuggingInfo(debuggingInfo, "storage");
|
|
119
|
+
deserializer.deserializeValue(storage);
|
|
115
120
|
deserializer.validateDebuggingInfo(debuggingInfo, "relTableInfos");
|
|
116
121
|
deserializer.deserializeVector(relTableInfos);
|
|
117
122
|
auto relGroupEntry = std::make_unique<RelGroupCatalogEntry>();
|
|
118
123
|
relGroupEntry->srcMultiplicity = srcMultiplicity;
|
|
119
124
|
relGroupEntry->dstMultiplicity = dstMultiplicity;
|
|
120
125
|
relGroupEntry->storageDirection = storageDirection;
|
|
126
|
+
relGroupEntry->storage = storage;
|
|
121
127
|
relGroupEntry->relTableInfos = relTableInfos;
|
|
122
128
|
return relGroupEntry;
|
|
123
129
|
}
|
|
@@ -167,6 +173,7 @@ std::unique_ptr<TableCatalogEntry> RelGroupCatalogEntry::copy() const {
|
|
|
167
173
|
other->srcMultiplicity = srcMultiplicity;
|
|
168
174
|
other->dstMultiplicity = dstMultiplicity;
|
|
169
175
|
other->storageDirection = storageDirection;
|
|
176
|
+
other->storage = storage;
|
|
170
177
|
other->relTableInfos = relTableInfos;
|
|
171
178
|
other->copyFrom(*this);
|
|
172
179
|
return other;
|
|
@@ -228,7 +228,8 @@ FunctionCollection* FunctionCollection::getFunctions() {
|
|
|
228
228
|
TABLE_FUNCTION(StatsInfoFunction), TABLE_FUNCTION(StorageInfoFunction),
|
|
229
229
|
TABLE_FUNCTION(ShowAttachedDatabasesFunction), TABLE_FUNCTION(ShowSequencesFunction),
|
|
230
230
|
TABLE_FUNCTION(ShowFunctionsFunction), TABLE_FUNCTION(BMInfoFunction),
|
|
231
|
-
TABLE_FUNCTION(FileInfoFunction), TABLE_FUNCTION(
|
|
231
|
+
TABLE_FUNCTION(FileInfoFunction), TABLE_FUNCTION(DiskSizeInfoFunction),
|
|
232
|
+
TABLE_FUNCTION(ShowLoadedExtensionsFunction),
|
|
232
233
|
TABLE_FUNCTION(ShowOfficialExtensionsFunction), TABLE_FUNCTION(ShowIndexesFunction),
|
|
233
234
|
TABLE_FUNCTION(ShowProjectedGraphsFunction), TABLE_FUNCTION(ProjectedGraphInfoFunction),
|
|
234
235
|
TABLE_FUNCTION(ShowMacrosFunction),
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
#include "binder/binder.h"
|
|
2
|
+
#include "catalog/catalog.h"
|
|
3
|
+
#include "catalog/catalog_entry/node_table_catalog_entry.h"
|
|
4
|
+
#include "catalog/catalog_entry/rel_group_catalog_entry.h"
|
|
5
|
+
#include "common/exception/binder.h"
|
|
6
|
+
#include "function/table/bind_data.h"
|
|
7
|
+
#include "function/table/simple_table_function.h"
|
|
8
|
+
#include "main/client_context.h"
|
|
9
|
+
#include "storage/database_header.h"
|
|
10
|
+
#include "storage/index/hash_index.h"
|
|
11
|
+
#include "storage/page_manager.h"
|
|
12
|
+
#include "storage/storage_manager.h"
|
|
13
|
+
#include "storage/table/list_chunk_data.h"
|
|
14
|
+
#include "storage/table/node_table.h"
|
|
15
|
+
#include "storage/table/rel_table.h"
|
|
16
|
+
#include "storage/table/string_chunk_data.h"
|
|
17
|
+
#include "storage/table/struct_chunk_data.h"
|
|
18
|
+
#include "transaction/transaction.h"
|
|
19
|
+
|
|
20
|
+
using namespace lbug::common;
|
|
21
|
+
using namespace lbug::catalog;
|
|
22
|
+
using namespace lbug::storage;
|
|
23
|
+
using namespace lbug::main;
|
|
24
|
+
|
|
25
|
+
namespace lbug {
|
|
26
|
+
namespace function {
|
|
27
|
+
|
|
28
|
+
struct DiskSizeInfoBindData final : TableFuncBindData {
|
|
29
|
+
const ClientContext* ctx;
|
|
30
|
+
DiskSizeInfoBindData(binder::expression_vector columns, row_idx_t numRows,
|
|
31
|
+
const ClientContext* ctx)
|
|
32
|
+
: TableFuncBindData{std::move(columns), numRows}, ctx{ctx} {}
|
|
33
|
+
|
|
34
|
+
std::unique_ptr<TableFuncBindData> copy() const override {
|
|
35
|
+
return std::make_unique<DiskSizeInfoBindData>(columns, numRows, ctx);
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
static uint64_t countChunkDataPages(const ColumnChunkData& chunkData) {
|
|
40
|
+
uint64_t pages = 0;
|
|
41
|
+
auto metadata = chunkData.getResidencyState() == ResidencyState::ON_DISK ?
|
|
42
|
+
chunkData.getMetadata() :
|
|
43
|
+
chunkData.getMetadataToFlush();
|
|
44
|
+
pages += metadata.getNumPages();
|
|
45
|
+
|
|
46
|
+
if (chunkData.hasNullData()) {
|
|
47
|
+
pages += countChunkDataPages(*chunkData.getNullData());
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
auto physicalType = chunkData.getDataType().getPhysicalType();
|
|
51
|
+
switch (physicalType) {
|
|
52
|
+
case PhysicalTypeID::STRUCT: {
|
|
53
|
+
auto& structChunk = chunkData.cast<StructChunkData>();
|
|
54
|
+
for (auto i = 0u; i < structChunk.getNumChildren(); i++) {
|
|
55
|
+
pages += countChunkDataPages(structChunk.getChild(i));
|
|
56
|
+
}
|
|
57
|
+
} break;
|
|
58
|
+
case PhysicalTypeID::STRING: {
|
|
59
|
+
auto& stringChunk = chunkData.cast<StringChunkData>();
|
|
60
|
+
pages += countChunkDataPages(*stringChunk.getIndexColumnChunk());
|
|
61
|
+
auto& dictionaryChunk = stringChunk.getDictionaryChunk();
|
|
62
|
+
pages += countChunkDataPages(*dictionaryChunk.getStringDataChunk());
|
|
63
|
+
pages += countChunkDataPages(*dictionaryChunk.getOffsetChunk());
|
|
64
|
+
} break;
|
|
65
|
+
case PhysicalTypeID::ARRAY:
|
|
66
|
+
case PhysicalTypeID::LIST: {
|
|
67
|
+
auto& listChunk = chunkData.cast<ListChunkData>();
|
|
68
|
+
pages += countChunkDataPages(*listChunk.getOffsetColumnChunk());
|
|
69
|
+
pages += countChunkDataPages(*listChunk.getSizeColumnChunk());
|
|
70
|
+
pages += countChunkDataPages(*listChunk.getDataColumnChunk());
|
|
71
|
+
} break;
|
|
72
|
+
default:
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
return pages;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
static uint64_t countChunkedGroupPages(ChunkedNodeGroup* chunkedGroup) {
|
|
79
|
+
uint64_t pages = 0;
|
|
80
|
+
auto numColumns = chunkedGroup->getNumColumns();
|
|
81
|
+
for (auto i = 0u; i < numColumns; i++) {
|
|
82
|
+
for (auto* segment : chunkedGroup->getColumnChunk(i).getSegments()) {
|
|
83
|
+
pages += countChunkDataPages(*segment);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
if (chunkedGroup->getFormat() == NodeGroupDataFormat::CSR) {
|
|
87
|
+
auto& chunkedCSRGroup = chunkedGroup->cast<ChunkedCSRNodeGroup>();
|
|
88
|
+
for (auto* segment : chunkedCSRGroup.getCSRHeader().offset->getSegments()) {
|
|
89
|
+
pages += countChunkDataPages(*segment);
|
|
90
|
+
}
|
|
91
|
+
for (auto* segment : chunkedCSRGroup.getCSRHeader().length->getSegments()) {
|
|
92
|
+
pages += countChunkDataPages(*segment);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return pages;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
static uint64_t countNodeGroupPages(NodeGroup* nodeGroup) {
|
|
99
|
+
uint64_t pages = 0;
|
|
100
|
+
auto numChunks = nodeGroup->getNumChunkedGroups();
|
|
101
|
+
for (auto chunkIdx = 0ul; chunkIdx < numChunks; chunkIdx++) {
|
|
102
|
+
pages += countChunkedGroupPages(nodeGroup->getChunkedNodeGroup(chunkIdx));
|
|
103
|
+
}
|
|
104
|
+
if (nodeGroup->getFormat() == NodeGroupDataFormat::CSR) {
|
|
105
|
+
auto& csrNodeGroup = nodeGroup->cast<CSRNodeGroup>();
|
|
106
|
+
auto persistentChunk = csrNodeGroup.getPersistentChunkedGroup();
|
|
107
|
+
if (persistentChunk) {
|
|
108
|
+
pages += countChunkedGroupPages(persistentChunk);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return pages;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
struct DiskSizeEntry {
|
|
115
|
+
std::string category;
|
|
116
|
+
std::string name;
|
|
117
|
+
uint64_t numPages;
|
|
118
|
+
uint64_t sizeBytes;
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
// Estimate the number of pages used by a hash index based on the number of entries
|
|
122
|
+
// Hash index structure:
|
|
123
|
+
// - INDEX_HEADER_PAGES pages for HashIndexHeaderOnDisk (2 pages for 256 sub-indexes)
|
|
124
|
+
// - DiskArrayCollection header pages (1+ pages)
|
|
125
|
+
// - For each of 256 sub-indexes: pSlots and oSlots disk arrays
|
|
126
|
+
// - Each slot is SLOT_CAPACITY_BYTES (256 bytes), so 16 slots per page
|
|
127
|
+
// - Number of primary slots = 2^currentLevel + nextSplitSlotId
|
|
128
|
+
// - Overflow slots depend on collisions
|
|
129
|
+
static uint64_t estimateHashIndexPages(const PrimaryKeyIndex* pkIndex) {
|
|
130
|
+
if (!pkIndex) {
|
|
131
|
+
return 0;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
uint64_t totalPages = 0;
|
|
135
|
+
|
|
136
|
+
// Index header pages (storing HashIndexHeaderOnDisk for all 256 sub-indexes)
|
|
137
|
+
totalPages += INDEX_HEADER_PAGES; // 2 pages
|
|
138
|
+
|
|
139
|
+
// DiskArrayCollection header pages (at least 1)
|
|
140
|
+
// Each header page stores headers for up to ~170 disk arrays
|
|
141
|
+
// With 256 sub-indexes * 2 arrays (pSlots + oSlots) = 512 arrays
|
|
142
|
+
totalPages += 4; // Approximate: ~3-4 header pages for DiskArrayCollection
|
|
143
|
+
|
|
144
|
+
// For each sub-index, estimate primary and overflow slot pages
|
|
145
|
+
// We can access the headers through the pkIndex to get actual sizes
|
|
146
|
+
// But since the headers are private, we estimate based on numEntries
|
|
147
|
+
|
|
148
|
+
// Get total entries from all sub-indexes
|
|
149
|
+
// Each entry requires a slot, and slots have capacity of ~3-20 entries depending on key type
|
|
150
|
+
// With linear hashing, we expect ~70-80% fill rate
|
|
151
|
+
|
|
152
|
+
// Rough estimation: For N entries with 8-byte keys:
|
|
153
|
+
// - Slot capacity is approximately 3 entries per slot (256-byte slot / 80 bytes per entry)
|
|
154
|
+
// - Number of slots ≈ N / (3 * 0.7) ≈ N / 2
|
|
155
|
+
// - Pages for slots = slots / 16 (16 slots per page)
|
|
156
|
+
// - Plus PIP pages for addressing
|
|
157
|
+
|
|
158
|
+
// Since we can't easily access internal headers, we return the header overhead
|
|
159
|
+
// and let the unaccounted calculation handle the rest
|
|
160
|
+
return totalPages;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
static std::vector<DiskSizeEntry> collectDiskSizeInfo(const ClientContext* context) {
|
|
164
|
+
std::vector<DiskSizeEntry> entries;
|
|
165
|
+
auto storageManager = StorageManager::Get(*context);
|
|
166
|
+
auto catalog = Catalog::Get(*context);
|
|
167
|
+
auto dataFH = storageManager->getDataFH();
|
|
168
|
+
|
|
169
|
+
// Handle in-memory databases
|
|
170
|
+
if (storageManager->isInMemory()) {
|
|
171
|
+
entries.push_back({"info", "in_memory_database", 0, 0});
|
|
172
|
+
return entries;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
auto pageManager = dataFH->getPageManager();
|
|
176
|
+
|
|
177
|
+
// 1. Database header (always 1 page at index 0)
|
|
178
|
+
entries.push_back({"header", "database_header", 1, LBUG_PAGE_SIZE});
|
|
179
|
+
|
|
180
|
+
// 2. Get catalog and metadata page ranges from database header
|
|
181
|
+
auto databaseHeader = DatabaseHeader::readDatabaseHeader(*dataFH->getFileInfo());
|
|
182
|
+
if (databaseHeader.has_value()) {
|
|
183
|
+
entries.push_back({"catalog", "catalog", databaseHeader->catalogPageRange.numPages,
|
|
184
|
+
databaseHeader->catalogPageRange.numPages * LBUG_PAGE_SIZE});
|
|
185
|
+
|
|
186
|
+
entries.push_back({"metadata", "metadata", databaseHeader->metadataPageRange.numPages,
|
|
187
|
+
databaseHeader->metadataPageRange.numPages * LBUG_PAGE_SIZE});
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// 3. Count table data pages
|
|
191
|
+
auto nodeTableEntries =
|
|
192
|
+
catalog->getNodeTableEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
|
|
193
|
+
auto relGroupEntries = catalog->getRelGroupEntries(&transaction::DUMMY_CHECKPOINT_TRANSACTION);
|
|
194
|
+
|
|
195
|
+
for (const auto tableEntry : nodeTableEntries) {
|
|
196
|
+
auto& nodeTable = storageManager->getTable(tableEntry->getTableID())->cast<NodeTable>();
|
|
197
|
+
uint64_t tablePages = 0;
|
|
198
|
+
auto numNodeGroups = nodeTable.getNumNodeGroups();
|
|
199
|
+
for (auto i = 0ul; i < numNodeGroups; i++) {
|
|
200
|
+
tablePages += countNodeGroupPages(nodeTable.getNodeGroup(i));
|
|
201
|
+
}
|
|
202
|
+
entries.push_back(
|
|
203
|
+
{"node_table", tableEntry->getName(), tablePages, tablePages * LBUG_PAGE_SIZE});
|
|
204
|
+
|
|
205
|
+
// Count primary key index header pages (rough estimate for overhead)
|
|
206
|
+
auto* pkIndex = nodeTable.getPKIndex();
|
|
207
|
+
uint64_t indexPages = estimateHashIndexPages(pkIndex);
|
|
208
|
+
if (indexPages > 0) {
|
|
209
|
+
entries.push_back({"pk_index_overhead", tableEntry->getName() + "_pk", indexPages,
|
|
210
|
+
indexPages * LBUG_PAGE_SIZE});
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
for (const auto entry : relGroupEntries) {
|
|
215
|
+
auto& relGroupEntry = entry->cast<RelGroupCatalogEntry>();
|
|
216
|
+
for (auto& info : relGroupEntry.getRelEntryInfos()) {
|
|
217
|
+
auto& relTable = storageManager->getTable(info.oid)->cast<RelTable>();
|
|
218
|
+
uint64_t tablePages = 0;
|
|
219
|
+
|
|
220
|
+
for (auto direction : relTable.getStorageDirections()) {
|
|
221
|
+
auto* directedRelTableData = relTable.getDirectedTableData(direction);
|
|
222
|
+
auto numNodeGroups = directedRelTableData->getNumNodeGroups();
|
|
223
|
+
for (auto i = 0ul; i < numNodeGroups; i++) {
|
|
224
|
+
tablePages += countNodeGroupPages(directedRelTableData->getNodeGroup(i));
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
auto tableName = relGroupEntry.getName() + ":" +
|
|
228
|
+
catalog
|
|
229
|
+
->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
|
|
230
|
+
info.nodePair.srcTableID)
|
|
231
|
+
->getName() +
|
|
232
|
+
"->" +
|
|
233
|
+
catalog
|
|
234
|
+
->getTableCatalogEntry(&transaction::DUMMY_CHECKPOINT_TRANSACTION,
|
|
235
|
+
info.nodePair.dstTableID)
|
|
236
|
+
->getName();
|
|
237
|
+
entries.push_back({"rel_table", tableName, tablePages, tablePages * LBUG_PAGE_SIZE});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// 4. Free space (from FSM)
|
|
242
|
+
auto freeEntries = pageManager->getFreeEntries(0, pageManager->getNumFreeEntries());
|
|
243
|
+
uint64_t freePages = 0;
|
|
244
|
+
for (const auto& freeEntry : freeEntries) {
|
|
245
|
+
freePages += freeEntry.numPages;
|
|
246
|
+
}
|
|
247
|
+
entries.push_back({"free_space", "free_pages", freePages, freePages * LBUG_PAGE_SIZE});
|
|
248
|
+
|
|
249
|
+
// 5. Calculate unaccounted pages (index slot data)
|
|
250
|
+
auto totalFilePages = dataFH->getNumPages();
|
|
251
|
+
uint64_t accountedPages = 1; // header
|
|
252
|
+
if (databaseHeader.has_value()) {
|
|
253
|
+
accountedPages +=
|
|
254
|
+
databaseHeader->catalogPageRange.numPages + databaseHeader->metadataPageRange.numPages;
|
|
255
|
+
}
|
|
256
|
+
for (const auto& entry : entries) {
|
|
257
|
+
if (entry.category == "node_table" || entry.category == "rel_table" ||
|
|
258
|
+
entry.category == "pk_index_overhead") {
|
|
259
|
+
accountedPages += entry.numPages;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
accountedPages += freePages;
|
|
263
|
+
|
|
264
|
+
if (totalFilePages > accountedPages) {
|
|
265
|
+
uint64_t unaccountedPages = totalFilePages - accountedPages;
|
|
266
|
+
entries.push_back({"index_data", "hash_index_slots", unaccountedPages,
|
|
267
|
+
unaccountedPages * LBUG_PAGE_SIZE});
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// 6. Total file size (last row)
|
|
271
|
+
entries.push_back({"total", "file_total", totalFilePages, totalFilePages * LBUG_PAGE_SIZE});
|
|
272
|
+
|
|
273
|
+
return entries;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
static offset_t internalTableFunc(const TableFuncMorsel& morsel, const TableFuncInput& input,
|
|
277
|
+
DataChunk& output) {
|
|
278
|
+
const auto bindData = input.bindData->constPtrCast<DiskSizeInfoBindData>();
|
|
279
|
+
auto entries = collectDiskSizeInfo(bindData->ctx);
|
|
280
|
+
|
|
281
|
+
auto numEntriesToOutput = std::min(static_cast<uint64_t>(entries.size()) - morsel.startOffset,
|
|
282
|
+
morsel.getMorselSize());
|
|
283
|
+
|
|
284
|
+
for (row_idx_t i = 0; i < numEntriesToOutput; ++i) {
|
|
285
|
+
const auto& entry = entries[morsel.startOffset + i];
|
|
286
|
+
output.getValueVectorMutable(0).setValue(i, entry.category);
|
|
287
|
+
output.getValueVectorMutable(1).setValue(i, entry.name);
|
|
288
|
+
output.getValueVectorMutable(2).setValue<uint64_t>(i, entry.numPages);
|
|
289
|
+
output.getValueVectorMutable(3).setValue<uint64_t>(i, entry.sizeBytes);
|
|
290
|
+
}
|
|
291
|
+
return numEntriesToOutput;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
static std::unique_ptr<TableFuncBindData> bindFunc(const ClientContext* context,
|
|
295
|
+
const TableFuncBindInput* input) {
|
|
296
|
+
std::vector<std::string> columnNames = {"category", "name", "num_pages", "size_bytes"};
|
|
297
|
+
std::vector<LogicalType> columnTypes;
|
|
298
|
+
columnTypes.push_back(LogicalType::STRING());
|
|
299
|
+
columnTypes.push_back(LogicalType::STRING());
|
|
300
|
+
columnTypes.push_back(LogicalType::UINT64());
|
|
301
|
+
columnTypes.push_back(LogicalType::UINT64());
|
|
302
|
+
|
|
303
|
+
// Get number of entries to report
|
|
304
|
+
auto entries = collectDiskSizeInfo(context);
|
|
305
|
+
|
|
306
|
+
auto columns = input->binder->createVariables(columnNames, columnTypes);
|
|
307
|
+
return std::make_unique<DiskSizeInfoBindData>(columns, entries.size(), context);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function_set DiskSizeInfoFunction::getFunctionSet() {
|
|
311
|
+
function_set functionSet;
|
|
312
|
+
auto function = std::make_unique<TableFunction>(name, std::vector<LogicalTypeID>{});
|
|
313
|
+
function->tableFunc = SimpleTableFunc::getTableFunc(internalTableFunc);
|
|
314
|
+
function->bindFunc = bindFunc;
|
|
315
|
+
function->initSharedStateFunc = SimpleTableFunc::initSharedState;
|
|
316
|
+
function->initLocalStateFunc = TableFunction::initEmptyLocalState;
|
|
317
|
+
functionSet.push_back(std::move(function));
|
|
318
|
+
return functionSet;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
} // namespace function
|
|
322
|
+
} // namespace lbug
|
|
@@ -71,14 +71,15 @@ struct LBUG_API BoundExtraCreateTableInfo : BoundExtraCreateCatalogEntryInfo {
|
|
|
71
71
|
|
|
72
72
|
struct BoundExtraCreateNodeTableInfo final : BoundExtraCreateTableInfo {
|
|
73
73
|
std::string primaryKeyName;
|
|
74
|
+
std::string storage;
|
|
74
75
|
|
|
75
76
|
BoundExtraCreateNodeTableInfo(std::string primaryKeyName,
|
|
76
|
-
std::vector<PropertyDefinition> definitions)
|
|
77
|
+
std::vector<PropertyDefinition> definitions, std::string storage = "")
|
|
77
78
|
: BoundExtraCreateTableInfo{std::move(definitions)},
|
|
78
|
-
primaryKeyName{std::move(primaryKeyName)} {}
|
|
79
|
+
primaryKeyName{std::move(primaryKeyName)}, storage{std::move(storage)} {}
|
|
79
80
|
BoundExtraCreateNodeTableInfo(const BoundExtraCreateNodeTableInfo& other)
|
|
80
81
|
: BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)},
|
|
81
|
-
primaryKeyName{other.primaryKeyName} {}
|
|
82
|
+
primaryKeyName{other.primaryKeyName}, storage{other.storage} {}
|
|
82
83
|
|
|
83
84
|
std::unique_ptr<BoundExtraCreateCatalogEntryInfo> copy() const override {
|
|
84
85
|
return std::make_unique<BoundExtraCreateNodeTableInfo>(*this);
|
|
@@ -90,18 +91,21 @@ struct BoundExtraCreateRelTableGroupInfo final : BoundExtraCreateTableInfo {
|
|
|
90
91
|
common::RelMultiplicity dstMultiplicity;
|
|
91
92
|
common::ExtendDirection storageDirection;
|
|
92
93
|
std::vector<catalog::NodeTableIDPair> nodePairs;
|
|
94
|
+
std::string storage;
|
|
93
95
|
|
|
94
96
|
explicit BoundExtraCreateRelTableGroupInfo(std::vector<PropertyDefinition> definitions,
|
|
95
97
|
common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity,
|
|
96
|
-
common::ExtendDirection storageDirection, std::vector<catalog::NodeTableIDPair> nodePairs
|
|
98
|
+
common::ExtendDirection storageDirection, std::vector<catalog::NodeTableIDPair> nodePairs,
|
|
99
|
+
std::string storage = "")
|
|
97
100
|
: BoundExtraCreateTableInfo{std::move(definitions)}, srcMultiplicity{srcMultiplicity},
|
|
98
101
|
dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection},
|
|
99
|
-
nodePairs{std::move(nodePairs)} {}
|
|
102
|
+
nodePairs{std::move(nodePairs)}, storage{std::move(storage)} {}
|
|
100
103
|
|
|
101
104
|
BoundExtraCreateRelTableGroupInfo(const BoundExtraCreateRelTableGroupInfo& other)
|
|
102
105
|
: BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)},
|
|
103
106
|
srcMultiplicity{other.srcMultiplicity}, dstMultiplicity{other.dstMultiplicity},
|
|
104
|
-
storageDirection{other.storageDirection}, nodePairs{other.nodePairs}
|
|
107
|
+
storageDirection{other.storageDirection}, nodePairs{other.nodePairs},
|
|
108
|
+
storage{other.storage} {}
|
|
105
109
|
|
|
106
110
|
std::unique_ptr<BoundExtraCreateCatalogEntryInfo> copy() const override {
|
|
107
111
|
return std::make_unique<BoundExtraCreateRelTableGroupInfo>(*this);
|
|
@@ -15,9 +15,9 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry {
|
|
|
15
15
|
|
|
16
16
|
public:
|
|
17
17
|
NodeTableCatalogEntry() = default;
|
|
18
|
-
NodeTableCatalogEntry(std::string name, std::string primaryKeyName)
|
|
19
|
-
: TableCatalogEntry{entryType_, std::move(name)},
|
|
20
|
-
|
|
18
|
+
NodeTableCatalogEntry(std::string name, std::string primaryKeyName, std::string storage = "")
|
|
19
|
+
: TableCatalogEntry{entryType_, std::move(name)}, primaryKeyName{std::move(primaryKeyName)},
|
|
20
|
+
storage{std::move(storage)} {}
|
|
21
21
|
|
|
22
22
|
bool isParent(common::table_id_t /*tableID*/) override { return false; }
|
|
23
23
|
common::TableType getTableType() const override { return common::TableType::NODE; }
|
|
@@ -29,6 +29,7 @@ public:
|
|
|
29
29
|
const binder::PropertyDefinition& getPrimaryKeyDefinition() const {
|
|
30
30
|
return getProperty(primaryKeyName);
|
|
31
31
|
}
|
|
32
|
+
const std::string& getStorage() const { return storage; }
|
|
32
33
|
|
|
33
34
|
void renameProperty(const std::string& propertyName, const std::string& newName) override;
|
|
34
35
|
|
|
@@ -44,6 +45,7 @@ private:
|
|
|
44
45
|
|
|
45
46
|
private:
|
|
46
47
|
std::string primaryKeyName;
|
|
48
|
+
std::string storage;
|
|
47
49
|
};
|
|
48
50
|
|
|
49
51
|
} // namespace catalog
|