lbug 0.12.3-dev.16 → 0.12.3-dev.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lbug-source/CMakeLists.txt +1 -1
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
- package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
- package/lbug-source/scripts/antlr4/hash.md5 +1 -1
- package/lbug-source/src/antlr4/Cypher.g4 +1 -1
- package/lbug-source/src/binder/bind/bind_ddl.cpp +23 -13
- package/lbug-source/src/catalog/catalog.cpp +5 -4
- package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
- package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +7 -0
- package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +10 -6
- package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
- package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +4 -2
- package/lbug-source/src/include/common/constants.h +1 -0
- package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
- package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
- package/lbug-source/src/include/storage/storage_manager.h +1 -0
- package/lbug-source/src/include/storage/table/node_table.h +6 -1
- package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
- package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
- package/lbug-source/src/include/storage/table/rel_table.h +2 -2
- package/lbug-source/src/include/transaction/transaction.h +2 -0
- package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
- package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
- package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
- package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
- package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +12 -2
- package/lbug-source/src/storage/storage_manager.cpp +37 -6
- package/lbug-source/src/storage/table/CMakeLists.txt +2 -0
- package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
- package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
- package/lbug-source/test/include/test_runner/test_group.h +11 -1
- package/lbug-source/test/runner/e2e_test.cpp +7 -1
- package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
- package/lbug-source/test/test_helper/test_helper.cpp +24 -0
- package/lbug-source/test/test_runner/test_parser.cpp +3 -0
- package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2761 -2701
- package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -0
- package/package.json +1 -1
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
CREATE NODE TABLE city(id INT32, name STRING, population INT64, PRIMARY KEY(id)) WITH (storage = 'dataset/demo-db/graph-std/demo');
|
|
2
|
+
CREATE NODE TABLE user(id INT32, name STRING, age INT64, PRIMARY KEY(id)) WITH (storage = 'dataset/demo-db/graph-std/demo');
|
|
3
|
+
CREATE REL TABLE follows(FROM user TO user, since INT32) WITH (storage = 'dataset/demo-db/graph-std/demo');
|
|
4
|
+
CREATE REL TABLE livesin(FROM user TO city) WITH (storage = 'dataset/demo-db/graph-std/demo');
|
|
@@ -342,7 +342,7 @@ kU_IfNotExists
|
|
|
342
342
|
: IF SP NOT SP EXISTS ;
|
|
343
343
|
|
|
344
344
|
kU_CreateNodeTable
|
|
345
|
-
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ;
|
|
345
|
+
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ( SP WITH SP? '(' SP? kU_Options SP? ')')? ;
|
|
346
346
|
|
|
347
347
|
kU_CreateRelTable
|
|
348
348
|
: CREATE SP REL SP TABLE ( SP GROUP )? ( SP kU_IfNotExists )? SP oC_SchemaName
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
52606d4848c2f224b8e480fec2923081
|
|
@@ -95,7 +95,7 @@ kU_IfNotExists
|
|
|
95
95
|
: IF SP NOT SP EXISTS ;
|
|
96
96
|
|
|
97
97
|
kU_CreateNodeTable
|
|
98
|
-
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ;
|
|
98
|
+
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ( SP WITH SP? '(' SP? kU_Options SP? ')')? ;
|
|
99
99
|
|
|
100
100
|
kU_CreateRelTable
|
|
101
101
|
: CREATE SP REL SP TABLE ( SP GROUP )? ( SP kU_IfNotExists )? SP oC_SchemaName
|
|
@@ -136,16 +136,6 @@ BoundCreateTableInfo Binder::bindCreateTableInfo(const CreateTableInfo* info) {
|
|
|
136
136
|
}
|
|
137
137
|
}
|
|
138
138
|
|
|
139
|
-
BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
|
|
140
|
-
auto propertyDefinitions = bindPropertyDefinitions(info->propertyDefinitions, info->tableName);
|
|
141
|
-
auto& extraInfo = info->extraInfo->constCast<ExtraCreateNodeTableInfo>();
|
|
142
|
-
validatePrimaryKey(extraInfo.pKName, propertyDefinitions);
|
|
143
|
-
auto boundExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pKName,
|
|
144
|
-
std::move(propertyDefinitions));
|
|
145
|
-
return BoundCreateTableInfo(CatalogEntryType::NODE_TABLE_ENTRY, info->tableName,
|
|
146
|
-
info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
|
|
147
|
-
}
|
|
148
|
-
|
|
149
139
|
void Binder::validateNodeTableType(const TableCatalogEntry* entry) {
|
|
150
140
|
if (entry->getType() != CatalogEntryType::NODE_TABLE_ENTRY) {
|
|
151
141
|
throw BinderException(stringFormat("{} is not of type NODE.", entry->getName()));
|
|
@@ -168,6 +158,13 @@ void Binder::validateColumnExistence(const TableCatalogEntry* entry,
|
|
|
168
158
|
}
|
|
169
159
|
}
|
|
170
160
|
|
|
161
|
+
static std::string getStorage(const case_insensitive_map_t<Value>& options) {
|
|
162
|
+
if (options.contains(TableOptionConstants::REL_STORAGE_OPTION)) {
|
|
163
|
+
return options.at(TableOptionConstants::REL_STORAGE_OPTION).toString();
|
|
164
|
+
}
|
|
165
|
+
return "";
|
|
166
|
+
}
|
|
167
|
+
|
|
171
168
|
static ExtendDirection getStorageDirection(const case_insensitive_map_t<Value>& options) {
|
|
172
169
|
if (options.contains(TableOptionConstants::REL_STORAGE_DIRECTION_OPTION)) {
|
|
173
170
|
return ExtendDirectionUtil::fromString(
|
|
@@ -176,6 +173,18 @@ static ExtendDirection getStorageDirection(const case_insensitive_map_t<Value>&
|
|
|
176
173
|
return DEFAULT_EXTEND_DIRECTION;
|
|
177
174
|
}
|
|
178
175
|
|
|
176
|
+
BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
|
|
177
|
+
auto propertyDefinitions = bindPropertyDefinitions(info->propertyDefinitions, info->tableName);
|
|
178
|
+
auto& extraInfo = info->extraInfo->constCast<ExtraCreateNodeTableInfo>();
|
|
179
|
+
validatePrimaryKey(extraInfo.pKName, propertyDefinitions);
|
|
180
|
+
auto boundOptions = bindParsingOptions(extraInfo.options);
|
|
181
|
+
auto storage = getStorage(boundOptions);
|
|
182
|
+
auto boundExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pKName,
|
|
183
|
+
std::move(propertyDefinitions), std::move(storage));
|
|
184
|
+
return BoundCreateTableInfo(CatalogEntryType::NODE_TABLE_ENTRY, info->tableName,
|
|
185
|
+
info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
|
|
186
|
+
}
|
|
187
|
+
|
|
179
188
|
std::vector<PropertyDefinition> Binder::bindRelPropertyDefinitions(const CreateTableInfo& info) {
|
|
180
189
|
std::vector<PropertyDefinition> propertyDefinitions;
|
|
181
190
|
propertyDefinitions.emplace_back(
|
|
@@ -193,6 +202,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo*
|
|
|
193
202
|
auto dstMultiplicity = RelMultiplicityUtils::getBwd(extraInfo.relMultiplicity);
|
|
194
203
|
auto boundOptions = bindParsingOptions(extraInfo.options);
|
|
195
204
|
auto storageDirection = getStorageDirection(boundOptions);
|
|
205
|
+
auto storage = getStorage(boundOptions);
|
|
196
206
|
// Bind from to pairs
|
|
197
207
|
node_table_id_pair_set_t nodePairsSet;
|
|
198
208
|
std::vector<NodeTableIDPair> nodePairs;
|
|
@@ -209,9 +219,9 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo*
|
|
|
209
219
|
nodePairsSet.insert(pair);
|
|
210
220
|
nodePairs.emplace_back(pair);
|
|
211
221
|
}
|
|
212
|
-
auto boundExtraInfo =
|
|
213
|
-
std::
|
|
214
|
-
|
|
222
|
+
auto boundExtraInfo = std::make_unique<BoundExtraCreateRelTableGroupInfo>(
|
|
223
|
+
std::move(propertyDefinitions), srcMultiplicity, dstMultiplicity, storageDirection,
|
|
224
|
+
std::move(nodePairs), std::move(storage));
|
|
215
225
|
return BoundCreateTableInfo(CatalogEntryType::REL_GROUP_ENTRY, info->tableName,
|
|
216
226
|
info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
|
|
217
227
|
}
|
|
@@ -190,9 +190,9 @@ CatalogEntry* Catalog::createRelGroupEntry(Transaction* transaction,
|
|
|
190
190
|
for (auto& nodePair : extraInfo->nodePairs) {
|
|
191
191
|
relTableInfos.emplace_back(nodePair, tables->getNextOID());
|
|
192
192
|
}
|
|
193
|
-
auto relGroupEntry =
|
|
194
|
-
|
|
195
|
-
|
|
193
|
+
auto relGroupEntry = std::make_unique<RelGroupCatalogEntry>(info.tableName,
|
|
194
|
+
extraInfo->srcMultiplicity, extraInfo->dstMultiplicity, extraInfo->storageDirection,
|
|
195
|
+
std::move(relTableInfos), extraInfo->storage);
|
|
196
196
|
for (auto& definition : extraInfo->propertyDefinitions) {
|
|
197
197
|
relGroupEntry->addProperty(definition);
|
|
198
198
|
}
|
|
@@ -541,7 +541,8 @@ CatalogEntry* Catalog::createTableEntry(Transaction* transaction,
|
|
|
541
541
|
CatalogEntry* Catalog::createNodeTableEntry(Transaction* transaction,
|
|
542
542
|
const BoundCreateTableInfo& info) {
|
|
543
543
|
const auto extraInfo = info.extraInfo->constPtrCast<BoundExtraCreateNodeTableInfo>();
|
|
544
|
-
auto entry = std::make_unique<NodeTableCatalogEntry>(info.tableName, extraInfo->primaryKeyName
|
|
544
|
+
auto entry = std::make_unique<NodeTableCatalogEntry>(info.tableName, extraInfo->primaryKeyName,
|
|
545
|
+
extraInfo->storage);
|
|
545
546
|
for (auto& definition : extraInfo->propertyDefinitions) {
|
|
546
547
|
entry->addProperty(definition);
|
|
547
548
|
}
|
|
@@ -21,16 +21,22 @@ void NodeTableCatalogEntry::serialize(common::Serializer& serializer) const {
|
|
|
21
21
|
TableCatalogEntry::serialize(serializer);
|
|
22
22
|
serializer.writeDebuggingInfo("primaryKeyName");
|
|
23
23
|
serializer.write(primaryKeyName);
|
|
24
|
+
serializer.writeDebuggingInfo("storage");
|
|
25
|
+
serializer.write(storage);
|
|
24
26
|
}
|
|
25
27
|
|
|
26
28
|
std::unique_ptr<NodeTableCatalogEntry> NodeTableCatalogEntry::deserialize(
|
|
27
29
|
common::Deserializer& deserializer) {
|
|
28
30
|
std::string debuggingInfo;
|
|
29
31
|
std::string primaryKeyName;
|
|
32
|
+
std::string storage;
|
|
30
33
|
deserializer.validateDebuggingInfo(debuggingInfo, "primaryKeyName");
|
|
31
34
|
deserializer.deserializeValue(primaryKeyName);
|
|
35
|
+
deserializer.validateDebuggingInfo(debuggingInfo, "storage");
|
|
36
|
+
deserializer.deserializeValue(storage);
|
|
32
37
|
auto nodeTableEntry = std::make_unique<NodeTableCatalogEntry>();
|
|
33
38
|
nodeTableEntry->primaryKeyName = primaryKeyName;
|
|
39
|
+
nodeTableEntry->storage = storage;
|
|
34
40
|
return nodeTableEntry;
|
|
35
41
|
}
|
|
36
42
|
|
|
@@ -42,6 +48,7 @@ std::string NodeTableCatalogEntry::toCypher(const ToCypherInfo& /*info*/) const
|
|
|
42
48
|
std::unique_ptr<TableCatalogEntry> NodeTableCatalogEntry::copy() const {
|
|
43
49
|
auto other = std::make_unique<NodeTableCatalogEntry>();
|
|
44
50
|
other->primaryKeyName = primaryKeyName;
|
|
51
|
+
other->storage = storage;
|
|
45
52
|
other->copyFrom(*this);
|
|
46
53
|
return other;
|
|
47
54
|
}
|
|
@@ -49,7 +56,7 @@ std::unique_ptr<TableCatalogEntry> NodeTableCatalogEntry::copy() const {
|
|
|
49
56
|
std::unique_ptr<BoundExtraCreateCatalogEntryInfo> NodeTableCatalogEntry::getBoundExtraCreateInfo(
|
|
50
57
|
transaction::Transaction*) const {
|
|
51
58
|
return std::make_unique<BoundExtraCreateNodeTableInfo>(primaryKeyName,
|
|
52
|
-
copyVector(getProperties()));
|
|
59
|
+
copyVector(getProperties()), storage);
|
|
53
60
|
}
|
|
54
61
|
|
|
55
62
|
} // namespace catalog
|
|
@@ -95,6 +95,8 @@ void RelGroupCatalogEntry::serialize(Serializer& serializer) const {
|
|
|
95
95
|
serializer.serializeValue(dstMultiplicity);
|
|
96
96
|
serializer.writeDebuggingInfo("storageDirection");
|
|
97
97
|
serializer.serializeValue(storageDirection);
|
|
98
|
+
serializer.writeDebuggingInfo("storage");
|
|
99
|
+
serializer.serializeValue(storage);
|
|
98
100
|
serializer.writeDebuggingInfo("relTableInfos");
|
|
99
101
|
serializer.serializeVector(relTableInfos);
|
|
100
102
|
}
|
|
@@ -105,6 +107,7 @@ std::unique_ptr<RelGroupCatalogEntry> RelGroupCatalogEntry::deserialize(
|
|
|
105
107
|
auto srcMultiplicity = RelMultiplicity::MANY;
|
|
106
108
|
auto dstMultiplicity = RelMultiplicity::MANY;
|
|
107
109
|
auto storageDirection = ExtendDirection::BOTH;
|
|
110
|
+
std::string storage;
|
|
108
111
|
std::vector<RelTableCatalogInfo> relTableInfos;
|
|
109
112
|
deserializer.validateDebuggingInfo(debuggingInfo, "srcMultiplicity");
|
|
110
113
|
deserializer.deserializeValue(srcMultiplicity);
|
|
@@ -112,12 +115,15 @@ std::unique_ptr<RelGroupCatalogEntry> RelGroupCatalogEntry::deserialize(
|
|
|
112
115
|
deserializer.deserializeValue(dstMultiplicity);
|
|
113
116
|
deserializer.validateDebuggingInfo(debuggingInfo, "storageDirection");
|
|
114
117
|
deserializer.deserializeValue(storageDirection);
|
|
118
|
+
deserializer.validateDebuggingInfo(debuggingInfo, "storage");
|
|
119
|
+
deserializer.deserializeValue(storage);
|
|
115
120
|
deserializer.validateDebuggingInfo(debuggingInfo, "relTableInfos");
|
|
116
121
|
deserializer.deserializeVector(relTableInfos);
|
|
117
122
|
auto relGroupEntry = std::make_unique<RelGroupCatalogEntry>();
|
|
118
123
|
relGroupEntry->srcMultiplicity = srcMultiplicity;
|
|
119
124
|
relGroupEntry->dstMultiplicity = dstMultiplicity;
|
|
120
125
|
relGroupEntry->storageDirection = storageDirection;
|
|
126
|
+
relGroupEntry->storage = storage;
|
|
121
127
|
relGroupEntry->relTableInfos = relTableInfos;
|
|
122
128
|
return relGroupEntry;
|
|
123
129
|
}
|
|
@@ -167,6 +173,7 @@ std::unique_ptr<TableCatalogEntry> RelGroupCatalogEntry::copy() const {
|
|
|
167
173
|
other->srcMultiplicity = srcMultiplicity;
|
|
168
174
|
other->dstMultiplicity = dstMultiplicity;
|
|
169
175
|
other->storageDirection = storageDirection;
|
|
176
|
+
other->storage = storage;
|
|
170
177
|
other->relTableInfos = relTableInfos;
|
|
171
178
|
other->copyFrom(*this);
|
|
172
179
|
return other;
|
|
@@ -71,14 +71,15 @@ struct LBUG_API BoundExtraCreateTableInfo : BoundExtraCreateCatalogEntryInfo {
|
|
|
71
71
|
|
|
72
72
|
struct BoundExtraCreateNodeTableInfo final : BoundExtraCreateTableInfo {
|
|
73
73
|
std::string primaryKeyName;
|
|
74
|
+
std::string storage;
|
|
74
75
|
|
|
75
76
|
BoundExtraCreateNodeTableInfo(std::string primaryKeyName,
|
|
76
|
-
std::vector<PropertyDefinition> definitions)
|
|
77
|
+
std::vector<PropertyDefinition> definitions, std::string storage = "")
|
|
77
78
|
: BoundExtraCreateTableInfo{std::move(definitions)},
|
|
78
|
-
primaryKeyName{std::move(primaryKeyName)} {}
|
|
79
|
+
primaryKeyName{std::move(primaryKeyName)}, storage{std::move(storage)} {}
|
|
79
80
|
BoundExtraCreateNodeTableInfo(const BoundExtraCreateNodeTableInfo& other)
|
|
80
81
|
: BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)},
|
|
81
|
-
primaryKeyName{other.primaryKeyName} {}
|
|
82
|
+
primaryKeyName{other.primaryKeyName}, storage{other.storage} {}
|
|
82
83
|
|
|
83
84
|
std::unique_ptr<BoundExtraCreateCatalogEntryInfo> copy() const override {
|
|
84
85
|
return std::make_unique<BoundExtraCreateNodeTableInfo>(*this);
|
|
@@ -90,18 +91,21 @@ struct BoundExtraCreateRelTableGroupInfo final : BoundExtraCreateTableInfo {
|
|
|
90
91
|
common::RelMultiplicity dstMultiplicity;
|
|
91
92
|
common::ExtendDirection storageDirection;
|
|
92
93
|
std::vector<catalog::NodeTableIDPair> nodePairs;
|
|
94
|
+
std::string storage;
|
|
93
95
|
|
|
94
96
|
explicit BoundExtraCreateRelTableGroupInfo(std::vector<PropertyDefinition> definitions,
|
|
95
97
|
common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity,
|
|
96
|
-
common::ExtendDirection storageDirection, std::vector<catalog::NodeTableIDPair> nodePairs
|
|
98
|
+
common::ExtendDirection storageDirection, std::vector<catalog::NodeTableIDPair> nodePairs,
|
|
99
|
+
std::string storage = "")
|
|
97
100
|
: BoundExtraCreateTableInfo{std::move(definitions)}, srcMultiplicity{srcMultiplicity},
|
|
98
101
|
dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection},
|
|
99
|
-
nodePairs{std::move(nodePairs)} {}
|
|
102
|
+
nodePairs{std::move(nodePairs)}, storage{std::move(storage)} {}
|
|
100
103
|
|
|
101
104
|
BoundExtraCreateRelTableGroupInfo(const BoundExtraCreateRelTableGroupInfo& other)
|
|
102
105
|
: BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)},
|
|
103
106
|
srcMultiplicity{other.srcMultiplicity}, dstMultiplicity{other.dstMultiplicity},
|
|
104
|
-
storageDirection{other.storageDirection}, nodePairs{other.nodePairs}
|
|
107
|
+
storageDirection{other.storageDirection}, nodePairs{other.nodePairs},
|
|
108
|
+
storage{other.storage} {}
|
|
105
109
|
|
|
106
110
|
std::unique_ptr<BoundExtraCreateCatalogEntryInfo> copy() const override {
|
|
107
111
|
return std::make_unique<BoundExtraCreateRelTableGroupInfo>(*this);
|
|
@@ -15,9 +15,9 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry {
|
|
|
15
15
|
|
|
16
16
|
public:
|
|
17
17
|
NodeTableCatalogEntry() = default;
|
|
18
|
-
NodeTableCatalogEntry(std::string name, std::string primaryKeyName)
|
|
19
|
-
: TableCatalogEntry{entryType_, std::move(name)},
|
|
20
|
-
|
|
18
|
+
NodeTableCatalogEntry(std::string name, std::string primaryKeyName, std::string storage = "")
|
|
19
|
+
: TableCatalogEntry{entryType_, std::move(name)}, primaryKeyName{std::move(primaryKeyName)},
|
|
20
|
+
storage{std::move(storage)} {}
|
|
21
21
|
|
|
22
22
|
bool isParent(common::table_id_t /*tableID*/) override { return false; }
|
|
23
23
|
common::TableType getTableType() const override { return common::TableType::NODE; }
|
|
@@ -29,6 +29,7 @@ public:
|
|
|
29
29
|
const binder::PropertyDefinition& getPrimaryKeyDefinition() const {
|
|
30
30
|
return getProperty(primaryKeyName);
|
|
31
31
|
}
|
|
32
|
+
const std::string& getStorage() const { return storage; }
|
|
32
33
|
|
|
33
34
|
void renameProperty(const std::string& propertyName, const std::string& newName) override;
|
|
34
35
|
|
|
@@ -44,6 +45,7 @@ private:
|
|
|
44
45
|
|
|
45
46
|
private:
|
|
46
47
|
std::string primaryKeyName;
|
|
48
|
+
std::string storage;
|
|
47
49
|
};
|
|
48
50
|
|
|
49
51
|
} // namespace catalog
|
|
@@ -34,10 +34,10 @@ public:
|
|
|
34
34
|
RelGroupCatalogEntry() = default;
|
|
35
35
|
RelGroupCatalogEntry(std::string tableName, common::RelMultiplicity srcMultiplicity,
|
|
36
36
|
common::RelMultiplicity dstMultiplicity, common::ExtendDirection storageDirection,
|
|
37
|
-
std::vector<RelTableCatalogInfo> relTableInfos)
|
|
37
|
+
std::vector<RelTableCatalogInfo> relTableInfos, std::string storage = "")
|
|
38
38
|
: TableCatalogEntry{type_, std::move(tableName)}, srcMultiplicity{srcMultiplicity},
|
|
39
39
|
dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection},
|
|
40
|
-
relTableInfos{std::move(relTableInfos)} {
|
|
40
|
+
relTableInfos{std::move(relTableInfos)}, storage{std::move(storage)} {
|
|
41
41
|
propertyCollection =
|
|
42
42
|
PropertyDefinitionCollection{1}; // Skip NBR_NODE_ID column as the first one.
|
|
43
43
|
}
|
|
@@ -53,6 +53,7 @@ public:
|
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
common::ExtendDirection getStorageDirection() const { return storageDirection; }
|
|
56
|
+
const std::string& getStorage() const { return storage; }
|
|
56
57
|
|
|
57
58
|
common::idx_t getNumRelTables() const { return relTableInfos.size(); }
|
|
58
59
|
const std::vector<RelTableCatalogInfo>& getRelEntryInfos() const { return relTableInfos; }
|
|
@@ -97,6 +98,7 @@ private:
|
|
|
97
98
|
// TODO(Guodong): Avoid using extend direction for storage direction
|
|
98
99
|
common::ExtendDirection storageDirection = common::ExtendDirection::BOTH;
|
|
99
100
|
std::vector<RelTableCatalogInfo> relTableInfos;
|
|
101
|
+
std::string storage;
|
|
100
102
|
};
|
|
101
103
|
|
|
102
104
|
} // namespace catalog
|
|
@@ -36,8 +36,10 @@ struct CreateTableInfo {
|
|
|
36
36
|
|
|
37
37
|
struct ExtraCreateNodeTableInfo final : ExtraCreateTableInfo {
|
|
38
38
|
std::string pKName;
|
|
39
|
+
options_t options;
|
|
39
40
|
|
|
40
|
-
explicit ExtraCreateNodeTableInfo(std::string pKName
|
|
41
|
+
explicit ExtraCreateNodeTableInfo(std::string pKName, options_t options = {})
|
|
42
|
+
: pKName{std::move(pKName)}, options{std::move(options)} {}
|
|
41
43
|
};
|
|
42
44
|
|
|
43
45
|
struct ExtraCreateRelTableGroupInfo final : ExtraCreateTableInfo {
|
|
@@ -24,7 +24,7 @@ public:
|
|
|
24
24
|
void initialize(const transaction::Transaction* transaction, storage::NodeTable* table,
|
|
25
25
|
ScanNodeTableProgressSharedState& progressSharedState);
|
|
26
26
|
|
|
27
|
-
void nextMorsel(storage::
|
|
27
|
+
void nextMorsel(storage::TableScanState& scanState,
|
|
28
28
|
ScanNodeTableProgressSharedState& progressSharedState);
|
|
29
29
|
|
|
30
30
|
common::SemiMask* getSemiMask() const { return semiMask.get(); }
|
|
@@ -116,7 +116,7 @@ private:
|
|
|
116
116
|
|
|
117
117
|
private:
|
|
118
118
|
common::idx_t currentTableIdx;
|
|
119
|
-
std::unique_ptr<storage::
|
|
119
|
+
std::unique_ptr<storage::TableScanState> scanState;
|
|
120
120
|
std::vector<ScanNodeTableInfo> tableInfos;
|
|
121
121
|
std::vector<std::shared_ptr<ScanNodeTableSharedState>> sharedStates;
|
|
122
122
|
std::shared_ptr<ScanNodeTableProgressSharedState> progressSharedState;
|
|
@@ -107,7 +107,7 @@ private:
|
|
|
107
107
|
|
|
108
108
|
class StorageManager;
|
|
109
109
|
|
|
110
|
-
class LBUG_API NodeTable
|
|
110
|
+
class LBUG_API NodeTable : public Table {
|
|
111
111
|
public:
|
|
112
112
|
NodeTable(const StorageManager* storageManager,
|
|
113
113
|
const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* mm);
|
|
@@ -119,6 +119,11 @@ public:
|
|
|
119
119
|
void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
|
|
120
120
|
common::table_id_t tableID, common::offset_t startOffset) const;
|
|
121
121
|
|
|
122
|
+
// Virtual method for operator-level scan coordination initialization
|
|
123
|
+
// Called once per scan operation (not per scan state)
|
|
124
|
+
virtual void initializeScanCoordination(
|
|
125
|
+
[[maybe_unused]] const transaction::Transaction* transaction) {}
|
|
126
|
+
|
|
122
127
|
bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
|
|
123
128
|
template<bool lock = true>
|
|
124
129
|
bool lookup(const transaction::Transaction* transaction, const TableScanState& scanState) const;
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <mutex>
|
|
4
|
+
#include <vector>
|
|
5
|
+
|
|
6
|
+
#include "catalog/catalog_entry/node_table_catalog_entry.h"
|
|
7
|
+
#include "common/exception/runtime.h"
|
|
8
|
+
#include "common/types/internal_id_util.h"
|
|
9
|
+
#include "common/types/value/value.h"
|
|
10
|
+
#include "processor/operator/persistent/reader/parquet/parquet_reader.h"
|
|
11
|
+
#include "storage/table/node_table.h"
|
|
12
|
+
|
|
13
|
+
namespace lbug {
|
|
14
|
+
namespace storage {
|
|
15
|
+
|
|
16
|
+
struct ParquetNodeTableScanState final : NodeTableScanState {
|
|
17
|
+
std::unique_ptr<processor::ParquetReader> parquetReader;
|
|
18
|
+
std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
|
|
19
|
+
bool initialized = false;
|
|
20
|
+
bool scanCompleted = false; // Track if this scan state has finished reading
|
|
21
|
+
bool dataRead = false;
|
|
22
|
+
std::vector<std::vector<std::unique_ptr<common::Value>>> allData;
|
|
23
|
+
size_t totalRows = 0;
|
|
24
|
+
size_t nextRowToDistribute = 0;
|
|
25
|
+
uint64_t lastQueryId = 0; // Track the last query ID to detect new queries
|
|
26
|
+
|
|
27
|
+
ParquetNodeTableScanState([[maybe_unused]] MemoryManager& mm, common::ValueVector* nodeIDVector,
|
|
28
|
+
std::vector<common::ValueVector*> outputVectors,
|
|
29
|
+
std::shared_ptr<common::DataChunkState> outChunkState)
|
|
30
|
+
: NodeTableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
|
|
31
|
+
parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
// Shared state to coordinate row group assignment across parallel scan states
|
|
36
|
+
struct ParquetNodeTableSharedState {
|
|
37
|
+
std::mutex mtx;
|
|
38
|
+
common::node_group_idx_t currentRowGroupIdx = 0;
|
|
39
|
+
common::node_group_idx_t numRowGroups = 0;
|
|
40
|
+
|
|
41
|
+
void reset(common::node_group_idx_t totalRowGroups) {
|
|
42
|
+
std::lock_guard<std::mutex> lock(mtx);
|
|
43
|
+
currentRowGroupIdx = 0;
|
|
44
|
+
numRowGroups = totalRowGroups;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
bool getNextRowGroup(common::node_group_idx_t& assignedRowGroupIdx) {
|
|
48
|
+
std::lock_guard<std::mutex> lock(mtx);
|
|
49
|
+
if (currentRowGroupIdx < numRowGroups) {
|
|
50
|
+
assignedRowGroupIdx = currentRowGroupIdx++;
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
class ParquetNodeTable final : public NodeTable {
|
|
58
|
+
public:
|
|
59
|
+
ParquetNodeTable(const StorageManager* storageManager,
|
|
60
|
+
const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager);
|
|
61
|
+
|
|
62
|
+
void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
|
|
63
|
+
bool resetCachedBoundNodeSelVec = true) const override;
|
|
64
|
+
|
|
65
|
+
// Override to reset shared state for row group coordination at the start of each scan operation
|
|
66
|
+
void initializeScanCoordination(const transaction::Transaction* transaction) override;
|
|
67
|
+
|
|
68
|
+
bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
|
|
69
|
+
|
|
70
|
+
// For parquet-backed tables, we don't support modifications
|
|
71
|
+
void insert([[maybe_unused]] transaction::Transaction* transaction,
|
|
72
|
+
[[maybe_unused]] TableInsertState& insertState) override {
|
|
73
|
+
throw common::RuntimeException("Cannot insert into parquet-backed node table");
|
|
74
|
+
}
|
|
75
|
+
void update([[maybe_unused]] transaction::Transaction* transaction,
|
|
76
|
+
[[maybe_unused]] TableUpdateState& updateState) override {
|
|
77
|
+
throw common::RuntimeException("Cannot update parquet-backed node table");
|
|
78
|
+
}
|
|
79
|
+
bool delete_([[maybe_unused]] transaction::Transaction* transaction,
|
|
80
|
+
[[maybe_unused]] TableDeleteState& deleteState) override {
|
|
81
|
+
throw common::RuntimeException("Cannot delete from parquet-backed node table");
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
|
|
86
|
+
|
|
87
|
+
const std::string& getParquetFilePath() const { return parquetFilePath; }
|
|
88
|
+
|
|
89
|
+
// Note: Cannot override getNumCommittedNodeGroups since it's not virtual in base class
|
|
90
|
+
// Will need a different approach
|
|
91
|
+
|
|
92
|
+
private:
|
|
93
|
+
std::string parquetFilePath;
|
|
94
|
+
const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry;
|
|
95
|
+
mutable std::unique_ptr<ParquetNodeTableSharedState> sharedState;
|
|
96
|
+
|
|
97
|
+
void initializeParquetReader(transaction::Transaction* transaction) const;
|
|
98
|
+
void initParquetScanForRowGroup(transaction::Transaction* transaction,
|
|
99
|
+
ParquetNodeTableScanState& scanState) const;
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
} // namespace storage
|
|
103
|
+
} // namespace lbug
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "catalog/catalog_entry/rel_group_catalog_entry.h"
|
|
4
|
+
#include "common/exception/runtime.h"
|
|
5
|
+
#include "common/types/internal_id_util.h"
|
|
6
|
+
#include "processor/operator/persistent/reader/parquet/parquet_reader.h"
|
|
7
|
+
#include "storage/table/rel_table.h"
|
|
8
|
+
#include "transaction/transaction.h"
|
|
9
|
+
|
|
10
|
+
namespace lbug {
|
|
11
|
+
namespace storage {
|
|
12
|
+
|
|
13
|
+
struct ParquetRelTableScanState final : RelTableScanState {
|
|
14
|
+
std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
|
|
15
|
+
// For CSR format: store matching rows for current bound node
|
|
16
|
+
size_t nextRowToProcess = 0;
|
|
17
|
+
|
|
18
|
+
// Row group range for morsel-driven parallelism
|
|
19
|
+
uint64_t startRowGroup = 0;
|
|
20
|
+
uint64_t endRowGroup = 0;
|
|
21
|
+
uint64_t currentRowGroup = 0;
|
|
22
|
+
|
|
23
|
+
// Per-scan-state readers for thread safety
|
|
24
|
+
std::unique_ptr<processor::ParquetReader> indicesReader;
|
|
25
|
+
std::unique_ptr<processor::ParquetReader> indptrReader;
|
|
26
|
+
|
|
27
|
+
ParquetRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector,
|
|
28
|
+
std::vector<common::ValueVector*> outputVectors,
|
|
29
|
+
std::shared_ptr<common::DataChunkState> outChunkState)
|
|
30
|
+
: RelTableScanState{mm, nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
|
|
31
|
+
parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
void setToTable(const transaction::Transaction* transaction, Table* table_,
|
|
35
|
+
std::vector<common::column_id_t> columnIDs_,
|
|
36
|
+
std::vector<ColumnPredicateSet> columnPredicateSets_,
|
|
37
|
+
common::RelDataDirection direction_) override;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
class ParquetRelTable final : public RelTable {
|
|
41
|
+
public:
|
|
42
|
+
ParquetRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID,
|
|
43
|
+
common::table_id_t toTableID, const StorageManager* storageManager,
|
|
44
|
+
MemoryManager* memoryManager);
|
|
45
|
+
|
|
46
|
+
void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
|
|
47
|
+
bool resetCachedBoundNodeSelVec = true) const override;
|
|
48
|
+
|
|
49
|
+
bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
|
|
50
|
+
|
|
51
|
+
// For parquet-backed tables, we don't support modifications
|
|
52
|
+
void insert([[maybe_unused]] transaction::Transaction* transaction,
|
|
53
|
+
[[maybe_unused]] TableInsertState& insertState) override {
|
|
54
|
+
throw common::RuntimeException("Cannot insert into parquet-backed rel table");
|
|
55
|
+
}
|
|
56
|
+
void update([[maybe_unused]] transaction::Transaction* transaction,
|
|
57
|
+
[[maybe_unused]] TableUpdateState& updateState) override {
|
|
58
|
+
throw common::RuntimeException("Cannot update parquet-backed rel table");
|
|
59
|
+
}
|
|
60
|
+
bool delete_([[maybe_unused]] transaction::Transaction* transaction,
|
|
61
|
+
[[maybe_unused]] TableDeleteState& deleteState) override {
|
|
62
|
+
throw common::RuntimeException("Cannot delete from parquet-backed rel table");
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
|
|
67
|
+
|
|
68
|
+
private:
|
|
69
|
+
catalog::RelGroupCatalogEntry* relGroupEntry; // Store reference to table schema
|
|
70
|
+
std::string indicesFilePath;
|
|
71
|
+
std::string indptrFilePath;
|
|
72
|
+
mutable std::unique_ptr<processor::ParquetReader> indicesReader;
|
|
73
|
+
mutable std::unique_ptr<processor::ParquetReader> indptrReader;
|
|
74
|
+
mutable std::mutex parquetReaderMutex;
|
|
75
|
+
mutable std::mutex indptrDataMutex;
|
|
76
|
+
mutable std::vector<common::offset_t> indptrData; // Cached indptr data for CSR format
|
|
77
|
+
|
|
78
|
+
void initializeParquetReaders(transaction::Transaction* transaction) const;
|
|
79
|
+
void initializeIndptrReader(transaction::Transaction* transaction) const;
|
|
80
|
+
void loadIndptrData(transaction::Transaction* transaction) const;
|
|
81
|
+
bool scanInternalByRowGroups(transaction::Transaction* transaction,
|
|
82
|
+
ParquetRelTableScanState& parquetRelScanState);
|
|
83
|
+
bool scanRowGroupForBoundNodes(transaction::Transaction* transaction,
|
|
84
|
+
ParquetRelTableScanState& parquetRelScanState,
|
|
85
|
+
const std::vector<uint64_t>& rowGroupsToProcess,
|
|
86
|
+
const std::unordered_set<common::offset_t>& boundNodeOffsets);
|
|
87
|
+
common::offset_t findSourceNodeForRow(common::offset_t globalRowIdx) const;
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
} // namespace storage
|
|
91
|
+
} // namespace lbug
|
|
@@ -48,7 +48,7 @@ struct RelTableScanState : TableScanState {
|
|
|
48
48
|
nodeGroupScanState = std::make_unique<CSRNodeGroupScanState>();
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
-
void setToTable(const transaction::Transaction* transaction, Table* table_,
|
|
51
|
+
virtual void setToTable(const transaction::Transaction* transaction, Table* table_,
|
|
52
52
|
std::vector<common::column_id_t> columnIDs_,
|
|
53
53
|
std::vector<ColumnPredicateSet> columnPredicateSets_,
|
|
54
54
|
common::RelDataDirection direction_) override;
|
|
@@ -138,7 +138,7 @@ struct LBUG_API RelTableDeleteState final : TableDeleteState {
|
|
|
138
138
|
relIDVector{relIDVector}, detachDeleteDirection{detachDeleteDirection} {}
|
|
139
139
|
};
|
|
140
140
|
|
|
141
|
-
class LBUG_API RelTable
|
|
141
|
+
class LBUG_API RelTable : public Table {
|
|
142
142
|
public:
|
|
143
143
|
using rel_multiplicity_constraint_throw_func_t =
|
|
144
144
|
std::function<void(const std::string&, common::offset_t, common::RelDataDirection)>;
|
|
@@ -130,6 +130,8 @@ public:
|
|
|
130
130
|
return getMinUncommittedNodeOffset(tableID) + localRowIdx;
|
|
131
131
|
}
|
|
132
132
|
|
|
133
|
+
main::ClientContext* getClientContext() const { return clientContext; }
|
|
134
|
+
|
|
133
135
|
void pushCreateDropCatalogEntry(catalog::CatalogSet& catalogSet,
|
|
134
136
|
catalog::CatalogEntry& catalogEntry, bool isInternal, bool skipLoggingToWAL = false);
|
|
135
137
|
void pushAlterCatalogEntry(catalog::CatalogSet& catalogSet, catalog::CatalogEntry& catalogEntry,
|