lbug 0.12.3-dev.16 → 0.12.3-dev.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/lbug-source/CMakeLists.txt +1 -1
  2. package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
  3. package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
  4. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
  5. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
  6. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
  7. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
  8. package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
  9. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
  10. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
  11. package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
  12. package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
  13. package/lbug-source/scripts/antlr4/hash.md5 +1 -1
  14. package/lbug-source/src/antlr4/Cypher.g4 +1 -1
  15. package/lbug-source/src/binder/bind/bind_ddl.cpp +23 -13
  16. package/lbug-source/src/catalog/catalog.cpp +5 -4
  17. package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
  18. package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +7 -0
  19. package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +10 -6
  20. package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
  21. package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +4 -2
  22. package/lbug-source/src/include/common/constants.h +1 -0
  23. package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
  24. package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
  25. package/lbug-source/src/include/storage/storage_manager.h +1 -0
  26. package/lbug-source/src/include/storage/table/node_table.h +6 -1
  27. package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
  28. package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
  29. package/lbug-source/src/include/storage/table/rel_table.h +2 -2
  30. package/lbug-source/src/include/transaction/transaction.h +2 -0
  31. package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
  32. package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
  33. package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
  34. package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
  35. package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +12 -2
  36. package/lbug-source/src/storage/storage_manager.cpp +37 -6
  37. package/lbug-source/src/storage/table/CMakeLists.txt +2 -0
  38. package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
  39. package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
  40. package/lbug-source/test/include/test_runner/test_group.h +11 -1
  41. package/lbug-source/test/runner/e2e_test.cpp +7 -1
  42. package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
  43. package/lbug-source/test/test_helper/test_helper.cpp +24 -0
  44. package/lbug-source/test/test_runner/test_parser.cpp +3 -0
  45. package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2761 -2701
  46. package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -0
  47. package/package.json +1 -1
  48. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  49. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  50. package/prebuilt/lbugjs-linux-x64.node +0 -0
  51. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -1,6 +1,6 @@
1
1
  cmake_minimum_required(VERSION 3.15)
2
2
 
3
- project(Lbug VERSION 0.12.3.16 LANGUAGES CXX C)
3
+ project(Lbug VERSION 0.12.3.18 LANGUAGES CXX C)
4
4
 
5
5
  option(SINGLE_THREADED "Single-threaded mode" FALSE)
6
6
  if(SINGLE_THREADED)
@@ -0,0 +1,4 @@
1
+ CREATE NODE TABLE city(id INT32, name STRING, population INT64, PRIMARY KEY(id)) WITH (storage = 'dataset/demo-db/graph-std/demo');
2
+ CREATE NODE TABLE user(id INT32, name STRING, age INT64, PRIMARY KEY(id)) WITH (storage = 'dataset/demo-db/graph-std/demo');
3
+ CREATE REL TABLE follows(FROM user TO user, since INT32) WITH (storage = 'dataset/demo-db/graph-std/demo');
4
+ CREATE REL TABLE livesin(FROM user TO city) WITH (storage = 'dataset/demo-db/graph-std/demo');
@@ -342,7 +342,7 @@ kU_IfNotExists
342
342
  : IF SP NOT SP EXISTS ;
343
343
 
344
344
  kU_CreateNodeTable
345
- : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ;
345
+ : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ( SP WITH SP? '(' SP? kU_Options SP? ')')? ;
346
346
 
347
347
  kU_CreateRelTable
348
348
  : CREATE SP REL SP TABLE ( SP GROUP )? ( SP kU_IfNotExists )? SP oC_SchemaName
@@ -1 +1 @@
1
- 8334a684be17e562250acf07ae2bbca0
1
+ 52606d4848c2f224b8e480fec2923081
@@ -95,7 +95,7 @@ kU_IfNotExists
95
95
  : IF SP NOT SP EXISTS ;
96
96
 
97
97
  kU_CreateNodeTable
98
- : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ;
98
+ : CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName ( SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint )? SP? ')' | SP AS SP oC_Query ) ( SP WITH SP? '(' SP? kU_Options SP? ')')? ;
99
99
 
100
100
  kU_CreateRelTable
101
101
  : CREATE SP REL SP TABLE ( SP GROUP )? ( SP kU_IfNotExists )? SP oC_SchemaName
@@ -136,16 +136,6 @@ BoundCreateTableInfo Binder::bindCreateTableInfo(const CreateTableInfo* info) {
136
136
  }
137
137
  }
138
138
 
139
- BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
140
- auto propertyDefinitions = bindPropertyDefinitions(info->propertyDefinitions, info->tableName);
141
- auto& extraInfo = info->extraInfo->constCast<ExtraCreateNodeTableInfo>();
142
- validatePrimaryKey(extraInfo.pKName, propertyDefinitions);
143
- auto boundExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pKName,
144
- std::move(propertyDefinitions));
145
- return BoundCreateTableInfo(CatalogEntryType::NODE_TABLE_ENTRY, info->tableName,
146
- info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
147
- }
148
-
149
139
  void Binder::validateNodeTableType(const TableCatalogEntry* entry) {
150
140
  if (entry->getType() != CatalogEntryType::NODE_TABLE_ENTRY) {
151
141
  throw BinderException(stringFormat("{} is not of type NODE.", entry->getName()));
@@ -168,6 +158,13 @@ void Binder::validateColumnExistence(const TableCatalogEntry* entry,
168
158
  }
169
159
  }
170
160
 
161
+ static std::string getStorage(const case_insensitive_map_t<Value>& options) {
162
+ if (options.contains(TableOptionConstants::REL_STORAGE_OPTION)) {
163
+ return options.at(TableOptionConstants::REL_STORAGE_OPTION).toString();
164
+ }
165
+ return "";
166
+ }
167
+
171
168
  static ExtendDirection getStorageDirection(const case_insensitive_map_t<Value>& options) {
172
169
  if (options.contains(TableOptionConstants::REL_STORAGE_DIRECTION_OPTION)) {
173
170
  return ExtendDirectionUtil::fromString(
@@ -176,6 +173,18 @@ static ExtendDirection getStorageDirection(const case_insensitive_map_t<Value>&
176
173
  return DEFAULT_EXTEND_DIRECTION;
177
174
  }
178
175
 
176
+ BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
177
+ auto propertyDefinitions = bindPropertyDefinitions(info->propertyDefinitions, info->tableName);
178
+ auto& extraInfo = info->extraInfo->constCast<ExtraCreateNodeTableInfo>();
179
+ validatePrimaryKey(extraInfo.pKName, propertyDefinitions);
180
+ auto boundOptions = bindParsingOptions(extraInfo.options);
181
+ auto storage = getStorage(boundOptions);
182
+ auto boundExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pKName,
183
+ std::move(propertyDefinitions), std::move(storage));
184
+ return BoundCreateTableInfo(CatalogEntryType::NODE_TABLE_ENTRY, info->tableName,
185
+ info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
186
+ }
187
+
179
188
  std::vector<PropertyDefinition> Binder::bindRelPropertyDefinitions(const CreateTableInfo& info) {
180
189
  std::vector<PropertyDefinition> propertyDefinitions;
181
190
  propertyDefinitions.emplace_back(
@@ -193,6 +202,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo*
193
202
  auto dstMultiplicity = RelMultiplicityUtils::getBwd(extraInfo.relMultiplicity);
194
203
  auto boundOptions = bindParsingOptions(extraInfo.options);
195
204
  auto storageDirection = getStorageDirection(boundOptions);
205
+ auto storage = getStorage(boundOptions);
196
206
  // Bind from to pairs
197
207
  node_table_id_pair_set_t nodePairsSet;
198
208
  std::vector<NodeTableIDPair> nodePairs;
@@ -209,9 +219,9 @@ BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo*
209
219
  nodePairsSet.insert(pair);
210
220
  nodePairs.emplace_back(pair);
211
221
  }
212
- auto boundExtraInfo =
213
- std::make_unique<BoundExtraCreateRelTableGroupInfo>(std::move(propertyDefinitions),
214
- srcMultiplicity, dstMultiplicity, storageDirection, std::move(nodePairs));
222
+ auto boundExtraInfo = std::make_unique<BoundExtraCreateRelTableGroupInfo>(
223
+ std::move(propertyDefinitions), srcMultiplicity, dstMultiplicity, storageDirection,
224
+ std::move(nodePairs), std::move(storage));
215
225
  return BoundCreateTableInfo(CatalogEntryType::REL_GROUP_ENTRY, info->tableName,
216
226
  info->onConflict, std::move(boundExtraInfo), clientContext->useInternalCatalogEntry());
217
227
  }
@@ -190,9 +190,9 @@ CatalogEntry* Catalog::createRelGroupEntry(Transaction* transaction,
190
190
  for (auto& nodePair : extraInfo->nodePairs) {
191
191
  relTableInfos.emplace_back(nodePair, tables->getNextOID());
192
192
  }
193
- auto relGroupEntry =
194
- std::make_unique<RelGroupCatalogEntry>(info.tableName, extraInfo->srcMultiplicity,
195
- extraInfo->dstMultiplicity, extraInfo->storageDirection, std::move(relTableInfos));
193
+ auto relGroupEntry = std::make_unique<RelGroupCatalogEntry>(info.tableName,
194
+ extraInfo->srcMultiplicity, extraInfo->dstMultiplicity, extraInfo->storageDirection,
195
+ std::move(relTableInfos), extraInfo->storage);
196
196
  for (auto& definition : extraInfo->propertyDefinitions) {
197
197
  relGroupEntry->addProperty(definition);
198
198
  }
@@ -541,7 +541,8 @@ CatalogEntry* Catalog::createTableEntry(Transaction* transaction,
541
541
  CatalogEntry* Catalog::createNodeTableEntry(Transaction* transaction,
542
542
  const BoundCreateTableInfo& info) {
543
543
  const auto extraInfo = info.extraInfo->constPtrCast<BoundExtraCreateNodeTableInfo>();
544
- auto entry = std::make_unique<NodeTableCatalogEntry>(info.tableName, extraInfo->primaryKeyName);
544
+ auto entry = std::make_unique<NodeTableCatalogEntry>(info.tableName, extraInfo->primaryKeyName,
545
+ extraInfo->storage);
545
546
  for (auto& definition : extraInfo->propertyDefinitions) {
546
547
  entry->addProperty(definition);
547
548
  }
@@ -21,16 +21,22 @@ void NodeTableCatalogEntry::serialize(common::Serializer& serializer) const {
21
21
  TableCatalogEntry::serialize(serializer);
22
22
  serializer.writeDebuggingInfo("primaryKeyName");
23
23
  serializer.write(primaryKeyName);
24
+ serializer.writeDebuggingInfo("storage");
25
+ serializer.write(storage);
24
26
  }
25
27
 
26
28
  std::unique_ptr<NodeTableCatalogEntry> NodeTableCatalogEntry::deserialize(
27
29
  common::Deserializer& deserializer) {
28
30
  std::string debuggingInfo;
29
31
  std::string primaryKeyName;
32
+ std::string storage;
30
33
  deserializer.validateDebuggingInfo(debuggingInfo, "primaryKeyName");
31
34
  deserializer.deserializeValue(primaryKeyName);
35
+ deserializer.validateDebuggingInfo(debuggingInfo, "storage");
36
+ deserializer.deserializeValue(storage);
32
37
  auto nodeTableEntry = std::make_unique<NodeTableCatalogEntry>();
33
38
  nodeTableEntry->primaryKeyName = primaryKeyName;
39
+ nodeTableEntry->storage = storage;
34
40
  return nodeTableEntry;
35
41
  }
36
42
 
@@ -42,6 +48,7 @@ std::string NodeTableCatalogEntry::toCypher(const ToCypherInfo& /*info*/) const
42
48
  std::unique_ptr<TableCatalogEntry> NodeTableCatalogEntry::copy() const {
43
49
  auto other = std::make_unique<NodeTableCatalogEntry>();
44
50
  other->primaryKeyName = primaryKeyName;
51
+ other->storage = storage;
45
52
  other->copyFrom(*this);
46
53
  return other;
47
54
  }
@@ -49,7 +56,7 @@ std::unique_ptr<TableCatalogEntry> NodeTableCatalogEntry::copy() const {
49
56
  std::unique_ptr<BoundExtraCreateCatalogEntryInfo> NodeTableCatalogEntry::getBoundExtraCreateInfo(
50
57
  transaction::Transaction*) const {
51
58
  return std::make_unique<BoundExtraCreateNodeTableInfo>(primaryKeyName,
52
- copyVector(getProperties()));
59
+ copyVector(getProperties()), storage);
53
60
  }
54
61
 
55
62
  } // namespace catalog
@@ -95,6 +95,8 @@ void RelGroupCatalogEntry::serialize(Serializer& serializer) const {
95
95
  serializer.serializeValue(dstMultiplicity);
96
96
  serializer.writeDebuggingInfo("storageDirection");
97
97
  serializer.serializeValue(storageDirection);
98
+ serializer.writeDebuggingInfo("storage");
99
+ serializer.serializeValue(storage);
98
100
  serializer.writeDebuggingInfo("relTableInfos");
99
101
  serializer.serializeVector(relTableInfos);
100
102
  }
@@ -105,6 +107,7 @@ std::unique_ptr<RelGroupCatalogEntry> RelGroupCatalogEntry::deserialize(
105
107
  auto srcMultiplicity = RelMultiplicity::MANY;
106
108
  auto dstMultiplicity = RelMultiplicity::MANY;
107
109
  auto storageDirection = ExtendDirection::BOTH;
110
+ std::string storage;
108
111
  std::vector<RelTableCatalogInfo> relTableInfos;
109
112
  deserializer.validateDebuggingInfo(debuggingInfo, "srcMultiplicity");
110
113
  deserializer.deserializeValue(srcMultiplicity);
@@ -112,12 +115,15 @@ std::unique_ptr<RelGroupCatalogEntry> RelGroupCatalogEntry::deserialize(
112
115
  deserializer.deserializeValue(dstMultiplicity);
113
116
  deserializer.validateDebuggingInfo(debuggingInfo, "storageDirection");
114
117
  deserializer.deserializeValue(storageDirection);
118
+ deserializer.validateDebuggingInfo(debuggingInfo, "storage");
119
+ deserializer.deserializeValue(storage);
115
120
  deserializer.validateDebuggingInfo(debuggingInfo, "relTableInfos");
116
121
  deserializer.deserializeVector(relTableInfos);
117
122
  auto relGroupEntry = std::make_unique<RelGroupCatalogEntry>();
118
123
  relGroupEntry->srcMultiplicity = srcMultiplicity;
119
124
  relGroupEntry->dstMultiplicity = dstMultiplicity;
120
125
  relGroupEntry->storageDirection = storageDirection;
126
+ relGroupEntry->storage = storage;
121
127
  relGroupEntry->relTableInfos = relTableInfos;
122
128
  return relGroupEntry;
123
129
  }
@@ -167,6 +173,7 @@ std::unique_ptr<TableCatalogEntry> RelGroupCatalogEntry::copy() const {
167
173
  other->srcMultiplicity = srcMultiplicity;
168
174
  other->dstMultiplicity = dstMultiplicity;
169
175
  other->storageDirection = storageDirection;
176
+ other->storage = storage;
170
177
  other->relTableInfos = relTableInfos;
171
178
  other->copyFrom(*this);
172
179
  return other;
@@ -71,14 +71,15 @@ struct LBUG_API BoundExtraCreateTableInfo : BoundExtraCreateCatalogEntryInfo {
71
71
 
72
72
  struct BoundExtraCreateNodeTableInfo final : BoundExtraCreateTableInfo {
73
73
  std::string primaryKeyName;
74
+ std::string storage;
74
75
 
75
76
  BoundExtraCreateNodeTableInfo(std::string primaryKeyName,
76
- std::vector<PropertyDefinition> definitions)
77
+ std::vector<PropertyDefinition> definitions, std::string storage = "")
77
78
  : BoundExtraCreateTableInfo{std::move(definitions)},
78
- primaryKeyName{std::move(primaryKeyName)} {}
79
+ primaryKeyName{std::move(primaryKeyName)}, storage{std::move(storage)} {}
79
80
  BoundExtraCreateNodeTableInfo(const BoundExtraCreateNodeTableInfo& other)
80
81
  : BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)},
81
- primaryKeyName{other.primaryKeyName} {}
82
+ primaryKeyName{other.primaryKeyName}, storage{other.storage} {}
82
83
 
83
84
  std::unique_ptr<BoundExtraCreateCatalogEntryInfo> copy() const override {
84
85
  return std::make_unique<BoundExtraCreateNodeTableInfo>(*this);
@@ -90,18 +91,21 @@ struct BoundExtraCreateRelTableGroupInfo final : BoundExtraCreateTableInfo {
90
91
  common::RelMultiplicity dstMultiplicity;
91
92
  common::ExtendDirection storageDirection;
92
93
  std::vector<catalog::NodeTableIDPair> nodePairs;
94
+ std::string storage;
93
95
 
94
96
  explicit BoundExtraCreateRelTableGroupInfo(std::vector<PropertyDefinition> definitions,
95
97
  common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity,
96
- common::ExtendDirection storageDirection, std::vector<catalog::NodeTableIDPair> nodePairs)
98
+ common::ExtendDirection storageDirection, std::vector<catalog::NodeTableIDPair> nodePairs,
99
+ std::string storage = "")
97
100
  : BoundExtraCreateTableInfo{std::move(definitions)}, srcMultiplicity{srcMultiplicity},
98
101
  dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection},
99
- nodePairs{std::move(nodePairs)} {}
102
+ nodePairs{std::move(nodePairs)}, storage{std::move(storage)} {}
100
103
 
101
104
  BoundExtraCreateRelTableGroupInfo(const BoundExtraCreateRelTableGroupInfo& other)
102
105
  : BoundExtraCreateTableInfo{copyVector(other.propertyDefinitions)},
103
106
  srcMultiplicity{other.srcMultiplicity}, dstMultiplicity{other.dstMultiplicity},
104
- storageDirection{other.storageDirection}, nodePairs{other.nodePairs} {}
107
+ storageDirection{other.storageDirection}, nodePairs{other.nodePairs},
108
+ storage{other.storage} {}
105
109
 
106
110
  std::unique_ptr<BoundExtraCreateCatalogEntryInfo> copy() const override {
107
111
  return std::make_unique<BoundExtraCreateRelTableGroupInfo>(*this);
@@ -15,9 +15,9 @@ class LBUG_API NodeTableCatalogEntry final : public TableCatalogEntry {
15
15
 
16
16
  public:
17
17
  NodeTableCatalogEntry() = default;
18
- NodeTableCatalogEntry(std::string name, std::string primaryKeyName)
19
- : TableCatalogEntry{entryType_, std::move(name)},
20
- primaryKeyName{std::move(primaryKeyName)} {}
18
+ NodeTableCatalogEntry(std::string name, std::string primaryKeyName, std::string storage = "")
19
+ : TableCatalogEntry{entryType_, std::move(name)}, primaryKeyName{std::move(primaryKeyName)},
20
+ storage{std::move(storage)} {}
21
21
 
22
22
  bool isParent(common::table_id_t /*tableID*/) override { return false; }
23
23
  common::TableType getTableType() const override { return common::TableType::NODE; }
@@ -29,6 +29,7 @@ public:
29
29
  const binder::PropertyDefinition& getPrimaryKeyDefinition() const {
30
30
  return getProperty(primaryKeyName);
31
31
  }
32
+ const std::string& getStorage() const { return storage; }
32
33
 
33
34
  void renameProperty(const std::string& propertyName, const std::string& newName) override;
34
35
 
@@ -44,6 +45,7 @@ private:
44
45
 
45
46
  private:
46
47
  std::string primaryKeyName;
48
+ std::string storage;
47
49
  };
48
50
 
49
51
  } // namespace catalog
@@ -34,10 +34,10 @@ public:
34
34
  RelGroupCatalogEntry() = default;
35
35
  RelGroupCatalogEntry(std::string tableName, common::RelMultiplicity srcMultiplicity,
36
36
  common::RelMultiplicity dstMultiplicity, common::ExtendDirection storageDirection,
37
- std::vector<RelTableCatalogInfo> relTableInfos)
37
+ std::vector<RelTableCatalogInfo> relTableInfos, std::string storage = "")
38
38
  : TableCatalogEntry{type_, std::move(tableName)}, srcMultiplicity{srcMultiplicity},
39
39
  dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection},
40
- relTableInfos{std::move(relTableInfos)} {
40
+ relTableInfos{std::move(relTableInfos)}, storage{std::move(storage)} {
41
41
  propertyCollection =
42
42
  PropertyDefinitionCollection{1}; // Skip NBR_NODE_ID column as the first one.
43
43
  }
@@ -53,6 +53,7 @@ public:
53
53
  }
54
54
 
55
55
  common::ExtendDirection getStorageDirection() const { return storageDirection; }
56
+ const std::string& getStorage() const { return storage; }
56
57
 
57
58
  common::idx_t getNumRelTables() const { return relTableInfos.size(); }
58
59
  const std::vector<RelTableCatalogInfo>& getRelEntryInfos() const { return relTableInfos; }
@@ -97,6 +98,7 @@ private:
97
98
  // TODO(Guodong): Avoid using extend direction for storage direction
98
99
  common::ExtendDirection storageDirection = common::ExtendDirection::BOTH;
99
100
  std::vector<RelTableCatalogInfo> relTableInfos;
101
+ std::string storage;
100
102
  };
101
103
 
102
104
  } // namespace catalog
@@ -83,6 +83,7 @@ struct StorageConstants {
83
83
 
84
84
  struct TableOptionConstants {
85
85
  static constexpr char REL_STORAGE_DIRECTION_OPTION[] = "STORAGE_DIRECTION";
86
+ static constexpr char REL_STORAGE_OPTION[] = "STORAGE";
86
87
  };
87
88
 
88
89
  // Hash Index Configurations
@@ -36,8 +36,10 @@ struct CreateTableInfo {
36
36
 
37
37
  struct ExtraCreateNodeTableInfo final : ExtraCreateTableInfo {
38
38
  std::string pKName;
39
+ options_t options;
39
40
 
40
- explicit ExtraCreateNodeTableInfo(std::string pKName) : pKName{std::move(pKName)} {}
41
+ explicit ExtraCreateNodeTableInfo(std::string pKName, options_t options = {})
42
+ : pKName{std::move(pKName)}, options{std::move(options)} {}
41
43
  };
42
44
 
43
45
  struct ExtraCreateRelTableGroupInfo final : ExtraCreateTableInfo {
@@ -24,7 +24,7 @@ public:
24
24
  void initialize(const transaction::Transaction* transaction, storage::NodeTable* table,
25
25
  ScanNodeTableProgressSharedState& progressSharedState);
26
26
 
27
- void nextMorsel(storage::NodeTableScanState& scanState,
27
+ void nextMorsel(storage::TableScanState& scanState,
28
28
  ScanNodeTableProgressSharedState& progressSharedState);
29
29
 
30
30
  common::SemiMask* getSemiMask() const { return semiMask.get(); }
@@ -116,7 +116,7 @@ private:
116
116
 
117
117
  private:
118
118
  common::idx_t currentTableIdx;
119
- std::unique_ptr<storage::NodeTableScanState> scanState;
119
+ std::unique_ptr<storage::TableScanState> scanState;
120
120
  std::vector<ScanNodeTableInfo> tableInfos;
121
121
  std::vector<std::shared_ptr<ScanNodeTableSharedState>> sharedStates;
122
122
  std::shared_ptr<ScanNodeTableProgressSharedState> progressSharedState;
@@ -97,6 +97,7 @@ private:
97
97
  bool enableCompression;
98
98
  bool inMemory;
99
99
  std::vector<IndexType> registeredIndexTypes;
100
+ std::unordered_map<common::table_id_t, std::string> tableNameCache;
100
101
  };
101
102
 
102
103
  } // namespace storage
@@ -107,7 +107,7 @@ private:
107
107
 
108
108
  class StorageManager;
109
109
 
110
- class LBUG_API NodeTable final : public Table {
110
+ class LBUG_API NodeTable : public Table {
111
111
  public:
112
112
  NodeTable(const StorageManager* storageManager,
113
113
  const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* mm);
@@ -119,6 +119,11 @@ public:
119
119
  void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
120
120
  common::table_id_t tableID, common::offset_t startOffset) const;
121
121
 
122
+ // Virtual method for operator-level scan coordination initialization
123
+ // Called once per scan operation (not per scan state)
124
+ virtual void initializeScanCoordination(
125
+ [[maybe_unused]] const transaction::Transaction* transaction) {}
126
+
122
127
  bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
123
128
  template<bool lock = true>
124
129
  bool lookup(const transaction::Transaction* transaction, const TableScanState& scanState) const;
@@ -0,0 +1,103 @@
1
+ #pragma once
2
+
3
+ #include <mutex>
4
+ #include <vector>
5
+
6
+ #include "catalog/catalog_entry/node_table_catalog_entry.h"
7
+ #include "common/exception/runtime.h"
8
+ #include "common/types/internal_id_util.h"
9
+ #include "common/types/value/value.h"
10
+ #include "processor/operator/persistent/reader/parquet/parquet_reader.h"
11
+ #include "storage/table/node_table.h"
12
+
13
+ namespace lbug {
14
+ namespace storage {
15
+
16
+ struct ParquetNodeTableScanState final : NodeTableScanState {
17
+ std::unique_ptr<processor::ParquetReader> parquetReader;
18
+ std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
19
+ bool initialized = false;
20
+ bool scanCompleted = false; // Track if this scan state has finished reading
21
+ bool dataRead = false;
22
+ std::vector<std::vector<std::unique_ptr<common::Value>>> allData;
23
+ size_t totalRows = 0;
24
+ size_t nextRowToDistribute = 0;
25
+ uint64_t lastQueryId = 0; // Track the last query ID to detect new queries
26
+
27
+ ParquetNodeTableScanState([[maybe_unused]] MemoryManager& mm, common::ValueVector* nodeIDVector,
28
+ std::vector<common::ValueVector*> outputVectors,
29
+ std::shared_ptr<common::DataChunkState> outChunkState)
30
+ : NodeTableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
31
+ parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
32
+ }
33
+ };
34
+
35
+ // Shared state to coordinate row group assignment across parallel scan states
36
+ struct ParquetNodeTableSharedState {
37
+ std::mutex mtx;
38
+ common::node_group_idx_t currentRowGroupIdx = 0;
39
+ common::node_group_idx_t numRowGroups = 0;
40
+
41
+ void reset(common::node_group_idx_t totalRowGroups) {
42
+ std::lock_guard<std::mutex> lock(mtx);
43
+ currentRowGroupIdx = 0;
44
+ numRowGroups = totalRowGroups;
45
+ }
46
+
47
+ bool getNextRowGroup(common::node_group_idx_t& assignedRowGroupIdx) {
48
+ std::lock_guard<std::mutex> lock(mtx);
49
+ if (currentRowGroupIdx < numRowGroups) {
50
+ assignedRowGroupIdx = currentRowGroupIdx++;
51
+ return true;
52
+ }
53
+ return false;
54
+ }
55
+ };
56
+
57
+ class ParquetNodeTable final : public NodeTable {
58
+ public:
59
+ ParquetNodeTable(const StorageManager* storageManager,
60
+ const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager);
61
+
62
+ void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
63
+ bool resetCachedBoundNodeSelVec = true) const override;
64
+
65
+ // Override to reset shared state for row group coordination at the start of each scan operation
66
+ void initializeScanCoordination(const transaction::Transaction* transaction) override;
67
+
68
+ bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
69
+
70
+ // For parquet-backed tables, we don't support modifications
71
+ void insert([[maybe_unused]] transaction::Transaction* transaction,
72
+ [[maybe_unused]] TableInsertState& insertState) override {
73
+ throw common::RuntimeException("Cannot insert into parquet-backed node table");
74
+ }
75
+ void update([[maybe_unused]] transaction::Transaction* transaction,
76
+ [[maybe_unused]] TableUpdateState& updateState) override {
77
+ throw common::RuntimeException("Cannot update parquet-backed node table");
78
+ }
79
+ bool delete_([[maybe_unused]] transaction::Transaction* transaction,
80
+ [[maybe_unused]] TableDeleteState& deleteState) override {
81
+ throw common::RuntimeException("Cannot delete from parquet-backed node table");
82
+ return false;
83
+ }
84
+
85
+ common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
86
+
87
+ const std::string& getParquetFilePath() const { return parquetFilePath; }
88
+
89
+ // Note: Cannot override getNumCommittedNodeGroups since it's not virtual in base class
90
+ // Will need a different approach
91
+
92
+ private:
93
+ std::string parquetFilePath;
94
+ const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry;
95
+ mutable std::unique_ptr<ParquetNodeTableSharedState> sharedState;
96
+
97
+ void initializeParquetReader(transaction::Transaction* transaction) const;
98
+ void initParquetScanForRowGroup(transaction::Transaction* transaction,
99
+ ParquetNodeTableScanState& scanState) const;
100
+ };
101
+
102
+ } // namespace storage
103
+ } // namespace lbug
@@ -0,0 +1,91 @@
1
+ #pragma once
2
+
3
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
4
+ #include "common/exception/runtime.h"
5
+ #include "common/types/internal_id_util.h"
6
+ #include "processor/operator/persistent/reader/parquet/parquet_reader.h"
7
+ #include "storage/table/rel_table.h"
8
+ #include "transaction/transaction.h"
9
+
10
+ namespace lbug {
11
+ namespace storage {
12
+
13
+ struct ParquetRelTableScanState final : RelTableScanState {
14
+ std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
15
+ // For CSR format: store matching rows for current bound node
16
+ size_t nextRowToProcess = 0;
17
+
18
+ // Row group range for morsel-driven parallelism
19
+ uint64_t startRowGroup = 0;
20
+ uint64_t endRowGroup = 0;
21
+ uint64_t currentRowGroup = 0;
22
+
23
+ // Per-scan-state readers for thread safety
24
+ std::unique_ptr<processor::ParquetReader> indicesReader;
25
+ std::unique_ptr<processor::ParquetReader> indptrReader;
26
+
27
+ ParquetRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector,
28
+ std::vector<common::ValueVector*> outputVectors,
29
+ std::shared_ptr<common::DataChunkState> outChunkState)
30
+ : RelTableScanState{mm, nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
31
+ parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
32
+ }
33
+
34
+ void setToTable(const transaction::Transaction* transaction, Table* table_,
35
+ std::vector<common::column_id_t> columnIDs_,
36
+ std::vector<ColumnPredicateSet> columnPredicateSets_,
37
+ common::RelDataDirection direction_) override;
38
+ };
39
+
40
+ class ParquetRelTable final : public RelTable {
41
+ public:
42
+ ParquetRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID,
43
+ common::table_id_t toTableID, const StorageManager* storageManager,
44
+ MemoryManager* memoryManager);
45
+
46
+ void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
47
+ bool resetCachedBoundNodeSelVec = true) const override;
48
+
49
+ bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
50
+
51
+ // For parquet-backed tables, we don't support modifications
52
+ void insert([[maybe_unused]] transaction::Transaction* transaction,
53
+ [[maybe_unused]] TableInsertState& insertState) override {
54
+ throw common::RuntimeException("Cannot insert into parquet-backed rel table");
55
+ }
56
+ void update([[maybe_unused]] transaction::Transaction* transaction,
57
+ [[maybe_unused]] TableUpdateState& updateState) override {
58
+ throw common::RuntimeException("Cannot update parquet-backed rel table");
59
+ }
60
+ bool delete_([[maybe_unused]] transaction::Transaction* transaction,
61
+ [[maybe_unused]] TableDeleteState& deleteState) override {
62
+ throw common::RuntimeException("Cannot delete from parquet-backed rel table");
63
+ return false;
64
+ }
65
+
66
+ common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
67
+
68
+ private:
69
+ catalog::RelGroupCatalogEntry* relGroupEntry; // Store reference to table schema
70
+ std::string indicesFilePath;
71
+ std::string indptrFilePath;
72
+ mutable std::unique_ptr<processor::ParquetReader> indicesReader;
73
+ mutable std::unique_ptr<processor::ParquetReader> indptrReader;
74
+ mutable std::mutex parquetReaderMutex;
75
+ mutable std::mutex indptrDataMutex;
76
+ mutable std::vector<common::offset_t> indptrData; // Cached indptr data for CSR format
77
+
78
+ void initializeParquetReaders(transaction::Transaction* transaction) const;
79
+ void initializeIndptrReader(transaction::Transaction* transaction) const;
80
+ void loadIndptrData(transaction::Transaction* transaction) const;
81
+ bool scanInternalByRowGroups(transaction::Transaction* transaction,
82
+ ParquetRelTableScanState& parquetRelScanState);
83
+ bool scanRowGroupForBoundNodes(transaction::Transaction* transaction,
84
+ ParquetRelTableScanState& parquetRelScanState,
85
+ const std::vector<uint64_t>& rowGroupsToProcess,
86
+ const std::unordered_set<common::offset_t>& boundNodeOffsets);
87
+ common::offset_t findSourceNodeForRow(common::offset_t globalRowIdx) const;
88
+ };
89
+
90
+ } // namespace storage
91
+ } // namespace lbug
@@ -48,7 +48,7 @@ struct RelTableScanState : TableScanState {
48
48
  nodeGroupScanState = std::make_unique<CSRNodeGroupScanState>();
49
49
  }
50
50
 
51
- void setToTable(const transaction::Transaction* transaction, Table* table_,
51
+ virtual void setToTable(const transaction::Transaction* transaction, Table* table_,
52
52
  std::vector<common::column_id_t> columnIDs_,
53
53
  std::vector<ColumnPredicateSet> columnPredicateSets_,
54
54
  common::RelDataDirection direction_) override;
@@ -138,7 +138,7 @@ struct LBUG_API RelTableDeleteState final : TableDeleteState {
138
138
  relIDVector{relIDVector}, detachDeleteDirection{detachDeleteDirection} {}
139
139
  };
140
140
 
141
- class LBUG_API RelTable final : public Table {
141
+ class LBUG_API RelTable : public Table {
142
142
  public:
143
143
  using rel_multiplicity_constraint_throw_func_t =
144
144
  std::function<void(const std::string&, common::offset_t, common::RelDataDirection)>;
@@ -130,6 +130,8 @@ public:
130
130
  return getMinUncommittedNodeOffset(tableID) + localRowIdx;
131
131
  }
132
132
 
133
+ main::ClientContext* getClientContext() const { return clientContext; }
134
+
133
135
  void pushCreateDropCatalogEntry(catalog::CatalogSet& catalogSet,
134
136
  catalog::CatalogEntry& catalogEntry, bool isInternal, bool skipLoggingToWAL = false);
135
137
  void pushAlterCatalogEntry(catalog::CatalogSet& catalogSet, catalog::CatalogEntry& catalogEntry,