lbug 0.12.3-dev.2 → 0.12.3-dev.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/README.md +2 -6
  2. package/lbug-source/.github/workflows/ci-workflow.yml +9 -2
  3. package/lbug-source/CMakeLists.txt +15 -6
  4. package/lbug-source/Makefile +1 -2
  5. package/lbug-source/README.md +2 -6
  6. package/lbug-source/benchmark/serializer.py +24 -3
  7. package/lbug-source/dataset/demo-db/csv/copy.cypher +4 -4
  8. package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
  9. package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
  10. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
  11. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
  12. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
  13. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
  14. package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
  15. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
  16. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
  17. package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
  18. package/lbug-source/dataset/demo-db/parquet/copy.cypher +4 -4
  19. package/lbug-source/extension/httpfs/test/test_files/http.test +1 -0
  20. package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
  21. package/lbug-source/scripts/antlr4/hash.md5 +1 -1
  22. package/lbug-source/scripts/generate_binary_demo.sh +1 -1
  23. package/lbug-source/src/antlr4/Cypher.g4 +1 -1
  24. package/lbug-source/src/binder/bind/bind_ddl.cpp +23 -13
  25. package/lbug-source/src/catalog/catalog.cpp +5 -4
  26. package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
  27. package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +7 -0
  28. package/lbug-source/src/function/function_collection.cpp +2 -1
  29. package/lbug-source/src/function/table/CMakeLists.txt +1 -0
  30. package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
  31. package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +10 -6
  32. package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
  33. package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +4 -2
  34. package/lbug-source/src/include/common/constants.h +1 -0
  35. package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
  36. package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
  37. package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
  38. package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
  39. package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
  40. package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
  41. package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
  42. package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
  43. package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
  44. package/lbug-source/src/include/processor/plan_mapper.h +2 -0
  45. package/lbug-source/src/include/storage/storage_manager.h +1 -0
  46. package/lbug-source/src/include/storage/storage_version_info.h +1 -7
  47. package/lbug-source/src/include/storage/table/node_table.h +6 -1
  48. package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
  49. package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
  50. package/lbug-source/src/include/storage/table/rel_table.h +2 -2
  51. package/lbug-source/src/include/transaction/transaction.h +2 -0
  52. package/lbug-source/src/main/query_result/materialized_query_result.cpp +2 -2
  53. package/lbug-source/src/optimizer/CMakeLists.txt +1 -0
  54. package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
  55. package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
  56. package/lbug-source/src/optimizer/optimizer.cpp +6 -0
  57. package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
  58. package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
  59. package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
  60. package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
  61. package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
  62. package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
  63. package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
  64. package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
  65. package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
  66. package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
  67. package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
  68. package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
  69. package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
  70. package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +12 -2
  71. package/lbug-source/src/storage/storage_manager.cpp +37 -6
  72. package/lbug-source/src/storage/table/CMakeLists.txt +2 -0
  73. package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
  74. package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
  75. package/lbug-source/test/api/api_test.cpp +18 -0
  76. package/lbug-source/test/common/string_format.cpp +9 -1
  77. package/lbug-source/test/copy/copy_test.cpp +4 -4
  78. package/lbug-source/test/graph_test/CMakeLists.txt +1 -1
  79. package/lbug-source/test/include/test_runner/test_group.h +11 -1
  80. package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
  81. package/lbug-source/test/runner/e2e_test.cpp +7 -1
  82. package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
  83. package/lbug-source/test/test_helper/CMakeLists.txt +1 -1
  84. package/lbug-source/test/test_helper/test_helper.cpp +33 -1
  85. package/lbug-source/test/test_runner/CMakeLists.txt +1 -1
  86. package/lbug-source/test/test_runner/insert_by_row.cpp +6 -8
  87. package/lbug-source/test/test_runner/multi_copy_split.cpp +2 -4
  88. package/lbug-source/test/test_runner/test_parser.cpp +3 -0
  89. package/lbug-source/test/transaction/checkpoint_test.cpp +1 -1
  90. package/lbug-source/test/transaction/transaction_test.cpp +19 -15
  91. package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2761 -2701
  92. package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -0
  93. package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
  94. package/lbug-source/tools/shell/embedded_shell.cpp +78 -3
  95. package/lbug-source/tools/shell/include/embedded_shell.h +2 -0
  96. package/lbug-source/tools/shell/linenoise.cpp +3 -3
  97. package/lbug-source/tools/shell/test/test_helper.py +1 -1
  98. package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
  99. package/lbug-source/tools/shell/test/test_shell_commands.py +19 -0
  100. package/package.json +1 -1
  101. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  102. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  103. package/prebuilt/lbugjs-linux-x64.node +0 -0
  104. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -0,0 +1,62 @@
1
+ #pragma once
2
+
3
+ #include "common/enums/rel_direction.h"
4
+ #include "processor/operator/physical_operator.h"
5
+ #include "storage/table/node_table.h"
6
+ #include "storage/table/rel_table.h"
7
+
8
+ namespace lbug {
9
+ namespace processor {
10
+
11
+ struct CountRelTablePrintInfo final : OPPrintInfo {
12
+ std::string relTableName;
13
+
14
+ explicit CountRelTablePrintInfo(std::string relTableName)
15
+ : relTableName{std::move(relTableName)} {}
16
+
17
+ std::string toString() const override { return "Table: " + relTableName; }
18
+
19
+ std::unique_ptr<OPPrintInfo> copy() const override {
20
+ return std::make_unique<CountRelTablePrintInfo>(relTableName);
21
+ }
22
+ };
23
+
24
+ /**
25
+ * CountRelTable is a source operator that counts edges in a rel table
26
+ * by scanning through all bound nodes and counting their edges.
27
+ * It creates its own internal vectors for node scanning (not exposed in ResultSet).
28
+ */
29
+ class CountRelTable final : public PhysicalOperator {
30
+ static constexpr PhysicalOperatorType type_ = PhysicalOperatorType::COUNT_REL_TABLE;
31
+
32
+ public:
33
+ CountRelTable(std::vector<storage::NodeTable*> nodeTables,
34
+ std::vector<storage::RelTable*> relTables, common::RelDataDirection direction,
35
+ DataPos countOutputPos, physical_op_id id, std::unique_ptr<OPPrintInfo> printInfo)
36
+ : PhysicalOperator{type_, id, std::move(printInfo)}, nodeTables{std::move(nodeTables)},
37
+ relTables{std::move(relTables)}, direction{direction}, countOutputPos{countOutputPos} {}
38
+
39
+ bool isSource() const override { return true; }
40
+ bool isParallel() const override { return false; }
41
+
42
+ void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override;
43
+
44
+ bool getNextTuplesInternal(ExecutionContext* context) override;
45
+
46
+ std::unique_ptr<PhysicalOperator> copy() override {
47
+ return std::make_unique<CountRelTable>(nodeTables, relTables, direction, countOutputPos, id,
48
+ printInfo->copy());
49
+ }
50
+
51
+ private:
52
+ std::vector<storage::NodeTable*> nodeTables;
53
+ std::vector<storage::RelTable*> relTables;
54
+ common::RelDataDirection direction;
55
+ DataPos countOutputPos;
56
+ common::ValueVector* countVector;
57
+ bool hasExecuted;
58
+ common::row_idx_t totalCount;
59
+ };
60
+
61
+ } // namespace processor
62
+ } // namespace lbug
@@ -24,7 +24,7 @@ public:
24
24
  void initialize(const transaction::Transaction* transaction, storage::NodeTable* table,
25
25
  ScanNodeTableProgressSharedState& progressSharedState);
26
26
 
27
- void nextMorsel(storage::NodeTableScanState& scanState,
27
+ void nextMorsel(storage::TableScanState& scanState,
28
28
  ScanNodeTableProgressSharedState& progressSharedState);
29
29
 
30
30
  common::SemiMask* getSemiMask() const { return semiMask.get(); }
@@ -116,7 +116,7 @@ private:
116
116
 
117
117
  private:
118
118
  common::idx_t currentTableIdx;
119
- std::unique_ptr<storage::NodeTableScanState> scanState;
119
+ std::unique_ptr<storage::TableScanState> scanState;
120
120
  std::vector<ScanNodeTableInfo> tableInfos;
121
121
  std::vector<std::shared_ptr<ScanNodeTableSharedState>> sharedStates;
122
122
  std::shared_ptr<ScanNodeTableProgressSharedState> progressSharedState;
@@ -90,6 +90,8 @@ public:
90
90
  std::unique_ptr<PhysicalOperator> mapCopyRelFrom(
91
91
  const planner::LogicalOperator* logicalOperator);
92
92
  std::unique_ptr<PhysicalOperator> mapCopyTo(const planner::LogicalOperator* logicalOperator);
93
+ std::unique_ptr<PhysicalOperator> mapCountRelTable(
94
+ const planner::LogicalOperator* logicalOperator);
93
95
  std::unique_ptr<PhysicalOperator> mapCreateMacro(
94
96
  const planner::LogicalOperator* logicalOperator);
95
97
  std::unique_ptr<PhysicalOperator> mapCreateSequence(
@@ -97,6 +97,7 @@ private:
97
97
  bool enableCompression;
98
98
  bool inMemory;
99
99
  std::vector<IndexType> registeredIndexTypes;
100
+ std::unordered_map<common::table_id_t, std::string> tableNameCache;
100
101
  };
101
102
 
102
103
  } // namespace storage
@@ -13,13 +13,7 @@ using storage_version_t = uint64_t;
13
13
 
14
14
  struct StorageVersionInfo {
15
15
  static std::unordered_map<std::string, storage_version_t> getStorageVersionInfo() {
16
- return {{"0.11.1", 39}, {"0.11.0", 39}, {"0.10.0", 38}, {"0.9.0", 37}, {"0.8.0", 36},
17
- {"0.7.1.1", 35}, {"0.7.0", 34}, {"0.6.0.6", 33}, {"0.6.0.5", 32}, {"0.6.0.2", 31},
18
- {"0.6.0.1", 31}, {"0.6.0", 28}, {"0.5.0", 28}, {"0.4.2", 27}, {"0.4.1", 27},
19
- {"0.4.0", 27}, {"0.3.2", 26}, {"0.3.1", 26}, {"0.3.0", 26}, {"0.2.1", 25},
20
- {"0.2.0", 25}, {"0.1.0", 24}, {"0.0.12.3", 24}, {"0.0.12.2", 24}, {"0.0.12.1", 24},
21
- {"0.0.12", 23}, {"0.0.11", 23}, {"0.0.10", 23}, {"0.0.9", 23}, {"0.0.8", 17},
22
- {"0.0.7", 15}, {"0.0.6", 9}, {"0.0.5", 8}, {"0.0.4", 7}, {"0.0.3", 1}};
16
+ return {{"0.12.0", 40}, {"0.12.2", 40}};
23
17
  }
24
18
 
25
19
  static LBUG_API storage_version_t getStorageVersion();
@@ -107,7 +107,7 @@ private:
107
107
 
108
108
  class StorageManager;
109
109
 
110
- class LBUG_API NodeTable final : public Table {
110
+ class LBUG_API NodeTable : public Table {
111
111
  public:
112
112
  NodeTable(const StorageManager* storageManager,
113
113
  const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* mm);
@@ -119,6 +119,11 @@ public:
119
119
  void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
120
120
  common::table_id_t tableID, common::offset_t startOffset) const;
121
121
 
122
+ // Virtual method for operator-level scan coordination initialization
123
+ // Called once per scan operation (not per scan state)
124
+ virtual void initializeScanCoordination(
125
+ [[maybe_unused]] const transaction::Transaction* transaction) {}
126
+
122
127
  bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
123
128
  template<bool lock = true>
124
129
  bool lookup(const transaction::Transaction* transaction, const TableScanState& scanState) const;
@@ -0,0 +1,103 @@
1
+ #pragma once
2
+
3
+ #include <mutex>
4
+ #include <vector>
5
+
6
+ #include "catalog/catalog_entry/node_table_catalog_entry.h"
7
+ #include "common/exception/runtime.h"
8
+ #include "common/types/internal_id_util.h"
9
+ #include "common/types/value/value.h"
10
+ #include "processor/operator/persistent/reader/parquet/parquet_reader.h"
11
+ #include "storage/table/node_table.h"
12
+
13
+ namespace lbug {
14
+ namespace storage {
15
+
16
+ struct ParquetNodeTableScanState final : NodeTableScanState {
17
+ std::unique_ptr<processor::ParquetReader> parquetReader;
18
+ std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
19
+ bool initialized = false;
20
+ bool scanCompleted = false; // Track if this scan state has finished reading
21
+ bool dataRead = false;
22
+ std::vector<std::vector<std::unique_ptr<common::Value>>> allData;
23
+ size_t totalRows = 0;
24
+ size_t nextRowToDistribute = 0;
25
+ uint64_t lastQueryId = 0; // Track the last query ID to detect new queries
26
+
27
+ ParquetNodeTableScanState([[maybe_unused]] MemoryManager& mm, common::ValueVector* nodeIDVector,
28
+ std::vector<common::ValueVector*> outputVectors,
29
+ std::shared_ptr<common::DataChunkState> outChunkState)
30
+ : NodeTableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
31
+ parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
32
+ }
33
+ };
34
+
35
+ // Shared state to coordinate row group assignment across parallel scan states
36
+ struct ParquetNodeTableSharedState {
37
+ std::mutex mtx;
38
+ common::node_group_idx_t currentRowGroupIdx = 0;
39
+ common::node_group_idx_t numRowGroups = 0;
40
+
41
+ void reset(common::node_group_idx_t totalRowGroups) {
42
+ std::lock_guard<std::mutex> lock(mtx);
43
+ currentRowGroupIdx = 0;
44
+ numRowGroups = totalRowGroups;
45
+ }
46
+
47
+ bool getNextRowGroup(common::node_group_idx_t& assignedRowGroupIdx) {
48
+ std::lock_guard<std::mutex> lock(mtx);
49
+ if (currentRowGroupIdx < numRowGroups) {
50
+ assignedRowGroupIdx = currentRowGroupIdx++;
51
+ return true;
52
+ }
53
+ return false;
54
+ }
55
+ };
56
+
57
+ class ParquetNodeTable final : public NodeTable {
58
+ public:
59
+ ParquetNodeTable(const StorageManager* storageManager,
60
+ const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager);
61
+
62
+ void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
63
+ bool resetCachedBoundNodeSelVec = true) const override;
64
+
65
+ // Override to reset shared state for row group coordination at the start of each scan operation
66
+ void initializeScanCoordination(const transaction::Transaction* transaction) override;
67
+
68
+ bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
69
+
70
+ // For parquet-backed tables, we don't support modifications
71
+ void insert([[maybe_unused]] transaction::Transaction* transaction,
72
+ [[maybe_unused]] TableInsertState& insertState) override {
73
+ throw common::RuntimeException("Cannot insert into parquet-backed node table");
74
+ }
75
+ void update([[maybe_unused]] transaction::Transaction* transaction,
76
+ [[maybe_unused]] TableUpdateState& updateState) override {
77
+ throw common::RuntimeException("Cannot update parquet-backed node table");
78
+ }
79
+ bool delete_([[maybe_unused]] transaction::Transaction* transaction,
80
+ [[maybe_unused]] TableDeleteState& deleteState) override {
81
+ throw common::RuntimeException("Cannot delete from parquet-backed node table");
82
+ return false;
83
+ }
84
+
85
+ common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
86
+
87
+ const std::string& getParquetFilePath() const { return parquetFilePath; }
88
+
89
+ // Note: Cannot override getNumCommittedNodeGroups since it's not virtual in base class
90
+ // Will need a different approach
91
+
92
+ private:
93
+ std::string parquetFilePath;
94
+ const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry;
95
+ mutable std::unique_ptr<ParquetNodeTableSharedState> sharedState;
96
+
97
+ void initializeParquetReader(transaction::Transaction* transaction) const;
98
+ void initParquetScanForRowGroup(transaction::Transaction* transaction,
99
+ ParquetNodeTableScanState& scanState) const;
100
+ };
101
+
102
+ } // namespace storage
103
+ } // namespace lbug
@@ -0,0 +1,91 @@
1
+ #pragma once
2
+
3
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
4
+ #include "common/exception/runtime.h"
5
+ #include "common/types/internal_id_util.h"
6
+ #include "processor/operator/persistent/reader/parquet/parquet_reader.h"
7
+ #include "storage/table/rel_table.h"
8
+ #include "transaction/transaction.h"
9
+
10
+ namespace lbug {
11
+ namespace storage {
12
+
13
+ struct ParquetRelTableScanState final : RelTableScanState {
14
+ std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
15
+ // For CSR format: store matching rows for current bound node
16
+ size_t nextRowToProcess = 0;
17
+
18
+ // Row group range for morsel-driven parallelism
19
+ uint64_t startRowGroup = 0;
20
+ uint64_t endRowGroup = 0;
21
+ uint64_t currentRowGroup = 0;
22
+
23
+ // Per-scan-state readers for thread safety
24
+ std::unique_ptr<processor::ParquetReader> indicesReader;
25
+ std::unique_ptr<processor::ParquetReader> indptrReader;
26
+
27
+ ParquetRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector,
28
+ std::vector<common::ValueVector*> outputVectors,
29
+ std::shared_ptr<common::DataChunkState> outChunkState)
30
+ : RelTableScanState{mm, nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
31
+ parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
32
+ }
33
+
34
+ void setToTable(const transaction::Transaction* transaction, Table* table_,
35
+ std::vector<common::column_id_t> columnIDs_,
36
+ std::vector<ColumnPredicateSet> columnPredicateSets_,
37
+ common::RelDataDirection direction_) override;
38
+ };
39
+
40
+ class ParquetRelTable final : public RelTable {
41
+ public:
42
+ ParquetRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID,
43
+ common::table_id_t toTableID, const StorageManager* storageManager,
44
+ MemoryManager* memoryManager);
45
+
46
+ void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
47
+ bool resetCachedBoundNodeSelVec = true) const override;
48
+
49
+ bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
50
+
51
+ // For parquet-backed tables, we don't support modifications
52
+ void insert([[maybe_unused]] transaction::Transaction* transaction,
53
+ [[maybe_unused]] TableInsertState& insertState) override {
54
+ throw common::RuntimeException("Cannot insert into parquet-backed rel table");
55
+ }
56
+ void update([[maybe_unused]] transaction::Transaction* transaction,
57
+ [[maybe_unused]] TableUpdateState& updateState) override {
58
+ throw common::RuntimeException("Cannot update parquet-backed rel table");
59
+ }
60
+ bool delete_([[maybe_unused]] transaction::Transaction* transaction,
61
+ [[maybe_unused]] TableDeleteState& deleteState) override {
62
+ throw common::RuntimeException("Cannot delete from parquet-backed rel table");
63
+ return false;
64
+ }
65
+
66
+ common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
67
+
68
+ private:
69
+ catalog::RelGroupCatalogEntry* relGroupEntry; // Store reference to table schema
70
+ std::string indicesFilePath;
71
+ std::string indptrFilePath;
72
+ mutable std::unique_ptr<processor::ParquetReader> indicesReader;
73
+ mutable std::unique_ptr<processor::ParquetReader> indptrReader;
74
+ mutable std::mutex parquetReaderMutex;
75
+ mutable std::mutex indptrDataMutex;
76
+ mutable std::vector<common::offset_t> indptrData; // Cached indptr data for CSR format
77
+
78
+ void initializeParquetReaders(transaction::Transaction* transaction) const;
79
+ void initializeIndptrReader(transaction::Transaction* transaction) const;
80
+ void loadIndptrData(transaction::Transaction* transaction) const;
81
+ bool scanInternalByRowGroups(transaction::Transaction* transaction,
82
+ ParquetRelTableScanState& parquetRelScanState);
83
+ bool scanRowGroupForBoundNodes(transaction::Transaction* transaction,
84
+ ParquetRelTableScanState& parquetRelScanState,
85
+ const std::vector<uint64_t>& rowGroupsToProcess,
86
+ const std::unordered_set<common::offset_t>& boundNodeOffsets);
87
+ common::offset_t findSourceNodeForRow(common::offset_t globalRowIdx) const;
88
+ };
89
+
90
+ } // namespace storage
91
+ } // namespace lbug
@@ -48,7 +48,7 @@ struct RelTableScanState : TableScanState {
48
48
  nodeGroupScanState = std::make_unique<CSRNodeGroupScanState>();
49
49
  }
50
50
 
51
- void setToTable(const transaction::Transaction* transaction, Table* table_,
51
+ virtual void setToTable(const transaction::Transaction* transaction, Table* table_,
52
52
  std::vector<common::column_id_t> columnIDs_,
53
53
  std::vector<ColumnPredicateSet> columnPredicateSets_,
54
54
  common::RelDataDirection direction_) override;
@@ -138,7 +138,7 @@ struct LBUG_API RelTableDeleteState final : TableDeleteState {
138
138
  relIDVector{relIDVector}, detachDeleteDirection{detachDeleteDirection} {}
139
139
  };
140
140
 
141
- class LBUG_API RelTable final : public Table {
141
+ class LBUG_API RelTable : public Table {
142
142
  public:
143
143
  using rel_multiplicity_constraint_throw_func_t =
144
144
  std::function<void(const std::string&, common::offset_t, common::RelDataDirection)>;
@@ -130,6 +130,8 @@ public:
130
130
  return getMinUncommittedNodeOffset(tableID) + localRowIdx;
131
131
  }
132
132
 
133
+ main::ClientContext* getClientContext() const { return clientContext; }
134
+
133
135
  void pushCreateDropCatalogEntry(catalog::CatalogSet& catalogSet,
134
136
  catalog::CatalogEntry& catalogEntry, bool isInternal, bool skipLoggingToWAL = false);
135
137
  void pushAlterCatalogEntry(catalog::CatalogSet& catalogSet, catalog::CatalogEntry& catalogEntry,
@@ -79,8 +79,8 @@ std::string MaterializedQueryResult::toString() const {
79
79
  result += "\n";
80
80
  auto tuple_ = FlatTuple(this->columnTypes);
81
81
  auto iterator_ = FactorizedTableIterator(*table);
82
- while (iterator->hasNext()) {
83
- iterator->getNext(tuple_);
82
+ while (iterator_.hasNext()) {
83
+ iterator_.getNext(tuple_);
84
84
  result += tuple_.toString();
85
85
  }
86
86
  return result;
@@ -4,6 +4,7 @@ add_library(lbug_optimizer
4
4
  agg_key_dependency_optimizer.cpp
5
5
  cardinality_updater.cpp
6
6
  correlated_subquery_unnest_solver.cpp
7
+ count_rel_table_optimizer.cpp
7
8
  factorization_rewriter.cpp
8
9
  filter_push_down_optimizer.cpp
9
10
  logical_operator_collector.cpp
@@ -0,0 +1,217 @@
1
+ #include "optimizer/count_rel_table_optimizer.h"
2
+
3
+ #include "binder/expression/aggregate_function_expression.h"
4
+ #include "binder/expression/node_expression.h"
5
+ #include "catalog/catalog_entry/node_table_id_pair.h"
6
+ #include "function/aggregate/count_star.h"
7
+ #include "main/client_context.h"
8
+ #include "planner/operator/extend/logical_extend.h"
9
+ #include "planner/operator/logical_aggregate.h"
10
+ #include "planner/operator/logical_projection.h"
11
+ #include "planner/operator/scan/logical_count_rel_table.h"
12
+ #include "planner/operator/scan/logical_scan_node_table.h"
13
+
14
+ using namespace lbug::common;
15
+ using namespace lbug::planner;
16
+ using namespace lbug::binder;
17
+ using namespace lbug::catalog;
18
+
19
+ namespace lbug {
20
+ namespace optimizer {
21
+
22
+ void CountRelTableOptimizer::rewrite(LogicalPlan* plan) {
23
+ visitOperator(plan->getLastOperator());
24
+ }
25
+
26
+ std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitOperator(
27
+ const std::shared_ptr<LogicalOperator>& op) {
28
+ // bottom-up traversal
29
+ for (auto i = 0u; i < op->getNumChildren(); ++i) {
30
+ op->setChild(i, visitOperator(op->getChild(i)));
31
+ }
32
+ auto result = visitOperatorReplaceSwitch(op);
33
+ result->computeFlatSchema();
34
+ return result;
35
+ }
36
+
37
+ bool CountRelTableOptimizer::isSimpleCountStar(LogicalOperator* op) const {
38
+ if (op->getOperatorType() != LogicalOperatorType::AGGREGATE) {
39
+ return false;
40
+ }
41
+ auto& aggregate = op->constCast<LogicalAggregate>();
42
+
43
+ // Must have no keys (i.e., a simple aggregate without GROUP BY)
44
+ if (aggregate.hasKeys()) {
45
+ return false;
46
+ }
47
+
48
+ // Must have exactly one aggregate expression
49
+ auto aggregates = aggregate.getAggregates();
50
+ if (aggregates.size() != 1) {
51
+ return false;
52
+ }
53
+
54
+ // Must be COUNT_STAR
55
+ auto& aggExpr = aggregates[0];
56
+ if (aggExpr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
57
+ return false;
58
+ }
59
+ auto& aggFuncExpr = aggExpr->constCast<AggregateFunctionExpression>();
60
+ if (aggFuncExpr.getFunction().name != function::CountStarFunction::name) {
61
+ return false;
62
+ }
63
+
64
+ // COUNT_STAR should not be DISTINCT (conceptually it doesn't make sense)
65
+ if (aggFuncExpr.isDistinct()) {
66
+ return false;
67
+ }
68
+
69
+ return true;
70
+ }
71
+
72
+ bool CountRelTableOptimizer::canOptimize(LogicalOperator* aggregate) const {
73
+ // Pattern we're looking for:
74
+ // AGGREGATE (COUNT_STAR, no keys)
75
+ // -> PROJECTION (empty expressions or pass-through)
76
+ // -> EXTEND (single rel table, no properties scanned)
77
+ // -> SCAN_NODE_TABLE (no properties scanned)
78
+ //
79
+ // Note: The projection between aggregate and extend might be empty or
80
+ // just projecting the count expression.
81
+
82
+ auto* current = aggregate->getChild(0).get();
83
+
84
+ // Skip any projections (they should be empty or just for count)
85
+ while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
86
+ auto& proj = current->constCast<LogicalProjection>();
87
+ // Empty projection is okay, it's just a passthrough
88
+ if (!proj.getExpressionsToProject().empty()) {
89
+ // If projection has expressions, they should all be aggregate expressions
90
+ // (which means they're just passing through the count)
91
+ for (auto& expr : proj.getExpressionsToProject()) {
92
+ if (expr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
93
+ return false;
94
+ }
95
+ }
96
+ }
97
+ current = current->getChild(0).get();
98
+ }
99
+
100
+ // Now we should have EXTEND
101
+ if (current->getOperatorType() != LogicalOperatorType::EXTEND) {
102
+ return false;
103
+ }
104
+ auto& extend = current->constCast<LogicalExtend>();
105
+
106
+ // Don't optimize for undirected edges (BOTH direction) - the query pattern
107
+ // (a)-[e]-(b) generates a plan that scans both directions, and optimizing
108
+ // this would require special handling to avoid double counting.
109
+ if (extend.getDirection() == ExtendDirection::BOTH) {
110
+ return false;
111
+ }
112
+
113
+ // The rel should be a single table (not multi-labeled)
114
+ auto rel = extend.getRel();
115
+ if (rel->isMultiLabeled()) {
116
+ return false;
117
+ }
118
+
119
+ // Check if we're scanning any properties (we can only optimize when no properties needed)
120
+ if (!extend.getProperties().empty()) {
121
+ return false;
122
+ }
123
+
124
+ // The child of extend should be SCAN_NODE_TABLE
125
+ auto* extendChild = current->getChild(0).get();
126
+ if (extendChild->getOperatorType() != LogicalOperatorType::SCAN_NODE_TABLE) {
127
+ return false;
128
+ }
129
+ auto& scanNode = extendChild->constCast<LogicalScanNodeTable>();
130
+
131
+ // Check if node scan has any properties (we can only optimize when no properties needed)
132
+ if (!scanNode.getProperties().empty()) {
133
+ return false;
134
+ }
135
+
136
+ return true;
137
+ }
138
+
139
+ std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitAggregateReplace(
140
+ std::shared_ptr<LogicalOperator> op) {
141
+ if (!isSimpleCountStar(op.get())) {
142
+ return op;
143
+ }
144
+
145
+ if (!canOptimize(op.get())) {
146
+ return op;
147
+ }
148
+
149
+ // Find the EXTEND operator
150
+ auto* current = op->getChild(0).get();
151
+ while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
152
+ current = current->getChild(0).get();
153
+ }
154
+
155
+ KU_ASSERT(current->getOperatorType() == LogicalOperatorType::EXTEND);
156
+ auto& extend = current->constCast<LogicalExtend>();
157
+ auto rel = extend.getRel();
158
+ auto boundNode = extend.getBoundNode();
159
+ auto nbrNode = extend.getNbrNode();
160
+
161
+ // Get the rel group entry
162
+ KU_ASSERT(rel->getNumEntries() == 1);
163
+ auto* relGroupEntry = rel->getEntry(0)->ptrCast<RelGroupCatalogEntry>();
164
+
165
+ // Determine the source and destination node table IDs based on extend direction.
166
+ // If extendFromSource is true, then boundNode is the source and nbrNode is the destination.
167
+ // If extendFromSource is false, then boundNode is the destination and nbrNode is the source.
168
+ auto boundNodeTableIDs = boundNode->getTableIDsSet();
169
+ auto nbrNodeTableIDs = nbrNode->getTableIDsSet();
170
+
171
+ // Get only the rel table IDs that match the specific node table ID pairs in the query.
172
+ // A rel table connects a specific (srcTableID, dstTableID) pair.
173
+ std::vector<table_id_t> relTableIDs;
174
+ for (auto& info : relGroupEntry->getRelEntryInfos()) {
175
+ table_id_t srcTableID = info.nodePair.srcTableID;
176
+ table_id_t dstTableID = info.nodePair.dstTableID;
177
+
178
+ bool matches = false;
179
+ if (extend.extendFromSourceNode()) {
180
+ // boundNode is src, nbrNode is dst
181
+ matches =
182
+ boundNodeTableIDs.contains(srcTableID) && nbrNodeTableIDs.contains(dstTableID);
183
+ } else {
184
+ // boundNode is dst, nbrNode is src
185
+ matches =
186
+ boundNodeTableIDs.contains(dstTableID) && nbrNodeTableIDs.contains(srcTableID);
187
+ }
188
+
189
+ if (matches) {
190
+ relTableIDs.push_back(info.oid);
191
+ }
192
+ }
193
+
194
+ // If no matching rel tables, don't optimize (shouldn't happen for valid queries)
195
+ if (relTableIDs.empty()) {
196
+ return op;
197
+ }
198
+
199
+ // Get the count expression from the original aggregate
200
+ auto& aggregate = op->constCast<LogicalAggregate>();
201
+ auto countExpr = aggregate.getAggregates()[0];
202
+
203
+ // Get the bound node table IDs as a vector
204
+ std::vector<table_id_t> boundNodeTableIDsVec(boundNodeTableIDs.begin(),
205
+ boundNodeTableIDs.end());
206
+
207
+ // Create the new COUNT_REL_TABLE operator with all necessary information for scanning
208
+ auto countRelTable =
209
+ std::make_shared<LogicalCountRelTable>(relGroupEntry, std::move(relTableIDs),
210
+ std::move(boundNodeTableIDsVec), boundNode, extend.getDirection(), countExpr);
211
+ countRelTable->computeFlatSchema();
212
+
213
+ return countRelTable;
214
+ }
215
+
216
+ } // namespace optimizer
217
+ } // namespace lbug
@@ -19,6 +19,9 @@ void LogicalOperatorVisitor::visitOperatorSwitch(LogicalOperator* op) {
19
19
  case LogicalOperatorType::COPY_TO: {
20
20
  visitCopyTo(op);
21
21
  } break;
22
+ case LogicalOperatorType::COUNT_REL_TABLE: {
23
+ visitCountRelTable(op);
24
+ } break;
22
25
  case LogicalOperatorType::DELETE: {
23
26
  visitDelete(op);
24
27
  } break;
@@ -108,6 +111,9 @@ std::shared_ptr<LogicalOperator> LogicalOperatorVisitor::visitOperatorReplaceSwi
108
111
  case LogicalOperatorType::COPY_TO: {
109
112
  return visitCopyToReplace(op);
110
113
  }
114
+ case LogicalOperatorType::COUNT_REL_TABLE: {
115
+ return visitCountRelTableReplace(op);
116
+ }
111
117
  case LogicalOperatorType::DELETE: {
112
118
  return visitDeleteReplace(op);
113
119
  }
@@ -5,6 +5,7 @@
5
5
  #include "optimizer/agg_key_dependency_optimizer.h"
6
6
  #include "optimizer/cardinality_updater.h"
7
7
  #include "optimizer/correlated_subquery_unnest_solver.h"
8
+ #include "optimizer/count_rel_table_optimizer.h"
8
9
  #include "optimizer/factorization_rewriter.h"
9
10
  #include "optimizer/filter_push_down_optimizer.h"
10
11
  #include "optimizer/limit_push_down_optimizer.h"
@@ -32,6 +33,11 @@ void Optimizer::optimize(planner::LogicalPlan* plan, main::ClientContext* contex
32
33
  auto removeUnnecessaryJoinOptimizer = RemoveUnnecessaryJoinOptimizer();
33
34
  removeUnnecessaryJoinOptimizer.rewrite(plan);
34
35
 
36
+ // CountRelTableOptimizer should be applied early before other optimizations
37
+ // that might change the plan structure.
38
+ auto countRelTableOptimizer = CountRelTableOptimizer(context);
39
+ countRelTableOptimizer.rewrite(plan);
40
+
35
41
  auto filterPushDownOptimizer = FilterPushDownOptimizer(context);
36
42
  filterPushDownOptimizer.rewrite(plan);
37
43
 
@@ -81,7 +81,12 @@ std::unique_ptr<Statement> Transformer::transformCreateNodeTable(
81
81
  } else {
82
82
  createTableInfo.propertyDefinitions =
83
83
  transformPropertyDefinitions(*ctx.kU_PropertyDefinitions());
84
- createTableInfo.extraInfo = std::make_unique<ExtraCreateNodeTableInfo>(getPKName(ctx));
84
+ options_t options;
85
+ if (ctx.kU_Options()) {
86
+ options = transformOptions(*ctx.kU_Options());
87
+ }
88
+ createTableInfo.extraInfo =
89
+ std::make_unique<ExtraCreateNodeTableInfo>(getPKName(ctx), std::move(options));
85
90
  return std::make_unique<CreateTable>(std::move(createTableInfo));
86
91
  }
87
92
  }
@@ -22,6 +22,8 @@ std::string LogicalOperatorUtils::logicalOperatorTypeToString(LogicalOperatorTyp
22
22
  return "COPY_FROM";
23
23
  case LogicalOperatorType::COPY_TO:
24
24
  return "COPY_TO";
25
+ case LogicalOperatorType::COUNT_REL_TABLE:
26
+ return "COUNT_REL_TABLE";
25
27
  case LogicalOperatorType::CREATE_MACRO:
26
28
  return "CREATE_MACRO";
27
29
  case LogicalOperatorType::CREATE_SEQUENCE: