lbug 0.12.3-dev.3 → 0.12.3-dev.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/README.md +2 -6
  2. package/lbug-source/.github/workflows/ci-workflow.yml +9 -2
  3. package/lbug-source/CMakeLists.txt +15 -6
  4. package/lbug-source/Makefile +15 -4
  5. package/lbug-source/README.md +2 -6
  6. package/lbug-source/benchmark/serializer.py +24 -3
  7. package/lbug-source/dataset/demo-db/csv/copy.cypher +4 -4
  8. package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
  9. package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
  10. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
  11. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
  12. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
  13. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
  14. package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
  15. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
  16. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
  17. package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
  18. package/lbug-source/dataset/demo-db/parquet/copy.cypher +4 -4
  19. package/lbug-source/extension/duckdb/src/catalog/duckdb_catalog.cpp +1 -1
  20. package/lbug-source/extension/duckdb/src/catalog/duckdb_table_catalog_entry.cpp +43 -4
  21. package/lbug-source/extension/duckdb/src/connector/duckdb_result_converter.cpp +6 -0
  22. package/lbug-source/extension/duckdb/src/connector/duckdb_secret_manager.cpp +1 -1
  23. package/lbug-source/extension/duckdb/src/function/duckdb_scan.cpp +49 -4
  24. package/lbug-source/extension/duckdb/src/include/catalog/duckdb_table_catalog_entry.h +6 -1
  25. package/lbug-source/extension/duckdb/src/include/function/duckdb_scan.h +2 -0
  26. package/lbug-source/extension/duckdb/test/test_files/duckdb.test +28 -0
  27. package/lbug-source/extension/extension_config.cmake +3 -2
  28. package/lbug-source/extension/httpfs/test/test_files/http.test +1 -0
  29. package/lbug-source/scripts/antlr4/Cypher.g4 +4 -4
  30. package/lbug-source/scripts/antlr4/hash.md5 +1 -1
  31. package/lbug-source/scripts/generate_binary_demo.sh +1 -1
  32. package/lbug-source/src/antlr4/Cypher.g4 +4 -4
  33. package/lbug-source/src/binder/bind/bind_ddl.cpp +97 -15
  34. package/lbug-source/src/binder/bind/bind_graph_pattern.cpp +30 -3
  35. package/lbug-source/src/catalog/catalog.cpp +6 -4
  36. package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
  37. package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +46 -7
  38. package/lbug-source/src/catalog/catalog_set.cpp +1 -0
  39. package/lbug-source/src/function/function_collection.cpp +2 -1
  40. package/lbug-source/src/function/table/CMakeLists.txt +1 -0
  41. package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
  42. package/lbug-source/src/function/table/show_connection.cpp +6 -1
  43. package/lbug-source/src/function/table/show_tables.cpp +10 -2
  44. package/lbug-source/src/function/table/table_function.cpp +11 -2
  45. package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +23 -6
  46. package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
  47. package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +21 -2
  48. package/lbug-source/src/include/catalog/catalog_entry/table_catalog_entry.h +7 -0
  49. package/lbug-source/src/include/common/constants.h +1 -0
  50. package/lbug-source/src/include/common/string_format.h +2 -2
  51. package/lbug-source/src/include/common/types/types.h +1 -0
  52. package/lbug-source/src/include/function/table/bind_data.h +12 -1
  53. package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
  54. package/lbug-source/src/include/function/table/table_function.h +2 -0
  55. package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
  56. package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
  57. package/lbug-source/src/include/optimizer/order_by_push_down_optimizer.h +21 -0
  58. package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
  59. package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
  60. package/lbug-source/src/include/planner/operator/logical_table_function_call.h +14 -1
  61. package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
  62. package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
  63. package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
  64. package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
  65. package/lbug-source/src/include/processor/plan_mapper.h +2 -0
  66. package/lbug-source/src/include/storage/storage_manager.h +1 -0
  67. package/lbug-source/src/include/storage/storage_version_info.h +1 -7
  68. package/lbug-source/src/include/storage/table/foreign_rel_table.h +56 -0
  69. package/lbug-source/src/include/storage/table/node_table.h +6 -1
  70. package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
  71. package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
  72. package/lbug-source/src/include/storage/table/rel_table.h +2 -2
  73. package/lbug-source/src/include/transaction/transaction.h +2 -0
  74. package/lbug-source/src/main/query_result/materialized_query_result.cpp +2 -2
  75. package/lbug-source/src/optimizer/CMakeLists.txt +3 -1
  76. package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
  77. package/lbug-source/src/optimizer/limit_push_down_optimizer.cpp +12 -0
  78. package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
  79. package/lbug-source/src/optimizer/optimizer.cpp +10 -0
  80. package/lbug-source/src/optimizer/order_by_push_down_optimizer.cpp +123 -0
  81. package/lbug-source/src/optimizer/projection_push_down_optimizer.cpp +5 -1
  82. package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
  83. package/lbug-source/src/parser/transform/transform_expression.cpp +1 -1
  84. package/lbug-source/src/parser/transform/transform_graph_pattern.cpp +6 -1
  85. package/lbug-source/src/parser/transformer.cpp +7 -1
  86. package/lbug-source/src/planner/join_order/cardinality_estimator.cpp +11 -2
  87. package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
  88. package/lbug-source/src/planner/operator/logical_table_function_call.cpp +4 -0
  89. package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
  90. package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
  91. package/lbug-source/src/planner/plan/plan_join_order.cpp +16 -1
  92. package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
  93. package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
  94. package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
  95. package/lbug-source/src/processor/operator/index_lookup.cpp +31 -23
  96. package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
  97. package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
  98. package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
  99. package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
  100. package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
  101. package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
  102. package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +18 -2
  103. package/lbug-source/src/storage/storage_manager.cpp +43 -6
  104. package/lbug-source/src/storage/table/CMakeLists.txt +3 -0
  105. package/lbug-source/src/storage/table/foreign_rel_table.cpp +63 -0
  106. package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
  107. package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
  108. package/lbug-source/test/api/api_test.cpp +18 -0
  109. package/lbug-source/test/common/string_format.cpp +9 -1
  110. package/lbug-source/test/copy/copy_test.cpp +4 -4
  111. package/lbug-source/test/graph_test/CMakeLists.txt +1 -1
  112. package/lbug-source/test/include/test_runner/test_group.h +11 -1
  113. package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
  114. package/lbug-source/test/runner/e2e_test.cpp +7 -1
  115. package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
  116. package/lbug-source/test/test_helper/CMakeLists.txt +1 -1
  117. package/lbug-source/test/test_helper/test_helper.cpp +33 -1
  118. package/lbug-source/test/test_runner/CMakeLists.txt +1 -1
  119. package/lbug-source/test/test_runner/insert_by_row.cpp +6 -8
  120. package/lbug-source/test/test_runner/multi_copy_split.cpp +2 -4
  121. package/lbug-source/test/test_runner/test_parser.cpp +3 -0
  122. package/lbug-source/test/transaction/checkpoint_test.cpp +1 -1
  123. package/lbug-source/test/transaction/transaction_test.cpp +19 -15
  124. package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2805 -2708
  125. package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +7 -3
  126. package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
  127. package/lbug-source/tools/nodejs_api/package.json +4 -2
  128. package/lbug-source/tools/shell/embedded_shell.cpp +78 -3
  129. package/lbug-source/tools/shell/include/embedded_shell.h +2 -0
  130. package/lbug-source/tools/shell/linenoise.cpp +3 -3
  131. package/lbug-source/tools/shell/test/test_helper.py +1 -1
  132. package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
  133. package/lbug-source/tools/shell/test/test_shell_commands.py +19 -0
  134. package/package.json +9 -2
  135. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  136. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  137. package/prebuilt/lbugjs-linux-x64.node +0 -0
  138. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -0,0 +1,49 @@
1
+ #pragma once
2
+
3
+ #include "logical_operator_visitor.h"
4
+ #include "planner/operator/logical_plan.h"
5
+
6
+ namespace lbug {
7
+ namespace main {
8
+ class ClientContext;
9
+ }
10
+
11
+ namespace optimizer {
12
+
13
+ /**
14
+ * This optimizer detects patterns where we're counting all rows from a single rel table
15
+ * without any filters, and replaces the scan + aggregate with a direct count from table metadata.
16
+ *
17
+ * Pattern detected:
18
+ * AGGREGATE (COUNT_STAR only, no keys) →
19
+ * PROJECTION (empty or pass-through) →
20
+ * EXTEND (single rel table) →
21
+ * SCAN_NODE_TABLE
22
+ *
23
+ * This pattern is replaced with:
24
+ * COUNT_REL_TABLE (new operator that directly reads the count from table metadata)
25
+ */
26
+ class CountRelTableOptimizer : public LogicalOperatorVisitor {
27
+ public:
28
+ explicit CountRelTableOptimizer(main::ClientContext* context) : context{context} {}
29
+
30
+ void rewrite(planner::LogicalPlan* plan);
31
+
32
+ private:
33
+ std::shared_ptr<planner::LogicalOperator> visitOperator(
34
+ const std::shared_ptr<planner::LogicalOperator>& op);
35
+
36
+ std::shared_ptr<planner::LogicalOperator> visitAggregateReplace(
37
+ std::shared_ptr<planner::LogicalOperator> op) override;
38
+
39
+ // Check if the aggregate is a simple COUNT(*) with no keys
40
+ bool isSimpleCountStar(planner::LogicalOperator* op) const;
41
+
42
+ // Check if the plan below aggregate matches the pattern for optimization
43
+ bool canOptimize(planner::LogicalOperator* aggregate) const;
44
+
45
+ main::ClientContext* context;
46
+ };
47
+
48
+ } // namespace optimizer
49
+ } // namespace lbug
@@ -39,6 +39,12 @@ protected:
39
39
  return op;
40
40
  }
41
41
 
42
+ virtual void visitCountRelTable(planner::LogicalOperator* /*op*/) {}
43
+ virtual std::shared_ptr<planner::LogicalOperator> visitCountRelTableReplace(
44
+ std::shared_ptr<planner::LogicalOperator> op) {
45
+ return op;
46
+ }
47
+
42
48
  virtual void visitDelete(planner::LogicalOperator* /*op*/) {}
43
49
  virtual std::shared_ptr<planner::LogicalOperator> visitDeleteReplace(
44
50
  std::shared_ptr<planner::LogicalOperator> op) {
@@ -0,0 +1,21 @@
1
+ #pragma once
2
+
3
+ #include "planner/operator/logical_plan.h"
4
+
5
+ namespace lbug {
6
+ namespace optimizer {
7
+
8
+ class OrderByPushDownOptimizer {
9
+ public:
10
+ void rewrite(planner::LogicalPlan* plan);
11
+
12
+ private:
13
+ std::shared_ptr<planner::LogicalOperator> visitOperator(
14
+ std::shared_ptr<planner::LogicalOperator> op, std::string currentOrderBy = "");
15
+
16
+ static std::string buildOrderByString(const binder::expression_vector& expressions,
17
+ const std::vector<bool>& isAscOrders);
18
+ };
19
+
20
+ } // namespace optimizer
21
+ } // namespace lbug
@@ -36,8 +36,10 @@ struct CreateTableInfo {
36
36
 
37
37
  struct ExtraCreateNodeTableInfo final : ExtraCreateTableInfo {
38
38
  std::string pKName;
39
+ options_t options;
39
40
 
40
- explicit ExtraCreateNodeTableInfo(std::string pKName) : pKName{std::move(pKName)} {}
41
+ explicit ExtraCreateNodeTableInfo(std::string pKName, options_t options = {})
42
+ : pKName{std::move(pKName)}, options{std::move(options)} {}
41
43
  };
42
44
 
43
45
  struct ExtraCreateRelTableGroupInfo final : ExtraCreateTableInfo {
@@ -17,6 +17,7 @@ enum class LogicalOperatorType : uint8_t {
17
17
  ATTACH_DATABASE,
18
18
  COPY_FROM,
19
19
  COPY_TO,
20
+ COUNT_REL_TABLE,
20
21
  CREATE_MACRO,
21
22
  CREATE_SEQUENCE,
22
23
  CREATE_TABLE,
@@ -27,11 +27,18 @@ public:
27
27
  void setColumnPredicates(std::vector<storage::ColumnPredicateSet> predicates) {
28
28
  bindData->setColumnPredicates(std::move(predicates));
29
29
  }
30
+ void setLimitNum(common::row_idx_t limit) { bindData->setLimitNum(limit); }
31
+ void setOrderBy(std::string orderBy) { bindData->setOrderBy(orderBy); }
30
32
 
31
33
  void computeFlatSchema() override;
32
34
  void computeFactorizedSchema() override;
33
35
 
34
- std::string getExpressionsForPrinting() const override { return tableFunc.name; }
36
+ std::string getExpressionsForPrinting() const override {
37
+ auto desc = bindData->getDescription();
38
+ return desc.empty() ? tableFunc.name : desc;
39
+ }
40
+
41
+ std::unique_ptr<OPPrintInfo> getPrintInfo() const override;
35
42
 
36
43
  std::unique_ptr<LogicalOperator> copy() override {
37
44
  return std::make_unique<LogicalTableFunctionCall>(tableFunc, bindData->copy());
@@ -42,5 +49,11 @@ private:
42
49
  std::unique_ptr<function::TableFuncBindData> bindData;
43
50
  };
44
51
 
52
+ struct LogicalTableFunctionCallPrintInfo final : OPPrintInfo {
53
+ std::string desc;
54
+ explicit LogicalTableFunctionCallPrintInfo(std::string desc) : desc{std::move(desc)} {}
55
+ std::string toString() const override { return desc; }
56
+ };
57
+
45
58
  } // namespace planner
46
59
  } // namespace lbug
@@ -0,0 +1,84 @@
1
+ #pragma once
2
+
3
+ #include "binder/expression/expression.h"
4
+ #include "binder/expression/node_expression.h"
5
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
6
+ #include "common/enums/extend_direction.h"
7
+ #include "planner/operator/logical_operator.h"
8
+
9
+ namespace lbug {
10
+ namespace planner {
11
+
12
+ struct LogicalCountRelTablePrintInfo final : OPPrintInfo {
13
+ std::string relTableName;
14
+ std::shared_ptr<binder::Expression> countExpr;
15
+
16
+ LogicalCountRelTablePrintInfo(std::string relTableName,
17
+ std::shared_ptr<binder::Expression> countExpr)
18
+ : relTableName{std::move(relTableName)}, countExpr{std::move(countExpr)} {}
19
+
20
+ std::string toString() const override {
21
+ return "Table: " + relTableName + ", Count: " + countExpr->toString();
22
+ }
23
+
24
+ std::unique_ptr<OPPrintInfo> copy() const override {
25
+ return std::make_unique<LogicalCountRelTablePrintInfo>(relTableName, countExpr);
26
+ }
27
+ };
28
+
29
+ /**
30
+ * LogicalCountRelTable is an optimized operator that counts the number of rows
31
+ * in a rel table by scanning through bound nodes and counting edges.
32
+ *
33
+ * This operator is created by CountRelTableOptimizer when it detects:
34
+ * COUNT(*) over a single rel table with no filters
35
+ */
36
+ class LogicalCountRelTable final : public LogicalOperator {
37
+ static constexpr LogicalOperatorType type_ = LogicalOperatorType::COUNT_REL_TABLE;
38
+
39
+ public:
40
+ LogicalCountRelTable(catalog::RelGroupCatalogEntry* relGroupEntry,
41
+ std::vector<common::table_id_t> relTableIDs,
42
+ std::vector<common::table_id_t> boundNodeTableIDs,
43
+ std::shared_ptr<binder::NodeExpression> boundNode, common::ExtendDirection direction,
44
+ std::shared_ptr<binder::Expression> countExpr)
45
+ : LogicalOperator{type_}, relGroupEntry{relGroupEntry}, relTableIDs{std::move(relTableIDs)},
46
+ boundNodeTableIDs{std::move(boundNodeTableIDs)}, boundNode{std::move(boundNode)},
47
+ direction{direction}, countExpr{std::move(countExpr)} {
48
+ cardinality = 1; // Always returns exactly one row
49
+ }
50
+
51
+ void computeFactorizedSchema() override;
52
+ void computeFlatSchema() override;
53
+
54
+ std::string getExpressionsForPrinting() const override { return countExpr->toString(); }
55
+
56
+ catalog::RelGroupCatalogEntry* getRelGroupEntry() const { return relGroupEntry; }
57
+ const std::vector<common::table_id_t>& getRelTableIDs() const { return relTableIDs; }
58
+ const std::vector<common::table_id_t>& getBoundNodeTableIDs() const {
59
+ return boundNodeTableIDs;
60
+ }
61
+ std::shared_ptr<binder::NodeExpression> getBoundNode() const { return boundNode; }
62
+ common::ExtendDirection getDirection() const { return direction; }
63
+ std::shared_ptr<binder::Expression> getCountExpr() const { return countExpr; }
64
+
65
+ std::unique_ptr<OPPrintInfo> getPrintInfo() const override {
66
+ return std::make_unique<LogicalCountRelTablePrintInfo>(relGroupEntry->getName(), countExpr);
67
+ }
68
+
69
+ std::unique_ptr<LogicalOperator> copy() override {
70
+ return std::make_unique<LogicalCountRelTable>(relGroupEntry, relTableIDs, boundNodeTableIDs,
71
+ boundNode, direction, countExpr);
72
+ }
73
+
74
+ private:
75
+ catalog::RelGroupCatalogEntry* relGroupEntry;
76
+ std::vector<common::table_id_t> relTableIDs;
77
+ std::vector<common::table_id_t> boundNodeTableIDs;
78
+ std::shared_ptr<binder::NodeExpression> boundNode;
79
+ common::ExtendDirection direction;
80
+ std::shared_ptr<binder::Expression> countExpr;
81
+ };
82
+
83
+ } // namespace planner
84
+ } // namespace lbug
@@ -22,6 +22,7 @@ enum class PhysicalOperatorType : uint8_t {
22
22
  ATTACH_DATABASE,
23
23
  BATCH_INSERT,
24
24
  COPY_TO,
25
+ COUNT_REL_TABLE,
25
26
  CREATE_MACRO,
26
27
  CREATE_SEQUENCE,
27
28
  CREATE_TABLE,
@@ -0,0 +1,62 @@
1
+ #pragma once
2
+
3
+ #include "common/enums/rel_direction.h"
4
+ #include "processor/operator/physical_operator.h"
5
+ #include "storage/table/node_table.h"
6
+ #include "storage/table/rel_table.h"
7
+
8
+ namespace lbug {
9
+ namespace processor {
10
+
11
+ struct CountRelTablePrintInfo final : OPPrintInfo {
12
+ std::string relTableName;
13
+
14
+ explicit CountRelTablePrintInfo(std::string relTableName)
15
+ : relTableName{std::move(relTableName)} {}
16
+
17
+ std::string toString() const override { return "Table: " + relTableName; }
18
+
19
+ std::unique_ptr<OPPrintInfo> copy() const override {
20
+ return std::make_unique<CountRelTablePrintInfo>(relTableName);
21
+ }
22
+ };
23
+
24
+ /**
25
+ * CountRelTable is a source operator that counts edges in a rel table
26
+ * by scanning through all bound nodes and counting their edges.
27
+ * It creates its own internal vectors for node scanning (not exposed in ResultSet).
28
+ */
29
+ class CountRelTable final : public PhysicalOperator {
30
+ static constexpr PhysicalOperatorType type_ = PhysicalOperatorType::COUNT_REL_TABLE;
31
+
32
+ public:
33
+ CountRelTable(std::vector<storage::NodeTable*> nodeTables,
34
+ std::vector<storage::RelTable*> relTables, common::RelDataDirection direction,
35
+ DataPos countOutputPos, physical_op_id id, std::unique_ptr<OPPrintInfo> printInfo)
36
+ : PhysicalOperator{type_, id, std::move(printInfo)}, nodeTables{std::move(nodeTables)},
37
+ relTables{std::move(relTables)}, direction{direction}, countOutputPos{countOutputPos} {}
38
+
39
+ bool isSource() const override { return true; }
40
+ bool isParallel() const override { return false; }
41
+
42
+ void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override;
43
+
44
+ bool getNextTuplesInternal(ExecutionContext* context) override;
45
+
46
+ std::unique_ptr<PhysicalOperator> copy() override {
47
+ return std::make_unique<CountRelTable>(nodeTables, relTables, direction, countOutputPos, id,
48
+ printInfo->copy());
49
+ }
50
+
51
+ private:
52
+ std::vector<storage::NodeTable*> nodeTables;
53
+ std::vector<storage::RelTable*> relTables;
54
+ common::RelDataDirection direction;
55
+ DataPos countOutputPos;
56
+ common::ValueVector* countVector;
57
+ bool hasExecuted;
58
+ common::row_idx_t totalCount;
59
+ };
60
+
61
+ } // namespace processor
62
+ } // namespace lbug
@@ -24,7 +24,7 @@ public:
24
24
  void initialize(const transaction::Transaction* transaction, storage::NodeTable* table,
25
25
  ScanNodeTableProgressSharedState& progressSharedState);
26
26
 
27
- void nextMorsel(storage::NodeTableScanState& scanState,
27
+ void nextMorsel(storage::TableScanState& scanState,
28
28
  ScanNodeTableProgressSharedState& progressSharedState);
29
29
 
30
30
  common::SemiMask* getSemiMask() const { return semiMask.get(); }
@@ -116,7 +116,7 @@ private:
116
116
 
117
117
  private:
118
118
  common::idx_t currentTableIdx;
119
- std::unique_ptr<storage::NodeTableScanState> scanState;
119
+ std::unique_ptr<storage::TableScanState> scanState;
120
120
  std::vector<ScanNodeTableInfo> tableInfos;
121
121
  std::vector<std::shared_ptr<ScanNodeTableSharedState>> sharedStates;
122
122
  std::shared_ptr<ScanNodeTableProgressSharedState> progressSharedState;
@@ -90,6 +90,8 @@ public:
90
90
  std::unique_ptr<PhysicalOperator> mapCopyRelFrom(
91
91
  const planner::LogicalOperator* logicalOperator);
92
92
  std::unique_ptr<PhysicalOperator> mapCopyTo(const planner::LogicalOperator* logicalOperator);
93
+ std::unique_ptr<PhysicalOperator> mapCountRelTable(
94
+ const planner::LogicalOperator* logicalOperator);
93
95
  std::unique_ptr<PhysicalOperator> mapCreateMacro(
94
96
  const planner::LogicalOperator* logicalOperator);
95
97
  std::unique_ptr<PhysicalOperator> mapCreateSequence(
@@ -97,6 +97,7 @@ private:
97
97
  bool enableCompression;
98
98
  bool inMemory;
99
99
  std::vector<IndexType> registeredIndexTypes;
100
+ std::unordered_map<common::table_id_t, std::string> tableNameCache;
100
101
  };
101
102
 
102
103
  } // namespace storage
@@ -13,13 +13,7 @@ using storage_version_t = uint64_t;
13
13
 
14
14
  struct StorageVersionInfo {
15
15
  static std::unordered_map<std::string, storage_version_t> getStorageVersionInfo() {
16
- return {{"0.11.1", 39}, {"0.11.0", 39}, {"0.10.0", 38}, {"0.9.0", 37}, {"0.8.0", 36},
17
- {"0.7.1.1", 35}, {"0.7.0", 34}, {"0.6.0.6", 33}, {"0.6.0.5", 32}, {"0.6.0.2", 31},
18
- {"0.6.0.1", 31}, {"0.6.0", 28}, {"0.5.0", 28}, {"0.4.2", 27}, {"0.4.1", 27},
19
- {"0.4.0", 27}, {"0.3.2", 26}, {"0.3.1", 26}, {"0.3.0", 26}, {"0.2.1", 25},
20
- {"0.2.0", 25}, {"0.1.0", 24}, {"0.0.12.3", 24}, {"0.0.12.2", 24}, {"0.0.12.1", 24},
21
- {"0.0.12", 23}, {"0.0.11", 23}, {"0.0.10", 23}, {"0.0.9", 23}, {"0.0.8", 17},
22
- {"0.0.7", 15}, {"0.0.6", 9}, {"0.0.5", 8}, {"0.0.4", 7}, {"0.0.3", 1}};
16
+ return {{"0.12.0", 40}, {"0.12.2", 40}};
23
17
  }
24
18
 
25
19
  static LBUG_API storage_version_t getStorageVersion();
@@ -0,0 +1,56 @@
1
+ #pragma once
2
+
3
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
4
+ #include "common/exception/runtime.h"
5
+ #include "function/table/table_function.h"
6
+ #include "storage/table/rel_table.h"
7
+
8
+ namespace lbug {
9
+ namespace storage {
10
+
11
+ struct ForeignRelTableScanState final : RelTableScanState {
12
+ std::shared_ptr<function::TableFuncSharedState> sharedState;
13
+ std::shared_ptr<function::TableFuncLocalState> localState;
14
+ common::DataChunk dataChunk;
15
+
16
+ ForeignRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector,
17
+ std::vector<common::ValueVector*> outputVectors,
18
+ std::shared_ptr<common::DataChunkState> outChunkState);
19
+ };
20
+
21
+ class ForeignRelTable final : public RelTable {
22
+ public:
23
+ ForeignRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID,
24
+ common::table_id_t toTableID, const StorageManager* storageManager,
25
+ MemoryManager* memoryManager, function::TableFunction scanFunction,
26
+ std::shared_ptr<function::TableFuncBindData> scanBindData);
27
+
28
+ void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
29
+ bool resetCachedBoundNodeSelVec = true) const override;
30
+
31
+ bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
32
+
33
+ // For foreign-backed tables, we don't support modifications
34
+ void insert([[maybe_unused]] transaction::Transaction* transaction,
35
+ [[maybe_unused]] TableInsertState& insertState) override {
36
+ throw common::RuntimeException("Cannot insert into foreign-backed rel table");
37
+ }
38
+ void update([[maybe_unused]] transaction::Transaction* transaction,
39
+ [[maybe_unused]] TableUpdateState& updateState) override {
40
+ throw common::RuntimeException("Cannot update foreign-backed rel table");
41
+ }
42
+ bool delete_([[maybe_unused]] transaction::Transaction* transaction,
43
+ [[maybe_unused]] TableDeleteState& deleteState) override {
44
+ throw common::RuntimeException("Cannot delete from foreign-backed rel table");
45
+ return false;
46
+ }
47
+
48
+ common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
49
+
50
+ private:
51
+ function::TableFunction scanFunction;
52
+ std::shared_ptr<function::TableFuncBindData> scanBindData;
53
+ };
54
+
55
+ } // namespace storage
56
+ } // namespace lbug
@@ -107,7 +107,7 @@ private:
107
107
 
108
108
  class StorageManager;
109
109
 
110
- class LBUG_API NodeTable final : public Table {
110
+ class LBUG_API NodeTable : public Table {
111
111
  public:
112
112
  NodeTable(const StorageManager* storageManager,
113
113
  const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* mm);
@@ -119,6 +119,11 @@ public:
119
119
  void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
120
120
  common::table_id_t tableID, common::offset_t startOffset) const;
121
121
 
122
+ // Virtual method for operator-level scan coordination initialization
123
+ // Called once per scan operation (not per scan state)
124
+ virtual void initializeScanCoordination(
125
+ [[maybe_unused]] const transaction::Transaction* transaction) {}
126
+
122
127
  bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
123
128
  template<bool lock = true>
124
129
  bool lookup(const transaction::Transaction* transaction, const TableScanState& scanState) const;
@@ -0,0 +1,103 @@
1
+ #pragma once
2
+
3
+ #include <mutex>
4
+ #include <vector>
5
+
6
+ #include "catalog/catalog_entry/node_table_catalog_entry.h"
7
+ #include "common/exception/runtime.h"
8
+ #include "common/types/internal_id_util.h"
9
+ #include "common/types/value/value.h"
10
+ #include "processor/operator/persistent/reader/parquet/parquet_reader.h"
11
+ #include "storage/table/node_table.h"
12
+
13
+ namespace lbug {
14
+ namespace storage {
15
+
16
+ struct ParquetNodeTableScanState final : NodeTableScanState {
17
+ std::unique_ptr<processor::ParquetReader> parquetReader;
18
+ std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
19
+ bool initialized = false;
20
+ bool scanCompleted = false; // Track if this scan state has finished reading
21
+ bool dataRead = false;
22
+ std::vector<std::vector<std::unique_ptr<common::Value>>> allData;
23
+ size_t totalRows = 0;
24
+ size_t nextRowToDistribute = 0;
25
+ uint64_t lastQueryId = 0; // Track the last query ID to detect new queries
26
+
27
+ ParquetNodeTableScanState([[maybe_unused]] MemoryManager& mm, common::ValueVector* nodeIDVector,
28
+ std::vector<common::ValueVector*> outputVectors,
29
+ std::shared_ptr<common::DataChunkState> outChunkState)
30
+ : NodeTableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
31
+ parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
32
+ }
33
+ };
34
+
35
+ // Shared state to coordinate row group assignment across parallel scan states
36
+ struct ParquetNodeTableSharedState {
37
+ std::mutex mtx;
38
+ common::node_group_idx_t currentRowGroupIdx = 0;
39
+ common::node_group_idx_t numRowGroups = 0;
40
+
41
+ void reset(common::node_group_idx_t totalRowGroups) {
42
+ std::lock_guard<std::mutex> lock(mtx);
43
+ currentRowGroupIdx = 0;
44
+ numRowGroups = totalRowGroups;
45
+ }
46
+
47
+ bool getNextRowGroup(common::node_group_idx_t& assignedRowGroupIdx) {
48
+ std::lock_guard<std::mutex> lock(mtx);
49
+ if (currentRowGroupIdx < numRowGroups) {
50
+ assignedRowGroupIdx = currentRowGroupIdx++;
51
+ return true;
52
+ }
53
+ return false;
54
+ }
55
+ };
56
+
57
+ class ParquetNodeTable final : public NodeTable {
58
+ public:
59
+ ParquetNodeTable(const StorageManager* storageManager,
60
+ const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager);
61
+
62
+ void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
63
+ bool resetCachedBoundNodeSelVec = true) const override;
64
+
65
+ // Override to reset shared state for row group coordination at the start of each scan operation
66
+ void initializeScanCoordination(const transaction::Transaction* transaction) override;
67
+
68
+ bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
69
+
70
+ // For parquet-backed tables, we don't support modifications
71
+ void insert([[maybe_unused]] transaction::Transaction* transaction,
72
+ [[maybe_unused]] TableInsertState& insertState) override {
73
+ throw common::RuntimeException("Cannot insert into parquet-backed node table");
74
+ }
75
+ void update([[maybe_unused]] transaction::Transaction* transaction,
76
+ [[maybe_unused]] TableUpdateState& updateState) override {
77
+ throw common::RuntimeException("Cannot update parquet-backed node table");
78
+ }
79
+ bool delete_([[maybe_unused]] transaction::Transaction* transaction,
80
+ [[maybe_unused]] TableDeleteState& deleteState) override {
81
+ throw common::RuntimeException("Cannot delete from parquet-backed node table");
82
+ return false;
83
+ }
84
+
85
+ common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
86
+
87
+ const std::string& getParquetFilePath() const { return parquetFilePath; }
88
+
89
+ // Note: Cannot override getNumCommittedNodeGroups since it's not virtual in base class
90
+ // Will need a different approach
91
+
92
+ private:
93
+ std::string parquetFilePath;
94
+ const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry;
95
+ mutable std::unique_ptr<ParquetNodeTableSharedState> sharedState;
96
+
97
+ void initializeParquetReader(transaction::Transaction* transaction) const;
98
+ void initParquetScanForRowGroup(transaction::Transaction* transaction,
99
+ ParquetNodeTableScanState& scanState) const;
100
+ };
101
+
102
+ } // namespace storage
103
+ } // namespace lbug
@@ -0,0 +1,91 @@
1
+ #pragma once
2
+
3
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
4
+ #include "common/exception/runtime.h"
5
+ #include "common/types/internal_id_util.h"
6
+ #include "processor/operator/persistent/reader/parquet/parquet_reader.h"
7
+ #include "storage/table/rel_table.h"
8
+ #include "transaction/transaction.h"
9
+
10
+ namespace lbug {
11
+ namespace storage {
12
+
13
+ struct ParquetRelTableScanState final : RelTableScanState {
14
+ std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
15
+ // For CSR format: store matching rows for current bound node
16
+ size_t nextRowToProcess = 0;
17
+
18
+ // Row group range for morsel-driven parallelism
19
+ uint64_t startRowGroup = 0;
20
+ uint64_t endRowGroup = 0;
21
+ uint64_t currentRowGroup = 0;
22
+
23
+ // Per-scan-state readers for thread safety
24
+ std::unique_ptr<processor::ParquetReader> indicesReader;
25
+ std::unique_ptr<processor::ParquetReader> indptrReader;
26
+
27
+ ParquetRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector,
28
+ std::vector<common::ValueVector*> outputVectors,
29
+ std::shared_ptr<common::DataChunkState> outChunkState)
30
+ : RelTableScanState{mm, nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
31
+ parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
32
+ }
33
+
34
+ void setToTable(const transaction::Transaction* transaction, Table* table_,
35
+ std::vector<common::column_id_t> columnIDs_,
36
+ std::vector<ColumnPredicateSet> columnPredicateSets_,
37
+ common::RelDataDirection direction_) override;
38
+ };
39
+
40
+ class ParquetRelTable final : public RelTable {
41
+ public:
42
+ ParquetRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID,
43
+ common::table_id_t toTableID, const StorageManager* storageManager,
44
+ MemoryManager* memoryManager);
45
+
46
+ void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
47
+ bool resetCachedBoundNodeSelVec = true) const override;
48
+
49
+ bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
50
+
51
+ // For parquet-backed tables, we don't support modifications
52
+ void insert([[maybe_unused]] transaction::Transaction* transaction,
53
+ [[maybe_unused]] TableInsertState& insertState) override {
54
+ throw common::RuntimeException("Cannot insert into parquet-backed rel table");
55
+ }
56
+ void update([[maybe_unused]] transaction::Transaction* transaction,
57
+ [[maybe_unused]] TableUpdateState& updateState) override {
58
+ throw common::RuntimeException("Cannot update parquet-backed rel table");
59
+ }
60
+ bool delete_([[maybe_unused]] transaction::Transaction* transaction,
61
+ [[maybe_unused]] TableDeleteState& deleteState) override {
62
+ throw common::RuntimeException("Cannot delete from parquet-backed rel table");
63
+ return false;
64
+ }
65
+
66
+ common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
67
+
68
+ private:
69
+ catalog::RelGroupCatalogEntry* relGroupEntry; // Store reference to table schema
70
+ std::string indicesFilePath;
71
+ std::string indptrFilePath;
72
+ mutable std::unique_ptr<processor::ParquetReader> indicesReader;
73
+ mutable std::unique_ptr<processor::ParquetReader> indptrReader;
74
+ mutable std::mutex parquetReaderMutex;
75
+ mutable std::mutex indptrDataMutex;
76
+ mutable std::vector<common::offset_t> indptrData; // Cached indptr data for CSR format
77
+
78
+ void initializeParquetReaders(transaction::Transaction* transaction) const;
79
+ void initializeIndptrReader(transaction::Transaction* transaction) const;
80
+ void loadIndptrData(transaction::Transaction* transaction) const;
81
+ bool scanInternalByRowGroups(transaction::Transaction* transaction,
82
+ ParquetRelTableScanState& parquetRelScanState);
83
+ bool scanRowGroupForBoundNodes(transaction::Transaction* transaction,
84
+ ParquetRelTableScanState& parquetRelScanState,
85
+ const std::vector<uint64_t>& rowGroupsToProcess,
86
+ const std::unordered_set<common::offset_t>& boundNodeOffsets);
87
+ common::offset_t findSourceNodeForRow(common::offset_t globalRowIdx) const;
88
+ };
89
+
90
+ } // namespace storage
91
+ } // namespace lbug
@@ -48,7 +48,7 @@ struct RelTableScanState : TableScanState {
48
48
  nodeGroupScanState = std::make_unique<CSRNodeGroupScanState>();
49
49
  }
50
50
 
51
- void setToTable(const transaction::Transaction* transaction, Table* table_,
51
+ virtual void setToTable(const transaction::Transaction* transaction, Table* table_,
52
52
  std::vector<common::column_id_t> columnIDs_,
53
53
  std::vector<ColumnPredicateSet> columnPredicateSets_,
54
54
  common::RelDataDirection direction_) override;
@@ -138,7 +138,7 @@ struct LBUG_API RelTableDeleteState final : TableDeleteState {
138
138
  relIDVector{relIDVector}, detachDeleteDirection{detachDeleteDirection} {}
139
139
  };
140
140
 
141
- class LBUG_API RelTable final : public Table {
141
+ class LBUG_API RelTable : public Table {
142
142
  public:
143
143
  using rel_multiplicity_constraint_throw_func_t =
144
144
  std::function<void(const std::string&, common::offset_t, common::RelDataDirection)>;
@@ -130,6 +130,8 @@ public:
130
130
  return getMinUncommittedNodeOffset(tableID) + localRowIdx;
131
131
  }
132
132
 
133
+ main::ClientContext* getClientContext() const { return clientContext; }
134
+
133
135
  void pushCreateDropCatalogEntry(catalog::CatalogSet& catalogSet,
134
136
  catalog::CatalogEntry& catalogEntry, bool isInternal, bool skipLoggingToWAL = false);
135
137
  void pushAlterCatalogEntry(catalog::CatalogSet& catalogSet, catalog::CatalogEntry& catalogEntry,
@@ -79,8 +79,8 @@ std::string MaterializedQueryResult::toString() const {
79
79
  result += "\n";
80
80
  auto tuple_ = FlatTuple(this->columnTypes);
81
81
  auto iterator_ = FactorizedTableIterator(*table);
82
- while (iterator->hasNext()) {
83
- iterator->getNext(tuple_);
82
+ while (iterator_.hasNext()) {
83
+ iterator_.getNext(tuple_);
84
84
  result += tuple_.toString();
85
85
  }
86
86
  return result;