lbug 0.12.3-dev.13 → 0.12.3-dev.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/lbug-source/.github/workflows/ci-workflow.yml +9 -2
  2. package/lbug-source/CMakeLists.txt +15 -6
  3. package/lbug-source/Makefile +1 -2
  4. package/lbug-source/benchmark/serializer.py +13 -2
  5. package/lbug-source/extension/httpfs/test/test_files/http.test +1 -0
  6. package/lbug-source/scripts/generate_binary_demo.sh +1 -1
  7. package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
  8. package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
  9. package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
  10. package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
  11. package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
  12. package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
  13. package/lbug-source/src/include/processor/plan_mapper.h +2 -0
  14. package/lbug-source/src/optimizer/CMakeLists.txt +1 -0
  15. package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
  16. package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
  17. package/lbug-source/src/optimizer/optimizer.cpp +6 -0
  18. package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
  19. package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
  20. package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
  21. package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
  22. package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
  23. package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
  24. package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
  25. package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
  26. package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
  27. package/lbug-source/test/common/string_format.cpp +9 -1
  28. package/lbug-source/test/copy/copy_test.cpp +4 -4
  29. package/lbug-source/test/graph_test/CMakeLists.txt +1 -1
  30. package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
  31. package/lbug-source/test/test_helper/CMakeLists.txt +1 -1
  32. package/lbug-source/test/test_runner/CMakeLists.txt +1 -1
  33. package/lbug-source/test/test_runner/insert_by_row.cpp +6 -8
  34. package/lbug-source/test/test_runner/multi_copy_split.cpp +2 -4
  35. package/lbug-source/test/transaction/checkpoint_test.cpp +1 -1
  36. package/lbug-source/test/transaction/transaction_test.cpp +19 -15
  37. package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
  38. package/lbug-source/tools/shell/embedded_shell.cpp +11 -0
  39. package/lbug-source/tools/shell/linenoise.cpp +3 -3
  40. package/lbug-source/tools/shell/test/test_helper.py +1 -1
  41. package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
  42. package/package.json +1 -1
  43. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  44. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  45. package/prebuilt/lbugjs-linux-x64.node +0 -0
  46. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -1152,6 +1152,9 @@ jobs:
1152
1152
  name: minimal test
1153
1153
  runs-on: ubuntu-latest
1154
1154
  needs: [ sanity-checks ]
1155
+ env:
1156
+ GEN: Ninja
1157
+ USE_STD_FORMAT: 1
1155
1158
  steps:
1156
1159
  - uses: actions/checkout@v4
1157
1160
 
@@ -1159,12 +1162,14 @@ jobs:
1159
1162
  uses: hendrikmuhs/ccache-action@v1.2
1160
1163
  with:
1161
1164
  key: minimal-test-${{ runner.os }}
1165
+ max-size: 2G
1166
+ restore-keys: minimal-test-
1162
1167
 
1163
1168
  - name: Build
1164
- run: make GEN=Ninja release USE_STD_FORMAT=1
1169
+ run: make relwithdebinfo
1165
1170
 
1166
1171
  - name: Generate datasets
1167
- run: bash scripts/generate_binary_demo.sh
1172
+ run: bash scripts/generate_binary_demo.sh --lbug-shell-mode relwithdebinfo
1168
1173
 
1169
1174
  - name: Install uv
1170
1175
  run: pip3 install uv
@@ -1210,6 +1215,8 @@ jobs:
1210
1215
  uses: hendrikmuhs/ccache-action@v1.2
1211
1216
  with:
1212
1217
  key: minimal-extension-test-${{ runner.os }}
1218
+ max-size: 2G
1219
+ restore-keys: minimal-extension-test-
1213
1220
 
1214
1221
  - name: Update PostgreSQL host
1215
1222
  working-directory: extension/postgres/test/test_files
@@ -1,6 +1,6 @@
1
1
  cmake_minimum_required(VERSION 3.15)
2
2
 
3
- project(Lbug VERSION 0.12.3.13 LANGUAGES CXX C)
3
+ project(Lbug VERSION 0.12.3.15 LANGUAGES CXX C)
4
4
 
5
5
  option(SINGLE_THREADED "Single-threaded mode" FALSE)
6
6
  if(SINGLE_THREADED)
@@ -14,15 +14,26 @@ endif()
14
14
 
15
15
  set(CMAKE_CXX_STANDARD 20)
16
16
  set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
17
- set(CMAKE_CXX_VISIBILITY_PRESET hidden)
18
- set(CMAKE_C_VISIBILITY_PRESET hidden)
19
17
  set(CMAKE_EXPORT_COMPILE_COMMANDS TRUE)
20
18
  set(CMAKE_FIND_PACKAGE_RESOLVE_SYMLINKS TRUE)
21
19
  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
22
- set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
23
20
  # On Linux, symbols in executables are not accessible by loaded shared libraries (e.g. via dlopen(3)). However, we need to export public symbols in executables so that extensions can access public symbols. This enables that behaviour.
24
21
  set(CMAKE_ENABLE_EXPORTS TRUE)
25
22
 
23
+ # When building tests, we need all symbols visible so tests can link to the shared library
24
+ # instead of static linking (which bloats binary sizes significantly)
25
+ option(BUILD_TESTS "Build C++ tests." FALSE)
26
+ option(BUILD_EXTENSION_TESTS "Build C++ extension tests." FALSE)
27
+ if(BUILD_TESTS OR BUILD_EXTENSION_TESTS)
28
+ set(CMAKE_CXX_VISIBILITY_PRESET default)
29
+ set(CMAKE_C_VISIBILITY_PRESET default)
30
+ set(CMAKE_VISIBILITY_INLINES_HIDDEN OFF)
31
+ else()
32
+ set(CMAKE_CXX_VISIBILITY_PRESET hidden)
33
+ set(CMAKE_C_VISIBILITY_PRESET hidden)
34
+ set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
35
+ endif()
36
+
26
37
  option(ENABLE_WERROR "Treat all warnings as errors" FALSE)
27
38
  if(ENABLE_WERROR)
28
39
  if (CMAKE_VERSION VERSION_GREATER "3.24.0" OR CMAKE_VERSION VERSION_EQUAL "3.24.0")
@@ -302,8 +313,6 @@ option(BUILD_NODEJS "Build NodeJS API." FALSE)
302
313
  option(BUILD_PYTHON "Build Python API." FALSE)
303
314
  option(BUILD_SHELL "Build Interactive Shell" TRUE)
304
315
  option(BUILD_SINGLE_FILE_HEADER "Build single file header. Requires Python >= 3.9." TRUE)
305
- option(BUILD_TESTS "Build C++ tests." FALSE)
306
- option(BUILD_EXTENSION_TESTS "Build C++ extension tests." FALSE)
307
316
  option(BUILD_LBUG "Build Lbug." TRUE)
308
317
  option(ENABLE_BACKTRACES "Enable backtrace printing for exceptions and segfaults" FALSE)
309
318
  option(USE_STD_FORMAT "Use std::format instead of a custom formatter." FALSE)
@@ -294,9 +294,8 @@ extension-release:
294
294
  -DBUILD_LBUG=FALSE \
295
295
  )
296
296
 
297
- # pytest expects a `Release` build path.
298
297
  shell-test:
299
- $(call run-cmake-release, \
298
+ $(call run-cmake-relwithdebinfo, \
300
299
  -DBUILD_SHELL=TRUE \
301
300
  )
302
301
  $(MAKE) -C tools/shell/test test
@@ -95,17 +95,28 @@ if __name__ == '__main__':
95
95
  parser.add_argument("--single-thread",
96
96
  help="If true, copy single threaded, which makes the results more reproducible",
97
97
  action="store_true")
98
+ parser.add_argument("--lbug-shell-mode",
99
+ help="debug, release or relwithdebinfo",
100
+ default="release")
101
+ default_mode = "release"
98
102
  if sys.platform == "win32":
99
103
  default_lbug_exec_path = os.path.join(
100
- base_dir, '..', 'build', 'release', 'tools', 'shell', 'lbug_shell')
104
+ base_dir, '..', 'build', default_mode, 'tools', 'shell', 'lbug_shell')
101
105
  else:
102
106
  default_lbug_exec_path = os.path.join(
103
- base_dir, '..', 'build', 'release', 'tools', 'shell', 'lbug')
107
+ base_dir, '..', 'build', default_mode, 'tools', 'shell', 'lbug')
104
108
  parser.add_argument("--lbug-shell",
105
109
  help="Path of the lbug shell executable. Defaults to the path as built in the default release build directory",
106
110
  default=default_lbug_exec_path)
107
111
  args = parser.parse_args()
108
112
 
113
+ if args.lbug_shell == default_lbug_exec_path:
114
+ mode = args.lbug_shell_mode
115
+ if sys.platform == "win32":
116
+ args.lbug_shell = os.path.join(base_dir, '..', 'build', mode, 'tools', 'shell', 'lbug_shell')
117
+ else:
118
+ args.lbug_shell = os.path.join(base_dir, '..', 'build', mode, 'tools', 'shell', 'lbug')
119
+
109
120
  try:
110
121
  serialize(args.lbug_shell, args.dataset_name, args.dataset_path, args.serialized_graph_path,
111
122
  args.benchmark_copy_log_dir, args.single_thread)
@@ -148,6 +148,7 @@ tinysnb1|LBUG
148
148
  ---- ok
149
149
 
150
150
  -CASE AttachNotExistPath
151
+ -SKIP
151
152
  -LOAD_DYNAMIC_EXTENSION httpfs
152
153
  -STATEMENT attach 'http://localhost/dataset/databases/tinysnb1' as test (dbtype lbug)
153
154
  ---- error
@@ -2,4 +2,4 @@
2
2
 
3
3
  CD=`dirname "$0"`
4
4
  DATASET_DIR=$CD/../dataset
5
- python3 $CD/../benchmark/serializer.py DemoDB $DATASET_DIR/demo-db/parquet $DATASET_DIR/binary-demo --single-thread
5
+ python3 $CD/../benchmark/serializer.py DemoDB $DATASET_DIR/demo-db/parquet $DATASET_DIR/binary-demo --single-thread $*
@@ -0,0 +1,49 @@
1
+ #pragma once
2
+
3
+ #include "logical_operator_visitor.h"
4
+ #include "planner/operator/logical_plan.h"
5
+
6
+ namespace lbug {
7
+ namespace main {
8
+ class ClientContext;
9
+ }
10
+
11
+ namespace optimizer {
12
+
13
+ /**
14
+ * This optimizer detects patterns where we're counting all rows from a single rel table
15
+ * without any filters, and replaces the scan + aggregate with a direct count from table metadata.
16
+ *
17
+ * Pattern detected:
18
+ * AGGREGATE (COUNT_STAR only, no keys) →
19
+ * PROJECTION (empty or pass-through) →
20
+ * EXTEND (single rel table) →
21
+ * SCAN_NODE_TABLE
22
+ *
23
+ * This pattern is replaced with:
24
+ * COUNT_REL_TABLE (new operator that directly reads the count from table metadata)
25
+ */
26
+ class CountRelTableOptimizer : public LogicalOperatorVisitor {
27
+ public:
28
+ explicit CountRelTableOptimizer(main::ClientContext* context) : context{context} {}
29
+
30
+ void rewrite(planner::LogicalPlan* plan);
31
+
32
+ private:
33
+ std::shared_ptr<planner::LogicalOperator> visitOperator(
34
+ const std::shared_ptr<planner::LogicalOperator>& op);
35
+
36
+ std::shared_ptr<planner::LogicalOperator> visitAggregateReplace(
37
+ std::shared_ptr<planner::LogicalOperator> op) override;
38
+
39
+ // Check if the aggregate is a simple COUNT(*) with no keys
40
+ bool isSimpleCountStar(planner::LogicalOperator* op) const;
41
+
42
+ // Check if the plan below aggregate matches the pattern for optimization
43
+ bool canOptimize(planner::LogicalOperator* aggregate) const;
44
+
45
+ main::ClientContext* context;
46
+ };
47
+
48
+ } // namespace optimizer
49
+ } // namespace lbug
@@ -39,6 +39,12 @@ protected:
39
39
  return op;
40
40
  }
41
41
 
42
+ virtual void visitCountRelTable(planner::LogicalOperator* /*op*/) {}
43
+ virtual std::shared_ptr<planner::LogicalOperator> visitCountRelTableReplace(
44
+ std::shared_ptr<planner::LogicalOperator> op) {
45
+ return op;
46
+ }
47
+
42
48
  virtual void visitDelete(planner::LogicalOperator* /*op*/) {}
43
49
  virtual std::shared_ptr<planner::LogicalOperator> visitDeleteReplace(
44
50
  std::shared_ptr<planner::LogicalOperator> op) {
@@ -17,6 +17,7 @@ enum class LogicalOperatorType : uint8_t {
17
17
  ATTACH_DATABASE,
18
18
  COPY_FROM,
19
19
  COPY_TO,
20
+ COUNT_REL_TABLE,
20
21
  CREATE_MACRO,
21
22
  CREATE_SEQUENCE,
22
23
  CREATE_TABLE,
@@ -0,0 +1,84 @@
1
+ #pragma once
2
+
3
+ #include "binder/expression/expression.h"
4
+ #include "binder/expression/node_expression.h"
5
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
6
+ #include "common/enums/extend_direction.h"
7
+ #include "planner/operator/logical_operator.h"
8
+
9
+ namespace lbug {
10
+ namespace planner {
11
+
12
+ struct LogicalCountRelTablePrintInfo final : OPPrintInfo {
13
+ std::string relTableName;
14
+ std::shared_ptr<binder::Expression> countExpr;
15
+
16
+ LogicalCountRelTablePrintInfo(std::string relTableName,
17
+ std::shared_ptr<binder::Expression> countExpr)
18
+ : relTableName{std::move(relTableName)}, countExpr{std::move(countExpr)} {}
19
+
20
+ std::string toString() const override {
21
+ return "Table: " + relTableName + ", Count: " + countExpr->toString();
22
+ }
23
+
24
+ std::unique_ptr<OPPrintInfo> copy() const override {
25
+ return std::make_unique<LogicalCountRelTablePrintInfo>(relTableName, countExpr);
26
+ }
27
+ };
28
+
29
+ /**
30
+ * LogicalCountRelTable is an optimized operator that counts the number of rows
31
+ * in a rel table by scanning through bound nodes and counting edges.
32
+ *
33
+ * This operator is created by CountRelTableOptimizer when it detects:
34
+ * COUNT(*) over a single rel table with no filters
35
+ */
36
+ class LogicalCountRelTable final : public LogicalOperator {
37
+ static constexpr LogicalOperatorType type_ = LogicalOperatorType::COUNT_REL_TABLE;
38
+
39
+ public:
40
+ LogicalCountRelTable(catalog::RelGroupCatalogEntry* relGroupEntry,
41
+ std::vector<common::table_id_t> relTableIDs,
42
+ std::vector<common::table_id_t> boundNodeTableIDs,
43
+ std::shared_ptr<binder::NodeExpression> boundNode, common::ExtendDirection direction,
44
+ std::shared_ptr<binder::Expression> countExpr)
45
+ : LogicalOperator{type_}, relGroupEntry{relGroupEntry}, relTableIDs{std::move(relTableIDs)},
46
+ boundNodeTableIDs{std::move(boundNodeTableIDs)}, boundNode{std::move(boundNode)},
47
+ direction{direction}, countExpr{std::move(countExpr)} {
48
+ cardinality = 1; // Always returns exactly one row
49
+ }
50
+
51
+ void computeFactorizedSchema() override;
52
+ void computeFlatSchema() override;
53
+
54
+ std::string getExpressionsForPrinting() const override { return countExpr->toString(); }
55
+
56
+ catalog::RelGroupCatalogEntry* getRelGroupEntry() const { return relGroupEntry; }
57
+ const std::vector<common::table_id_t>& getRelTableIDs() const { return relTableIDs; }
58
+ const std::vector<common::table_id_t>& getBoundNodeTableIDs() const {
59
+ return boundNodeTableIDs;
60
+ }
61
+ std::shared_ptr<binder::NodeExpression> getBoundNode() const { return boundNode; }
62
+ common::ExtendDirection getDirection() const { return direction; }
63
+ std::shared_ptr<binder::Expression> getCountExpr() const { return countExpr; }
64
+
65
+ std::unique_ptr<OPPrintInfo> getPrintInfo() const override {
66
+ return std::make_unique<LogicalCountRelTablePrintInfo>(relGroupEntry->getName(), countExpr);
67
+ }
68
+
69
+ std::unique_ptr<LogicalOperator> copy() override {
70
+ return std::make_unique<LogicalCountRelTable>(relGroupEntry, relTableIDs, boundNodeTableIDs,
71
+ boundNode, direction, countExpr);
72
+ }
73
+
74
+ private:
75
+ catalog::RelGroupCatalogEntry* relGroupEntry;
76
+ std::vector<common::table_id_t> relTableIDs;
77
+ std::vector<common::table_id_t> boundNodeTableIDs;
78
+ std::shared_ptr<binder::NodeExpression> boundNode;
79
+ common::ExtendDirection direction;
80
+ std::shared_ptr<binder::Expression> countExpr;
81
+ };
82
+
83
+ } // namespace planner
84
+ } // namespace lbug
@@ -22,6 +22,7 @@ enum class PhysicalOperatorType : uint8_t {
22
22
  ATTACH_DATABASE,
23
23
  BATCH_INSERT,
24
24
  COPY_TO,
25
+ COUNT_REL_TABLE,
25
26
  CREATE_MACRO,
26
27
  CREATE_SEQUENCE,
27
28
  CREATE_TABLE,
@@ -0,0 +1,62 @@
1
+ #pragma once
2
+
3
+ #include "common/enums/rel_direction.h"
4
+ #include "processor/operator/physical_operator.h"
5
+ #include "storage/table/node_table.h"
6
+ #include "storage/table/rel_table.h"
7
+
8
+ namespace lbug {
9
+ namespace processor {
10
+
11
+ struct CountRelTablePrintInfo final : OPPrintInfo {
12
+ std::string relTableName;
13
+
14
+ explicit CountRelTablePrintInfo(std::string relTableName)
15
+ : relTableName{std::move(relTableName)} {}
16
+
17
+ std::string toString() const override { return "Table: " + relTableName; }
18
+
19
+ std::unique_ptr<OPPrintInfo> copy() const override {
20
+ return std::make_unique<CountRelTablePrintInfo>(relTableName);
21
+ }
22
+ };
23
+
24
+ /**
25
+ * CountRelTable is a source operator that counts edges in a rel table
26
+ * by scanning through all bound nodes and counting their edges.
27
+ * It creates its own internal vectors for node scanning (not exposed in ResultSet).
28
+ */
29
+ class CountRelTable final : public PhysicalOperator {
30
+ static constexpr PhysicalOperatorType type_ = PhysicalOperatorType::COUNT_REL_TABLE;
31
+
32
+ public:
33
+ CountRelTable(std::vector<storage::NodeTable*> nodeTables,
34
+ std::vector<storage::RelTable*> relTables, common::RelDataDirection direction,
35
+ DataPos countOutputPos, physical_op_id id, std::unique_ptr<OPPrintInfo> printInfo)
36
+ : PhysicalOperator{type_, id, std::move(printInfo)}, nodeTables{std::move(nodeTables)},
37
+ relTables{std::move(relTables)}, direction{direction}, countOutputPos{countOutputPos} {}
38
+
39
+ bool isSource() const override { return true; }
40
+ bool isParallel() const override { return false; }
41
+
42
+ void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override;
43
+
44
+ bool getNextTuplesInternal(ExecutionContext* context) override;
45
+
46
+ std::unique_ptr<PhysicalOperator> copy() override {
47
+ return std::make_unique<CountRelTable>(nodeTables, relTables, direction, countOutputPos, id,
48
+ printInfo->copy());
49
+ }
50
+
51
+ private:
52
+ std::vector<storage::NodeTable*> nodeTables;
53
+ std::vector<storage::RelTable*> relTables;
54
+ common::RelDataDirection direction;
55
+ DataPos countOutputPos;
56
+ common::ValueVector* countVector;
57
+ bool hasExecuted;
58
+ common::row_idx_t totalCount;
59
+ };
60
+
61
+ } // namespace processor
62
+ } // namespace lbug
@@ -90,6 +90,8 @@ public:
90
90
  std::unique_ptr<PhysicalOperator> mapCopyRelFrom(
91
91
  const planner::LogicalOperator* logicalOperator);
92
92
  std::unique_ptr<PhysicalOperator> mapCopyTo(const planner::LogicalOperator* logicalOperator);
93
+ std::unique_ptr<PhysicalOperator> mapCountRelTable(
94
+ const planner::LogicalOperator* logicalOperator);
93
95
  std::unique_ptr<PhysicalOperator> mapCreateMacro(
94
96
  const planner::LogicalOperator* logicalOperator);
95
97
  std::unique_ptr<PhysicalOperator> mapCreateSequence(
@@ -4,6 +4,7 @@ add_library(lbug_optimizer
4
4
  agg_key_dependency_optimizer.cpp
5
5
  cardinality_updater.cpp
6
6
  correlated_subquery_unnest_solver.cpp
7
+ count_rel_table_optimizer.cpp
7
8
  factorization_rewriter.cpp
8
9
  filter_push_down_optimizer.cpp
9
10
  logical_operator_collector.cpp
@@ -0,0 +1,217 @@
1
+ #include "optimizer/count_rel_table_optimizer.h"
2
+
3
+ #include "binder/expression/aggregate_function_expression.h"
4
+ #include "binder/expression/node_expression.h"
5
+ #include "catalog/catalog_entry/node_table_id_pair.h"
6
+ #include "function/aggregate/count_star.h"
7
+ #include "main/client_context.h"
8
+ #include "planner/operator/extend/logical_extend.h"
9
+ #include "planner/operator/logical_aggregate.h"
10
+ #include "planner/operator/logical_projection.h"
11
+ #include "planner/operator/scan/logical_count_rel_table.h"
12
+ #include "planner/operator/scan/logical_scan_node_table.h"
13
+
14
+ using namespace lbug::common;
15
+ using namespace lbug::planner;
16
+ using namespace lbug::binder;
17
+ using namespace lbug::catalog;
18
+
19
+ namespace lbug {
20
+ namespace optimizer {
21
+
22
+ void CountRelTableOptimizer::rewrite(LogicalPlan* plan) {
23
+ visitOperator(plan->getLastOperator());
24
+ }
25
+
26
+ std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitOperator(
27
+ const std::shared_ptr<LogicalOperator>& op) {
28
+ // bottom-up traversal
29
+ for (auto i = 0u; i < op->getNumChildren(); ++i) {
30
+ op->setChild(i, visitOperator(op->getChild(i)));
31
+ }
32
+ auto result = visitOperatorReplaceSwitch(op);
33
+ result->computeFlatSchema();
34
+ return result;
35
+ }
36
+
37
+ bool CountRelTableOptimizer::isSimpleCountStar(LogicalOperator* op) const {
38
+ if (op->getOperatorType() != LogicalOperatorType::AGGREGATE) {
39
+ return false;
40
+ }
41
+ auto& aggregate = op->constCast<LogicalAggregate>();
42
+
43
+ // Must have no keys (i.e., a simple aggregate without GROUP BY)
44
+ if (aggregate.hasKeys()) {
45
+ return false;
46
+ }
47
+
48
+ // Must have exactly one aggregate expression
49
+ auto aggregates = aggregate.getAggregates();
50
+ if (aggregates.size() != 1) {
51
+ return false;
52
+ }
53
+
54
+ // Must be COUNT_STAR
55
+ auto& aggExpr = aggregates[0];
56
+ if (aggExpr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
57
+ return false;
58
+ }
59
+ auto& aggFuncExpr = aggExpr->constCast<AggregateFunctionExpression>();
60
+ if (aggFuncExpr.getFunction().name != function::CountStarFunction::name) {
61
+ return false;
62
+ }
63
+
64
+ // COUNT_STAR should not be DISTINCT (conceptually it doesn't make sense)
65
+ if (aggFuncExpr.isDistinct()) {
66
+ return false;
67
+ }
68
+
69
+ return true;
70
+ }
71
+
72
+ bool CountRelTableOptimizer::canOptimize(LogicalOperator* aggregate) const {
73
+ // Pattern we're looking for:
74
+ // AGGREGATE (COUNT_STAR, no keys)
75
+ // -> PROJECTION (empty expressions or pass-through)
76
+ // -> EXTEND (single rel table, no properties scanned)
77
+ // -> SCAN_NODE_TABLE (no properties scanned)
78
+ //
79
+ // Note: The projection between aggregate and extend might be empty or
80
+ // just projecting the count expression.
81
+
82
+ auto* current = aggregate->getChild(0).get();
83
+
84
+ // Skip any projections (they should be empty or just for count)
85
+ while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
86
+ auto& proj = current->constCast<LogicalProjection>();
87
+ // Empty projection is okay, it's just a passthrough
88
+ if (!proj.getExpressionsToProject().empty()) {
89
+ // If projection has expressions, they should all be aggregate expressions
90
+ // (which means they're just passing through the count)
91
+ for (auto& expr : proj.getExpressionsToProject()) {
92
+ if (expr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
93
+ return false;
94
+ }
95
+ }
96
+ }
97
+ current = current->getChild(0).get();
98
+ }
99
+
100
+ // Now we should have EXTEND
101
+ if (current->getOperatorType() != LogicalOperatorType::EXTEND) {
102
+ return false;
103
+ }
104
+ auto& extend = current->constCast<LogicalExtend>();
105
+
106
+ // Don't optimize for undirected edges (BOTH direction) - the query pattern
107
+ // (a)-[e]-(b) generates a plan that scans both directions, and optimizing
108
+ // this would require special handling to avoid double counting.
109
+ if (extend.getDirection() == ExtendDirection::BOTH) {
110
+ return false;
111
+ }
112
+
113
+ // The rel should be a single table (not multi-labeled)
114
+ auto rel = extend.getRel();
115
+ if (rel->isMultiLabeled()) {
116
+ return false;
117
+ }
118
+
119
+ // Check if we're scanning any properties (we can only optimize when no properties needed)
120
+ if (!extend.getProperties().empty()) {
121
+ return false;
122
+ }
123
+
124
+ // The child of extend should be SCAN_NODE_TABLE
125
+ auto* extendChild = current->getChild(0).get();
126
+ if (extendChild->getOperatorType() != LogicalOperatorType::SCAN_NODE_TABLE) {
127
+ return false;
128
+ }
129
+ auto& scanNode = extendChild->constCast<LogicalScanNodeTable>();
130
+
131
+ // Check if node scan has any properties (we can only optimize when no properties needed)
132
+ if (!scanNode.getProperties().empty()) {
133
+ return false;
134
+ }
135
+
136
+ return true;
137
+ }
138
+
139
+ std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitAggregateReplace(
140
+ std::shared_ptr<LogicalOperator> op) {
141
+ if (!isSimpleCountStar(op.get())) {
142
+ return op;
143
+ }
144
+
145
+ if (!canOptimize(op.get())) {
146
+ return op;
147
+ }
148
+
149
+ // Find the EXTEND operator
150
+ auto* current = op->getChild(0).get();
151
+ while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
152
+ current = current->getChild(0).get();
153
+ }
154
+
155
+ KU_ASSERT(current->getOperatorType() == LogicalOperatorType::EXTEND);
156
+ auto& extend = current->constCast<LogicalExtend>();
157
+ auto rel = extend.getRel();
158
+ auto boundNode = extend.getBoundNode();
159
+ auto nbrNode = extend.getNbrNode();
160
+
161
+ // Get the rel group entry
162
+ KU_ASSERT(rel->getNumEntries() == 1);
163
+ auto* relGroupEntry = rel->getEntry(0)->ptrCast<RelGroupCatalogEntry>();
164
+
165
+ // Determine the source and destination node table IDs based on extend direction.
166
+ // If extendFromSource is true, then boundNode is the source and nbrNode is the destination.
167
+ // If extendFromSource is false, then boundNode is the destination and nbrNode is the source.
168
+ auto boundNodeTableIDs = boundNode->getTableIDsSet();
169
+ auto nbrNodeTableIDs = nbrNode->getTableIDsSet();
170
+
171
+ // Get only the rel table IDs that match the specific node table ID pairs in the query.
172
+ // A rel table connects a specific (srcTableID, dstTableID) pair.
173
+ std::vector<table_id_t> relTableIDs;
174
+ for (auto& info : relGroupEntry->getRelEntryInfos()) {
175
+ table_id_t srcTableID = info.nodePair.srcTableID;
176
+ table_id_t dstTableID = info.nodePair.dstTableID;
177
+
178
+ bool matches = false;
179
+ if (extend.extendFromSourceNode()) {
180
+ // boundNode is src, nbrNode is dst
181
+ matches =
182
+ boundNodeTableIDs.contains(srcTableID) && nbrNodeTableIDs.contains(dstTableID);
183
+ } else {
184
+ // boundNode is dst, nbrNode is src
185
+ matches =
186
+ boundNodeTableIDs.contains(dstTableID) && nbrNodeTableIDs.contains(srcTableID);
187
+ }
188
+
189
+ if (matches) {
190
+ relTableIDs.push_back(info.oid);
191
+ }
192
+ }
193
+
194
+ // If no matching rel tables, don't optimize (shouldn't happen for valid queries)
195
+ if (relTableIDs.empty()) {
196
+ return op;
197
+ }
198
+
199
+ // Get the count expression from the original aggregate
200
+ auto& aggregate = op->constCast<LogicalAggregate>();
201
+ auto countExpr = aggregate.getAggregates()[0];
202
+
203
+ // Get the bound node table IDs as a vector
204
+ std::vector<table_id_t> boundNodeTableIDsVec(boundNodeTableIDs.begin(),
205
+ boundNodeTableIDs.end());
206
+
207
+ // Create the new COUNT_REL_TABLE operator with all necessary information for scanning
208
+ auto countRelTable =
209
+ std::make_shared<LogicalCountRelTable>(relGroupEntry, std::move(relTableIDs),
210
+ std::move(boundNodeTableIDsVec), boundNode, extend.getDirection(), countExpr);
211
+ countRelTable->computeFlatSchema();
212
+
213
+ return countRelTable;
214
+ }
215
+
216
+ } // namespace optimizer
217
+ } // namespace lbug
@@ -19,6 +19,9 @@ void LogicalOperatorVisitor::visitOperatorSwitch(LogicalOperator* op) {
19
19
  case LogicalOperatorType::COPY_TO: {
20
20
  visitCopyTo(op);
21
21
  } break;
22
+ case LogicalOperatorType::COUNT_REL_TABLE: {
23
+ visitCountRelTable(op);
24
+ } break;
22
25
  case LogicalOperatorType::DELETE: {
23
26
  visitDelete(op);
24
27
  } break;
@@ -108,6 +111,9 @@ std::shared_ptr<LogicalOperator> LogicalOperatorVisitor::visitOperatorReplaceSwi
108
111
  case LogicalOperatorType::COPY_TO: {
109
112
  return visitCopyToReplace(op);
110
113
  }
114
+ case LogicalOperatorType::COUNT_REL_TABLE: {
115
+ return visitCountRelTableReplace(op);
116
+ }
111
117
  case LogicalOperatorType::DELETE: {
112
118
  return visitDeleteReplace(op);
113
119
  }
@@ -5,6 +5,7 @@
5
5
  #include "optimizer/agg_key_dependency_optimizer.h"
6
6
  #include "optimizer/cardinality_updater.h"
7
7
  #include "optimizer/correlated_subquery_unnest_solver.h"
8
+ #include "optimizer/count_rel_table_optimizer.h"
8
9
  #include "optimizer/factorization_rewriter.h"
9
10
  #include "optimizer/filter_push_down_optimizer.h"
10
11
  #include "optimizer/limit_push_down_optimizer.h"
@@ -32,6 +33,11 @@ void Optimizer::optimize(planner::LogicalPlan* plan, main::ClientContext* contex
32
33
  auto removeUnnecessaryJoinOptimizer = RemoveUnnecessaryJoinOptimizer();
33
34
  removeUnnecessaryJoinOptimizer.rewrite(plan);
34
35
 
36
+ // CountRelTableOptimizer should be applied early before other optimizations
37
+ // that might change the plan structure.
38
+ auto countRelTableOptimizer = CountRelTableOptimizer(context);
39
+ countRelTableOptimizer.rewrite(plan);
40
+
35
41
  auto filterPushDownOptimizer = FilterPushDownOptimizer(context);
36
42
  filterPushDownOptimizer.rewrite(plan);
37
43
 
@@ -22,6 +22,8 @@ std::string LogicalOperatorUtils::logicalOperatorTypeToString(LogicalOperatorTyp
22
22
  return "COPY_FROM";
23
23
  case LogicalOperatorType::COPY_TO:
24
24
  return "COPY_TO";
25
+ case LogicalOperatorType::COUNT_REL_TABLE:
26
+ return "COUNT_REL_TABLE";
25
27
  case LogicalOperatorType::CREATE_MACRO:
26
28
  return "CREATE_MACRO";
27
29
  case LogicalOperatorType::CREATE_SEQUENCE:
@@ -1,5 +1,6 @@
1
1
  add_library(lbug_planner_scan
2
2
  OBJECT
3
+ logical_count_rel_table.cpp
3
4
  logical_expressions_scan.cpp
4
5
  logical_index_look_up.cpp
5
6
  logical_scan_node_table.cpp)