lbug 0.12.3-dev.3 → 0.12.3-dev.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/README.md +2 -6
  2. package/lbug-source/.github/workflows/ci-workflow.yml +9 -2
  3. package/lbug-source/CMakeLists.txt +15 -6
  4. package/lbug-source/Makefile +15 -4
  5. package/lbug-source/README.md +2 -6
  6. package/lbug-source/benchmark/serializer.py +24 -3
  7. package/lbug-source/dataset/demo-db/csv/copy.cypher +4 -4
  8. package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
  9. package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
  10. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
  11. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
  12. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
  13. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
  14. package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
  15. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
  16. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
  17. package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
  18. package/lbug-source/dataset/demo-db/parquet/copy.cypher +4 -4
  19. package/lbug-source/extension/duckdb/src/catalog/duckdb_catalog.cpp +1 -1
  20. package/lbug-source/extension/duckdb/src/catalog/duckdb_table_catalog_entry.cpp +43 -4
  21. package/lbug-source/extension/duckdb/src/connector/duckdb_result_converter.cpp +6 -0
  22. package/lbug-source/extension/duckdb/src/connector/duckdb_secret_manager.cpp +1 -1
  23. package/lbug-source/extension/duckdb/src/function/duckdb_scan.cpp +49 -4
  24. package/lbug-source/extension/duckdb/src/include/catalog/duckdb_table_catalog_entry.h +6 -1
  25. package/lbug-source/extension/duckdb/src/include/function/duckdb_scan.h +2 -0
  26. package/lbug-source/extension/duckdb/test/test_files/duckdb.test +28 -0
  27. package/lbug-source/extension/extension_config.cmake +3 -2
  28. package/lbug-source/extension/httpfs/test/test_files/http.test +1 -0
  29. package/lbug-source/scripts/antlr4/Cypher.g4 +4 -4
  30. package/lbug-source/scripts/antlr4/hash.md5 +1 -1
  31. package/lbug-source/scripts/generate_binary_demo.sh +1 -1
  32. package/lbug-source/src/antlr4/Cypher.g4 +4 -4
  33. package/lbug-source/src/binder/bind/bind_ddl.cpp +97 -15
  34. package/lbug-source/src/binder/bind/bind_graph_pattern.cpp +30 -3
  35. package/lbug-source/src/catalog/catalog.cpp +6 -4
  36. package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
  37. package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +46 -7
  38. package/lbug-source/src/catalog/catalog_set.cpp +1 -0
  39. package/lbug-source/src/function/function_collection.cpp +2 -1
  40. package/lbug-source/src/function/table/CMakeLists.txt +1 -0
  41. package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
  42. package/lbug-source/src/function/table/show_connection.cpp +6 -1
  43. package/lbug-source/src/function/table/show_tables.cpp +10 -2
  44. package/lbug-source/src/function/table/table_function.cpp +11 -2
  45. package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +23 -6
  46. package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
  47. package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +21 -2
  48. package/lbug-source/src/include/catalog/catalog_entry/table_catalog_entry.h +7 -0
  49. package/lbug-source/src/include/common/constants.h +1 -0
  50. package/lbug-source/src/include/common/string_format.h +2 -2
  51. package/lbug-source/src/include/common/types/types.h +1 -0
  52. package/lbug-source/src/include/function/table/bind_data.h +12 -1
  53. package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
  54. package/lbug-source/src/include/function/table/table_function.h +2 -0
  55. package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
  56. package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
  57. package/lbug-source/src/include/optimizer/order_by_push_down_optimizer.h +21 -0
  58. package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
  59. package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
  60. package/lbug-source/src/include/planner/operator/logical_table_function_call.h +14 -1
  61. package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
  62. package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
  63. package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
  64. package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
  65. package/lbug-source/src/include/processor/plan_mapper.h +2 -0
  66. package/lbug-source/src/include/storage/storage_manager.h +1 -0
  67. package/lbug-source/src/include/storage/storage_version_info.h +1 -7
  68. package/lbug-source/src/include/storage/table/foreign_rel_table.h +56 -0
  69. package/lbug-source/src/include/storage/table/node_table.h +6 -1
  70. package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
  71. package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
  72. package/lbug-source/src/include/storage/table/rel_table.h +2 -2
  73. package/lbug-source/src/include/transaction/transaction.h +2 -0
  74. package/lbug-source/src/main/query_result/materialized_query_result.cpp +2 -2
  75. package/lbug-source/src/optimizer/CMakeLists.txt +3 -1
  76. package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
  77. package/lbug-source/src/optimizer/limit_push_down_optimizer.cpp +12 -0
  78. package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
  79. package/lbug-source/src/optimizer/optimizer.cpp +10 -0
  80. package/lbug-source/src/optimizer/order_by_push_down_optimizer.cpp +123 -0
  81. package/lbug-source/src/optimizer/projection_push_down_optimizer.cpp +5 -1
  82. package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
  83. package/lbug-source/src/parser/transform/transform_expression.cpp +1 -1
  84. package/lbug-source/src/parser/transform/transform_graph_pattern.cpp +6 -1
  85. package/lbug-source/src/parser/transformer.cpp +7 -1
  86. package/lbug-source/src/planner/join_order/cardinality_estimator.cpp +11 -2
  87. package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
  88. package/lbug-source/src/planner/operator/logical_table_function_call.cpp +4 -0
  89. package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
  90. package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
  91. package/lbug-source/src/planner/plan/plan_join_order.cpp +16 -1
  92. package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
  93. package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
  94. package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
  95. package/lbug-source/src/processor/operator/index_lookup.cpp +31 -23
  96. package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
  97. package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
  98. package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
  99. package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
  100. package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
  101. package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
  102. package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +18 -2
  103. package/lbug-source/src/storage/storage_manager.cpp +43 -6
  104. package/lbug-source/src/storage/table/CMakeLists.txt +3 -0
  105. package/lbug-source/src/storage/table/foreign_rel_table.cpp +63 -0
  106. package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
  107. package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
  108. package/lbug-source/test/api/api_test.cpp +18 -0
  109. package/lbug-source/test/common/string_format.cpp +9 -1
  110. package/lbug-source/test/copy/copy_test.cpp +4 -4
  111. package/lbug-source/test/graph_test/CMakeLists.txt +1 -1
  112. package/lbug-source/test/include/test_runner/test_group.h +11 -1
  113. package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
  114. package/lbug-source/test/runner/e2e_test.cpp +7 -1
  115. package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
  116. package/lbug-source/test/test_helper/CMakeLists.txt +1 -1
  117. package/lbug-source/test/test_helper/test_helper.cpp +33 -1
  118. package/lbug-source/test/test_runner/CMakeLists.txt +1 -1
  119. package/lbug-source/test/test_runner/insert_by_row.cpp +6 -8
  120. package/lbug-source/test/test_runner/multi_copy_split.cpp +2 -4
  121. package/lbug-source/test/test_runner/test_parser.cpp +3 -0
  122. package/lbug-source/test/transaction/checkpoint_test.cpp +1 -1
  123. package/lbug-source/test/transaction/transaction_test.cpp +19 -15
  124. package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2805 -2708
  125. package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +7 -3
  126. package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
  127. package/lbug-source/tools/nodejs_api/package.json +4 -2
  128. package/lbug-source/tools/shell/embedded_shell.cpp +78 -3
  129. package/lbug-source/tools/shell/include/embedded_shell.h +2 -0
  130. package/lbug-source/tools/shell/linenoise.cpp +3 -3
  131. package/lbug-source/tools/shell/test/test_helper.py +1 -1
  132. package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
  133. package/lbug-source/tools/shell/test/test_shell_commands.py +19 -0
  134. package/package.json +9 -2
  135. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  136. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  137. package/prebuilt/lbugjs-linux-x64.node +0 -0
  138. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -4,6 +4,7 @@ add_library(lbug_optimizer
4
4
  agg_key_dependency_optimizer.cpp
5
5
  cardinality_updater.cpp
6
6
  correlated_subquery_unnest_solver.cpp
7
+ count_rel_table_optimizer.cpp
7
8
  factorization_rewriter.cpp
8
9
  filter_push_down_optimizer.cpp
9
10
  logical_operator_collector.cpp
@@ -14,7 +15,8 @@ add_library(lbug_optimizer
14
15
  remove_factorization_rewriter.cpp
15
16
  remove_unnecessary_join_optimizer.cpp
16
17
  top_k_optimizer.cpp
17
- limit_push_down_optimizer.cpp)
18
+ limit_push_down_optimizer.cpp
19
+ order_by_push_down_optimizer.cpp)
18
20
 
19
21
  set(ALL_OBJECT_FILES
20
22
  ${ALL_OBJECT_FILES} $<TARGET_OBJECTS:lbug_optimizer>
@@ -0,0 +1,217 @@
1
+ #include "optimizer/count_rel_table_optimizer.h"
2
+
3
+ #include "binder/expression/aggregate_function_expression.h"
4
+ #include "binder/expression/node_expression.h"
5
+ #include "catalog/catalog_entry/node_table_id_pair.h"
6
+ #include "function/aggregate/count_star.h"
7
+ #include "main/client_context.h"
8
+ #include "planner/operator/extend/logical_extend.h"
9
+ #include "planner/operator/logical_aggregate.h"
10
+ #include "planner/operator/logical_projection.h"
11
+ #include "planner/operator/scan/logical_count_rel_table.h"
12
+ #include "planner/operator/scan/logical_scan_node_table.h"
13
+
14
+ using namespace lbug::common;
15
+ using namespace lbug::planner;
16
+ using namespace lbug::binder;
17
+ using namespace lbug::catalog;
18
+
19
+ namespace lbug {
20
+ namespace optimizer {
21
+
22
+ void CountRelTableOptimizer::rewrite(LogicalPlan* plan) {
23
+ visitOperator(plan->getLastOperator());
24
+ }
25
+
26
+ std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitOperator(
27
+ const std::shared_ptr<LogicalOperator>& op) {
28
+ // bottom-up traversal
29
+ for (auto i = 0u; i < op->getNumChildren(); ++i) {
30
+ op->setChild(i, visitOperator(op->getChild(i)));
31
+ }
32
+ auto result = visitOperatorReplaceSwitch(op);
33
+ result->computeFlatSchema();
34
+ return result;
35
+ }
36
+
37
+ bool CountRelTableOptimizer::isSimpleCountStar(LogicalOperator* op) const {
38
+ if (op->getOperatorType() != LogicalOperatorType::AGGREGATE) {
39
+ return false;
40
+ }
41
+ auto& aggregate = op->constCast<LogicalAggregate>();
42
+
43
+ // Must have no keys (i.e., a simple aggregate without GROUP BY)
44
+ if (aggregate.hasKeys()) {
45
+ return false;
46
+ }
47
+
48
+ // Must have exactly one aggregate expression
49
+ auto aggregates = aggregate.getAggregates();
50
+ if (aggregates.size() != 1) {
51
+ return false;
52
+ }
53
+
54
+ // Must be COUNT_STAR
55
+ auto& aggExpr = aggregates[0];
56
+ if (aggExpr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
57
+ return false;
58
+ }
59
+ auto& aggFuncExpr = aggExpr->constCast<AggregateFunctionExpression>();
60
+ if (aggFuncExpr.getFunction().name != function::CountStarFunction::name) {
61
+ return false;
62
+ }
63
+
64
+ // COUNT_STAR should not be DISTINCT (conceptually it doesn't make sense)
65
+ if (aggFuncExpr.isDistinct()) {
66
+ return false;
67
+ }
68
+
69
+ return true;
70
+ }
71
+
72
+ bool CountRelTableOptimizer::canOptimize(LogicalOperator* aggregate) const {
73
+ // Pattern we're looking for:
74
+ // AGGREGATE (COUNT_STAR, no keys)
75
+ // -> PROJECTION (empty expressions or pass-through)
76
+ // -> EXTEND (single rel table, no properties scanned)
77
+ // -> SCAN_NODE_TABLE (no properties scanned)
78
+ //
79
+ // Note: The projection between aggregate and extend might be empty or
80
+ // just projecting the count expression.
81
+
82
+ auto* current = aggregate->getChild(0).get();
83
+
84
+ // Skip any projections (they should be empty or just for count)
85
+ while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
86
+ auto& proj = current->constCast<LogicalProjection>();
87
+ // Empty projection is okay, it's just a passthrough
88
+ if (!proj.getExpressionsToProject().empty()) {
89
+ // If projection has expressions, they should all be aggregate expressions
90
+ // (which means they're just passing through the count)
91
+ for (auto& expr : proj.getExpressionsToProject()) {
92
+ if (expr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
93
+ return false;
94
+ }
95
+ }
96
+ }
97
+ current = current->getChild(0).get();
98
+ }
99
+
100
+ // Now we should have EXTEND
101
+ if (current->getOperatorType() != LogicalOperatorType::EXTEND) {
102
+ return false;
103
+ }
104
+ auto& extend = current->constCast<LogicalExtend>();
105
+
106
+ // Don't optimize for undirected edges (BOTH direction) - the query pattern
107
+ // (a)-[e]-(b) generates a plan that scans both directions, and optimizing
108
+ // this would require special handling to avoid double counting.
109
+ if (extend.getDirection() == ExtendDirection::BOTH) {
110
+ return false;
111
+ }
112
+
113
+ // The rel should be a single table (not multi-labeled)
114
+ auto rel = extend.getRel();
115
+ if (rel->isMultiLabeled()) {
116
+ return false;
117
+ }
118
+
119
+ // Check if we're scanning any properties (we can only optimize when no properties needed)
120
+ if (!extend.getProperties().empty()) {
121
+ return false;
122
+ }
123
+
124
+ // The child of extend should be SCAN_NODE_TABLE
125
+ auto* extendChild = current->getChild(0).get();
126
+ if (extendChild->getOperatorType() != LogicalOperatorType::SCAN_NODE_TABLE) {
127
+ return false;
128
+ }
129
+ auto& scanNode = extendChild->constCast<LogicalScanNodeTable>();
130
+
131
+ // Check if node scan has any properties (we can only optimize when no properties needed)
132
+ if (!scanNode.getProperties().empty()) {
133
+ return false;
134
+ }
135
+
136
+ return true;
137
+ }
138
+
139
+ std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitAggregateReplace(
140
+ std::shared_ptr<LogicalOperator> op) {
141
+ if (!isSimpleCountStar(op.get())) {
142
+ return op;
143
+ }
144
+
145
+ if (!canOptimize(op.get())) {
146
+ return op;
147
+ }
148
+
149
+ // Find the EXTEND operator
150
+ auto* current = op->getChild(0).get();
151
+ while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
152
+ current = current->getChild(0).get();
153
+ }
154
+
155
+ KU_ASSERT(current->getOperatorType() == LogicalOperatorType::EXTEND);
156
+ auto& extend = current->constCast<LogicalExtend>();
157
+ auto rel = extend.getRel();
158
+ auto boundNode = extend.getBoundNode();
159
+ auto nbrNode = extend.getNbrNode();
160
+
161
+ // Get the rel group entry
162
+ KU_ASSERT(rel->getNumEntries() == 1);
163
+ auto* relGroupEntry = rel->getEntry(0)->ptrCast<RelGroupCatalogEntry>();
164
+
165
+ // Determine the source and destination node table IDs based on extend direction.
166
+ // If extendFromSource is true, then boundNode is the source and nbrNode is the destination.
167
+ // If extendFromSource is false, then boundNode is the destination and nbrNode is the source.
168
+ auto boundNodeTableIDs = boundNode->getTableIDsSet();
169
+ auto nbrNodeTableIDs = nbrNode->getTableIDsSet();
170
+
171
+ // Get only the rel table IDs that match the specific node table ID pairs in the query.
172
+ // A rel table connects a specific (srcTableID, dstTableID) pair.
173
+ std::vector<table_id_t> relTableIDs;
174
+ for (auto& info : relGroupEntry->getRelEntryInfos()) {
175
+ table_id_t srcTableID = info.nodePair.srcTableID;
176
+ table_id_t dstTableID = info.nodePair.dstTableID;
177
+
178
+ bool matches = false;
179
+ if (extend.extendFromSourceNode()) {
180
+ // boundNode is src, nbrNode is dst
181
+ matches =
182
+ boundNodeTableIDs.contains(srcTableID) && nbrNodeTableIDs.contains(dstTableID);
183
+ } else {
184
+ // boundNode is dst, nbrNode is src
185
+ matches =
186
+ boundNodeTableIDs.contains(dstTableID) && nbrNodeTableIDs.contains(srcTableID);
187
+ }
188
+
189
+ if (matches) {
190
+ relTableIDs.push_back(info.oid);
191
+ }
192
+ }
193
+
194
+ // If no matching rel tables, don't optimize (shouldn't happen for valid queries)
195
+ if (relTableIDs.empty()) {
196
+ return op;
197
+ }
198
+
199
+ // Get the count expression from the original aggregate
200
+ auto& aggregate = op->constCast<LogicalAggregate>();
201
+ auto countExpr = aggregate.getAggregates()[0];
202
+
203
+ // Get the bound node table IDs as a vector
204
+ std::vector<table_id_t> boundNodeTableIDsVec(boundNodeTableIDs.begin(),
205
+ boundNodeTableIDs.end());
206
+
207
+ // Create the new COUNT_REL_TABLE operator with all necessary information for scanning
208
+ auto countRelTable =
209
+ std::make_shared<LogicalCountRelTable>(relGroupEntry, std::move(relTableIDs),
210
+ std::move(boundNodeTableIDsVec), boundNode, extend.getDirection(), countExpr);
211
+ countRelTable->computeFlatSchema();
212
+
213
+ return countRelTable;
214
+ }
215
+
216
+ } // namespace optimizer
217
+ } // namespace lbug
@@ -6,6 +6,7 @@
6
6
  #include "planner/operator/logical_distinct.h"
7
7
  #include "planner/operator/logical_hash_join.h"
8
8
  #include "planner/operator/logical_limit.h"
9
+ #include "planner/operator/logical_table_function_call.h"
9
10
 
10
11
  using namespace lbug::binder;
11
12
  using namespace lbug::common;
@@ -34,10 +35,21 @@ void LimitPushDownOptimizer::visitOperator(planner::LogicalOperator* op) {
34
35
  case LogicalOperatorType::MULTIPLICITY_REDUCER:
35
36
  case LogicalOperatorType::EXPLAIN:
36
37
  case LogicalOperatorType::ACCUMULATE:
38
+ case LogicalOperatorType::FILTER:
37
39
  case LogicalOperatorType::PROJECTION: {
38
40
  visitOperator(op->getChild(0).get());
39
41
  return;
40
42
  }
43
+ case LogicalOperatorType::TABLE_FUNCTION_CALL: {
44
+ if (limitNumber == INVALID_LIMIT && skipNumber == 0) {
45
+ return;
46
+ }
47
+ auto& tableFuncCall = op->cast<LogicalTableFunctionCall>();
48
+ if (tableFuncCall.getTableFunc().supportsPushDownFunc()) {
49
+ tableFuncCall.setLimitNum(skipNumber + limitNumber);
50
+ }
51
+ return;
52
+ }
41
53
  case LogicalOperatorType::DISTINCT: {
42
54
  if (limitNumber == INVALID_LIMIT && skipNumber == 0) {
43
55
  return;
@@ -19,6 +19,9 @@ void LogicalOperatorVisitor::visitOperatorSwitch(LogicalOperator* op) {
19
19
  case LogicalOperatorType::COPY_TO: {
20
20
  visitCopyTo(op);
21
21
  } break;
22
+ case LogicalOperatorType::COUNT_REL_TABLE: {
23
+ visitCountRelTable(op);
24
+ } break;
22
25
  case LogicalOperatorType::DELETE: {
23
26
  visitDelete(op);
24
27
  } break;
@@ -108,6 +111,9 @@ std::shared_ptr<LogicalOperator> LogicalOperatorVisitor::visitOperatorReplaceSwi
108
111
  case LogicalOperatorType::COPY_TO: {
109
112
  return visitCopyToReplace(op);
110
113
  }
114
+ case LogicalOperatorType::COUNT_REL_TABLE: {
115
+ return visitCountRelTableReplace(op);
116
+ }
111
117
  case LogicalOperatorType::DELETE: {
112
118
  return visitDeleteReplace(op);
113
119
  }
@@ -5,9 +5,11 @@
5
5
  #include "optimizer/agg_key_dependency_optimizer.h"
6
6
  #include "optimizer/cardinality_updater.h"
7
7
  #include "optimizer/correlated_subquery_unnest_solver.h"
8
+ #include "optimizer/count_rel_table_optimizer.h"
8
9
  #include "optimizer/factorization_rewriter.h"
9
10
  #include "optimizer/filter_push_down_optimizer.h"
10
11
  #include "optimizer/limit_push_down_optimizer.h"
12
+ #include "optimizer/order_by_push_down_optimizer.h"
11
13
  #include "optimizer/projection_push_down_optimizer.h"
12
14
  #include "optimizer/remove_factorization_rewriter.h"
13
15
  #include "optimizer/remove_unnecessary_join_optimizer.h"
@@ -32,6 +34,11 @@ void Optimizer::optimize(planner::LogicalPlan* plan, main::ClientContext* contex
32
34
  auto removeUnnecessaryJoinOptimizer = RemoveUnnecessaryJoinOptimizer();
33
35
  removeUnnecessaryJoinOptimizer.rewrite(plan);
34
36
 
37
+ // CountRelTableOptimizer should be applied early before other optimizations
38
+ // that might change the plan structure.
39
+ auto countRelTableOptimizer = CountRelTableOptimizer(context);
40
+ countRelTableOptimizer.rewrite(plan);
41
+
35
42
  auto filterPushDownOptimizer = FilterPushDownOptimizer(context);
36
43
  filterPushDownOptimizer.rewrite(plan);
37
44
 
@@ -39,6 +46,9 @@ void Optimizer::optimize(planner::LogicalPlan* plan, main::ClientContext* contex
39
46
  ProjectionPushDownOptimizer(context->getClientConfig()->recursivePatternSemantic);
40
47
  projectionPushDownOptimizer.rewrite(plan);
41
48
 
49
+ auto orderByPushDownOptimizer = OrderByPushDownOptimizer();
50
+ orderByPushDownOptimizer.rewrite(plan);
51
+
42
52
  auto limitPushDownOptimizer = LimitPushDownOptimizer();
43
53
  limitPushDownOptimizer.rewrite(plan);
44
54
 
@@ -0,0 +1,123 @@
1
+ #include "optimizer/order_by_push_down_optimizer.h"
2
+
3
+ #include "binder/expression/expression.h"
4
+ #include "binder/expression/expression_util.h"
5
+ #include "binder/expression/property_expression.h"
6
+ #include "binder/expression/variable_expression.h"
7
+ #include "common/exception/runtime.h"
8
+ #include "planner/operator/logical_order_by.h"
9
+ #include "planner/operator/logical_table_function_call.h"
10
+
11
+ using namespace lbug::binder;
12
+ using namespace lbug::common;
13
+ using namespace lbug::planner;
14
+
15
+ namespace lbug {
16
+ namespace optimizer {
17
+
18
+ // This ensures that ORDER BY can be pushed down only through operators that support it.
19
+ // It should not be pushed down for things like RECURSIVE_EXTEND etc.
20
+ bool isPushDownSupported(LogicalOperator* op) {
21
+ switch (op->getOperatorType()) {
22
+ case LogicalOperatorType::TABLE_FUNCTION_CALL: {
23
+ return op->cast<LogicalTableFunctionCall>().getTableFunc().supportsPushDownFunc();
24
+ }
25
+ case LogicalOperatorType::MULTIPLICITY_REDUCER:
26
+ case LogicalOperatorType::EXPLAIN:
27
+ case LogicalOperatorType::ACCUMULATE:
28
+ case LogicalOperatorType::FILTER:
29
+ case LogicalOperatorType::PROJECTION:
30
+ case LogicalOperatorType::LIMIT: {
31
+ if (op->getNumChildren() == 0) {
32
+ return false;
33
+ }
34
+ return isPushDownSupported(op->getChild(0).get());
35
+ }
36
+ default:
37
+ return false;
38
+ }
39
+ }
40
+
41
+ void OrderByPushDownOptimizer::rewrite(LogicalPlan* plan) {
42
+ plan->setLastOperator(visitOperator(plan->getLastOperator()));
43
+ }
44
+
45
+ std::shared_ptr<LogicalOperator> OrderByPushDownOptimizer::visitOperator(
46
+ std::shared_ptr<LogicalOperator> op, std::string currentOrderBy) {
47
+ switch (op->getOperatorType()) {
48
+ case LogicalOperatorType::ORDER_BY: {
49
+ auto& orderBy = op->constCast<LogicalOrderBy>();
50
+ std::string newOrderBy = currentOrderBy;
51
+ if (!currentOrderBy.empty()) {
52
+ newOrderBy += ", ";
53
+ }
54
+ newOrderBy +=
55
+ buildOrderByString(orderBy.getExpressionsToOrderBy(), orderBy.getIsAscOrders());
56
+ auto newChild = visitOperator(orderBy.getChild(0), newOrderBy);
57
+ if (isPushDownSupported(newChild.get())) {
58
+ return newChild;
59
+ }
60
+ return std::make_shared<LogicalOrderBy>(orderBy.getExpressionsToOrderBy(),
61
+ orderBy.getIsAscOrders(), newChild);
62
+ }
63
+ case LogicalOperatorType::MULTIPLICITY_REDUCER:
64
+ case LogicalOperatorType::EXPLAIN:
65
+ case LogicalOperatorType::ACCUMULATE:
66
+ case LogicalOperatorType::FILTER:
67
+ case LogicalOperatorType::PROJECTION:
68
+ case LogicalOperatorType::LIMIT: {
69
+ for (auto i = 0u; i < op->getNumChildren(); ++i) {
70
+ op->setChild(i, visitOperator(op->getChild(i), currentOrderBy));
71
+ }
72
+ return op;
73
+ }
74
+ case LogicalOperatorType::TABLE_FUNCTION_CALL: {
75
+ if (!currentOrderBy.empty()) {
76
+ auto& tableFunc = op->cast<LogicalTableFunctionCall>();
77
+ if (tableFunc.getTableFunc().supportsPushDownFunc()) {
78
+ tableFunc.setOrderBy(currentOrderBy);
79
+ }
80
+ }
81
+ return op;
82
+ }
83
+ default:
84
+ return op;
85
+ }
86
+ }
87
+
88
+ std::string OrderByPushDownOptimizer::buildOrderByString(
89
+ const binder::expression_vector& expressions, const std::vector<bool>& isAscOrders) {
90
+ if (expressions.empty()) {
91
+ return "";
92
+ }
93
+ std::string result = " ORDER BY ";
94
+ bool first = true;
95
+ for (size_t i = 0; i < expressions.size(); ++i) {
96
+ auto& expr = expressions[i];
97
+ std::string colName;
98
+ if (expr->expressionType == common::ExpressionType::VARIABLE) {
99
+ auto& var = expr->constCast<binder::VariableExpression>();
100
+ colName = var.getVariableName();
101
+ } else if (expr->expressionType == common::ExpressionType::PROPERTY) {
102
+ auto& prop = expr->constCast<binder::PropertyExpression>();
103
+ colName = prop.getPropertyName();
104
+ } else {
105
+ // Skip expressions that cannot be pushed down
106
+ continue;
107
+ }
108
+ if (!first) {
109
+ result += ", ";
110
+ }
111
+ result += colName;
112
+ result += isAscOrders[i] ? " ASC" : " DESC";
113
+ first = false;
114
+ }
115
+ if (first) {
116
+ // No expressions could be pushed down
117
+ return "";
118
+ }
119
+ return result;
120
+ }
121
+
122
+ } // namespace optimizer
123
+ } // namespace lbug
@@ -261,7 +261,11 @@ void ProjectionPushDownOptimizer::visitTableFunctionCall(LogicalOperator* op) {
261
261
  auto& tableFunctionCall = op->cast<LogicalTableFunctionCall>();
262
262
  std::vector<bool> columnSkips;
263
263
  for (auto& column : tableFunctionCall.getBindData()->columns) {
264
- columnSkips.push_back(!variablesInUse.contains(column));
264
+ // Check both variablesInUse and propertiesInUse since foreign table columns
265
+ // may be referenced as properties in the query (e.g., a.id) but represented
266
+ // as variables in the table function bind data
267
+ columnSkips.push_back(
268
+ !variablesInUse.contains(column) && !propertiesInUse.contains(column));
265
269
  }
266
270
  tableFunctionCall.setColumnSkips(std::move(columnSkips));
267
271
  }
@@ -81,7 +81,12 @@ std::unique_ptr<Statement> Transformer::transformCreateNodeTable(
81
81
  } else {
82
82
  createTableInfo.propertyDefinitions =
83
83
  transformPropertyDefinitions(*ctx.kU_PropertyDefinitions());
84
- createTableInfo.extraInfo = std::make_unique<ExtraCreateNodeTableInfo>(getPKName(ctx));
84
+ options_t options;
85
+ if (ctx.kU_Options()) {
86
+ options = transformOptions(*ctx.kU_Options());
87
+ }
88
+ createTableInfo.extraInfo =
89
+ std::make_unique<ExtraCreateNodeTableInfo>(getPKName(ctx), std::move(options));
85
90
  return std::make_unique<CreateTable>(std::move(createTableInfo));
86
91
  }
87
92
  }
@@ -663,7 +663,7 @@ std::unique_ptr<ParsedExpression> Transformer::transformProperty(
663
663
  }
664
664
 
665
665
  std::string Transformer::transformPropertyKeyName(CypherParser::OC_PropertyKeyNameContext& ctx) {
666
- return transformSchemaName(*ctx.oC_SchemaName());
666
+ return transformSymbolicName(*ctx.oC_SymbolicName());
667
667
  }
668
668
 
669
669
  std::unique_ptr<ParsedExpression> Transformer::transformIntegerLiteral(
@@ -202,7 +202,12 @@ std::vector<std::string> Transformer::transformNodeLabels(CypherParser::OC_NodeL
202
202
  }
203
203
 
204
204
  std::string Transformer::transformLabelName(CypherParser::OC_LabelNameContext& ctx) {
205
- return transformSchemaName(*ctx.oC_SchemaName());
205
+ auto schemaNames = ctx.oC_SchemaName();
206
+ if (schemaNames.size() == 1) {
207
+ return transformSchemaName(*schemaNames[0]);
208
+ }
209
+ // Qualified name: db.table
210
+ return transformSchemaName(*schemaNames[0]) + "." + transformSchemaName(*schemaNames[1]);
206
211
  }
207
212
 
208
213
  std::string Transformer::transformRelTypeName(CypherParser::OC_RelTypeNameContext& ctx) {
@@ -88,7 +88,13 @@ std::unique_ptr<ParsedExpression> Transformer::transformWhere(CypherParser::OC_W
88
88
  }
89
89
 
90
90
  std::string Transformer::transformSchemaName(CypherParser::OC_SchemaNameContext& ctx) {
91
- return transformSymbolicName(*ctx.oC_SymbolicName());
91
+ auto symbolicNames = ctx.oC_SymbolicName();
92
+ if (symbolicNames.size() == 1) {
93
+ return transformSymbolicName(*symbolicNames[0]);
94
+ }
95
+ // Qualified name: db.table
96
+ return transformSymbolicName(*symbolicNames[0]) + "." +
97
+ transformSymbolicName(*symbolicNames[1]);
92
98
  }
93
99
 
94
100
  std::string Transformer::transformStringLiteral(antlr4::tree::TerminalNode& stringLiteral) {
@@ -1,6 +1,7 @@
1
1
  #include "planner/join_order/cardinality_estimator.h"
2
2
 
3
3
  #include "binder/expression/property_expression.h"
4
+ #include "catalog/catalog_entry/table_catalog_entry.h"
4
5
  #include "main/client_context.h"
5
6
  #include "planner/join_order/join_order_util.h"
6
7
  #include "planner/operator/logical_aggregate.h"
@@ -39,7 +40,12 @@ void CardinalityEstimator::init(const NodeExpression& node) {
39
40
  cardinality_t numNodes = 0u;
40
41
  auto storageManager = storage::StorageManager::Get(*context);
41
42
  auto transaction = transaction::Transaction::Get(*context);
42
- for (auto tableID : node.getTableIDs()) {
43
+ for (auto entry : node.getEntries()) {
44
+ // Skip foreign tables - they don't have storage in the local database
45
+ if (entry->getType() == catalog::CatalogEntryType::FOREIGN_TABLE_ENTRY) {
46
+ continue;
47
+ }
48
+ auto tableID = entry->getTableID();
43
49
  auto stats =
44
50
  storageManager->getTable(tableID)->cast<storage::NodeTable>().getStats(transaction);
45
51
  numNodes += stats.getTableCard();
@@ -195,7 +201,10 @@ uint64_t CardinalityEstimator::getNumNodes(const Transaction*,
195
201
  const std::vector<table_id_t>& tableIDs) const {
196
202
  cardinality_t numNodes = 0u;
197
203
  for (auto& tableID : tableIDs) {
198
- KU_ASSERT(nodeTableStats.contains(tableID));
204
+ // Skip foreign tables - they won't be in nodeTableStats
205
+ if (!nodeTableStats.contains(tableID)) {
206
+ continue;
207
+ }
199
208
  numNodes += nodeTableStats.at(tableID).getTableCard();
200
209
  }
201
210
  return atLeastOne(numNodes);
@@ -22,6 +22,8 @@ std::string LogicalOperatorUtils::logicalOperatorTypeToString(LogicalOperatorTyp
22
22
  return "COPY_FROM";
23
23
  case LogicalOperatorType::COPY_TO:
24
24
  return "COPY_TO";
25
+ case LogicalOperatorType::COUNT_REL_TABLE:
26
+ return "COUNT_REL_TABLE";
25
27
  case LogicalOperatorType::CREATE_MACRO:
26
28
  return "CREATE_MACRO";
27
29
  case LogicalOperatorType::CREATE_SEQUENCE:
@@ -19,5 +19,9 @@ void LogicalTableFunctionCall::computeFactorizedSchema() {
19
19
  }
20
20
  }
21
21
 
22
+ std::unique_ptr<OPPrintInfo> LogicalTableFunctionCall::getPrintInfo() const {
23
+ return std::make_unique<LogicalTableFunctionCallPrintInfo>(getExpressionsForPrinting());
24
+ }
25
+
22
26
  } // namespace planner
23
27
  } // namespace lbug
@@ -1,5 +1,6 @@
1
1
  add_library(lbug_planner_scan
2
2
  OBJECT
3
+ logical_count_rel_table.cpp
3
4
  logical_expressions_scan.cpp
4
5
  logical_index_look_up.cpp
5
6
  logical_scan_node_table.cpp)
@@ -0,0 +1,24 @@
1
+ #include "planner/operator/scan/logical_count_rel_table.h"
2
+
3
+ namespace lbug {
4
+ namespace planner {
5
+
6
+ void LogicalCountRelTable::computeFactorizedSchema() {
7
+ createEmptySchema();
8
+ // Only output the count expression in a single-state group.
9
+ // This operator is a source - it has no child in the logical plan.
10
+ // The bound node is used internally for scanning but not exposed.
11
+ auto groupPos = schema->createGroup();
12
+ schema->insertToGroupAndScope(countExpr, groupPos);
13
+ schema->setGroupAsSingleState(groupPos);
14
+ }
15
+
16
+ void LogicalCountRelTable::computeFlatSchema() {
17
+ createEmptySchema();
18
+ // For flat schema, create a single group with the count expression.
19
+ auto groupPos = schema->createGroup();
20
+ schema->insertToGroupAndScope(countExpr, groupPos);
21
+ }
22
+
23
+ } // namespace planner
24
+ } // namespace lbug
@@ -1,8 +1,11 @@
1
1
  #include <cmath>
2
2
 
3
+ #include "binder/bound_scan_source.h"
3
4
  #include "binder/expression_visitor.h"
5
+ #include "catalog/catalog_entry/catalog_entry_type.h"
4
6
  #include "common/enums/join_type.h"
5
7
  #include "common/enums/rel_direction.h"
8
+ #include "common/enums/table_type.h"
6
9
  #include "common/utils.h"
7
10
  #include "planner/join_order/cost_model.h"
8
11
  #include "planner/join_order/join_plan_solver.h"
@@ -246,7 +249,19 @@ void Planner::planNodeScan(uint32_t nodePos) {
246
249
  newSubgraph.addQueryNode(nodePos);
247
250
  auto plan = LogicalPlan();
248
251
  auto properties = getProperties(*node);
249
- appendScanNodeTable(node->getInternalID(), node->getTableIDs(), properties, plan);
252
+ if (node->getEntries().size() == 1 &&
253
+ node->getEntries()[0]->getType() == catalog::CatalogEntryType::FOREIGN_TABLE_ENTRY) {
254
+ auto boundScanInfo =
255
+ node->getEntries()[0]->getBoundScanInfo(clientContext, node->getUniqueName());
256
+ if (boundScanInfo != nullptr) {
257
+ // Use table function call for foreign tables
258
+ appendTableFunctionCall(*boundScanInfo, plan);
259
+ } else {
260
+ appendScanNodeTable(node->getInternalID(), node->getTableIDs(), properties, plan);
261
+ }
262
+ } else {
263
+ appendScanNodeTable(node->getInternalID(), node->getTableIDs(), properties, plan);
264
+ }
250
265
  auto predicates = getNewlyMatchedExprs(context.getEmptySubqueryGraph(), newSubgraph,
251
266
  context.getWhereExpressions());
252
267
  appendFilters(predicates, plan);
@@ -7,6 +7,7 @@ add_library(lbug_processor_mapper
7
7
  map_acc_hash_join.cpp
8
8
  map_accumulate.cpp
9
9
  map_aggregate.cpp
10
+ map_count_rel_table.cpp
10
11
  map_standalone_call.cpp
11
12
  map_table_function_call.cpp
12
13
  map_copy_to.cpp