lbug 0.12.3-dev.9 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lbug-source/.github/workflows/ci-workflow.yml +9 -2
- package/lbug-source/CMakeLists.txt +16 -7
- package/lbug-source/Makefile +15 -4
- package/lbug-source/benchmark/serializer.py +24 -3
- package/lbug-source/dataset/demo-db/csv/copy.cypher +4 -4
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
- package/lbug-source/dataset/demo-db/parquet/copy.cypher +4 -4
- package/lbug-source/extension/duckdb/src/catalog/duckdb_catalog.cpp +1 -1
- package/lbug-source/extension/duckdb/src/catalog/duckdb_table_catalog_entry.cpp +43 -4
- package/lbug-source/extension/duckdb/src/connector/duckdb_result_converter.cpp +6 -0
- package/lbug-source/extension/duckdb/src/connector/duckdb_secret_manager.cpp +1 -1
- package/lbug-source/extension/duckdb/src/function/duckdb_scan.cpp +49 -4
- package/lbug-source/extension/duckdb/src/include/catalog/duckdb_table_catalog_entry.h +6 -1
- package/lbug-source/extension/duckdb/src/include/function/duckdb_scan.h +2 -0
- package/lbug-source/extension/duckdb/test/test_files/duckdb.test +28 -0
- package/lbug-source/extension/extension_config.cmake +3 -2
- package/lbug-source/extension/httpfs/test/test_files/http.test +1 -0
- package/lbug-source/scripts/antlr4/Cypher.g4 +4 -4
- package/lbug-source/scripts/antlr4/hash.md5 +1 -1
- package/lbug-source/scripts/generate_binary_demo.sh +1 -1
- package/lbug-source/src/antlr4/Cypher.g4 +4 -4
- package/lbug-source/src/binder/bind/bind_ddl.cpp +97 -15
- package/lbug-source/src/binder/bind/bind_graph_pattern.cpp +30 -3
- package/lbug-source/src/catalog/catalog.cpp +6 -4
- package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
- package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +46 -7
- package/lbug-source/src/catalog/catalog_set.cpp +1 -0
- package/lbug-source/src/function/function_collection.cpp +2 -1
- package/lbug-source/src/function/table/CMakeLists.txt +1 -0
- package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
- package/lbug-source/src/function/table/show_connection.cpp +6 -1
- package/lbug-source/src/function/table/show_tables.cpp +10 -2
- package/lbug-source/src/function/table/table_function.cpp +11 -2
- package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +23 -6
- package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
- package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +21 -2
- package/lbug-source/src/include/catalog/catalog_entry/table_catalog_entry.h +7 -0
- package/lbug-source/src/include/common/constants.h +1 -0
- package/lbug-source/src/include/common/string_format.h +2 -2
- package/lbug-source/src/include/common/types/types.h +1 -0
- package/lbug-source/src/include/function/table/bind_data.h +12 -1
- package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
- package/lbug-source/src/include/function/table/table_function.h +2 -0
- package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
- package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
- package/lbug-source/src/include/optimizer/order_by_push_down_optimizer.h +21 -0
- package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
- package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
- package/lbug-source/src/include/planner/operator/logical_table_function_call.h +14 -1
- package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
- package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
- package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
- package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
- package/lbug-source/src/include/processor/plan_mapper.h +2 -0
- package/lbug-source/src/include/storage/storage_manager.h +1 -0
- package/lbug-source/src/include/storage/storage_version_info.h +1 -1
- package/lbug-source/src/include/storage/table/foreign_rel_table.h +56 -0
- package/lbug-source/src/include/storage/table/node_table.h +6 -1
- package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
- package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
- package/lbug-source/src/include/storage/table/rel_table.h +2 -2
- package/lbug-source/src/include/transaction/transaction.h +2 -0
- package/lbug-source/src/optimizer/CMakeLists.txt +3 -1
- package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
- package/lbug-source/src/optimizer/limit_push_down_optimizer.cpp +12 -0
- package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
- package/lbug-source/src/optimizer/optimizer.cpp +10 -0
- package/lbug-source/src/optimizer/order_by_push_down_optimizer.cpp +123 -0
- package/lbug-source/src/optimizer/projection_push_down_optimizer.cpp +5 -1
- package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
- package/lbug-source/src/parser/transform/transform_expression.cpp +1 -1
- package/lbug-source/src/parser/transform/transform_graph_pattern.cpp +6 -1
- package/lbug-source/src/parser/transformer.cpp +7 -1
- package/lbug-source/src/planner/join_order/cardinality_estimator.cpp +11 -2
- package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
- package/lbug-source/src/planner/operator/logical_table_function_call.cpp +4 -0
- package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
- package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
- package/lbug-source/src/planner/plan/plan_join_order.cpp +16 -1
- package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
- package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
- package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
- package/lbug-source/src/processor/operator/index_lookup.cpp +31 -23
- package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
- package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
- package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
- package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
- package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
- package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
- package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +18 -2
- package/lbug-source/src/storage/storage_manager.cpp +43 -6
- package/lbug-source/src/storage/table/CMakeLists.txt +3 -0
- package/lbug-source/src/storage/table/foreign_rel_table.cpp +63 -0
- package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
- package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
- package/lbug-source/test/common/string_format.cpp +9 -1
- package/lbug-source/test/copy/copy_test.cpp +4 -4
- package/lbug-source/test/graph_test/CMakeLists.txt +1 -1
- package/lbug-source/test/include/test_runner/test_group.h +11 -1
- package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
- package/lbug-source/test/runner/e2e_test.cpp +7 -1
- package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
- package/lbug-source/test/test_helper/CMakeLists.txt +1 -1
- package/lbug-source/test/test_helper/test_helper.cpp +33 -1
- package/lbug-source/test/test_runner/CMakeLists.txt +1 -1
- package/lbug-source/test/test_runner/insert_by_row.cpp +6 -8
- package/lbug-source/test/test_runner/multi_copy_split.cpp +2 -4
- package/lbug-source/test/test_runner/test_parser.cpp +3 -0
- package/lbug-source/test/transaction/checkpoint_test.cpp +1 -1
- package/lbug-source/test/transaction/transaction_test.cpp +19 -15
- package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2805 -2708
- package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +7 -3
- package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
- package/lbug-source/tools/nodejs_api/package.json +4 -2
- package/lbug-source/tools/shell/embedded_shell.cpp +78 -3
- package/lbug-source/tools/shell/include/embedded_shell.h +2 -0
- package/lbug-source/tools/shell/linenoise.cpp +3 -3
- package/lbug-source/tools/shell/test/test_helper.py +1 -1
- package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
- package/lbug-source/tools/shell/test/test_shell_commands.py +19 -0
- package/package.json +9 -2
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
#include "optimizer/count_rel_table_optimizer.h"
|
|
2
|
+
|
|
3
|
+
#include "binder/expression/aggregate_function_expression.h"
|
|
4
|
+
#include "binder/expression/node_expression.h"
|
|
5
|
+
#include "catalog/catalog_entry/node_table_id_pair.h"
|
|
6
|
+
#include "function/aggregate/count_star.h"
|
|
7
|
+
#include "main/client_context.h"
|
|
8
|
+
#include "planner/operator/extend/logical_extend.h"
|
|
9
|
+
#include "planner/operator/logical_aggregate.h"
|
|
10
|
+
#include "planner/operator/logical_projection.h"
|
|
11
|
+
#include "planner/operator/scan/logical_count_rel_table.h"
|
|
12
|
+
#include "planner/operator/scan/logical_scan_node_table.h"
|
|
13
|
+
|
|
14
|
+
using namespace lbug::common;
|
|
15
|
+
using namespace lbug::planner;
|
|
16
|
+
using namespace lbug::binder;
|
|
17
|
+
using namespace lbug::catalog;
|
|
18
|
+
|
|
19
|
+
namespace lbug {
|
|
20
|
+
namespace optimizer {
|
|
21
|
+
|
|
22
|
+
void CountRelTableOptimizer::rewrite(LogicalPlan* plan) {
|
|
23
|
+
visitOperator(plan->getLastOperator());
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitOperator(
|
|
27
|
+
const std::shared_ptr<LogicalOperator>& op) {
|
|
28
|
+
// bottom-up traversal
|
|
29
|
+
for (auto i = 0u; i < op->getNumChildren(); ++i) {
|
|
30
|
+
op->setChild(i, visitOperator(op->getChild(i)));
|
|
31
|
+
}
|
|
32
|
+
auto result = visitOperatorReplaceSwitch(op);
|
|
33
|
+
result->computeFlatSchema();
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
bool CountRelTableOptimizer::isSimpleCountStar(LogicalOperator* op) const {
|
|
38
|
+
if (op->getOperatorType() != LogicalOperatorType::AGGREGATE) {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
auto& aggregate = op->constCast<LogicalAggregate>();
|
|
42
|
+
|
|
43
|
+
// Must have no keys (i.e., a simple aggregate without GROUP BY)
|
|
44
|
+
if (aggregate.hasKeys()) {
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Must have exactly one aggregate expression
|
|
49
|
+
auto aggregates = aggregate.getAggregates();
|
|
50
|
+
if (aggregates.size() != 1) {
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Must be COUNT_STAR
|
|
55
|
+
auto& aggExpr = aggregates[0];
|
|
56
|
+
if (aggExpr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
auto& aggFuncExpr = aggExpr->constCast<AggregateFunctionExpression>();
|
|
60
|
+
if (aggFuncExpr.getFunction().name != function::CountStarFunction::name) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// COUNT_STAR should not be DISTINCT (conceptually it doesn't make sense)
|
|
65
|
+
if (aggFuncExpr.isDistinct()) {
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return true;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
bool CountRelTableOptimizer::canOptimize(LogicalOperator* aggregate) const {
|
|
73
|
+
// Pattern we're looking for:
|
|
74
|
+
// AGGREGATE (COUNT_STAR, no keys)
|
|
75
|
+
// -> PROJECTION (empty expressions or pass-through)
|
|
76
|
+
// -> EXTEND (single rel table, no properties scanned)
|
|
77
|
+
// -> SCAN_NODE_TABLE (no properties scanned)
|
|
78
|
+
//
|
|
79
|
+
// Note: The projection between aggregate and extend might be empty or
|
|
80
|
+
// just projecting the count expression.
|
|
81
|
+
|
|
82
|
+
auto* current = aggregate->getChild(0).get();
|
|
83
|
+
|
|
84
|
+
// Skip any projections (they should be empty or just for count)
|
|
85
|
+
while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
|
|
86
|
+
auto& proj = current->constCast<LogicalProjection>();
|
|
87
|
+
// Empty projection is okay, it's just a passthrough
|
|
88
|
+
if (!proj.getExpressionsToProject().empty()) {
|
|
89
|
+
// If projection has expressions, they should all be aggregate expressions
|
|
90
|
+
// (which means they're just passing through the count)
|
|
91
|
+
for (auto& expr : proj.getExpressionsToProject()) {
|
|
92
|
+
if (expr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
current = current->getChild(0).get();
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Now we should have EXTEND
|
|
101
|
+
if (current->getOperatorType() != LogicalOperatorType::EXTEND) {
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
auto& extend = current->constCast<LogicalExtend>();
|
|
105
|
+
|
|
106
|
+
// Don't optimize for undirected edges (BOTH direction) - the query pattern
|
|
107
|
+
// (a)-[e]-(b) generates a plan that scans both directions, and optimizing
|
|
108
|
+
// this would require special handling to avoid double counting.
|
|
109
|
+
if (extend.getDirection() == ExtendDirection::BOTH) {
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// The rel should be a single table (not multi-labeled)
|
|
114
|
+
auto rel = extend.getRel();
|
|
115
|
+
if (rel->isMultiLabeled()) {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Check if we're scanning any properties (we can only optimize when no properties needed)
|
|
120
|
+
if (!extend.getProperties().empty()) {
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// The child of extend should be SCAN_NODE_TABLE
|
|
125
|
+
auto* extendChild = current->getChild(0).get();
|
|
126
|
+
if (extendChild->getOperatorType() != LogicalOperatorType::SCAN_NODE_TABLE) {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
auto& scanNode = extendChild->constCast<LogicalScanNodeTable>();
|
|
130
|
+
|
|
131
|
+
// Check if node scan has any properties (we can only optimize when no properties needed)
|
|
132
|
+
if (!scanNode.getProperties().empty()) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return true;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitAggregateReplace(
|
|
140
|
+
std::shared_ptr<LogicalOperator> op) {
|
|
141
|
+
if (!isSimpleCountStar(op.get())) {
|
|
142
|
+
return op;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (!canOptimize(op.get())) {
|
|
146
|
+
return op;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Find the EXTEND operator
|
|
150
|
+
auto* current = op->getChild(0).get();
|
|
151
|
+
while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
|
|
152
|
+
current = current->getChild(0).get();
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
KU_ASSERT(current->getOperatorType() == LogicalOperatorType::EXTEND);
|
|
156
|
+
auto& extend = current->constCast<LogicalExtend>();
|
|
157
|
+
auto rel = extend.getRel();
|
|
158
|
+
auto boundNode = extend.getBoundNode();
|
|
159
|
+
auto nbrNode = extend.getNbrNode();
|
|
160
|
+
|
|
161
|
+
// Get the rel group entry
|
|
162
|
+
KU_ASSERT(rel->getNumEntries() == 1);
|
|
163
|
+
auto* relGroupEntry = rel->getEntry(0)->ptrCast<RelGroupCatalogEntry>();
|
|
164
|
+
|
|
165
|
+
// Determine the source and destination node table IDs based on extend direction.
|
|
166
|
+
// If extendFromSource is true, then boundNode is the source and nbrNode is the destination.
|
|
167
|
+
// If extendFromSource is false, then boundNode is the destination and nbrNode is the source.
|
|
168
|
+
auto boundNodeTableIDs = boundNode->getTableIDsSet();
|
|
169
|
+
auto nbrNodeTableIDs = nbrNode->getTableIDsSet();
|
|
170
|
+
|
|
171
|
+
// Get only the rel table IDs that match the specific node table ID pairs in the query.
|
|
172
|
+
// A rel table connects a specific (srcTableID, dstTableID) pair.
|
|
173
|
+
std::vector<table_id_t> relTableIDs;
|
|
174
|
+
for (auto& info : relGroupEntry->getRelEntryInfos()) {
|
|
175
|
+
table_id_t srcTableID = info.nodePair.srcTableID;
|
|
176
|
+
table_id_t dstTableID = info.nodePair.dstTableID;
|
|
177
|
+
|
|
178
|
+
bool matches = false;
|
|
179
|
+
if (extend.extendFromSourceNode()) {
|
|
180
|
+
// boundNode is src, nbrNode is dst
|
|
181
|
+
matches =
|
|
182
|
+
boundNodeTableIDs.contains(srcTableID) && nbrNodeTableIDs.contains(dstTableID);
|
|
183
|
+
} else {
|
|
184
|
+
// boundNode is dst, nbrNode is src
|
|
185
|
+
matches =
|
|
186
|
+
boundNodeTableIDs.contains(dstTableID) && nbrNodeTableIDs.contains(srcTableID);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (matches) {
|
|
190
|
+
relTableIDs.push_back(info.oid);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// If no matching rel tables, don't optimize (shouldn't happen for valid queries)
|
|
195
|
+
if (relTableIDs.empty()) {
|
|
196
|
+
return op;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Get the count expression from the original aggregate
|
|
200
|
+
auto& aggregate = op->constCast<LogicalAggregate>();
|
|
201
|
+
auto countExpr = aggregate.getAggregates()[0];
|
|
202
|
+
|
|
203
|
+
// Get the bound node table IDs as a vector
|
|
204
|
+
std::vector<table_id_t> boundNodeTableIDsVec(boundNodeTableIDs.begin(),
|
|
205
|
+
boundNodeTableIDs.end());
|
|
206
|
+
|
|
207
|
+
// Create the new COUNT_REL_TABLE operator with all necessary information for scanning
|
|
208
|
+
auto countRelTable =
|
|
209
|
+
std::make_shared<LogicalCountRelTable>(relGroupEntry, std::move(relTableIDs),
|
|
210
|
+
std::move(boundNodeTableIDsVec), boundNode, extend.getDirection(), countExpr);
|
|
211
|
+
countRelTable->computeFlatSchema();
|
|
212
|
+
|
|
213
|
+
return countRelTable;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
} // namespace optimizer
|
|
217
|
+
} // namespace lbug
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
#include "planner/operator/logical_distinct.h"
|
|
7
7
|
#include "planner/operator/logical_hash_join.h"
|
|
8
8
|
#include "planner/operator/logical_limit.h"
|
|
9
|
+
#include "planner/operator/logical_table_function_call.h"
|
|
9
10
|
|
|
10
11
|
using namespace lbug::binder;
|
|
11
12
|
using namespace lbug::common;
|
|
@@ -34,10 +35,21 @@ void LimitPushDownOptimizer::visitOperator(planner::LogicalOperator* op) {
|
|
|
34
35
|
case LogicalOperatorType::MULTIPLICITY_REDUCER:
|
|
35
36
|
case LogicalOperatorType::EXPLAIN:
|
|
36
37
|
case LogicalOperatorType::ACCUMULATE:
|
|
38
|
+
case LogicalOperatorType::FILTER:
|
|
37
39
|
case LogicalOperatorType::PROJECTION: {
|
|
38
40
|
visitOperator(op->getChild(0).get());
|
|
39
41
|
return;
|
|
40
42
|
}
|
|
43
|
+
case LogicalOperatorType::TABLE_FUNCTION_CALL: {
|
|
44
|
+
if (limitNumber == INVALID_LIMIT && skipNumber == 0) {
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
auto& tableFuncCall = op->cast<LogicalTableFunctionCall>();
|
|
48
|
+
if (tableFuncCall.getTableFunc().supportsPushDownFunc()) {
|
|
49
|
+
tableFuncCall.setLimitNum(skipNumber + limitNumber);
|
|
50
|
+
}
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
41
53
|
case LogicalOperatorType::DISTINCT: {
|
|
42
54
|
if (limitNumber == INVALID_LIMIT && skipNumber == 0) {
|
|
43
55
|
return;
|
|
@@ -19,6 +19,9 @@ void LogicalOperatorVisitor::visitOperatorSwitch(LogicalOperator* op) {
|
|
|
19
19
|
case LogicalOperatorType::COPY_TO: {
|
|
20
20
|
visitCopyTo(op);
|
|
21
21
|
} break;
|
|
22
|
+
case LogicalOperatorType::COUNT_REL_TABLE: {
|
|
23
|
+
visitCountRelTable(op);
|
|
24
|
+
} break;
|
|
22
25
|
case LogicalOperatorType::DELETE: {
|
|
23
26
|
visitDelete(op);
|
|
24
27
|
} break;
|
|
@@ -108,6 +111,9 @@ std::shared_ptr<LogicalOperator> LogicalOperatorVisitor::visitOperatorReplaceSwi
|
|
|
108
111
|
case LogicalOperatorType::COPY_TO: {
|
|
109
112
|
return visitCopyToReplace(op);
|
|
110
113
|
}
|
|
114
|
+
case LogicalOperatorType::COUNT_REL_TABLE: {
|
|
115
|
+
return visitCountRelTableReplace(op);
|
|
116
|
+
}
|
|
111
117
|
case LogicalOperatorType::DELETE: {
|
|
112
118
|
return visitDeleteReplace(op);
|
|
113
119
|
}
|
|
@@ -5,9 +5,11 @@
|
|
|
5
5
|
#include "optimizer/agg_key_dependency_optimizer.h"
|
|
6
6
|
#include "optimizer/cardinality_updater.h"
|
|
7
7
|
#include "optimizer/correlated_subquery_unnest_solver.h"
|
|
8
|
+
#include "optimizer/count_rel_table_optimizer.h"
|
|
8
9
|
#include "optimizer/factorization_rewriter.h"
|
|
9
10
|
#include "optimizer/filter_push_down_optimizer.h"
|
|
10
11
|
#include "optimizer/limit_push_down_optimizer.h"
|
|
12
|
+
#include "optimizer/order_by_push_down_optimizer.h"
|
|
11
13
|
#include "optimizer/projection_push_down_optimizer.h"
|
|
12
14
|
#include "optimizer/remove_factorization_rewriter.h"
|
|
13
15
|
#include "optimizer/remove_unnecessary_join_optimizer.h"
|
|
@@ -32,6 +34,11 @@ void Optimizer::optimize(planner::LogicalPlan* plan, main::ClientContext* contex
|
|
|
32
34
|
auto removeUnnecessaryJoinOptimizer = RemoveUnnecessaryJoinOptimizer();
|
|
33
35
|
removeUnnecessaryJoinOptimizer.rewrite(plan);
|
|
34
36
|
|
|
37
|
+
// CountRelTableOptimizer should be applied early before other optimizations
|
|
38
|
+
// that might change the plan structure.
|
|
39
|
+
auto countRelTableOptimizer = CountRelTableOptimizer(context);
|
|
40
|
+
countRelTableOptimizer.rewrite(plan);
|
|
41
|
+
|
|
35
42
|
auto filterPushDownOptimizer = FilterPushDownOptimizer(context);
|
|
36
43
|
filterPushDownOptimizer.rewrite(plan);
|
|
37
44
|
|
|
@@ -39,6 +46,9 @@ void Optimizer::optimize(planner::LogicalPlan* plan, main::ClientContext* contex
|
|
|
39
46
|
ProjectionPushDownOptimizer(context->getClientConfig()->recursivePatternSemantic);
|
|
40
47
|
projectionPushDownOptimizer.rewrite(plan);
|
|
41
48
|
|
|
49
|
+
auto orderByPushDownOptimizer = OrderByPushDownOptimizer();
|
|
50
|
+
orderByPushDownOptimizer.rewrite(plan);
|
|
51
|
+
|
|
42
52
|
auto limitPushDownOptimizer = LimitPushDownOptimizer();
|
|
43
53
|
limitPushDownOptimizer.rewrite(plan);
|
|
44
54
|
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
#include "optimizer/order_by_push_down_optimizer.h"
|
|
2
|
+
|
|
3
|
+
#include "binder/expression/expression.h"
|
|
4
|
+
#include "binder/expression/expression_util.h"
|
|
5
|
+
#include "binder/expression/property_expression.h"
|
|
6
|
+
#include "binder/expression/variable_expression.h"
|
|
7
|
+
#include "common/exception/runtime.h"
|
|
8
|
+
#include "planner/operator/logical_order_by.h"
|
|
9
|
+
#include "planner/operator/logical_table_function_call.h"
|
|
10
|
+
|
|
11
|
+
using namespace lbug::binder;
|
|
12
|
+
using namespace lbug::common;
|
|
13
|
+
using namespace lbug::planner;
|
|
14
|
+
|
|
15
|
+
namespace lbug {
|
|
16
|
+
namespace optimizer {
|
|
17
|
+
|
|
18
|
+
// This ensures that ORDER BY can be pushed down only through operators that support it.
|
|
19
|
+
// It should not be pushed down for things like RECURSIVE_EXTEND etc.
|
|
20
|
+
bool isPushDownSupported(LogicalOperator* op) {
|
|
21
|
+
switch (op->getOperatorType()) {
|
|
22
|
+
case LogicalOperatorType::TABLE_FUNCTION_CALL: {
|
|
23
|
+
return op->cast<LogicalTableFunctionCall>().getTableFunc().supportsPushDownFunc();
|
|
24
|
+
}
|
|
25
|
+
case LogicalOperatorType::MULTIPLICITY_REDUCER:
|
|
26
|
+
case LogicalOperatorType::EXPLAIN:
|
|
27
|
+
case LogicalOperatorType::ACCUMULATE:
|
|
28
|
+
case LogicalOperatorType::FILTER:
|
|
29
|
+
case LogicalOperatorType::PROJECTION:
|
|
30
|
+
case LogicalOperatorType::LIMIT: {
|
|
31
|
+
if (op->getNumChildren() == 0) {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
return isPushDownSupported(op->getChild(0).get());
|
|
35
|
+
}
|
|
36
|
+
default:
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
void OrderByPushDownOptimizer::rewrite(LogicalPlan* plan) {
|
|
42
|
+
plan->setLastOperator(visitOperator(plan->getLastOperator()));
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
std::shared_ptr<LogicalOperator> OrderByPushDownOptimizer::visitOperator(
|
|
46
|
+
std::shared_ptr<LogicalOperator> op, std::string currentOrderBy) {
|
|
47
|
+
switch (op->getOperatorType()) {
|
|
48
|
+
case LogicalOperatorType::ORDER_BY: {
|
|
49
|
+
auto& orderBy = op->constCast<LogicalOrderBy>();
|
|
50
|
+
std::string newOrderBy = currentOrderBy;
|
|
51
|
+
if (!currentOrderBy.empty()) {
|
|
52
|
+
newOrderBy += ", ";
|
|
53
|
+
}
|
|
54
|
+
newOrderBy +=
|
|
55
|
+
buildOrderByString(orderBy.getExpressionsToOrderBy(), orderBy.getIsAscOrders());
|
|
56
|
+
auto newChild = visitOperator(orderBy.getChild(0), newOrderBy);
|
|
57
|
+
if (isPushDownSupported(newChild.get())) {
|
|
58
|
+
return newChild;
|
|
59
|
+
}
|
|
60
|
+
return std::make_shared<LogicalOrderBy>(orderBy.getExpressionsToOrderBy(),
|
|
61
|
+
orderBy.getIsAscOrders(), newChild);
|
|
62
|
+
}
|
|
63
|
+
case LogicalOperatorType::MULTIPLICITY_REDUCER:
|
|
64
|
+
case LogicalOperatorType::EXPLAIN:
|
|
65
|
+
case LogicalOperatorType::ACCUMULATE:
|
|
66
|
+
case LogicalOperatorType::FILTER:
|
|
67
|
+
case LogicalOperatorType::PROJECTION:
|
|
68
|
+
case LogicalOperatorType::LIMIT: {
|
|
69
|
+
for (auto i = 0u; i < op->getNumChildren(); ++i) {
|
|
70
|
+
op->setChild(i, visitOperator(op->getChild(i), currentOrderBy));
|
|
71
|
+
}
|
|
72
|
+
return op;
|
|
73
|
+
}
|
|
74
|
+
case LogicalOperatorType::TABLE_FUNCTION_CALL: {
|
|
75
|
+
if (!currentOrderBy.empty()) {
|
|
76
|
+
auto& tableFunc = op->cast<LogicalTableFunctionCall>();
|
|
77
|
+
if (tableFunc.getTableFunc().supportsPushDownFunc()) {
|
|
78
|
+
tableFunc.setOrderBy(currentOrderBy);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return op;
|
|
82
|
+
}
|
|
83
|
+
default:
|
|
84
|
+
return op;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
std::string OrderByPushDownOptimizer::buildOrderByString(
|
|
89
|
+
const binder::expression_vector& expressions, const std::vector<bool>& isAscOrders) {
|
|
90
|
+
if (expressions.empty()) {
|
|
91
|
+
return "";
|
|
92
|
+
}
|
|
93
|
+
std::string result = " ORDER BY ";
|
|
94
|
+
bool first = true;
|
|
95
|
+
for (size_t i = 0; i < expressions.size(); ++i) {
|
|
96
|
+
auto& expr = expressions[i];
|
|
97
|
+
std::string colName;
|
|
98
|
+
if (expr->expressionType == common::ExpressionType::VARIABLE) {
|
|
99
|
+
auto& var = expr->constCast<binder::VariableExpression>();
|
|
100
|
+
colName = var.getVariableName();
|
|
101
|
+
} else if (expr->expressionType == common::ExpressionType::PROPERTY) {
|
|
102
|
+
auto& prop = expr->constCast<binder::PropertyExpression>();
|
|
103
|
+
colName = prop.getPropertyName();
|
|
104
|
+
} else {
|
|
105
|
+
// Skip expressions that cannot be pushed down
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
if (!first) {
|
|
109
|
+
result += ", ";
|
|
110
|
+
}
|
|
111
|
+
result += colName;
|
|
112
|
+
result += isAscOrders[i] ? " ASC" : " DESC";
|
|
113
|
+
first = false;
|
|
114
|
+
}
|
|
115
|
+
if (first) {
|
|
116
|
+
// No expressions could be pushed down
|
|
117
|
+
return "";
|
|
118
|
+
}
|
|
119
|
+
return result;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
} // namespace optimizer
|
|
123
|
+
} // namespace lbug
|
|
@@ -261,7 +261,11 @@ void ProjectionPushDownOptimizer::visitTableFunctionCall(LogicalOperator* op) {
|
|
|
261
261
|
auto& tableFunctionCall = op->cast<LogicalTableFunctionCall>();
|
|
262
262
|
std::vector<bool> columnSkips;
|
|
263
263
|
for (auto& column : tableFunctionCall.getBindData()->columns) {
|
|
264
|
-
|
|
264
|
+
// Check both variablesInUse and propertiesInUse since foreign table columns
|
|
265
|
+
// may be referenced as properties in the query (e.g., a.id) but represented
|
|
266
|
+
// as variables in the table function bind data
|
|
267
|
+
columnSkips.push_back(
|
|
268
|
+
!variablesInUse.contains(column) && !propertiesInUse.contains(column));
|
|
265
269
|
}
|
|
266
270
|
tableFunctionCall.setColumnSkips(std::move(columnSkips));
|
|
267
271
|
}
|
|
@@ -81,7 +81,12 @@ std::unique_ptr<Statement> Transformer::transformCreateNodeTable(
|
|
|
81
81
|
} else {
|
|
82
82
|
createTableInfo.propertyDefinitions =
|
|
83
83
|
transformPropertyDefinitions(*ctx.kU_PropertyDefinitions());
|
|
84
|
-
|
|
84
|
+
options_t options;
|
|
85
|
+
if (ctx.kU_Options()) {
|
|
86
|
+
options = transformOptions(*ctx.kU_Options());
|
|
87
|
+
}
|
|
88
|
+
createTableInfo.extraInfo =
|
|
89
|
+
std::make_unique<ExtraCreateNodeTableInfo>(getPKName(ctx), std::move(options));
|
|
85
90
|
return std::make_unique<CreateTable>(std::move(createTableInfo));
|
|
86
91
|
}
|
|
87
92
|
}
|
|
@@ -663,7 +663,7 @@ std::unique_ptr<ParsedExpression> Transformer::transformProperty(
|
|
|
663
663
|
}
|
|
664
664
|
|
|
665
665
|
std::string Transformer::transformPropertyKeyName(CypherParser::OC_PropertyKeyNameContext& ctx) {
|
|
666
|
-
return
|
|
666
|
+
return transformSymbolicName(*ctx.oC_SymbolicName());
|
|
667
667
|
}
|
|
668
668
|
|
|
669
669
|
std::unique_ptr<ParsedExpression> Transformer::transformIntegerLiteral(
|
|
@@ -202,7 +202,12 @@ std::vector<std::string> Transformer::transformNodeLabels(CypherParser::OC_NodeL
|
|
|
202
202
|
}
|
|
203
203
|
|
|
204
204
|
std::string Transformer::transformLabelName(CypherParser::OC_LabelNameContext& ctx) {
|
|
205
|
-
|
|
205
|
+
auto schemaNames = ctx.oC_SchemaName();
|
|
206
|
+
if (schemaNames.size() == 1) {
|
|
207
|
+
return transformSchemaName(*schemaNames[0]);
|
|
208
|
+
}
|
|
209
|
+
// Qualified name: db.table
|
|
210
|
+
return transformSchemaName(*schemaNames[0]) + "." + transformSchemaName(*schemaNames[1]);
|
|
206
211
|
}
|
|
207
212
|
|
|
208
213
|
std::string Transformer::transformRelTypeName(CypherParser::OC_RelTypeNameContext& ctx) {
|
|
@@ -88,7 +88,13 @@ std::unique_ptr<ParsedExpression> Transformer::transformWhere(CypherParser::OC_W
|
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
std::string Transformer::transformSchemaName(CypherParser::OC_SchemaNameContext& ctx) {
|
|
91
|
-
|
|
91
|
+
auto symbolicNames = ctx.oC_SymbolicName();
|
|
92
|
+
if (symbolicNames.size() == 1) {
|
|
93
|
+
return transformSymbolicName(*symbolicNames[0]);
|
|
94
|
+
}
|
|
95
|
+
// Qualified name: db.table
|
|
96
|
+
return transformSymbolicName(*symbolicNames[0]) + "." +
|
|
97
|
+
transformSymbolicName(*symbolicNames[1]);
|
|
92
98
|
}
|
|
93
99
|
|
|
94
100
|
std::string Transformer::transformStringLiteral(antlr4::tree::TerminalNode& stringLiteral) {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include "planner/join_order/cardinality_estimator.h"
|
|
2
2
|
|
|
3
3
|
#include "binder/expression/property_expression.h"
|
|
4
|
+
#include "catalog/catalog_entry/table_catalog_entry.h"
|
|
4
5
|
#include "main/client_context.h"
|
|
5
6
|
#include "planner/join_order/join_order_util.h"
|
|
6
7
|
#include "planner/operator/logical_aggregate.h"
|
|
@@ -39,7 +40,12 @@ void CardinalityEstimator::init(const NodeExpression& node) {
|
|
|
39
40
|
cardinality_t numNodes = 0u;
|
|
40
41
|
auto storageManager = storage::StorageManager::Get(*context);
|
|
41
42
|
auto transaction = transaction::Transaction::Get(*context);
|
|
42
|
-
for (auto
|
|
43
|
+
for (auto entry : node.getEntries()) {
|
|
44
|
+
// Skip foreign tables - they don't have storage in the local database
|
|
45
|
+
if (entry->getType() == catalog::CatalogEntryType::FOREIGN_TABLE_ENTRY) {
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
auto tableID = entry->getTableID();
|
|
43
49
|
auto stats =
|
|
44
50
|
storageManager->getTable(tableID)->cast<storage::NodeTable>().getStats(transaction);
|
|
45
51
|
numNodes += stats.getTableCard();
|
|
@@ -195,7 +201,10 @@ uint64_t CardinalityEstimator::getNumNodes(const Transaction*,
|
|
|
195
201
|
const std::vector<table_id_t>& tableIDs) const {
|
|
196
202
|
cardinality_t numNodes = 0u;
|
|
197
203
|
for (auto& tableID : tableIDs) {
|
|
198
|
-
|
|
204
|
+
// Skip foreign tables - they won't be in nodeTableStats
|
|
205
|
+
if (!nodeTableStats.contains(tableID)) {
|
|
206
|
+
continue;
|
|
207
|
+
}
|
|
199
208
|
numNodes += nodeTableStats.at(tableID).getTableCard();
|
|
200
209
|
}
|
|
201
210
|
return atLeastOne(numNodes);
|
|
@@ -22,6 +22,8 @@ std::string LogicalOperatorUtils::logicalOperatorTypeToString(LogicalOperatorTyp
|
|
|
22
22
|
return "COPY_FROM";
|
|
23
23
|
case LogicalOperatorType::COPY_TO:
|
|
24
24
|
return "COPY_TO";
|
|
25
|
+
case LogicalOperatorType::COUNT_REL_TABLE:
|
|
26
|
+
return "COUNT_REL_TABLE";
|
|
25
27
|
case LogicalOperatorType::CREATE_MACRO:
|
|
26
28
|
return "CREATE_MACRO";
|
|
27
29
|
case LogicalOperatorType::CREATE_SEQUENCE:
|
|
@@ -19,5 +19,9 @@ void LogicalTableFunctionCall::computeFactorizedSchema() {
|
|
|
19
19
|
}
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
std::unique_ptr<OPPrintInfo> LogicalTableFunctionCall::getPrintInfo() const {
|
|
23
|
+
return std::make_unique<LogicalTableFunctionCallPrintInfo>(getExpressionsForPrinting());
|
|
24
|
+
}
|
|
25
|
+
|
|
22
26
|
} // namespace planner
|
|
23
27
|
} // namespace lbug
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#include "planner/operator/scan/logical_count_rel_table.h"
|
|
2
|
+
|
|
3
|
+
namespace lbug {
|
|
4
|
+
namespace planner {
|
|
5
|
+
|
|
6
|
+
void LogicalCountRelTable::computeFactorizedSchema() {
|
|
7
|
+
createEmptySchema();
|
|
8
|
+
// Only output the count expression in a single-state group.
|
|
9
|
+
// This operator is a source - it has no child in the logical plan.
|
|
10
|
+
// The bound node is used internally for scanning but not exposed.
|
|
11
|
+
auto groupPos = schema->createGroup();
|
|
12
|
+
schema->insertToGroupAndScope(countExpr, groupPos);
|
|
13
|
+
schema->setGroupAsSingleState(groupPos);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
void LogicalCountRelTable::computeFlatSchema() {
|
|
17
|
+
createEmptySchema();
|
|
18
|
+
// For flat schema, create a single group with the count expression.
|
|
19
|
+
auto groupPos = schema->createGroup();
|
|
20
|
+
schema->insertToGroupAndScope(countExpr, groupPos);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
} // namespace planner
|
|
24
|
+
} // namespace lbug
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
#include <cmath>
|
|
2
2
|
|
|
3
|
+
#include "binder/bound_scan_source.h"
|
|
3
4
|
#include "binder/expression_visitor.h"
|
|
5
|
+
#include "catalog/catalog_entry/catalog_entry_type.h"
|
|
4
6
|
#include "common/enums/join_type.h"
|
|
5
7
|
#include "common/enums/rel_direction.h"
|
|
8
|
+
#include "common/enums/table_type.h"
|
|
6
9
|
#include "common/utils.h"
|
|
7
10
|
#include "planner/join_order/cost_model.h"
|
|
8
11
|
#include "planner/join_order/join_plan_solver.h"
|
|
@@ -246,7 +249,19 @@ void Planner::planNodeScan(uint32_t nodePos) {
|
|
|
246
249
|
newSubgraph.addQueryNode(nodePos);
|
|
247
250
|
auto plan = LogicalPlan();
|
|
248
251
|
auto properties = getProperties(*node);
|
|
249
|
-
|
|
252
|
+
if (node->getEntries().size() == 1 &&
|
|
253
|
+
node->getEntries()[0]->getType() == catalog::CatalogEntryType::FOREIGN_TABLE_ENTRY) {
|
|
254
|
+
auto boundScanInfo =
|
|
255
|
+
node->getEntries()[0]->getBoundScanInfo(clientContext, node->getUniqueName());
|
|
256
|
+
if (boundScanInfo != nullptr) {
|
|
257
|
+
// Use table function call for foreign tables
|
|
258
|
+
appendTableFunctionCall(*boundScanInfo, plan);
|
|
259
|
+
} else {
|
|
260
|
+
appendScanNodeTable(node->getInternalID(), node->getTableIDs(), properties, plan);
|
|
261
|
+
}
|
|
262
|
+
} else {
|
|
263
|
+
appendScanNodeTable(node->getInternalID(), node->getTableIDs(), properties, plan);
|
|
264
|
+
}
|
|
250
265
|
auto predicates = getNewlyMatchedExprs(context.getEmptySubqueryGraph(), newSubgraph,
|
|
251
266
|
context.getWhereExpressions());
|
|
252
267
|
appendFilters(predicates, plan);
|