lbug 0.12.3-dev.26 → 0.12.3-dev.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lbug-source/CMakeLists.txt +1 -1
- package/lbug-source/Makefile +14 -2
- package/lbug-source/extension/duckdb/src/catalog/duckdb_catalog.cpp +1 -1
- package/lbug-source/extension/duckdb/src/catalog/duckdb_table_catalog_entry.cpp +43 -4
- package/lbug-source/extension/duckdb/src/connector/duckdb_result_converter.cpp +6 -0
- package/lbug-source/extension/duckdb/src/connector/duckdb_secret_manager.cpp +1 -1
- package/lbug-source/extension/duckdb/src/function/duckdb_scan.cpp +49 -4
- package/lbug-source/extension/duckdb/src/include/catalog/duckdb_table_catalog_entry.h +6 -1
- package/lbug-source/extension/duckdb/src/include/function/duckdb_scan.h +2 -0
- package/lbug-source/extension/duckdb/test/test_files/duckdb.test +28 -0
- package/lbug-source/extension/extension_config.cmake +3 -2
- package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
- package/lbug-source/scripts/antlr4/hash.md5 +1 -1
- package/lbug-source/src/antlr4/Cypher.g4 +1 -1
- package/lbug-source/src/binder/bind/bind_graph_pattern.cpp +30 -3
- package/lbug-source/src/catalog/catalog_set.cpp +1 -0
- package/lbug-source/src/function/table/table_function.cpp +11 -2
- package/lbug-source/src/include/catalog/catalog_entry/table_catalog_entry.h +7 -0
- package/lbug-source/src/include/common/string_format.h +2 -2
- package/lbug-source/src/include/function/table/bind_data.h +12 -1
- package/lbug-source/src/include/function/table/table_function.h +2 -0
- package/lbug-source/src/include/optimizer/order_by_push_down_optimizer.h +21 -0
- package/lbug-source/src/include/planner/operator/logical_table_function_call.h +14 -1
- package/lbug-source/src/optimizer/CMakeLists.txt +2 -1
- package/lbug-source/src/optimizer/limit_push_down_optimizer.cpp +12 -0
- package/lbug-source/src/optimizer/optimizer.cpp +4 -0
- package/lbug-source/src/optimizer/order_by_push_down_optimizer.cpp +123 -0
- package/lbug-source/src/optimizer/projection_push_down_optimizer.cpp +5 -1
- package/lbug-source/src/parser/transform/transform_graph_pattern.cpp +6 -1
- package/lbug-source/src/planner/join_order/cardinality_estimator.cpp +11 -2
- package/lbug-source/src/planner/operator/logical_table_function_call.cpp +4 -0
- package/lbug-source/src/planner/plan/plan_join_order.cpp +16 -1
- package/lbug-source/src/processor/operator/index_lookup.cpp +31 -23
- package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +1332 -1316
- package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -1
- package/package.json +1 -1
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
#include "optimizer/order_by_push_down_optimizer.h"
|
|
2
|
+
|
|
3
|
+
#include "binder/expression/expression.h"
|
|
4
|
+
#include "binder/expression/expression_util.h"
|
|
5
|
+
#include "binder/expression/property_expression.h"
|
|
6
|
+
#include "binder/expression/variable_expression.h"
|
|
7
|
+
#include "common/exception/runtime.h"
|
|
8
|
+
#include "planner/operator/logical_order_by.h"
|
|
9
|
+
#include "planner/operator/logical_table_function_call.h"
|
|
10
|
+
|
|
11
|
+
using namespace lbug::binder;
|
|
12
|
+
using namespace lbug::common;
|
|
13
|
+
using namespace lbug::planner;
|
|
14
|
+
|
|
15
|
+
namespace lbug {
|
|
16
|
+
namespace optimizer {
|
|
17
|
+
|
|
18
|
+
// This ensures that ORDER BY can be pushed down only through operators that support it.
|
|
19
|
+
// It should not be pushed down for things like RECURSIVE_EXTEND etc.
|
|
20
|
+
bool isPushDownSupported(LogicalOperator* op) {
|
|
21
|
+
switch (op->getOperatorType()) {
|
|
22
|
+
case LogicalOperatorType::TABLE_FUNCTION_CALL: {
|
|
23
|
+
return op->cast<LogicalTableFunctionCall>().getTableFunc().supportsPushDownFunc();
|
|
24
|
+
}
|
|
25
|
+
case LogicalOperatorType::MULTIPLICITY_REDUCER:
|
|
26
|
+
case LogicalOperatorType::EXPLAIN:
|
|
27
|
+
case LogicalOperatorType::ACCUMULATE:
|
|
28
|
+
case LogicalOperatorType::FILTER:
|
|
29
|
+
case LogicalOperatorType::PROJECTION:
|
|
30
|
+
case LogicalOperatorType::LIMIT: {
|
|
31
|
+
if (op->getNumChildren() == 0) {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
return isPushDownSupported(op->getChild(0).get());
|
|
35
|
+
}
|
|
36
|
+
default:
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
void OrderByPushDownOptimizer::rewrite(LogicalPlan* plan) {
|
|
42
|
+
plan->setLastOperator(visitOperator(plan->getLastOperator()));
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
std::shared_ptr<LogicalOperator> OrderByPushDownOptimizer::visitOperator(
|
|
46
|
+
std::shared_ptr<LogicalOperator> op, std::string currentOrderBy) {
|
|
47
|
+
switch (op->getOperatorType()) {
|
|
48
|
+
case LogicalOperatorType::ORDER_BY: {
|
|
49
|
+
auto& orderBy = op->constCast<LogicalOrderBy>();
|
|
50
|
+
std::string newOrderBy = currentOrderBy;
|
|
51
|
+
if (!currentOrderBy.empty()) {
|
|
52
|
+
newOrderBy += ", ";
|
|
53
|
+
}
|
|
54
|
+
newOrderBy +=
|
|
55
|
+
buildOrderByString(orderBy.getExpressionsToOrderBy(), orderBy.getIsAscOrders());
|
|
56
|
+
auto newChild = visitOperator(orderBy.getChild(0), newOrderBy);
|
|
57
|
+
if (isPushDownSupported(newChild.get())) {
|
|
58
|
+
return newChild;
|
|
59
|
+
}
|
|
60
|
+
return std::make_shared<LogicalOrderBy>(orderBy.getExpressionsToOrderBy(),
|
|
61
|
+
orderBy.getIsAscOrders(), newChild);
|
|
62
|
+
}
|
|
63
|
+
case LogicalOperatorType::MULTIPLICITY_REDUCER:
|
|
64
|
+
case LogicalOperatorType::EXPLAIN:
|
|
65
|
+
case LogicalOperatorType::ACCUMULATE:
|
|
66
|
+
case LogicalOperatorType::FILTER:
|
|
67
|
+
case LogicalOperatorType::PROJECTION:
|
|
68
|
+
case LogicalOperatorType::LIMIT: {
|
|
69
|
+
for (auto i = 0u; i < op->getNumChildren(); ++i) {
|
|
70
|
+
op->setChild(i, visitOperator(op->getChild(i), currentOrderBy));
|
|
71
|
+
}
|
|
72
|
+
return op;
|
|
73
|
+
}
|
|
74
|
+
case LogicalOperatorType::TABLE_FUNCTION_CALL: {
|
|
75
|
+
if (!currentOrderBy.empty()) {
|
|
76
|
+
auto& tableFunc = op->cast<LogicalTableFunctionCall>();
|
|
77
|
+
if (tableFunc.getTableFunc().supportsPushDownFunc()) {
|
|
78
|
+
tableFunc.setOrderBy(currentOrderBy);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return op;
|
|
82
|
+
}
|
|
83
|
+
default:
|
|
84
|
+
return op;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
std::string OrderByPushDownOptimizer::buildOrderByString(
|
|
89
|
+
const binder::expression_vector& expressions, const std::vector<bool>& isAscOrders) {
|
|
90
|
+
if (expressions.empty()) {
|
|
91
|
+
return "";
|
|
92
|
+
}
|
|
93
|
+
std::string result = " ORDER BY ";
|
|
94
|
+
bool first = true;
|
|
95
|
+
for (size_t i = 0; i < expressions.size(); ++i) {
|
|
96
|
+
auto& expr = expressions[i];
|
|
97
|
+
std::string colName;
|
|
98
|
+
if (expr->expressionType == common::ExpressionType::VARIABLE) {
|
|
99
|
+
auto& var = expr->constCast<binder::VariableExpression>();
|
|
100
|
+
colName = var.getVariableName();
|
|
101
|
+
} else if (expr->expressionType == common::ExpressionType::PROPERTY) {
|
|
102
|
+
auto& prop = expr->constCast<binder::PropertyExpression>();
|
|
103
|
+
colName = prop.getPropertyName();
|
|
104
|
+
} else {
|
|
105
|
+
// Skip expressions that cannot be pushed down
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
if (!first) {
|
|
109
|
+
result += ", ";
|
|
110
|
+
}
|
|
111
|
+
result += colName;
|
|
112
|
+
result += isAscOrders[i] ? " ASC" : " DESC";
|
|
113
|
+
first = false;
|
|
114
|
+
}
|
|
115
|
+
if (first) {
|
|
116
|
+
// No expressions could be pushed down
|
|
117
|
+
return "";
|
|
118
|
+
}
|
|
119
|
+
return result;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
} // namespace optimizer
|
|
123
|
+
} // namespace lbug
|
|
@@ -261,7 +261,11 @@ void ProjectionPushDownOptimizer::visitTableFunctionCall(LogicalOperator* op) {
|
|
|
261
261
|
auto& tableFunctionCall = op->cast<LogicalTableFunctionCall>();
|
|
262
262
|
std::vector<bool> columnSkips;
|
|
263
263
|
for (auto& column : tableFunctionCall.getBindData()->columns) {
|
|
264
|
-
|
|
264
|
+
// Check both variablesInUse and propertiesInUse since foreign table columns
|
|
265
|
+
// may be referenced as properties in the query (e.g., a.id) but represented
|
|
266
|
+
// as variables in the table function bind data
|
|
267
|
+
columnSkips.push_back(
|
|
268
|
+
!variablesInUse.contains(column) && !propertiesInUse.contains(column));
|
|
265
269
|
}
|
|
266
270
|
tableFunctionCall.setColumnSkips(std::move(columnSkips));
|
|
267
271
|
}
|
|
@@ -202,7 +202,12 @@ std::vector<std::string> Transformer::transformNodeLabels(CypherParser::OC_NodeL
|
|
|
202
202
|
}
|
|
203
203
|
|
|
204
204
|
std::string Transformer::transformLabelName(CypherParser::OC_LabelNameContext& ctx) {
|
|
205
|
-
|
|
205
|
+
auto schemaNames = ctx.oC_SchemaName();
|
|
206
|
+
if (schemaNames.size() == 1) {
|
|
207
|
+
return transformSchemaName(*schemaNames[0]);
|
|
208
|
+
}
|
|
209
|
+
// Qualified name: db.table
|
|
210
|
+
return transformSchemaName(*schemaNames[0]) + "." + transformSchemaName(*schemaNames[1]);
|
|
206
211
|
}
|
|
207
212
|
|
|
208
213
|
std::string Transformer::transformRelTypeName(CypherParser::OC_RelTypeNameContext& ctx) {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include "planner/join_order/cardinality_estimator.h"
|
|
2
2
|
|
|
3
3
|
#include "binder/expression/property_expression.h"
|
|
4
|
+
#include "catalog/catalog_entry/table_catalog_entry.h"
|
|
4
5
|
#include "main/client_context.h"
|
|
5
6
|
#include "planner/join_order/join_order_util.h"
|
|
6
7
|
#include "planner/operator/logical_aggregate.h"
|
|
@@ -39,7 +40,12 @@ void CardinalityEstimator::init(const NodeExpression& node) {
|
|
|
39
40
|
cardinality_t numNodes = 0u;
|
|
40
41
|
auto storageManager = storage::StorageManager::Get(*context);
|
|
41
42
|
auto transaction = transaction::Transaction::Get(*context);
|
|
42
|
-
for (auto
|
|
43
|
+
for (auto entry : node.getEntries()) {
|
|
44
|
+
// Skip foreign tables - they don't have storage in the local database
|
|
45
|
+
if (entry->getType() == catalog::CatalogEntryType::FOREIGN_TABLE_ENTRY) {
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
auto tableID = entry->getTableID();
|
|
43
49
|
auto stats =
|
|
44
50
|
storageManager->getTable(tableID)->cast<storage::NodeTable>().getStats(transaction);
|
|
45
51
|
numNodes += stats.getTableCard();
|
|
@@ -195,7 +201,10 @@ uint64_t CardinalityEstimator::getNumNodes(const Transaction*,
|
|
|
195
201
|
const std::vector<table_id_t>& tableIDs) const {
|
|
196
202
|
cardinality_t numNodes = 0u;
|
|
197
203
|
for (auto& tableID : tableIDs) {
|
|
198
|
-
|
|
204
|
+
// Skip foreign tables - they won't be in nodeTableStats
|
|
205
|
+
if (!nodeTableStats.contains(tableID)) {
|
|
206
|
+
continue;
|
|
207
|
+
}
|
|
199
208
|
numNodes += nodeTableStats.at(tableID).getTableCard();
|
|
200
209
|
}
|
|
201
210
|
return atLeastOne(numNodes);
|
|
@@ -19,5 +19,9 @@ void LogicalTableFunctionCall::computeFactorizedSchema() {
|
|
|
19
19
|
}
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
std::unique_ptr<OPPrintInfo> LogicalTableFunctionCall::getPrintInfo() const {
|
|
23
|
+
return std::make_unique<LogicalTableFunctionCallPrintInfo>(getExpressionsForPrinting());
|
|
24
|
+
}
|
|
25
|
+
|
|
22
26
|
} // namespace planner
|
|
23
27
|
} // namespace lbug
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
#include <cmath>
|
|
2
2
|
|
|
3
|
+
#include "binder/bound_scan_source.h"
|
|
3
4
|
#include "binder/expression_visitor.h"
|
|
5
|
+
#include "catalog/catalog_entry/catalog_entry_type.h"
|
|
4
6
|
#include "common/enums/join_type.h"
|
|
5
7
|
#include "common/enums/rel_direction.h"
|
|
8
|
+
#include "common/enums/table_type.h"
|
|
6
9
|
#include "common/utils.h"
|
|
7
10
|
#include "planner/join_order/cost_model.h"
|
|
8
11
|
#include "planner/join_order/join_plan_solver.h"
|
|
@@ -246,7 +249,19 @@ void Planner::planNodeScan(uint32_t nodePos) {
|
|
|
246
249
|
newSubgraph.addQueryNode(nodePos);
|
|
247
250
|
auto plan = LogicalPlan();
|
|
248
251
|
auto properties = getProperties(*node);
|
|
249
|
-
|
|
252
|
+
if (node->getEntries().size() == 1 &&
|
|
253
|
+
node->getEntries()[0]->getType() == catalog::CatalogEntryType::FOREIGN_TABLE_ENTRY) {
|
|
254
|
+
auto boundScanInfo =
|
|
255
|
+
node->getEntries()[0]->getBoundScanInfo(clientContext, node->getUniqueName());
|
|
256
|
+
if (boundScanInfo != nullptr) {
|
|
257
|
+
// Use table function call for foreign tables
|
|
258
|
+
appendTableFunctionCall(*boundScanInfo, plan);
|
|
259
|
+
} else {
|
|
260
|
+
appendScanNodeTable(node->getInternalID(), node->getTableIDs(), properties, plan);
|
|
261
|
+
}
|
|
262
|
+
} else {
|
|
263
|
+
appendScanNodeTable(node->getInternalID(), node->getTableIDs(), properties, plan);
|
|
264
|
+
}
|
|
250
265
|
auto predicates = getNewlyMatchedExprs(context.getEmptySubqueryGraph(), newSubgraph,
|
|
251
266
|
context.getWhereExpressions());
|
|
252
267
|
appendFilters(predicates, plan);
|
|
@@ -28,7 +28,6 @@ std::optional<WarningSourceData> getWarningSourceData(
|
|
|
28
28
|
return ret;
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
-
// TODO(Guodong): Add short path for unfiltered case.
|
|
32
31
|
bool checkNullKey(ValueVector* keyVector, offset_t vectorOffset,
|
|
33
32
|
BatchInsertErrorHandler* errorHandler, const std::vector<ValueVector*>& warningDataVectors) {
|
|
34
33
|
bool isNull = keyVector->isNull(vectorOffset);
|
|
@@ -71,27 +70,17 @@ struct OffsetVectorManager {
|
|
|
71
70
|
offset_t insertOffset;
|
|
72
71
|
};
|
|
73
72
|
|
|
74
|
-
// TODO(Guodong): Add short path for unfiltered case.
|
|
75
73
|
template<bool hasNoNullsGuarantee>
|
|
76
|
-
void
|
|
77
|
-
ValueVector* keyVector, ValueVector* resultVector,
|
|
78
|
-
const std::vector<ValueVector*>& warningDataVectors, BatchInsertErrorHandler* errorHandler
|
|
79
|
-
|
|
74
|
+
void fillOffsetArraysFromVectorInternal(transaction::Transaction* transaction,
|
|
75
|
+
const IndexLookupInfo& info, ValueVector* keyVector, ValueVector* resultVector,
|
|
76
|
+
const std::vector<ValueVector*>& warningDataVectors, BatchInsertErrorHandler* errorHandler,
|
|
77
|
+
const sel_t* selVector, sel_t numKeys) {
|
|
80
78
|
TypeUtils::visit(
|
|
81
79
|
keyVector->dataType.getPhysicalType(),
|
|
82
80
|
[&]<IndexHashable T>(T) {
|
|
83
|
-
auto numKeys = keyVector->state->getSelVector().getSelSize();
|
|
84
|
-
|
|
85
|
-
// fetch all the selection pos at the start
|
|
86
|
-
// since we may modify the selection vector in the middle of the lookup
|
|
87
|
-
std::vector<sel_t> lookupPos(numKeys);
|
|
88
|
-
for (idx_t i = 0; i < numKeys; ++i) {
|
|
89
|
-
lookupPos[i] = (keyVector->state->getSelVector()[i]);
|
|
90
|
-
}
|
|
91
|
-
|
|
92
81
|
OffsetVectorManager resultManager{resultVector, errorHandler};
|
|
93
|
-
for (
|
|
94
|
-
auto pos =
|
|
82
|
+
for (sel_t i = 0u; i < numKeys; i++) {
|
|
83
|
+
auto pos = selVector ? selVector[i] : i;
|
|
95
84
|
if constexpr (!hasNoNullsGuarantee) {
|
|
96
85
|
if (!checkNullKey(keyVector, pos, errorHandler, warningDataVectors)) {
|
|
97
86
|
continue;
|
|
@@ -99,12 +88,9 @@ void fillOffsetArraysFromVector(transaction::Transaction* transaction, const Ind
|
|
|
99
88
|
}
|
|
100
89
|
offset_t lookupOffset = 0;
|
|
101
90
|
if (!info.nodeTable->lookupPK(transaction, keyVector, pos, lookupOffset)) {
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
TypeUtils::toString(keyVector->getValue<type>(pos), keyVector)),
|
|
106
|
-
getWarningSourceData(warningDataVectors, pos));
|
|
107
|
-
});
|
|
91
|
+
errorHandler->handleError(ExceptionMessage::nonExistentPKException(
|
|
92
|
+
keyVector->getAsValue(pos)->toString()),
|
|
93
|
+
getWarningSourceData(warningDataVectors, pos));
|
|
108
94
|
} else {
|
|
109
95
|
resultManager.insertEntry(lookupOffset, pos);
|
|
110
96
|
}
|
|
@@ -112,6 +98,28 @@ void fillOffsetArraysFromVector(transaction::Transaction* transaction, const Ind
|
|
|
112
98
|
},
|
|
113
99
|
[&](auto) { KU_UNREACHABLE; });
|
|
114
100
|
}
|
|
101
|
+
|
|
102
|
+
template<bool hasNoNullsGuarantee>
|
|
103
|
+
void fillOffsetArraysFromVector(transaction::Transaction* transaction, const IndexLookupInfo& info,
|
|
104
|
+
ValueVector* keyVector, ValueVector* resultVector,
|
|
105
|
+
const std::vector<ValueVector*>& warningDataVectors, BatchInsertErrorHandler* errorHandler) {
|
|
106
|
+
KU_ASSERT(resultVector->dataType.getPhysicalType() == PhysicalTypeID::INT64);
|
|
107
|
+
auto& selVector = keyVector->state->getSelVector();
|
|
108
|
+
auto numKeys = selVector.getSelSize();
|
|
109
|
+
if (selVector.isUnfiltered()) {
|
|
110
|
+
// Fast path: selection vector is unfiltered - pass a null selection vector
|
|
111
|
+
fillOffsetArraysFromVectorInternal<hasNoNullsGuarantee>(transaction, info, keyVector,
|
|
112
|
+
resultVector, warningDataVectors, errorHandler, nullptr /* selVector */, numKeys);
|
|
113
|
+
} else {
|
|
114
|
+
// Filtered case: copy selection positions since we may modify the selection vector
|
|
115
|
+
std::vector<sel_t> lookupPos(numKeys);
|
|
116
|
+
for (idx_t i = 0; i < numKeys; ++i) {
|
|
117
|
+
lookupPos[i] = selVector[i];
|
|
118
|
+
}
|
|
119
|
+
fillOffsetArraysFromVectorInternal<hasNoNullsGuarantee>(transaction, info, keyVector,
|
|
120
|
+
resultVector, warningDataVectors, errorHandler, lookupPos.data(), numKeys);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
115
123
|
} // namespace
|
|
116
124
|
|
|
117
125
|
std::string IndexLookupPrintInfo::toString() const {
|