lbug 0.12.3-dev.26 → 0.12.3-dev.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/lbug-source/CMakeLists.txt +1 -1
  2. package/lbug-source/Makefile +14 -2
  3. package/lbug-source/extension/duckdb/src/catalog/duckdb_catalog.cpp +1 -1
  4. package/lbug-source/extension/duckdb/src/catalog/duckdb_table_catalog_entry.cpp +43 -4
  5. package/lbug-source/extension/duckdb/src/connector/duckdb_result_converter.cpp +6 -0
  6. package/lbug-source/extension/duckdb/src/connector/duckdb_secret_manager.cpp +1 -1
  7. package/lbug-source/extension/duckdb/src/function/duckdb_scan.cpp +49 -4
  8. package/lbug-source/extension/duckdb/src/include/catalog/duckdb_table_catalog_entry.h +6 -1
  9. package/lbug-source/extension/duckdb/src/include/function/duckdb_scan.h +2 -0
  10. package/lbug-source/extension/duckdb/test/test_files/duckdb.test +28 -0
  11. package/lbug-source/extension/extension_config.cmake +3 -2
  12. package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
  13. package/lbug-source/scripts/antlr4/hash.md5 +1 -1
  14. package/lbug-source/src/antlr4/Cypher.g4 +1 -1
  15. package/lbug-source/src/binder/bind/bind_graph_pattern.cpp +30 -3
  16. package/lbug-source/src/catalog/catalog_set.cpp +1 -0
  17. package/lbug-source/src/function/table/table_function.cpp +11 -2
  18. package/lbug-source/src/include/catalog/catalog_entry/table_catalog_entry.h +7 -0
  19. package/lbug-source/src/include/common/string_format.h +2 -2
  20. package/lbug-source/src/include/function/table/bind_data.h +12 -1
  21. package/lbug-source/src/include/function/table/table_function.h +2 -0
  22. package/lbug-source/src/include/optimizer/order_by_push_down_optimizer.h +21 -0
  23. package/lbug-source/src/include/planner/operator/logical_table_function_call.h +14 -1
  24. package/lbug-source/src/optimizer/CMakeLists.txt +2 -1
  25. package/lbug-source/src/optimizer/limit_push_down_optimizer.cpp +12 -0
  26. package/lbug-source/src/optimizer/optimizer.cpp +4 -0
  27. package/lbug-source/src/optimizer/order_by_push_down_optimizer.cpp +123 -0
  28. package/lbug-source/src/optimizer/projection_push_down_optimizer.cpp +5 -1
  29. package/lbug-source/src/parser/transform/transform_graph_pattern.cpp +6 -1
  30. package/lbug-source/src/planner/join_order/cardinality_estimator.cpp +11 -2
  31. package/lbug-source/src/planner/operator/logical_table_function_call.cpp +4 -0
  32. package/lbug-source/src/planner/plan/plan_join_order.cpp +16 -1
  33. package/lbug-source/src/processor/operator/index_lookup.cpp +31 -23
  34. package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +1332 -1316
  35. package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -1
  36. package/package.json +1 -1
  37. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  38. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  39. package/prebuilt/lbugjs-linux-x64.node +0 -0
  40. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -0,0 +1,123 @@
1
+ #include "optimizer/order_by_push_down_optimizer.h"
2
+
3
+ #include "binder/expression/expression.h"
4
+ #include "binder/expression/expression_util.h"
5
+ #include "binder/expression/property_expression.h"
6
+ #include "binder/expression/variable_expression.h"
7
+ #include "common/exception/runtime.h"
8
+ #include "planner/operator/logical_order_by.h"
9
+ #include "planner/operator/logical_table_function_call.h"
10
+
11
+ using namespace lbug::binder;
12
+ using namespace lbug::common;
13
+ using namespace lbug::planner;
14
+
15
+ namespace lbug {
16
+ namespace optimizer {
17
+
18
+ // This ensures that ORDER BY can be pushed down only through operators that support it.
19
+ // It should not be pushed down for things like RECURSIVE_EXTEND etc.
20
+ bool isPushDownSupported(LogicalOperator* op) {
21
+ switch (op->getOperatorType()) {
22
+ case LogicalOperatorType::TABLE_FUNCTION_CALL: {
23
+ return op->cast<LogicalTableFunctionCall>().getTableFunc().supportsPushDownFunc();
24
+ }
25
+ case LogicalOperatorType::MULTIPLICITY_REDUCER:
26
+ case LogicalOperatorType::EXPLAIN:
27
+ case LogicalOperatorType::ACCUMULATE:
28
+ case LogicalOperatorType::FILTER:
29
+ case LogicalOperatorType::PROJECTION:
30
+ case LogicalOperatorType::LIMIT: {
31
+ if (op->getNumChildren() == 0) {
32
+ return false;
33
+ }
34
+ return isPushDownSupported(op->getChild(0).get());
35
+ }
36
+ default:
37
+ return false;
38
+ }
39
+ }
40
+
41
+ void OrderByPushDownOptimizer::rewrite(LogicalPlan* plan) {
42
+ plan->setLastOperator(visitOperator(plan->getLastOperator()));
43
+ }
44
+
45
+ std::shared_ptr<LogicalOperator> OrderByPushDownOptimizer::visitOperator(
46
+ std::shared_ptr<LogicalOperator> op, std::string currentOrderBy) {
47
+ switch (op->getOperatorType()) {
48
+ case LogicalOperatorType::ORDER_BY: {
49
+ auto& orderBy = op->constCast<LogicalOrderBy>();
50
+ std::string newOrderBy = currentOrderBy;
51
+ if (!currentOrderBy.empty()) {
52
+ newOrderBy += ", ";
53
+ }
54
+ newOrderBy +=
55
+ buildOrderByString(orderBy.getExpressionsToOrderBy(), orderBy.getIsAscOrders());
56
+ auto newChild = visitOperator(orderBy.getChild(0), newOrderBy);
57
+ if (isPushDownSupported(newChild.get())) {
58
+ return newChild;
59
+ }
60
+ return std::make_shared<LogicalOrderBy>(orderBy.getExpressionsToOrderBy(),
61
+ orderBy.getIsAscOrders(), newChild);
62
+ }
63
+ case LogicalOperatorType::MULTIPLICITY_REDUCER:
64
+ case LogicalOperatorType::EXPLAIN:
65
+ case LogicalOperatorType::ACCUMULATE:
66
+ case LogicalOperatorType::FILTER:
67
+ case LogicalOperatorType::PROJECTION:
68
+ case LogicalOperatorType::LIMIT: {
69
+ for (auto i = 0u; i < op->getNumChildren(); ++i) {
70
+ op->setChild(i, visitOperator(op->getChild(i), currentOrderBy));
71
+ }
72
+ return op;
73
+ }
74
+ case LogicalOperatorType::TABLE_FUNCTION_CALL: {
75
+ if (!currentOrderBy.empty()) {
76
+ auto& tableFunc = op->cast<LogicalTableFunctionCall>();
77
+ if (tableFunc.getTableFunc().supportsPushDownFunc()) {
78
+ tableFunc.setOrderBy(currentOrderBy);
79
+ }
80
+ }
81
+ return op;
82
+ }
83
+ default:
84
+ return op;
85
+ }
86
+ }
87
+
88
+ std::string OrderByPushDownOptimizer::buildOrderByString(
89
+ const binder::expression_vector& expressions, const std::vector<bool>& isAscOrders) {
90
+ if (expressions.empty()) {
91
+ return "";
92
+ }
93
+ std::string result = " ORDER BY ";
94
+ bool first = true;
95
+ for (size_t i = 0; i < expressions.size(); ++i) {
96
+ auto& expr = expressions[i];
97
+ std::string colName;
98
+ if (expr->expressionType == common::ExpressionType::VARIABLE) {
99
+ auto& var = expr->constCast<binder::VariableExpression>();
100
+ colName = var.getVariableName();
101
+ } else if (expr->expressionType == common::ExpressionType::PROPERTY) {
102
+ auto& prop = expr->constCast<binder::PropertyExpression>();
103
+ colName = prop.getPropertyName();
104
+ } else {
105
+ // Skip expressions that cannot be pushed down
106
+ continue;
107
+ }
108
+ if (!first) {
109
+ result += ", ";
110
+ }
111
+ result += colName;
112
+ result += isAscOrders[i] ? " ASC" : " DESC";
113
+ first = false;
114
+ }
115
+ if (first) {
116
+ // No expressions could be pushed down
117
+ return "";
118
+ }
119
+ return result;
120
+ }
121
+
122
+ } // namespace optimizer
123
+ } // namespace lbug
@@ -261,7 +261,11 @@ void ProjectionPushDownOptimizer::visitTableFunctionCall(LogicalOperator* op) {
261
261
  auto& tableFunctionCall = op->cast<LogicalTableFunctionCall>();
262
262
  std::vector<bool> columnSkips;
263
263
  for (auto& column : tableFunctionCall.getBindData()->columns) {
264
- columnSkips.push_back(!variablesInUse.contains(column));
264
+ // Check both variablesInUse and propertiesInUse since foreign table columns
265
+ // may be referenced as properties in the query (e.g., a.id) but represented
266
+ // as variables in the table function bind data
267
+ columnSkips.push_back(
268
+ !variablesInUse.contains(column) && !propertiesInUse.contains(column));
265
269
  }
266
270
  tableFunctionCall.setColumnSkips(std::move(columnSkips));
267
271
  }
@@ -202,7 +202,12 @@ std::vector<std::string> Transformer::transformNodeLabels(CypherParser::OC_NodeL
202
202
  }
203
203
 
204
204
  std::string Transformer::transformLabelName(CypherParser::OC_LabelNameContext& ctx) {
205
- return transformSchemaName(*ctx.oC_SchemaName());
205
+ auto schemaNames = ctx.oC_SchemaName();
206
+ if (schemaNames.size() == 1) {
207
+ return transformSchemaName(*schemaNames[0]);
208
+ }
209
+ // Qualified name: db.table
210
+ return transformSchemaName(*schemaNames[0]) + "." + transformSchemaName(*schemaNames[1]);
206
211
  }
207
212
 
208
213
  std::string Transformer::transformRelTypeName(CypherParser::OC_RelTypeNameContext& ctx) {
@@ -1,6 +1,7 @@
1
1
  #include "planner/join_order/cardinality_estimator.h"
2
2
 
3
3
  #include "binder/expression/property_expression.h"
4
+ #include "catalog/catalog_entry/table_catalog_entry.h"
4
5
  #include "main/client_context.h"
5
6
  #include "planner/join_order/join_order_util.h"
6
7
  #include "planner/operator/logical_aggregate.h"
@@ -39,7 +40,12 @@ void CardinalityEstimator::init(const NodeExpression& node) {
39
40
  cardinality_t numNodes = 0u;
40
41
  auto storageManager = storage::StorageManager::Get(*context);
41
42
  auto transaction = transaction::Transaction::Get(*context);
42
- for (auto tableID : node.getTableIDs()) {
43
+ for (auto entry : node.getEntries()) {
44
+ // Skip foreign tables - they don't have storage in the local database
45
+ if (entry->getType() == catalog::CatalogEntryType::FOREIGN_TABLE_ENTRY) {
46
+ continue;
47
+ }
48
+ auto tableID = entry->getTableID();
43
49
  auto stats =
44
50
  storageManager->getTable(tableID)->cast<storage::NodeTable>().getStats(transaction);
45
51
  numNodes += stats.getTableCard();
@@ -195,7 +201,10 @@ uint64_t CardinalityEstimator::getNumNodes(const Transaction*,
195
201
  const std::vector<table_id_t>& tableIDs) const {
196
202
  cardinality_t numNodes = 0u;
197
203
  for (auto& tableID : tableIDs) {
198
- KU_ASSERT(nodeTableStats.contains(tableID));
204
+ // Skip foreign tables - they won't be in nodeTableStats
205
+ if (!nodeTableStats.contains(tableID)) {
206
+ continue;
207
+ }
199
208
  numNodes += nodeTableStats.at(tableID).getTableCard();
200
209
  }
201
210
  return atLeastOne(numNodes);
@@ -19,5 +19,9 @@ void LogicalTableFunctionCall::computeFactorizedSchema() {
19
19
  }
20
20
  }
21
21
 
22
+ std::unique_ptr<OPPrintInfo> LogicalTableFunctionCall::getPrintInfo() const {
23
+ return std::make_unique<LogicalTableFunctionCallPrintInfo>(getExpressionsForPrinting());
24
+ }
25
+
22
26
  } // namespace planner
23
27
  } // namespace lbug
@@ -1,8 +1,11 @@
1
1
  #include <cmath>
2
2
 
3
+ #include "binder/bound_scan_source.h"
3
4
  #include "binder/expression_visitor.h"
5
+ #include "catalog/catalog_entry/catalog_entry_type.h"
4
6
  #include "common/enums/join_type.h"
5
7
  #include "common/enums/rel_direction.h"
8
+ #include "common/enums/table_type.h"
6
9
  #include "common/utils.h"
7
10
  #include "planner/join_order/cost_model.h"
8
11
  #include "planner/join_order/join_plan_solver.h"
@@ -246,7 +249,19 @@ void Planner::planNodeScan(uint32_t nodePos) {
246
249
  newSubgraph.addQueryNode(nodePos);
247
250
  auto plan = LogicalPlan();
248
251
  auto properties = getProperties(*node);
249
- appendScanNodeTable(node->getInternalID(), node->getTableIDs(), properties, plan);
252
+ if (node->getEntries().size() == 1 &&
253
+ node->getEntries()[0]->getType() == catalog::CatalogEntryType::FOREIGN_TABLE_ENTRY) {
254
+ auto boundScanInfo =
255
+ node->getEntries()[0]->getBoundScanInfo(clientContext, node->getUniqueName());
256
+ if (boundScanInfo != nullptr) {
257
+ // Use table function call for foreign tables
258
+ appendTableFunctionCall(*boundScanInfo, plan);
259
+ } else {
260
+ appendScanNodeTable(node->getInternalID(), node->getTableIDs(), properties, plan);
261
+ }
262
+ } else {
263
+ appendScanNodeTable(node->getInternalID(), node->getTableIDs(), properties, plan);
264
+ }
250
265
  auto predicates = getNewlyMatchedExprs(context.getEmptySubqueryGraph(), newSubgraph,
251
266
  context.getWhereExpressions());
252
267
  appendFilters(predicates, plan);
@@ -28,7 +28,6 @@ std::optional<WarningSourceData> getWarningSourceData(
28
28
  return ret;
29
29
  }
30
30
 
31
- // TODO(Guodong): Add short path for unfiltered case.
32
31
  bool checkNullKey(ValueVector* keyVector, offset_t vectorOffset,
33
32
  BatchInsertErrorHandler* errorHandler, const std::vector<ValueVector*>& warningDataVectors) {
34
33
  bool isNull = keyVector->isNull(vectorOffset);
@@ -71,27 +70,17 @@ struct OffsetVectorManager {
71
70
  offset_t insertOffset;
72
71
  };
73
72
 
74
- // TODO(Guodong): Add short path for unfiltered case.
75
73
  template<bool hasNoNullsGuarantee>
76
- void fillOffsetArraysFromVector(transaction::Transaction* transaction, const IndexLookupInfo& info,
77
- ValueVector* keyVector, ValueVector* resultVector,
78
- const std::vector<ValueVector*>& warningDataVectors, BatchInsertErrorHandler* errorHandler) {
79
- KU_ASSERT(resultVector->dataType.getPhysicalType() == PhysicalTypeID::INT64);
74
+ void fillOffsetArraysFromVectorInternal(transaction::Transaction* transaction,
75
+ const IndexLookupInfo& info, ValueVector* keyVector, ValueVector* resultVector,
76
+ const std::vector<ValueVector*>& warningDataVectors, BatchInsertErrorHandler* errorHandler,
77
+ const sel_t* selVector, sel_t numKeys) {
80
78
  TypeUtils::visit(
81
79
  keyVector->dataType.getPhysicalType(),
82
80
  [&]<IndexHashable T>(T) {
83
- auto numKeys = keyVector->state->getSelVector().getSelSize();
84
-
85
- // fetch all the selection pos at the start
86
- // since we may modify the selection vector in the middle of the lookup
87
- std::vector<sel_t> lookupPos(numKeys);
88
- for (idx_t i = 0; i < numKeys; ++i) {
89
- lookupPos[i] = (keyVector->state->getSelVector()[i]);
90
- }
91
-
92
81
  OffsetVectorManager resultManager{resultVector, errorHandler};
93
- for (auto i = 0u; i < numKeys; i++) {
94
- auto pos = lookupPos[i];
82
+ for (sel_t i = 0u; i < numKeys; i++) {
83
+ auto pos = selVector ? selVector[i] : i;
95
84
  if constexpr (!hasNoNullsGuarantee) {
96
85
  if (!checkNullKey(keyVector, pos, errorHandler, warningDataVectors)) {
97
86
  continue;
@@ -99,12 +88,9 @@ void fillOffsetArraysFromVector(transaction::Transaction* transaction, const Ind
99
88
  }
100
89
  offset_t lookupOffset = 0;
101
90
  if (!info.nodeTable->lookupPK(transaction, keyVector, pos, lookupOffset)) {
102
- TypeUtils::visit(keyVector->dataType, [&]<typename type>(type) {
103
- errorHandler->handleError(
104
- ExceptionMessage::nonExistentPKException(
105
- TypeUtils::toString(keyVector->getValue<type>(pos), keyVector)),
106
- getWarningSourceData(warningDataVectors, pos));
107
- });
91
+ errorHandler->handleError(ExceptionMessage::nonExistentPKException(
92
+ keyVector->getAsValue(pos)->toString()),
93
+ getWarningSourceData(warningDataVectors, pos));
108
94
  } else {
109
95
  resultManager.insertEntry(lookupOffset, pos);
110
96
  }
@@ -112,6 +98,28 @@ void fillOffsetArraysFromVector(transaction::Transaction* transaction, const Ind
112
98
  },
113
99
  [&](auto) { KU_UNREACHABLE; });
114
100
  }
101
+
102
+ template<bool hasNoNullsGuarantee>
103
+ void fillOffsetArraysFromVector(transaction::Transaction* transaction, const IndexLookupInfo& info,
104
+ ValueVector* keyVector, ValueVector* resultVector,
105
+ const std::vector<ValueVector*>& warningDataVectors, BatchInsertErrorHandler* errorHandler) {
106
+ KU_ASSERT(resultVector->dataType.getPhysicalType() == PhysicalTypeID::INT64);
107
+ auto& selVector = keyVector->state->getSelVector();
108
+ auto numKeys = selVector.getSelSize();
109
+ if (selVector.isUnfiltered()) {
110
+ // Fast path: selection vector is unfiltered - pass a null selection vector
111
+ fillOffsetArraysFromVectorInternal<hasNoNullsGuarantee>(transaction, info, keyVector,
112
+ resultVector, warningDataVectors, errorHandler, nullptr /* selVector */, numKeys);
113
+ } else {
114
+ // Filtered case: copy selection positions since we may modify the selection vector
115
+ std::vector<sel_t> lookupPos(numKeys);
116
+ for (idx_t i = 0; i < numKeys; ++i) {
117
+ lookupPos[i] = selVector[i];
118
+ }
119
+ fillOffsetArraysFromVectorInternal<hasNoNullsGuarantee>(transaction, info, keyVector,
120
+ resultVector, warningDataVectors, errorHandler, lookupPos.data(), numKeys);
121
+ }
122
+ }
115
123
  } // namespace
116
124
 
117
125
  std::string IndexLookupPrintInfo::toString() const {