lbug 0.12.3-dev.27 → 0.12.3-dev.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/lbug-source/CMakeLists.txt +1 -1
  2. package/lbug-source/Makefile +14 -2
  3. package/lbug-source/extension/duckdb/src/catalog/duckdb_catalog.cpp +1 -1
  4. package/lbug-source/extension/duckdb/src/catalog/duckdb_table_catalog_entry.cpp +43 -4
  5. package/lbug-source/extension/duckdb/src/connector/duckdb_result_converter.cpp +6 -0
  6. package/lbug-source/extension/duckdb/src/connector/duckdb_secret_manager.cpp +1 -1
  7. package/lbug-source/extension/duckdb/src/function/duckdb_scan.cpp +49 -4
  8. package/lbug-source/extension/duckdb/src/include/catalog/duckdb_table_catalog_entry.h +6 -1
  9. package/lbug-source/extension/duckdb/src/include/function/duckdb_scan.h +2 -0
  10. package/lbug-source/extension/duckdb/test/test_files/duckdb.test +28 -0
  11. package/lbug-source/extension/extension_config.cmake +3 -2
  12. package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
  13. package/lbug-source/scripts/antlr4/hash.md5 +1 -1
  14. package/lbug-source/src/antlr4/Cypher.g4 +1 -1
  15. package/lbug-source/src/binder/bind/bind_graph_pattern.cpp +30 -3
  16. package/lbug-source/src/catalog/catalog_set.cpp +1 -0
  17. package/lbug-source/src/function/table/table_function.cpp +11 -2
  18. package/lbug-source/src/include/catalog/catalog_entry/table_catalog_entry.h +7 -0
  19. package/lbug-source/src/include/common/string_format.h +2 -2
  20. package/lbug-source/src/include/function/table/bind_data.h +12 -1
  21. package/lbug-source/src/include/function/table/table_function.h +2 -0
  22. package/lbug-source/src/include/optimizer/order_by_push_down_optimizer.h +21 -0
  23. package/lbug-source/src/include/planner/operator/logical_table_function_call.h +14 -1
  24. package/lbug-source/src/optimizer/CMakeLists.txt +2 -1
  25. package/lbug-source/src/optimizer/limit_push_down_optimizer.cpp +12 -0
  26. package/lbug-source/src/optimizer/optimizer.cpp +4 -0
  27. package/lbug-source/src/optimizer/order_by_push_down_optimizer.cpp +123 -0
  28. package/lbug-source/src/optimizer/projection_push_down_optimizer.cpp +5 -1
  29. package/lbug-source/src/parser/transform/transform_graph_pattern.cpp +6 -1
  30. package/lbug-source/src/planner/join_order/cardinality_estimator.cpp +11 -2
  31. package/lbug-source/src/planner/operator/logical_table_function_call.cpp +4 -0
  32. package/lbug-source/src/planner/plan/plan_join_order.cpp +16 -1
  33. package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +1332 -1316
  34. package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -1
  35. package/package.json +1 -1
  36. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  37. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  38. package/prebuilt/lbugjs-linux-x64.node +0 -0
  39. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -1,6 +1,6 @@
1
1
  cmake_minimum_required(VERSION 3.15)
2
2
 
3
- project(Lbug VERSION 0.12.3.27 LANGUAGES CXX C)
3
+ project(Lbug VERSION 0.12.3.29 LANGUAGES CXX C)
4
4
 
5
5
  option(SINGLE_THREADED "Single-threaded mode" FALSE)
6
6
  if(SINGLE_THREADED)
@@ -14,7 +14,7 @@
14
14
  benchmark example \
15
15
  extension-test-build extension-test extension-json-test-build extension-json-test \
16
16
  extension-debug extension-release \
17
- shell-test \
17
+ shell shell-debug shell-test \
18
18
  tidy tidy-analyzer clangd-diagnostics \
19
19
  install \
20
20
  clean-extension clean-python-api clean-java clean
@@ -234,7 +234,7 @@ example:
234
234
  $(call run-cmake-release, -DBUILD_EXAMPLES=TRUE)
235
235
 
236
236
  extension-build:
237
- $(call run-cmake-relwithdebinfo,-DBUILD_EXTENSIONS="$(EXTENSION_LIST)")
237
+ $(call run-cmake-relwithdebinfo,-DBUILD_EXTENSIONS="$(EXTENSION_LIST)" -DEXTENSION_STATIC_LINK_LIST="$(EXTENSION_STATIC_LINK_LIST)")
238
238
 
239
239
  extension-test-build:
240
240
  $(call run-cmake-relwithdebinfo, \
@@ -294,6 +294,18 @@ extension-release:
294
294
  -DBUILD_LBUG=FALSE \
295
295
  )
296
296
 
297
+ shell:
298
+ BM_MALLOC=1 $(call run-cmake-release, \
299
+ -DBUILD_SHELL=TRUE \
300
+ -DEXTENSION_STATIC_LINK_LIST=duckdb \
301
+ )
302
+
303
+ shell-debug:
304
+ BM_MALLOC=1 $(call run-cmake-debug, \
305
+ -DBUILD_SHELL=TRUE \
306
+ -DEXTENSION_STATIC_LINK_LIST=duckdb \
307
+ )
308
+
297
309
  shell-test:
298
310
  $(call run-cmake-relwithdebinfo, \
299
311
  -DBUILD_SHELL=TRUE \
@@ -91,7 +91,7 @@ void DuckDBCatalog::createForeignTable(const std::string& tableName) {
91
91
  auto duckdbTableInfo =
92
92
  connector.getTableScanInfo(getQuery(*info), std::move(columnTypes), columnNames);
93
93
  auto tableEntry = std::make_unique<catalog::DuckDBTableCatalogEntry>(info->tableName,
94
- getScanFunction(duckdbTableInfo));
94
+ getScanFunction(duckdbTableInfo), duckdbTableInfo);
95
95
  for (auto& definition : extraInfo->propertyDefinitions) {
96
96
  tableEntry->addProperty(definition);
97
97
  }
@@ -1,21 +1,60 @@
1
1
  #include "catalog/duckdb_table_catalog_entry.h"
2
2
 
3
- #include "binder/ddl/bound_create_table_info.h"
3
+ #include "binder/bound_scan_source.h"
4
+ #include "binder/expression/variable_expression.h"
5
+ #include "common/constants.h"
6
+ #include "function/duckdb_scan.h"
4
7
 
5
8
  namespace lbug {
6
9
  namespace catalog {
7
10
 
8
11
  DuckDBTableCatalogEntry::DuckDBTableCatalogEntry(std::string name,
9
- function::TableFunction scanFunction)
12
+ function::TableFunction scanFunction,
13
+ std::shared_ptr<duckdb_extension::DuckDBTableScanInfo> scanInfo)
10
14
  : TableCatalogEntry{CatalogEntryType::FOREIGN_TABLE_ENTRY, std::move(name)},
11
- scanFunction{std::move(scanFunction)} {}
15
+ scanFunction{std::move(scanFunction)}, scanInfo{std::move(scanInfo)} {}
12
16
 
13
17
  common::TableType DuckDBTableCatalogEntry::getTableType() const {
14
18
  return common::TableType::FOREIGN;
15
19
  }
16
20
 
21
+ std::unique_ptr<binder::BoundTableScanInfo> DuckDBTableCatalogEntry::getBoundScanInfo(
22
+ main::ClientContext* context, const std::string& nodeUniqueName) {
23
+ auto columnNames = scanInfo->getColumnNames();
24
+ auto columnTypes = scanInfo->getColumnTypes(*context);
25
+ binder::expression_vector columns;
26
+
27
+ // Add rowid as _ID (internal ID) if nodeUniqueName is provided
28
+ if (!nodeUniqueName.empty()) {
29
+ auto idUniqueName = nodeUniqueName + "." + std::string(common::InternalKeyword::ID);
30
+ columns.push_back(std::make_shared<binder::VariableExpression>(common::LogicalType::INT64(),
31
+ idUniqueName, "rowid"));
32
+ }
33
+
34
+ for (auto i = 0u; i < columnNames.size(); i++) {
35
+ std::string uniqueName = columnNames[i];
36
+ if (!nodeUniqueName.empty()) {
37
+ uniqueName = nodeUniqueName + "." + columnNames[i];
38
+ }
39
+ columns.push_back(std::make_shared<binder::VariableExpression>(std::move(columnTypes[i]),
40
+ uniqueName, columnNames[i]));
41
+ }
42
+
43
+ // Build column names for DuckDB query - include rowid if needed
44
+ std::vector<std::string> duckdbColumnNames;
45
+ if (!nodeUniqueName.empty()) {
46
+ duckdbColumnNames.push_back("rowid");
47
+ }
48
+ duckdbColumnNames.insert(duckdbColumnNames.end(), columnNames.begin(), columnNames.end());
49
+
50
+ auto bindData =
51
+ std::make_unique<duckdb_extension::DuckDBScanBindData>(scanInfo->getTemplateQuery(*context),
52
+ duckdbColumnNames, scanInfo->getConnector(), std::move(columns));
53
+ return std::make_unique<binder::BoundTableScanInfo>(scanFunction, std::move(bindData));
54
+ }
55
+
17
56
  std::unique_ptr<TableCatalogEntry> DuckDBTableCatalogEntry::copy() const {
18
- auto other = std::make_unique<DuckDBTableCatalogEntry>(name, scanFunction);
57
+ auto other = std::make_unique<DuckDBTableCatalogEntry>(name, scanFunction, scanInfo);
19
58
  other->copyFrom(*this);
20
59
  return other;
21
60
  }
@@ -121,10 +121,16 @@ void DuckDBResultConverter::convertDuckDBResultToVector(duckdb::DataChunk& duckD
121
121
  for (auto i = 0u; i < conversionFunctions.size(); i++) {
122
122
  result.state->getSelVectorUnsafe().setSelSize(duckDBResult.size());
123
123
  if (columnSkips && columnSkips.value()[i]) {
124
+ // For rowid (first column), we always fetch it from DuckDB but skip writing to output.
125
+ // This keeps DuckDB result columns aligned with our expected order.
126
+ if (i == 0) {
127
+ duckdbResultColIdx++;
128
+ }
124
129
  continue;
125
130
  }
126
131
  KU_ASSERT(duckDBResult.data[duckdbResultColIdx].GetVectorType() ==
127
132
  duckdb::VectorType::FLAT_VECTOR);
133
+ // Write to output vector at position i (the original column index)
128
134
  conversionFunctions[i](duckDBResult.data[duckdbResultColIdx],
129
135
  result.getValueVectorMutable(i), result.state->getSelVector().getSelSize());
130
136
  duckdbResultColIdx++;
@@ -18,7 +18,7 @@ static std::string getDuckDBExtensionOptions(httpfs_extension::S3AuthParams lbug
18
18
  std::string DuckDBSecretManager::getRemoteS3FSSecret(main::ClientContext* context,
19
19
  const httpfs_extension::S3FileSystemConfig& config) {
20
20
  KU_ASSERT(config.fsName == "S3" || config.fsName == "GCS");
21
- std::string templateQuery = R"(CREATE SECRET {}_secret (
21
+ static constexpr std::string_view templateQuery = R"(CREATE SECRET {}_secret (
22
22
  {}
23
23
  TYPE {}
24
24
  );)";
@@ -15,21 +15,56 @@ namespace duckdb_extension {
15
15
 
16
16
  std::string DuckDBScanBindData::getColumnsToSelect() const {
17
17
  std::string columnNames = "";
18
+ auto columnSkips = getColumnSkips();
18
19
  auto numSkippedColumns =
19
20
  std::count_if(columnSkips.begin(), columnSkips.end(), [](auto item) { return item; });
20
21
  if (getNumColumns() == numSkippedColumns) {
21
- columnNames = columnNamesInDuckDB[0];
22
+ return columnNamesInDuckDB[0];
22
23
  }
24
+ bool first = true;
23
25
  for (auto i = 0u; i < getNumColumns(); i++) {
24
- if (columnSkips[i]) {
26
+ // Always include rowid (first column) even if marked as skipped.
27
+ // This ensures consistent column ordering between DuckDB results and the converter.
28
+ bool isRowid =
29
+ (i == 0 && !columnNamesInDuckDB.empty() && columnNamesInDuckDB[0] == "rowid");
30
+ if (columnSkips[i] && !isRowid) {
25
31
  continue;
26
32
  }
33
+ if (!first) {
34
+ columnNames += ",";
35
+ }
27
36
  columnNames += columnNamesInDuckDB[i];
28
- columnNames += (i == getNumColumns() - 1) ? "" : ",";
37
+ first = false;
29
38
  }
30
39
  return columnNames;
31
40
  }
32
41
 
42
+ std::string DuckDBScanBindData::getDescription() const {
43
+ auto columns = getColumnsToSelect();
44
+ std::string predicatesString = "";
45
+ for (auto& predicates : getColumnPredicates()) {
46
+ if (predicates.isEmpty()) {
47
+ continue;
48
+ }
49
+ if (predicatesString.empty()) {
50
+ predicatesString = " WHERE " + predicates.toString();
51
+ } else {
52
+ predicatesString += common::stringFormat(" AND {}", predicates.toString());
53
+ }
54
+ }
55
+ std::string q = query;
56
+ size_t pos = q.find("{}");
57
+ if (pos != std::string::npos) {
58
+ q.replace(pos, 2, columns);
59
+ }
60
+ q += predicatesString;
61
+ q += getOrderBy();
62
+ if (getLimitNum() != common::INVALID_ROW_IDX) {
63
+ q += common::stringFormat(" LIMIT {}", getLimitNum());
64
+ }
65
+ return q;
66
+ }
67
+
33
68
  DuckDBScanSharedState::DuckDBScanSharedState(
34
69
  std::shared_ptr<duckdb::MaterializedQueryResult> queryResult)
35
70
  : function::TableFuncSharedState{queryResult->RowCount()}, queryResult{std::move(queryResult)} {
@@ -66,7 +101,16 @@ std::unique_ptr<TableFuncSharedState> DuckDBScanFunction::initSharedState(
66
101
  predicatesString += stringFormat(" AND {}", predicates.toString());
67
102
  }
68
103
  }
69
- auto finalQuery = stringFormat(scanBindData->query, columnNames) + predicatesString;
104
+ std::string finalQuery = scanBindData->query;
105
+ size_t pos = finalQuery.find("{}");
106
+ if (pos != std::string::npos) {
107
+ finalQuery.replace(pos, 2, columnNames);
108
+ }
109
+ finalQuery += predicatesString;
110
+ finalQuery += scanBindData->getOrderBy();
111
+ if (scanBindData->getLimitNum() != INVALID_ROW_IDX) {
112
+ finalQuery += stringFormat(" LIMIT {}", scanBindData->getLimitNum());
113
+ }
70
114
  auto result = scanBindData->connector.executeQuery(finalQuery);
71
115
  if (result->HasError()) {
72
116
  throw RuntimeException(
@@ -117,6 +161,7 @@ TableFunction getScanFunction(std::shared_ptr<DuckDBTableScanInfo> scanInfo) {
117
161
  std::placeholders::_2);
118
162
  function.initSharedStateFunc = DuckDBScanFunction::initSharedState;
119
163
  function.initLocalStateFunc = DuckDBScanFunction::initLocalState;
164
+ function.supportsPushDownFunc = [] { return true; };
120
165
  return function;
121
166
  }
122
167
 
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include "catalog/catalog_entry/table_catalog_entry.h"
4
+ #include "function/duckdb_scan.h"
4
5
  #include "function/table/table_function.h"
5
6
 
6
7
  namespace lbug {
@@ -11,13 +12,16 @@ public:
11
12
  //===--------------------------------------------------------------------===//
12
13
  // constructors
13
14
  //===--------------------------------------------------------------------===//
14
- DuckDBTableCatalogEntry(std::string name, function::TableFunction scanFunction);
15
+ DuckDBTableCatalogEntry(std::string name, function::TableFunction scanFunction,
16
+ std::shared_ptr<duckdb_extension::DuckDBTableScanInfo> scanInfo);
15
17
 
16
18
  //===--------------------------------------------------------------------===//
17
19
  // getter & setter
18
20
  //===--------------------------------------------------------------------===//
19
21
  common::TableType getTableType() const override;
20
22
  function::TableFunction getScanFunction() override { return scanFunction; }
23
+ std::unique_ptr<binder::BoundTableScanInfo> getBoundScanInfo(main::ClientContext* context,
24
+ const std::string& nodeUniqueName = "") override;
21
25
 
22
26
  //===--------------------------------------------------------------------===//
23
27
  // serialization & deserialization
@@ -30,6 +34,7 @@ private:
30
34
 
31
35
  private:
32
36
  function::TableFunction scanFunction;
37
+ std::shared_ptr<duckdb_extension::DuckDBTableScanInfo> scanInfo;
33
38
  };
34
39
 
35
40
  } // namespace catalog
@@ -60,6 +60,8 @@ struct DuckDBScanBindData : function::TableFuncBindData {
60
60
 
61
61
  std::string getColumnsToSelect() const;
62
62
 
63
+ std::string getDescription() const override;
64
+
63
65
  std::unique_ptr<TableFuncBindData> copy() const override {
64
66
  return std::make_unique<DuckDBScanBindData>(*this);
65
67
  }
@@ -321,3 +321,31 @@ Attached database successfully.
321
321
  7|Elizabeth|1|False|True|20|4.700000|1980-10-26|1976-12-23 11:21:42|48:24:11|[2]|[Ein]|[[6],[7],[8]]|1.463000|a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a15|[96,59,65,88]
322
322
  8|Farooq|2|True|False|25|4.500000|1980-10-26|1972-07-31 13:22:30.678559|00:18:00.024|[3,4,5,6,7]|[Fesdwe]|[[8]]|1.510000|a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a16|[80,78,34,83]
323
323
  9|Greg|2|False|False|40|4.900000|1980-10-26|1976-12-23 11:21:42|10 years 5 months 13:00:00.000024|[1]|[Grad]|[[10]]|1.600000|a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a17|[43,83,67,43]
324
+
325
+ -CASE DuckDBNodeTable
326
+ -LOAD_DYNAMIC_EXTENSION duckdb
327
+ -STATEMENT ATTACH '${LBUG_ROOT_DIRECTORY}/dataset/databases/duckdb_database/tinysnb.db' as ts (dbtype duckdb, skip_unsupported_table = true);
328
+ ---- 1
329
+ Attached database successfully.
330
+ -STATEMENT MATCH (a:ts.person) RETURN count(*);
331
+ ---- 1
332
+ 8
333
+ -STATEMENT MATCH (a:ts.person) WHERE a.age > 30 RETURN count(*);
334
+ ---- 1
335
+ 4
336
+ -STATEMENT MATCH (a:ts.person) RETURN a.ID ORDER BY a.ID LIMIT 3;
337
+ ---- 3
338
+ 0
339
+ 2
340
+ 3
341
+ -STATEMENT MATCH (a:ts.person) RETURN a.ID ORDER BY a.ID DESC LIMIT 3;
342
+ ---- 3
343
+ 10
344
+ 9
345
+ 8
346
+ -STATEMENT MATCH (a:ts.person) WHERE a.age > 20 RETURN a.ID ORDER BY a.age DESC LIMIT 2;
347
+ ---- 2
348
+ 10
349
+ 3
350
+ -STATEMENT DETACH ts;
351
+ ---- ok
@@ -1,8 +1,6 @@
1
1
  set(EXTENSION_LIST azure delta duckdb fts httpfs iceberg json llm postgres sqlite unity_catalog vector neo4j algo)
2
2
 
3
3
  #set(EXTENSION_STATIC_LINK_LIST fts)
4
- string(JOIN ", " joined_extensions ${EXTENSION_STATIC_LINK_LIST})
5
- message(STATUS "Static link extensions: ${joined_extensions}")
6
4
  foreach(extension IN LISTS EXTENSION_STATIC_LINK_LIST)
7
5
  add_static_link_extension(${extension})
8
6
  endforeach()
@@ -30,3 +28,6 @@ if(${BUILD_SWIFT})
30
28
  add_static_link_extension(vector)
31
29
  add_static_link_extension(algo)
32
30
  endif()
31
+
32
+ string(JOIN ", " joined_extensions ${STATICALLY_LINKED_EXTENSIONS})
33
+ message(STATUS "Static link extensions: ${joined_extensions}")
@@ -699,7 +699,7 @@ oC_UpperBound
699
699
  : DecimalInteger ;
700
700
 
701
701
  oC_LabelName
702
- : oC_SchemaName ;
702
+ : oC_SchemaName ( '.' oC_SchemaName )? ;
703
703
 
704
704
  oC_RelTypeName
705
705
  : oC_SchemaName ;
@@ -1 +1 @@
1
- 52606d4848c2f224b8e480fec2923081
1
+ d606604ea3991978c8b514d4ac36b8f6
@@ -452,7 +452,7 @@ oC_UpperBound
452
452
  : DecimalInteger ;
453
453
 
454
454
  oC_LabelName
455
- : oC_SchemaName ;
455
+ : oC_SchemaName ( '.' oC_SchemaName )? ;
456
456
 
457
457
  oC_RelTypeName
458
458
  : oC_SchemaName ;
@@ -15,6 +15,7 @@
15
15
  #include "function/rewrite_function.h"
16
16
  #include "function/schema/vector_node_rel_functions.h"
17
17
  #include "main/client_context.h"
18
+ #include "main/database_manager.h"
18
19
  #include "transaction/transaction.h"
19
20
 
20
21
  using namespace lbug::common;
@@ -644,7 +645,8 @@ std::vector<TableCatalogEntry*> Binder::bindNodeTableEntries(
644
645
  } else {
645
646
  for (auto& name : tableNames) {
646
647
  auto entry = bindNodeTableEntry(name);
647
- if (entry->getType() != CatalogEntryType::NODE_TABLE_ENTRY) {
648
+ if (entry->getType() != CatalogEntryType::NODE_TABLE_ENTRY &&
649
+ entry->getType() != CatalogEntryType::FOREIGN_TABLE_ENTRY) {
648
650
  throw BinderException(
649
651
  stringFormat("Cannot bind {} as a node pattern label.", entry->getName()));
650
652
  }
@@ -658,10 +660,35 @@ TableCatalogEntry* Binder::bindNodeTableEntry(const std::string& name) const {
658
660
  auto transaction = transaction::Transaction::Get(*clientContext);
659
661
  auto catalog = Catalog::Get(*clientContext);
660
662
  auto useInternal = clientContext->useInternalCatalogEntry();
661
- if (!catalog->containsTable(transaction, name, useInternal)) {
663
+
664
+ std::string dbName;
665
+ std::string tableName = name;
666
+ auto dotPos = name.find('.');
667
+ if (dotPos != std::string::npos) {
668
+ dbName = name.substr(0, dotPos);
669
+ tableName = name.substr(dotPos + 1);
670
+ }
671
+
672
+ if (!dbName.empty()) {
673
+ // Qualified name: db.table
674
+ auto attachedDB = main::DatabaseManager::Get(*clientContext)->getAttachedDatabase(dbName);
675
+ if (!attachedDB) {
676
+ throw BinderException(stringFormat("Attached database {} does not exist.", dbName));
677
+ }
678
+ auto attachedCatalog = attachedDB->getCatalog();
679
+ if (!attachedCatalog->containsTable(transaction, tableName, useInternal)) {
680
+ throw BinderException(stringFormat("Table {} does not exist in attached database {}.",
681
+ tableName, dbName));
682
+ }
683
+ return attachedCatalog->getTableCatalogEntry(transaction, tableName, useInternal);
684
+ } else {
685
+ // Unqualified name: only search main catalog
686
+ // Foreign tables require qualified names (db.table) to avoid ambiguity
687
+ if (catalog->containsTable(transaction, name, useInternal)) {
688
+ return catalog->getTableCatalogEntry(transaction, name, useInternal);
689
+ }
662
690
  throw BinderException(stringFormat("Table {} does not exist.", name));
663
691
  }
664
- return catalog->getTableCatalogEntry(transaction, name, useInternal);
665
692
  }
666
693
 
667
694
  std::vector<TableCatalogEntry*> Binder::bindRelGroupEntries(
@@ -246,6 +246,7 @@ void CatalogSet::serialize(Serializer serializer) const {
246
246
  case CatalogEntryType::COPY_FUNCTION_ENTRY:
247
247
  case CatalogEntryType::TABLE_FUNCTION_ENTRY:
248
248
  case CatalogEntryType::STANDALONE_TABLE_FUNCTION_ENTRY:
249
+ case CatalogEntryType::FOREIGN_TABLE_ENTRY:
249
250
  continue;
250
251
  default: {
251
252
  auto committedEntry = getCommittedEntryNoLock(entry.get());
@@ -100,8 +100,17 @@ std::unique_ptr<PhysicalOperator> TableFunction::getPhysicalPlan(PlanMapper* pla
100
100
  auto initInput =
101
101
  TableFuncInitSharedStateInput(info.bindData.get(), planMapper->executionContext);
102
102
  auto sharedState = info.function.initSharedStateFunc(initInput);
103
- auto printInfo = std::make_unique<TableFunctionCallPrintInfo>(call.getTableFunc().name,
104
- call.getBindData()->columns);
103
+ // Filter columns for print info based on column skips
104
+ binder::expression_vector printExprs;
105
+ auto columnSkips = call.getBindData()->getColumnSkips();
106
+ for (auto i = 0u; i < call.getBindData()->columns.size(); ++i) {
107
+ if (columnSkips.empty() || !columnSkips[i]) {
108
+ printExprs.push_back(call.getBindData()->columns[i]);
109
+ }
110
+ }
111
+ auto desc = call.getBindData()->getDescription();
112
+ auto printInfo = std::make_unique<TableFunctionCallPrintInfo>(
113
+ desc.empty() ? call.getTableFunc().name : desc, printExprs);
105
114
  return std::make_unique<TableFunctionCall>(std::move(info), sharedState,
106
115
  planMapper->getOperatorID(), std::move(printInfo));
107
116
  }
@@ -2,6 +2,7 @@
2
2
 
3
3
  #include <vector>
4
4
 
5
+ #include "binder/bound_scan_source.h"
5
6
  #include "binder/ddl/bound_alter_info.h"
6
7
  #include "binder/ddl/bound_create_table_info.h"
7
8
  #include "catalog/catalog_entry/catalog_entry.h"
@@ -43,6 +44,12 @@ public:
43
44
 
44
45
  virtual function::TableFunction getScanFunction() { KU_UNREACHABLE; }
45
46
 
47
+ virtual std::unique_ptr<binder::BoundTableScanInfo> getBoundScanInfo(
48
+ [[maybe_unused]] main::ClientContext* context,
49
+ [[maybe_unused]] const std::string& nodeUniqueName = "") {
50
+ return nullptr;
51
+ }
52
+
46
53
  common::column_id_t getMaxColumnID() const;
47
54
  void vacuumColumnIDs(common::column_id_t nextColumnID);
48
55
  std::vector<binder::PropertyDefinition> getProperties() const {
@@ -14,8 +14,8 @@ namespace common {
14
14
  #if USE_STD_FORMAT
15
15
 
16
16
  template<typename... Args>
17
- inline std::string stringFormat(std::format_string<Args...> format, Args&&... args) {
18
- return std::format(format, std::forward<Args>(args)...);
17
+ inline std::string stringFormat(std::string_view format, Args&&... args) {
18
+ return std::vformat(format, std::make_format_args(args...));
19
19
  }
20
20
 
21
21
  #else
@@ -25,7 +25,8 @@ struct LBUG_API TableFuncBindData {
25
25
  TableFuncBindData(const TableFuncBindData& other)
26
26
  : columns{other.columns}, numRows{other.numRows},
27
27
  optionalParams{other.optionalParams == nullptr ? nullptr : other.optionalParams->copy()},
28
- columnSkips{other.columnSkips}, columnPredicates{copyVector(other.columnPredicates)} {}
28
+ columnSkips{other.columnSkips}, columnPredicates{copyVector(other.columnPredicates)},
29
+ limitNum{other.limitNum}, orderBy{other.orderBy} {}
29
30
  TableFuncBindData& operator=(const TableFuncBindData& other) = delete;
30
31
  virtual ~TableFuncBindData() = default;
31
32
 
@@ -46,10 +47,18 @@ struct LBUG_API TableFuncBindData {
46
47
  return columnPredicates;
47
48
  }
48
49
 
50
+ void setLimitNum(common::row_idx_t limit) { limitNum = limit; }
51
+ common::row_idx_t getLimitNum() const { return limitNum; }
52
+
53
+ void setOrderBy(std::string orderBy) { this->orderBy = orderBy; }
54
+ std::string getOrderBy() const { return orderBy; }
55
+
49
56
  virtual bool getIgnoreErrorsOption() const;
50
57
 
51
58
  virtual std::unique_ptr<TableFuncBindData> copy() const;
52
59
 
60
+ virtual std::string getDescription() const { return ""; }
61
+
53
62
  template<class TARGET>
54
63
  const TARGET* constPtrCast() const {
55
64
  return common::ku_dynamic_cast<const TARGET*>(this);
@@ -63,6 +72,8 @@ struct LBUG_API TableFuncBindData {
63
72
  protected:
64
73
  std::vector<bool> columnSkips;
65
74
  std::vector<storage::ColumnPredicateSet> columnPredicates;
75
+ common::row_idx_t limitNum = common::INVALID_ROW_IDX;
76
+ std::string orderBy;
66
77
  };
67
78
 
68
79
  } // namespace function
@@ -133,6 +133,7 @@ using table_func_init_local_t =
133
133
  using table_func_init_output_t =
134
134
  std::function<std::unique_ptr<TableFuncOutput>(const TableFuncInitOutputInput&)>;
135
135
  using table_func_can_parallel_t = std::function<bool()>;
136
+ using table_func_supports_push_down_t = std::function<bool()>;
136
137
  using table_func_progress_t = std::function<double(TableFuncSharedState* sharedState)>;
137
138
  using table_func_finalize_t =
138
139
  std::function<void(const processor::ExecutionContext*, TableFuncSharedState*)>;
@@ -153,6 +154,7 @@ struct LBUG_API TableFunction final : Function {
153
154
  table_func_init_local_t initLocalStateFunc = nullptr;
154
155
  table_func_init_output_t initOutputFunc = nullptr;
155
156
  table_func_can_parallel_t canParallelFunc = [] { return true; };
157
+ table_func_supports_push_down_t supportsPushDownFunc = [] { return false; };
156
158
  table_func_progress_t progressFunc = [](TableFuncSharedState*) { return 0.0; };
157
159
  table_func_finalize_t finalizeFunc = [](auto, auto) {};
158
160
  table_func_rewrite_t rewriteFunc = nullptr;
@@ -0,0 +1,21 @@
1
+ #pragma once
2
+
3
+ #include "planner/operator/logical_plan.h"
4
+
5
+ namespace lbug {
6
+ namespace optimizer {
7
+
8
+ class OrderByPushDownOptimizer {
9
+ public:
10
+ void rewrite(planner::LogicalPlan* plan);
11
+
12
+ private:
13
+ std::shared_ptr<planner::LogicalOperator> visitOperator(
14
+ std::shared_ptr<planner::LogicalOperator> op, std::string currentOrderBy = "");
15
+
16
+ static std::string buildOrderByString(const binder::expression_vector& expressions,
17
+ const std::vector<bool>& isAscOrders);
18
+ };
19
+
20
+ } // namespace optimizer
21
+ } // namespace lbug
@@ -27,11 +27,18 @@ public:
27
27
  void setColumnPredicates(std::vector<storage::ColumnPredicateSet> predicates) {
28
28
  bindData->setColumnPredicates(std::move(predicates));
29
29
  }
30
+ void setLimitNum(common::row_idx_t limit) { bindData->setLimitNum(limit); }
31
+ void setOrderBy(std::string orderBy) { bindData->setOrderBy(orderBy); }
30
32
 
31
33
  void computeFlatSchema() override;
32
34
  void computeFactorizedSchema() override;
33
35
 
34
- std::string getExpressionsForPrinting() const override { return tableFunc.name; }
36
+ std::string getExpressionsForPrinting() const override {
37
+ auto desc = bindData->getDescription();
38
+ return desc.empty() ? tableFunc.name : desc;
39
+ }
40
+
41
+ std::unique_ptr<OPPrintInfo> getPrintInfo() const override;
35
42
 
36
43
  std::unique_ptr<LogicalOperator> copy() override {
37
44
  return std::make_unique<LogicalTableFunctionCall>(tableFunc, bindData->copy());
@@ -42,5 +49,11 @@ private:
42
49
  std::unique_ptr<function::TableFuncBindData> bindData;
43
50
  };
44
51
 
52
+ struct LogicalTableFunctionCallPrintInfo final : OPPrintInfo {
53
+ std::string desc;
54
+ explicit LogicalTableFunctionCallPrintInfo(std::string desc) : desc{std::move(desc)} {}
55
+ std::string toString() const override { return desc; }
56
+ };
57
+
45
58
  } // namespace planner
46
59
  } // namespace lbug
@@ -15,7 +15,8 @@ add_library(lbug_optimizer
15
15
  remove_factorization_rewriter.cpp
16
16
  remove_unnecessary_join_optimizer.cpp
17
17
  top_k_optimizer.cpp
18
- limit_push_down_optimizer.cpp)
18
+ limit_push_down_optimizer.cpp
19
+ order_by_push_down_optimizer.cpp)
19
20
 
20
21
  set(ALL_OBJECT_FILES
21
22
  ${ALL_OBJECT_FILES} $<TARGET_OBJECTS:lbug_optimizer>
@@ -6,6 +6,7 @@
6
6
  #include "planner/operator/logical_distinct.h"
7
7
  #include "planner/operator/logical_hash_join.h"
8
8
  #include "planner/operator/logical_limit.h"
9
+ #include "planner/operator/logical_table_function_call.h"
9
10
 
10
11
  using namespace lbug::binder;
11
12
  using namespace lbug::common;
@@ -34,10 +35,21 @@ void LimitPushDownOptimizer::visitOperator(planner::LogicalOperator* op) {
34
35
  case LogicalOperatorType::MULTIPLICITY_REDUCER:
35
36
  case LogicalOperatorType::EXPLAIN:
36
37
  case LogicalOperatorType::ACCUMULATE:
38
+ case LogicalOperatorType::FILTER:
37
39
  case LogicalOperatorType::PROJECTION: {
38
40
  visitOperator(op->getChild(0).get());
39
41
  return;
40
42
  }
43
+ case LogicalOperatorType::TABLE_FUNCTION_CALL: {
44
+ if (limitNumber == INVALID_LIMIT && skipNumber == 0) {
45
+ return;
46
+ }
47
+ auto& tableFuncCall = op->cast<LogicalTableFunctionCall>();
48
+ if (tableFuncCall.getTableFunc().supportsPushDownFunc()) {
49
+ tableFuncCall.setLimitNum(skipNumber + limitNumber);
50
+ }
51
+ return;
52
+ }
41
53
  case LogicalOperatorType::DISTINCT: {
42
54
  if (limitNumber == INVALID_LIMIT && skipNumber == 0) {
43
55
  return;
@@ -9,6 +9,7 @@
9
9
  #include "optimizer/factorization_rewriter.h"
10
10
  #include "optimizer/filter_push_down_optimizer.h"
11
11
  #include "optimizer/limit_push_down_optimizer.h"
12
+ #include "optimizer/order_by_push_down_optimizer.h"
12
13
  #include "optimizer/projection_push_down_optimizer.h"
13
14
  #include "optimizer/remove_factorization_rewriter.h"
14
15
  #include "optimizer/remove_unnecessary_join_optimizer.h"
@@ -45,6 +46,9 @@ void Optimizer::optimize(planner::LogicalPlan* plan, main::ClientContext* contex
45
46
  ProjectionPushDownOptimizer(context->getClientConfig()->recursivePatternSemantic);
46
47
  projectionPushDownOptimizer.rewrite(plan);
47
48
 
49
+ auto orderByPushDownOptimizer = OrderByPushDownOptimizer();
50
+ orderByPushDownOptimizer.rewrite(plan);
51
+
48
52
  auto limitPushDownOptimizer = LimitPushDownOptimizer();
49
53
  limitPushDownOptimizer.rewrite(plan);
50
54