lbug 0.12.3-dev.26 → 0.12.3-dev.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lbug-source/CMakeLists.txt +1 -1
- package/lbug-source/Makefile +14 -2
- package/lbug-source/extension/duckdb/src/catalog/duckdb_catalog.cpp +1 -1
- package/lbug-source/extension/duckdb/src/catalog/duckdb_table_catalog_entry.cpp +43 -4
- package/lbug-source/extension/duckdb/src/connector/duckdb_result_converter.cpp +6 -0
- package/lbug-source/extension/duckdb/src/connector/duckdb_secret_manager.cpp +1 -1
- package/lbug-source/extension/duckdb/src/function/duckdb_scan.cpp +49 -4
- package/lbug-source/extension/duckdb/src/include/catalog/duckdb_table_catalog_entry.h +6 -1
- package/lbug-source/extension/duckdb/src/include/function/duckdb_scan.h +2 -0
- package/lbug-source/extension/duckdb/test/test_files/duckdb.test +28 -0
- package/lbug-source/extension/extension_config.cmake +3 -2
- package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
- package/lbug-source/scripts/antlr4/hash.md5 +1 -1
- package/lbug-source/src/antlr4/Cypher.g4 +1 -1
- package/lbug-source/src/binder/bind/bind_graph_pattern.cpp +30 -3
- package/lbug-source/src/catalog/catalog_set.cpp +1 -0
- package/lbug-source/src/function/table/table_function.cpp +11 -2
- package/lbug-source/src/include/catalog/catalog_entry/table_catalog_entry.h +7 -0
- package/lbug-source/src/include/common/string_format.h +2 -2
- package/lbug-source/src/include/function/table/bind_data.h +12 -1
- package/lbug-source/src/include/function/table/table_function.h +2 -0
- package/lbug-source/src/include/optimizer/order_by_push_down_optimizer.h +21 -0
- package/lbug-source/src/include/planner/operator/logical_table_function_call.h +14 -1
- package/lbug-source/src/optimizer/CMakeLists.txt +2 -1
- package/lbug-source/src/optimizer/limit_push_down_optimizer.cpp +12 -0
- package/lbug-source/src/optimizer/optimizer.cpp +4 -0
- package/lbug-source/src/optimizer/order_by_push_down_optimizer.cpp +123 -0
- package/lbug-source/src/optimizer/projection_push_down_optimizer.cpp +5 -1
- package/lbug-source/src/parser/transform/transform_graph_pattern.cpp +6 -1
- package/lbug-source/src/planner/join_order/cardinality_estimator.cpp +11 -2
- package/lbug-source/src/planner/operator/logical_table_function_call.cpp +4 -0
- package/lbug-source/src/planner/plan/plan_join_order.cpp +16 -1
- package/lbug-source/src/processor/operator/index_lookup.cpp +31 -23
- package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +1332 -1316
- package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -1
- package/package.json +1 -1
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
package/lbug-source/Makefile
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
benchmark example \
|
|
15
15
|
extension-test-build extension-test extension-json-test-build extension-json-test \
|
|
16
16
|
extension-debug extension-release \
|
|
17
|
-
shell-test \
|
|
17
|
+
shell shell-debug shell-test \
|
|
18
18
|
tidy tidy-analyzer clangd-diagnostics \
|
|
19
19
|
install \
|
|
20
20
|
clean-extension clean-python-api clean-java clean
|
|
@@ -234,7 +234,7 @@ example:
|
|
|
234
234
|
$(call run-cmake-release, -DBUILD_EXAMPLES=TRUE)
|
|
235
235
|
|
|
236
236
|
extension-build:
|
|
237
|
-
$(call run-cmake-relwithdebinfo,-DBUILD_EXTENSIONS="$(EXTENSION_LIST)")
|
|
237
|
+
$(call run-cmake-relwithdebinfo,-DBUILD_EXTENSIONS="$(EXTENSION_LIST)" -DEXTENSION_STATIC_LINK_LIST="$(EXTENSION_STATIC_LINK_LIST)")
|
|
238
238
|
|
|
239
239
|
extension-test-build:
|
|
240
240
|
$(call run-cmake-relwithdebinfo, \
|
|
@@ -294,6 +294,18 @@ extension-release:
|
|
|
294
294
|
-DBUILD_LBUG=FALSE \
|
|
295
295
|
)
|
|
296
296
|
|
|
297
|
+
shell:
|
|
298
|
+
BM_MALLOC=1 $(call run-cmake-release, \
|
|
299
|
+
-DBUILD_SHELL=TRUE \
|
|
300
|
+
-DEXTENSION_STATIC_LINK_LIST=duckdb \
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
shell-debug:
|
|
304
|
+
BM_MALLOC=1 $(call run-cmake-debug, \
|
|
305
|
+
-DBUILD_SHELL=TRUE \
|
|
306
|
+
-DEXTENSION_STATIC_LINK_LIST=duckdb \
|
|
307
|
+
)
|
|
308
|
+
|
|
297
309
|
shell-test:
|
|
298
310
|
$(call run-cmake-relwithdebinfo, \
|
|
299
311
|
-DBUILD_SHELL=TRUE \
|
|
@@ -91,7 +91,7 @@ void DuckDBCatalog::createForeignTable(const std::string& tableName) {
|
|
|
91
91
|
auto duckdbTableInfo =
|
|
92
92
|
connector.getTableScanInfo(getQuery(*info), std::move(columnTypes), columnNames);
|
|
93
93
|
auto tableEntry = std::make_unique<catalog::DuckDBTableCatalogEntry>(info->tableName,
|
|
94
|
-
getScanFunction(duckdbTableInfo));
|
|
94
|
+
getScanFunction(duckdbTableInfo), duckdbTableInfo);
|
|
95
95
|
for (auto& definition : extraInfo->propertyDefinitions) {
|
|
96
96
|
tableEntry->addProperty(definition);
|
|
97
97
|
}
|
|
@@ -1,21 +1,60 @@
|
|
|
1
1
|
#include "catalog/duckdb_table_catalog_entry.h"
|
|
2
2
|
|
|
3
|
-
#include "binder/
|
|
3
|
+
#include "binder/bound_scan_source.h"
|
|
4
|
+
#include "binder/expression/variable_expression.h"
|
|
5
|
+
#include "common/constants.h"
|
|
6
|
+
#include "function/duckdb_scan.h"
|
|
4
7
|
|
|
5
8
|
namespace lbug {
|
|
6
9
|
namespace catalog {
|
|
7
10
|
|
|
8
11
|
DuckDBTableCatalogEntry::DuckDBTableCatalogEntry(std::string name,
|
|
9
|
-
function::TableFunction scanFunction
|
|
12
|
+
function::TableFunction scanFunction,
|
|
13
|
+
std::shared_ptr<duckdb_extension::DuckDBTableScanInfo> scanInfo)
|
|
10
14
|
: TableCatalogEntry{CatalogEntryType::FOREIGN_TABLE_ENTRY, std::move(name)},
|
|
11
|
-
scanFunction{std::move(scanFunction)} {}
|
|
15
|
+
scanFunction{std::move(scanFunction)}, scanInfo{std::move(scanInfo)} {}
|
|
12
16
|
|
|
13
17
|
common::TableType DuckDBTableCatalogEntry::getTableType() const {
|
|
14
18
|
return common::TableType::FOREIGN;
|
|
15
19
|
}
|
|
16
20
|
|
|
21
|
+
std::unique_ptr<binder::BoundTableScanInfo> DuckDBTableCatalogEntry::getBoundScanInfo(
|
|
22
|
+
main::ClientContext* context, const std::string& nodeUniqueName) {
|
|
23
|
+
auto columnNames = scanInfo->getColumnNames();
|
|
24
|
+
auto columnTypes = scanInfo->getColumnTypes(*context);
|
|
25
|
+
binder::expression_vector columns;
|
|
26
|
+
|
|
27
|
+
// Add rowid as _ID (internal ID) if nodeUniqueName is provided
|
|
28
|
+
if (!nodeUniqueName.empty()) {
|
|
29
|
+
auto idUniqueName = nodeUniqueName + "." + std::string(common::InternalKeyword::ID);
|
|
30
|
+
columns.push_back(std::make_shared<binder::VariableExpression>(common::LogicalType::INT64(),
|
|
31
|
+
idUniqueName, "rowid"));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
for (auto i = 0u; i < columnNames.size(); i++) {
|
|
35
|
+
std::string uniqueName = columnNames[i];
|
|
36
|
+
if (!nodeUniqueName.empty()) {
|
|
37
|
+
uniqueName = nodeUniqueName + "." + columnNames[i];
|
|
38
|
+
}
|
|
39
|
+
columns.push_back(std::make_shared<binder::VariableExpression>(std::move(columnTypes[i]),
|
|
40
|
+
uniqueName, columnNames[i]));
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Build column names for DuckDB query - include rowid if needed
|
|
44
|
+
std::vector<std::string> duckdbColumnNames;
|
|
45
|
+
if (!nodeUniqueName.empty()) {
|
|
46
|
+
duckdbColumnNames.push_back("rowid");
|
|
47
|
+
}
|
|
48
|
+
duckdbColumnNames.insert(duckdbColumnNames.end(), columnNames.begin(), columnNames.end());
|
|
49
|
+
|
|
50
|
+
auto bindData =
|
|
51
|
+
std::make_unique<duckdb_extension::DuckDBScanBindData>(scanInfo->getTemplateQuery(*context),
|
|
52
|
+
duckdbColumnNames, scanInfo->getConnector(), std::move(columns));
|
|
53
|
+
return std::make_unique<binder::BoundTableScanInfo>(scanFunction, std::move(bindData));
|
|
54
|
+
}
|
|
55
|
+
|
|
17
56
|
std::unique_ptr<TableCatalogEntry> DuckDBTableCatalogEntry::copy() const {
|
|
18
|
-
auto other = std::make_unique<DuckDBTableCatalogEntry>(name, scanFunction);
|
|
57
|
+
auto other = std::make_unique<DuckDBTableCatalogEntry>(name, scanFunction, scanInfo);
|
|
19
58
|
other->copyFrom(*this);
|
|
20
59
|
return other;
|
|
21
60
|
}
|
|
@@ -121,10 +121,16 @@ void DuckDBResultConverter::convertDuckDBResultToVector(duckdb::DataChunk& duckD
|
|
|
121
121
|
for (auto i = 0u; i < conversionFunctions.size(); i++) {
|
|
122
122
|
result.state->getSelVectorUnsafe().setSelSize(duckDBResult.size());
|
|
123
123
|
if (columnSkips && columnSkips.value()[i]) {
|
|
124
|
+
// For rowid (first column), we always fetch it from DuckDB but skip writing to output.
|
|
125
|
+
// This keeps DuckDB result columns aligned with our expected order.
|
|
126
|
+
if (i == 0) {
|
|
127
|
+
duckdbResultColIdx++;
|
|
128
|
+
}
|
|
124
129
|
continue;
|
|
125
130
|
}
|
|
126
131
|
KU_ASSERT(duckDBResult.data[duckdbResultColIdx].GetVectorType() ==
|
|
127
132
|
duckdb::VectorType::FLAT_VECTOR);
|
|
133
|
+
// Write to output vector at position i (the original column index)
|
|
128
134
|
conversionFunctions[i](duckDBResult.data[duckdbResultColIdx],
|
|
129
135
|
result.getValueVectorMutable(i), result.state->getSelVector().getSelSize());
|
|
130
136
|
duckdbResultColIdx++;
|
|
@@ -18,7 +18,7 @@ static std::string getDuckDBExtensionOptions(httpfs_extension::S3AuthParams lbug
|
|
|
18
18
|
std::string DuckDBSecretManager::getRemoteS3FSSecret(main::ClientContext* context,
|
|
19
19
|
const httpfs_extension::S3FileSystemConfig& config) {
|
|
20
20
|
KU_ASSERT(config.fsName == "S3" || config.fsName == "GCS");
|
|
21
|
-
std::
|
|
21
|
+
static constexpr std::string_view templateQuery = R"(CREATE SECRET {}_secret (
|
|
22
22
|
{}
|
|
23
23
|
TYPE {}
|
|
24
24
|
);)";
|
|
@@ -15,21 +15,56 @@ namespace duckdb_extension {
|
|
|
15
15
|
|
|
16
16
|
std::string DuckDBScanBindData::getColumnsToSelect() const {
|
|
17
17
|
std::string columnNames = "";
|
|
18
|
+
auto columnSkips = getColumnSkips();
|
|
18
19
|
auto numSkippedColumns =
|
|
19
20
|
std::count_if(columnSkips.begin(), columnSkips.end(), [](auto item) { return item; });
|
|
20
21
|
if (getNumColumns() == numSkippedColumns) {
|
|
21
|
-
|
|
22
|
+
return columnNamesInDuckDB[0];
|
|
22
23
|
}
|
|
24
|
+
bool first = true;
|
|
23
25
|
for (auto i = 0u; i < getNumColumns(); i++) {
|
|
24
|
-
|
|
26
|
+
// Always include rowid (first column) even if marked as skipped.
|
|
27
|
+
// This ensures consistent column ordering between DuckDB results and the converter.
|
|
28
|
+
bool isRowid =
|
|
29
|
+
(i == 0 && !columnNamesInDuckDB.empty() && columnNamesInDuckDB[0] == "rowid");
|
|
30
|
+
if (columnSkips[i] && !isRowid) {
|
|
25
31
|
continue;
|
|
26
32
|
}
|
|
33
|
+
if (!first) {
|
|
34
|
+
columnNames += ",";
|
|
35
|
+
}
|
|
27
36
|
columnNames += columnNamesInDuckDB[i];
|
|
28
|
-
|
|
37
|
+
first = false;
|
|
29
38
|
}
|
|
30
39
|
return columnNames;
|
|
31
40
|
}
|
|
32
41
|
|
|
42
|
+
std::string DuckDBScanBindData::getDescription() const {
|
|
43
|
+
auto columns = getColumnsToSelect();
|
|
44
|
+
std::string predicatesString = "";
|
|
45
|
+
for (auto& predicates : getColumnPredicates()) {
|
|
46
|
+
if (predicates.isEmpty()) {
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
if (predicatesString.empty()) {
|
|
50
|
+
predicatesString = " WHERE " + predicates.toString();
|
|
51
|
+
} else {
|
|
52
|
+
predicatesString += common::stringFormat(" AND {}", predicates.toString());
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
std::string q = query;
|
|
56
|
+
size_t pos = q.find("{}");
|
|
57
|
+
if (pos != std::string::npos) {
|
|
58
|
+
q.replace(pos, 2, columns);
|
|
59
|
+
}
|
|
60
|
+
q += predicatesString;
|
|
61
|
+
q += getOrderBy();
|
|
62
|
+
if (getLimitNum() != common::INVALID_ROW_IDX) {
|
|
63
|
+
q += common::stringFormat(" LIMIT {}", getLimitNum());
|
|
64
|
+
}
|
|
65
|
+
return q;
|
|
66
|
+
}
|
|
67
|
+
|
|
33
68
|
DuckDBScanSharedState::DuckDBScanSharedState(
|
|
34
69
|
std::shared_ptr<duckdb::MaterializedQueryResult> queryResult)
|
|
35
70
|
: function::TableFuncSharedState{queryResult->RowCount()}, queryResult{std::move(queryResult)} {
|
|
@@ -66,7 +101,16 @@ std::unique_ptr<TableFuncSharedState> DuckDBScanFunction::initSharedState(
|
|
|
66
101
|
predicatesString += stringFormat(" AND {}", predicates.toString());
|
|
67
102
|
}
|
|
68
103
|
}
|
|
69
|
-
|
|
104
|
+
std::string finalQuery = scanBindData->query;
|
|
105
|
+
size_t pos = finalQuery.find("{}");
|
|
106
|
+
if (pos != std::string::npos) {
|
|
107
|
+
finalQuery.replace(pos, 2, columnNames);
|
|
108
|
+
}
|
|
109
|
+
finalQuery += predicatesString;
|
|
110
|
+
finalQuery += scanBindData->getOrderBy();
|
|
111
|
+
if (scanBindData->getLimitNum() != INVALID_ROW_IDX) {
|
|
112
|
+
finalQuery += stringFormat(" LIMIT {}", scanBindData->getLimitNum());
|
|
113
|
+
}
|
|
70
114
|
auto result = scanBindData->connector.executeQuery(finalQuery);
|
|
71
115
|
if (result->HasError()) {
|
|
72
116
|
throw RuntimeException(
|
|
@@ -117,6 +161,7 @@ TableFunction getScanFunction(std::shared_ptr<DuckDBTableScanInfo> scanInfo) {
|
|
|
117
161
|
std::placeholders::_2);
|
|
118
162
|
function.initSharedStateFunc = DuckDBScanFunction::initSharedState;
|
|
119
163
|
function.initLocalStateFunc = DuckDBScanFunction::initLocalState;
|
|
164
|
+
function.supportsPushDownFunc = [] { return true; };
|
|
120
165
|
return function;
|
|
121
166
|
}
|
|
122
167
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include "catalog/catalog_entry/table_catalog_entry.h"
|
|
4
|
+
#include "function/duckdb_scan.h"
|
|
4
5
|
#include "function/table/table_function.h"
|
|
5
6
|
|
|
6
7
|
namespace lbug {
|
|
@@ -11,13 +12,16 @@ public:
|
|
|
11
12
|
//===--------------------------------------------------------------------===//
|
|
12
13
|
// constructors
|
|
13
14
|
//===--------------------------------------------------------------------===//
|
|
14
|
-
DuckDBTableCatalogEntry(std::string name, function::TableFunction scanFunction
|
|
15
|
+
DuckDBTableCatalogEntry(std::string name, function::TableFunction scanFunction,
|
|
16
|
+
std::shared_ptr<duckdb_extension::DuckDBTableScanInfo> scanInfo);
|
|
15
17
|
|
|
16
18
|
//===--------------------------------------------------------------------===//
|
|
17
19
|
// getter & setter
|
|
18
20
|
//===--------------------------------------------------------------------===//
|
|
19
21
|
common::TableType getTableType() const override;
|
|
20
22
|
function::TableFunction getScanFunction() override { return scanFunction; }
|
|
23
|
+
std::unique_ptr<binder::BoundTableScanInfo> getBoundScanInfo(main::ClientContext* context,
|
|
24
|
+
const std::string& nodeUniqueName = "") override;
|
|
21
25
|
|
|
22
26
|
//===--------------------------------------------------------------------===//
|
|
23
27
|
// serialization & deserialization
|
|
@@ -30,6 +34,7 @@ private:
|
|
|
30
34
|
|
|
31
35
|
private:
|
|
32
36
|
function::TableFunction scanFunction;
|
|
37
|
+
std::shared_ptr<duckdb_extension::DuckDBTableScanInfo> scanInfo;
|
|
33
38
|
};
|
|
34
39
|
|
|
35
40
|
} // namespace catalog
|
|
@@ -60,6 +60,8 @@ struct DuckDBScanBindData : function::TableFuncBindData {
|
|
|
60
60
|
|
|
61
61
|
std::string getColumnsToSelect() const;
|
|
62
62
|
|
|
63
|
+
std::string getDescription() const override;
|
|
64
|
+
|
|
63
65
|
std::unique_ptr<TableFuncBindData> copy() const override {
|
|
64
66
|
return std::make_unique<DuckDBScanBindData>(*this);
|
|
65
67
|
}
|
|
@@ -321,3 +321,31 @@ Attached database successfully.
|
|
|
321
321
|
7|Elizabeth|1|False|True|20|4.700000|1980-10-26|1976-12-23 11:21:42|48:24:11|[2]|[Ein]|[[6],[7],[8]]|1.463000|a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a15|[96,59,65,88]
|
|
322
322
|
8|Farooq|2|True|False|25|4.500000|1980-10-26|1972-07-31 13:22:30.678559|00:18:00.024|[3,4,5,6,7]|[Fesdwe]|[[8]]|1.510000|a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a16|[80,78,34,83]
|
|
323
323
|
9|Greg|2|False|False|40|4.900000|1980-10-26|1976-12-23 11:21:42|10 years 5 months 13:00:00.000024|[1]|[Grad]|[[10]]|1.600000|a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a17|[43,83,67,43]
|
|
324
|
+
|
|
325
|
+
-CASE DuckDBNodeTable
|
|
326
|
+
-LOAD_DYNAMIC_EXTENSION duckdb
|
|
327
|
+
-STATEMENT ATTACH '${LBUG_ROOT_DIRECTORY}/dataset/databases/duckdb_database/tinysnb.db' as ts (dbtype duckdb, skip_unsupported_table = true);
|
|
328
|
+
---- 1
|
|
329
|
+
Attached database successfully.
|
|
330
|
+
-STATEMENT MATCH (a:ts.person) RETURN count(*);
|
|
331
|
+
---- 1
|
|
332
|
+
8
|
|
333
|
+
-STATEMENT MATCH (a:ts.person) WHERE a.age > 30 RETURN count(*);
|
|
334
|
+
---- 1
|
|
335
|
+
4
|
|
336
|
+
-STATEMENT MATCH (a:ts.person) RETURN a.ID ORDER BY a.ID LIMIT 3;
|
|
337
|
+
---- 3
|
|
338
|
+
0
|
|
339
|
+
2
|
|
340
|
+
3
|
|
341
|
+
-STATEMENT MATCH (a:ts.person) RETURN a.ID ORDER BY a.ID DESC LIMIT 3;
|
|
342
|
+
---- 3
|
|
343
|
+
10
|
|
344
|
+
9
|
|
345
|
+
8
|
|
346
|
+
-STATEMENT MATCH (a:ts.person) WHERE a.age > 20 RETURN a.ID ORDER BY a.age DESC LIMIT 2;
|
|
347
|
+
---- 2
|
|
348
|
+
10
|
|
349
|
+
3
|
|
350
|
+
-STATEMENT DETACH ts;
|
|
351
|
+
---- ok
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
set(EXTENSION_LIST azure delta duckdb fts httpfs iceberg json llm postgres sqlite unity_catalog vector neo4j algo)
|
|
2
2
|
|
|
3
3
|
#set(EXTENSION_STATIC_LINK_LIST fts)
|
|
4
|
-
string(JOIN ", " joined_extensions ${EXTENSION_STATIC_LINK_LIST})
|
|
5
|
-
message(STATUS "Static link extensions: ${joined_extensions}")
|
|
6
4
|
foreach(extension IN LISTS EXTENSION_STATIC_LINK_LIST)
|
|
7
5
|
add_static_link_extension(${extension})
|
|
8
6
|
endforeach()
|
|
@@ -30,3 +28,6 @@ if(${BUILD_SWIFT})
|
|
|
30
28
|
add_static_link_extension(vector)
|
|
31
29
|
add_static_link_extension(algo)
|
|
32
30
|
endif()
|
|
31
|
+
|
|
32
|
+
string(JOIN ", " joined_extensions ${STATICALLY_LINKED_EXTENSIONS})
|
|
33
|
+
message(STATUS "Static link extensions: ${joined_extensions}")
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
d606604ea3991978c8b514d4ac36b8f6
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
#include "function/rewrite_function.h"
|
|
16
16
|
#include "function/schema/vector_node_rel_functions.h"
|
|
17
17
|
#include "main/client_context.h"
|
|
18
|
+
#include "main/database_manager.h"
|
|
18
19
|
#include "transaction/transaction.h"
|
|
19
20
|
|
|
20
21
|
using namespace lbug::common;
|
|
@@ -644,7 +645,8 @@ std::vector<TableCatalogEntry*> Binder::bindNodeTableEntries(
|
|
|
644
645
|
} else {
|
|
645
646
|
for (auto& name : tableNames) {
|
|
646
647
|
auto entry = bindNodeTableEntry(name);
|
|
647
|
-
if (entry->getType() != CatalogEntryType::NODE_TABLE_ENTRY
|
|
648
|
+
if (entry->getType() != CatalogEntryType::NODE_TABLE_ENTRY &&
|
|
649
|
+
entry->getType() != CatalogEntryType::FOREIGN_TABLE_ENTRY) {
|
|
648
650
|
throw BinderException(
|
|
649
651
|
stringFormat("Cannot bind {} as a node pattern label.", entry->getName()));
|
|
650
652
|
}
|
|
@@ -658,10 +660,35 @@ TableCatalogEntry* Binder::bindNodeTableEntry(const std::string& name) const {
|
|
|
658
660
|
auto transaction = transaction::Transaction::Get(*clientContext);
|
|
659
661
|
auto catalog = Catalog::Get(*clientContext);
|
|
660
662
|
auto useInternal = clientContext->useInternalCatalogEntry();
|
|
661
|
-
|
|
663
|
+
|
|
664
|
+
std::string dbName;
|
|
665
|
+
std::string tableName = name;
|
|
666
|
+
auto dotPos = name.find('.');
|
|
667
|
+
if (dotPos != std::string::npos) {
|
|
668
|
+
dbName = name.substr(0, dotPos);
|
|
669
|
+
tableName = name.substr(dotPos + 1);
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
if (!dbName.empty()) {
|
|
673
|
+
// Qualified name: db.table
|
|
674
|
+
auto attachedDB = main::DatabaseManager::Get(*clientContext)->getAttachedDatabase(dbName);
|
|
675
|
+
if (!attachedDB) {
|
|
676
|
+
throw BinderException(stringFormat("Attached database {} does not exist.", dbName));
|
|
677
|
+
}
|
|
678
|
+
auto attachedCatalog = attachedDB->getCatalog();
|
|
679
|
+
if (!attachedCatalog->containsTable(transaction, tableName, useInternal)) {
|
|
680
|
+
throw BinderException(stringFormat("Table {} does not exist in attached database {}.",
|
|
681
|
+
tableName, dbName));
|
|
682
|
+
}
|
|
683
|
+
return attachedCatalog->getTableCatalogEntry(transaction, tableName, useInternal);
|
|
684
|
+
} else {
|
|
685
|
+
// Unqualified name: only search main catalog
|
|
686
|
+
// Foreign tables require qualified names (db.table) to avoid ambiguity
|
|
687
|
+
if (catalog->containsTable(transaction, name, useInternal)) {
|
|
688
|
+
return catalog->getTableCatalogEntry(transaction, name, useInternal);
|
|
689
|
+
}
|
|
662
690
|
throw BinderException(stringFormat("Table {} does not exist.", name));
|
|
663
691
|
}
|
|
664
|
-
return catalog->getTableCatalogEntry(transaction, name, useInternal);
|
|
665
692
|
}
|
|
666
693
|
|
|
667
694
|
std::vector<TableCatalogEntry*> Binder::bindRelGroupEntries(
|
|
@@ -246,6 +246,7 @@ void CatalogSet::serialize(Serializer serializer) const {
|
|
|
246
246
|
case CatalogEntryType::COPY_FUNCTION_ENTRY:
|
|
247
247
|
case CatalogEntryType::TABLE_FUNCTION_ENTRY:
|
|
248
248
|
case CatalogEntryType::STANDALONE_TABLE_FUNCTION_ENTRY:
|
|
249
|
+
case CatalogEntryType::FOREIGN_TABLE_ENTRY:
|
|
249
250
|
continue;
|
|
250
251
|
default: {
|
|
251
252
|
auto committedEntry = getCommittedEntryNoLock(entry.get());
|
|
@@ -100,8 +100,17 @@ std::unique_ptr<PhysicalOperator> TableFunction::getPhysicalPlan(PlanMapper* pla
|
|
|
100
100
|
auto initInput =
|
|
101
101
|
TableFuncInitSharedStateInput(info.bindData.get(), planMapper->executionContext);
|
|
102
102
|
auto sharedState = info.function.initSharedStateFunc(initInput);
|
|
103
|
-
|
|
104
|
-
|
|
103
|
+
// Filter columns for print info based on column skips
|
|
104
|
+
binder::expression_vector printExprs;
|
|
105
|
+
auto columnSkips = call.getBindData()->getColumnSkips();
|
|
106
|
+
for (auto i = 0u; i < call.getBindData()->columns.size(); ++i) {
|
|
107
|
+
if (columnSkips.empty() || !columnSkips[i]) {
|
|
108
|
+
printExprs.push_back(call.getBindData()->columns[i]);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
auto desc = call.getBindData()->getDescription();
|
|
112
|
+
auto printInfo = std::make_unique<TableFunctionCallPrintInfo>(
|
|
113
|
+
desc.empty() ? call.getTableFunc().name : desc, printExprs);
|
|
105
114
|
return std::make_unique<TableFunctionCall>(std::move(info), sharedState,
|
|
106
115
|
planMapper->getOperatorID(), std::move(printInfo));
|
|
107
116
|
}
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
#include <vector>
|
|
4
4
|
|
|
5
|
+
#include "binder/bound_scan_source.h"
|
|
5
6
|
#include "binder/ddl/bound_alter_info.h"
|
|
6
7
|
#include "binder/ddl/bound_create_table_info.h"
|
|
7
8
|
#include "catalog/catalog_entry/catalog_entry.h"
|
|
@@ -43,6 +44,12 @@ public:
|
|
|
43
44
|
|
|
44
45
|
virtual function::TableFunction getScanFunction() { KU_UNREACHABLE; }
|
|
45
46
|
|
|
47
|
+
virtual std::unique_ptr<binder::BoundTableScanInfo> getBoundScanInfo(
|
|
48
|
+
[[maybe_unused]] main::ClientContext* context,
|
|
49
|
+
[[maybe_unused]] const std::string& nodeUniqueName = "") {
|
|
50
|
+
return nullptr;
|
|
51
|
+
}
|
|
52
|
+
|
|
46
53
|
common::column_id_t getMaxColumnID() const;
|
|
47
54
|
void vacuumColumnIDs(common::column_id_t nextColumnID);
|
|
48
55
|
std::vector<binder::PropertyDefinition> getProperties() const {
|
|
@@ -14,8 +14,8 @@ namespace common {
|
|
|
14
14
|
#if USE_STD_FORMAT
|
|
15
15
|
|
|
16
16
|
template<typename... Args>
|
|
17
|
-
inline std::string stringFormat(std::
|
|
18
|
-
return std::
|
|
17
|
+
inline std::string stringFormat(std::string_view format, Args&&... args) {
|
|
18
|
+
return std::vformat(format, std::make_format_args(args...));
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
#else
|
|
@@ -25,7 +25,8 @@ struct LBUG_API TableFuncBindData {
|
|
|
25
25
|
TableFuncBindData(const TableFuncBindData& other)
|
|
26
26
|
: columns{other.columns}, numRows{other.numRows},
|
|
27
27
|
optionalParams{other.optionalParams == nullptr ? nullptr : other.optionalParams->copy()},
|
|
28
|
-
columnSkips{other.columnSkips}, columnPredicates{copyVector(other.columnPredicates)}
|
|
28
|
+
columnSkips{other.columnSkips}, columnPredicates{copyVector(other.columnPredicates)},
|
|
29
|
+
limitNum{other.limitNum}, orderBy{other.orderBy} {}
|
|
29
30
|
TableFuncBindData& operator=(const TableFuncBindData& other) = delete;
|
|
30
31
|
virtual ~TableFuncBindData() = default;
|
|
31
32
|
|
|
@@ -46,10 +47,18 @@ struct LBUG_API TableFuncBindData {
|
|
|
46
47
|
return columnPredicates;
|
|
47
48
|
}
|
|
48
49
|
|
|
50
|
+
void setLimitNum(common::row_idx_t limit) { limitNum = limit; }
|
|
51
|
+
common::row_idx_t getLimitNum() const { return limitNum; }
|
|
52
|
+
|
|
53
|
+
void setOrderBy(std::string orderBy) { this->orderBy = orderBy; }
|
|
54
|
+
std::string getOrderBy() const { return orderBy; }
|
|
55
|
+
|
|
49
56
|
virtual bool getIgnoreErrorsOption() const;
|
|
50
57
|
|
|
51
58
|
virtual std::unique_ptr<TableFuncBindData> copy() const;
|
|
52
59
|
|
|
60
|
+
virtual std::string getDescription() const { return ""; }
|
|
61
|
+
|
|
53
62
|
template<class TARGET>
|
|
54
63
|
const TARGET* constPtrCast() const {
|
|
55
64
|
return common::ku_dynamic_cast<const TARGET*>(this);
|
|
@@ -63,6 +72,8 @@ struct LBUG_API TableFuncBindData {
|
|
|
63
72
|
protected:
|
|
64
73
|
std::vector<bool> columnSkips;
|
|
65
74
|
std::vector<storage::ColumnPredicateSet> columnPredicates;
|
|
75
|
+
common::row_idx_t limitNum = common::INVALID_ROW_IDX;
|
|
76
|
+
std::string orderBy;
|
|
66
77
|
};
|
|
67
78
|
|
|
68
79
|
} // namespace function
|
|
@@ -133,6 +133,7 @@ using table_func_init_local_t =
|
|
|
133
133
|
using table_func_init_output_t =
|
|
134
134
|
std::function<std::unique_ptr<TableFuncOutput>(const TableFuncInitOutputInput&)>;
|
|
135
135
|
using table_func_can_parallel_t = std::function<bool()>;
|
|
136
|
+
using table_func_supports_push_down_t = std::function<bool()>;
|
|
136
137
|
using table_func_progress_t = std::function<double(TableFuncSharedState* sharedState)>;
|
|
137
138
|
using table_func_finalize_t =
|
|
138
139
|
std::function<void(const processor::ExecutionContext*, TableFuncSharedState*)>;
|
|
@@ -153,6 +154,7 @@ struct LBUG_API TableFunction final : Function {
|
|
|
153
154
|
table_func_init_local_t initLocalStateFunc = nullptr;
|
|
154
155
|
table_func_init_output_t initOutputFunc = nullptr;
|
|
155
156
|
table_func_can_parallel_t canParallelFunc = [] { return true; };
|
|
157
|
+
table_func_supports_push_down_t supportsPushDownFunc = [] { return false; };
|
|
156
158
|
table_func_progress_t progressFunc = [](TableFuncSharedState*) { return 0.0; };
|
|
157
159
|
table_func_finalize_t finalizeFunc = [](auto, auto) {};
|
|
158
160
|
table_func_rewrite_t rewriteFunc = nullptr;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "planner/operator/logical_plan.h"
|
|
4
|
+
|
|
5
|
+
namespace lbug {
|
|
6
|
+
namespace optimizer {
|
|
7
|
+
|
|
8
|
+
class OrderByPushDownOptimizer {
|
|
9
|
+
public:
|
|
10
|
+
void rewrite(planner::LogicalPlan* plan);
|
|
11
|
+
|
|
12
|
+
private:
|
|
13
|
+
std::shared_ptr<planner::LogicalOperator> visitOperator(
|
|
14
|
+
std::shared_ptr<planner::LogicalOperator> op, std::string currentOrderBy = "");
|
|
15
|
+
|
|
16
|
+
static std::string buildOrderByString(const binder::expression_vector& expressions,
|
|
17
|
+
const std::vector<bool>& isAscOrders);
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
} // namespace optimizer
|
|
21
|
+
} // namespace lbug
|
|
@@ -27,11 +27,18 @@ public:
|
|
|
27
27
|
void setColumnPredicates(std::vector<storage::ColumnPredicateSet> predicates) {
|
|
28
28
|
bindData->setColumnPredicates(std::move(predicates));
|
|
29
29
|
}
|
|
30
|
+
void setLimitNum(common::row_idx_t limit) { bindData->setLimitNum(limit); }
|
|
31
|
+
void setOrderBy(std::string orderBy) { bindData->setOrderBy(orderBy); }
|
|
30
32
|
|
|
31
33
|
void computeFlatSchema() override;
|
|
32
34
|
void computeFactorizedSchema() override;
|
|
33
35
|
|
|
34
|
-
std::string getExpressionsForPrinting() const override {
|
|
36
|
+
std::string getExpressionsForPrinting() const override {
|
|
37
|
+
auto desc = bindData->getDescription();
|
|
38
|
+
return desc.empty() ? tableFunc.name : desc;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
std::unique_ptr<OPPrintInfo> getPrintInfo() const override;
|
|
35
42
|
|
|
36
43
|
std::unique_ptr<LogicalOperator> copy() override {
|
|
37
44
|
return std::make_unique<LogicalTableFunctionCall>(tableFunc, bindData->copy());
|
|
@@ -42,5 +49,11 @@ private:
|
|
|
42
49
|
std::unique_ptr<function::TableFuncBindData> bindData;
|
|
43
50
|
};
|
|
44
51
|
|
|
52
|
+
struct LogicalTableFunctionCallPrintInfo final : OPPrintInfo {
|
|
53
|
+
std::string desc;
|
|
54
|
+
explicit LogicalTableFunctionCallPrintInfo(std::string desc) : desc{std::move(desc)} {}
|
|
55
|
+
std::string toString() const override { return desc; }
|
|
56
|
+
};
|
|
57
|
+
|
|
45
58
|
} // namespace planner
|
|
46
59
|
} // namespace lbug
|
|
@@ -15,7 +15,8 @@ add_library(lbug_optimizer
|
|
|
15
15
|
remove_factorization_rewriter.cpp
|
|
16
16
|
remove_unnecessary_join_optimizer.cpp
|
|
17
17
|
top_k_optimizer.cpp
|
|
18
|
-
limit_push_down_optimizer.cpp
|
|
18
|
+
limit_push_down_optimizer.cpp
|
|
19
|
+
order_by_push_down_optimizer.cpp)
|
|
19
20
|
|
|
20
21
|
set(ALL_OBJECT_FILES
|
|
21
22
|
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:lbug_optimizer>
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
#include "planner/operator/logical_distinct.h"
|
|
7
7
|
#include "planner/operator/logical_hash_join.h"
|
|
8
8
|
#include "planner/operator/logical_limit.h"
|
|
9
|
+
#include "planner/operator/logical_table_function_call.h"
|
|
9
10
|
|
|
10
11
|
using namespace lbug::binder;
|
|
11
12
|
using namespace lbug::common;
|
|
@@ -34,10 +35,21 @@ void LimitPushDownOptimizer::visitOperator(planner::LogicalOperator* op) {
|
|
|
34
35
|
case LogicalOperatorType::MULTIPLICITY_REDUCER:
|
|
35
36
|
case LogicalOperatorType::EXPLAIN:
|
|
36
37
|
case LogicalOperatorType::ACCUMULATE:
|
|
38
|
+
case LogicalOperatorType::FILTER:
|
|
37
39
|
case LogicalOperatorType::PROJECTION: {
|
|
38
40
|
visitOperator(op->getChild(0).get());
|
|
39
41
|
return;
|
|
40
42
|
}
|
|
43
|
+
case LogicalOperatorType::TABLE_FUNCTION_CALL: {
|
|
44
|
+
if (limitNumber == INVALID_LIMIT && skipNumber == 0) {
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
auto& tableFuncCall = op->cast<LogicalTableFunctionCall>();
|
|
48
|
+
if (tableFuncCall.getTableFunc().supportsPushDownFunc()) {
|
|
49
|
+
tableFuncCall.setLimitNum(skipNumber + limitNumber);
|
|
50
|
+
}
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
41
53
|
case LogicalOperatorType::DISTINCT: {
|
|
42
54
|
if (limitNumber == INVALID_LIMIT && skipNumber == 0) {
|
|
43
55
|
return;
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
#include "optimizer/factorization_rewriter.h"
|
|
10
10
|
#include "optimizer/filter_push_down_optimizer.h"
|
|
11
11
|
#include "optimizer/limit_push_down_optimizer.h"
|
|
12
|
+
#include "optimizer/order_by_push_down_optimizer.h"
|
|
12
13
|
#include "optimizer/projection_push_down_optimizer.h"
|
|
13
14
|
#include "optimizer/remove_factorization_rewriter.h"
|
|
14
15
|
#include "optimizer/remove_unnecessary_join_optimizer.h"
|
|
@@ -45,6 +46,9 @@ void Optimizer::optimize(planner::LogicalPlan* plan, main::ClientContext* contex
|
|
|
45
46
|
ProjectionPushDownOptimizer(context->getClientConfig()->recursivePatternSemantic);
|
|
46
47
|
projectionPushDownOptimizer.rewrite(plan);
|
|
47
48
|
|
|
49
|
+
auto orderByPushDownOptimizer = OrderByPushDownOptimizer();
|
|
50
|
+
orderByPushDownOptimizer.rewrite(plan);
|
|
51
|
+
|
|
48
52
|
auto limitPushDownOptimizer = LimitPushDownOptimizer();
|
|
49
53
|
limitPushDownOptimizer.rewrite(plan);
|
|
50
54
|
|