lbug 0.12.3-dev.13 → 0.12.3-dev.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lbug-source/.github/workflows/ci-workflow.yml +9 -2
- package/lbug-source/CMakeLists.txt +15 -6
- package/lbug-source/Makefile +1 -2
- package/lbug-source/benchmark/serializer.py +13 -2
- package/lbug-source/extension/httpfs/test/test_files/http.test +1 -0
- package/lbug-source/scripts/generate_binary_demo.sh +1 -1
- package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
- package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
- package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
- package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
- package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
- package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
- package/lbug-source/src/include/processor/plan_mapper.h +2 -0
- package/lbug-source/src/optimizer/CMakeLists.txt +1 -0
- package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
- package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
- package/lbug-source/src/optimizer/optimizer.cpp +6 -0
- package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
- package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
- package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
- package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
- package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
- package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
- package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
- package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
- package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
- package/lbug-source/test/common/string_format.cpp +9 -1
- package/lbug-source/test/copy/copy_test.cpp +4 -4
- package/lbug-source/test/graph_test/CMakeLists.txt +1 -1
- package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
- package/lbug-source/test/test_helper/CMakeLists.txt +1 -1
- package/lbug-source/test/test_runner/CMakeLists.txt +1 -1
- package/lbug-source/test/test_runner/insert_by_row.cpp +6 -8
- package/lbug-source/test/test_runner/multi_copy_split.cpp +2 -4
- package/lbug-source/test/transaction/checkpoint_test.cpp +1 -1
- package/lbug-source/test/transaction/transaction_test.cpp +19 -15
- package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
- package/lbug-source/tools/shell/embedded_shell.cpp +11 -0
- package/lbug-source/tools/shell/linenoise.cpp +3 -3
- package/lbug-source/tools/shell/test/test_helper.py +1 -1
- package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
- package/package.json +1 -1
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
|
@@ -1152,6 +1152,9 @@ jobs:
|
|
|
1152
1152
|
name: minimal test
|
|
1153
1153
|
runs-on: ubuntu-latest
|
|
1154
1154
|
needs: [ sanity-checks ]
|
|
1155
|
+
env:
|
|
1156
|
+
GEN: Ninja
|
|
1157
|
+
USE_STD_FORMAT: 1
|
|
1155
1158
|
steps:
|
|
1156
1159
|
- uses: actions/checkout@v4
|
|
1157
1160
|
|
|
@@ -1159,12 +1162,14 @@ jobs:
|
|
|
1159
1162
|
uses: hendrikmuhs/ccache-action@v1.2
|
|
1160
1163
|
with:
|
|
1161
1164
|
key: minimal-test-${{ runner.os }}
|
|
1165
|
+
max-size: 2G
|
|
1166
|
+
restore-keys: minimal-test-
|
|
1162
1167
|
|
|
1163
1168
|
- name: Build
|
|
1164
|
-
run: make
|
|
1169
|
+
run: make relwithdebinfo
|
|
1165
1170
|
|
|
1166
1171
|
- name: Generate datasets
|
|
1167
|
-
run: bash scripts/generate_binary_demo.sh
|
|
1172
|
+
run: bash scripts/generate_binary_demo.sh --lbug-shell-mode relwithdebinfo
|
|
1168
1173
|
|
|
1169
1174
|
- name: Install uv
|
|
1170
1175
|
run: pip3 install uv
|
|
@@ -1210,6 +1215,8 @@ jobs:
|
|
|
1210
1215
|
uses: hendrikmuhs/ccache-action@v1.2
|
|
1211
1216
|
with:
|
|
1212
1217
|
key: minimal-extension-test-${{ runner.os }}
|
|
1218
|
+
max-size: 2G
|
|
1219
|
+
restore-keys: minimal-extension-test-
|
|
1213
1220
|
|
|
1214
1221
|
- name: Update PostgreSQL host
|
|
1215
1222
|
working-directory: extension/postgres/test/test_files
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
cmake_minimum_required(VERSION 3.15)
|
|
2
2
|
|
|
3
|
-
project(Lbug VERSION 0.12.3.
|
|
3
|
+
project(Lbug VERSION 0.12.3.15 LANGUAGES CXX C)
|
|
4
4
|
|
|
5
5
|
option(SINGLE_THREADED "Single-threaded mode" FALSE)
|
|
6
6
|
if(SINGLE_THREADED)
|
|
@@ -14,15 +14,26 @@ endif()
|
|
|
14
14
|
|
|
15
15
|
set(CMAKE_CXX_STANDARD 20)
|
|
16
16
|
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
|
17
|
-
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
|
|
18
|
-
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
|
19
17
|
set(CMAKE_EXPORT_COMPILE_COMMANDS TRUE)
|
|
20
18
|
set(CMAKE_FIND_PACKAGE_RESOLVE_SYMLINKS TRUE)
|
|
21
19
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
|
22
|
-
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
|
|
23
20
|
# On Linux, symbols in executables are not accessible by loaded shared libraries (e.g. via dlopen(3)). However, we need to export public symbols in executables so that extensions can access public symbols. This enables that behaviour.
|
|
24
21
|
set(CMAKE_ENABLE_EXPORTS TRUE)
|
|
25
22
|
|
|
23
|
+
# When building tests, we need all symbols visible so tests can link to the shared library
|
|
24
|
+
# instead of static linking (which bloats binary sizes significantly)
|
|
25
|
+
option(BUILD_TESTS "Build C++ tests." FALSE)
|
|
26
|
+
option(BUILD_EXTENSION_TESTS "Build C++ extension tests." FALSE)
|
|
27
|
+
if(BUILD_TESTS OR BUILD_EXTENSION_TESTS)
|
|
28
|
+
set(CMAKE_CXX_VISIBILITY_PRESET default)
|
|
29
|
+
set(CMAKE_C_VISIBILITY_PRESET default)
|
|
30
|
+
set(CMAKE_VISIBILITY_INLINES_HIDDEN OFF)
|
|
31
|
+
else()
|
|
32
|
+
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
|
|
33
|
+
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
|
34
|
+
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
|
|
35
|
+
endif()
|
|
36
|
+
|
|
26
37
|
option(ENABLE_WERROR "Treat all warnings as errors" FALSE)
|
|
27
38
|
if(ENABLE_WERROR)
|
|
28
39
|
if (CMAKE_VERSION VERSION_GREATER "3.24.0" OR CMAKE_VERSION VERSION_EQUAL "3.24.0")
|
|
@@ -302,8 +313,6 @@ option(BUILD_NODEJS "Build NodeJS API." FALSE)
|
|
|
302
313
|
option(BUILD_PYTHON "Build Python API." FALSE)
|
|
303
314
|
option(BUILD_SHELL "Build Interactive Shell" TRUE)
|
|
304
315
|
option(BUILD_SINGLE_FILE_HEADER "Build single file header. Requires Python >= 3.9." TRUE)
|
|
305
|
-
option(BUILD_TESTS "Build C++ tests." FALSE)
|
|
306
|
-
option(BUILD_EXTENSION_TESTS "Build C++ extension tests." FALSE)
|
|
307
316
|
option(BUILD_LBUG "Build Lbug." TRUE)
|
|
308
317
|
option(ENABLE_BACKTRACES "Enable backtrace printing for exceptions and segfaults" FALSE)
|
|
309
318
|
option(USE_STD_FORMAT "Use std::format instead of a custom formatter." FALSE)
|
package/lbug-source/Makefile
CHANGED
|
@@ -294,9 +294,8 @@ extension-release:
|
|
|
294
294
|
-DBUILD_LBUG=FALSE \
|
|
295
295
|
)
|
|
296
296
|
|
|
297
|
-
# pytest expects a `Release` build path.
|
|
298
297
|
shell-test:
|
|
299
|
-
$(call run-cmake-
|
|
298
|
+
$(call run-cmake-relwithdebinfo, \
|
|
300
299
|
-DBUILD_SHELL=TRUE \
|
|
301
300
|
)
|
|
302
301
|
$(MAKE) -C tools/shell/test test
|
|
@@ -95,17 +95,28 @@ if __name__ == '__main__':
|
|
|
95
95
|
parser.add_argument("--single-thread",
|
|
96
96
|
help="If true, copy single threaded, which makes the results more reproducible",
|
|
97
97
|
action="store_true")
|
|
98
|
+
parser.add_argument("--lbug-shell-mode",
|
|
99
|
+
help="debug, release or relwithdebinfo",
|
|
100
|
+
default="release")
|
|
101
|
+
default_mode = "release"
|
|
98
102
|
if sys.platform == "win32":
|
|
99
103
|
default_lbug_exec_path = os.path.join(
|
|
100
|
-
base_dir, '..', 'build',
|
|
104
|
+
base_dir, '..', 'build', default_mode, 'tools', 'shell', 'lbug_shell')
|
|
101
105
|
else:
|
|
102
106
|
default_lbug_exec_path = os.path.join(
|
|
103
|
-
base_dir, '..', 'build',
|
|
107
|
+
base_dir, '..', 'build', default_mode, 'tools', 'shell', 'lbug')
|
|
104
108
|
parser.add_argument("--lbug-shell",
|
|
105
109
|
help="Path of the lbug shell executable. Defaults to the path as built in the default release build directory",
|
|
106
110
|
default=default_lbug_exec_path)
|
|
107
111
|
args = parser.parse_args()
|
|
108
112
|
|
|
113
|
+
if args.lbug_shell == default_lbug_exec_path:
|
|
114
|
+
mode = args.lbug_shell_mode
|
|
115
|
+
if sys.platform == "win32":
|
|
116
|
+
args.lbug_shell = os.path.join(base_dir, '..', 'build', mode, 'tools', 'shell', 'lbug_shell')
|
|
117
|
+
else:
|
|
118
|
+
args.lbug_shell = os.path.join(base_dir, '..', 'build', mode, 'tools', 'shell', 'lbug')
|
|
119
|
+
|
|
109
120
|
try:
|
|
110
121
|
serialize(args.lbug_shell, args.dataset_name, args.dataset_path, args.serialized_graph_path,
|
|
111
122
|
args.benchmark_copy_log_dir, args.single_thread)
|
|
@@ -2,4 +2,4 @@
|
|
|
2
2
|
|
|
3
3
|
CD=`dirname "$0"`
|
|
4
4
|
DATASET_DIR=$CD/../dataset
|
|
5
|
-
python3 $CD/../benchmark/serializer.py DemoDB $DATASET_DIR/demo-db/parquet $DATASET_DIR/binary-demo --single-thread
|
|
5
|
+
python3 $CD/../benchmark/serializer.py DemoDB $DATASET_DIR/demo-db/parquet $DATASET_DIR/binary-demo --single-thread $*
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "logical_operator_visitor.h"
|
|
4
|
+
#include "planner/operator/logical_plan.h"
|
|
5
|
+
|
|
6
|
+
namespace lbug {
|
|
7
|
+
namespace main {
|
|
8
|
+
class ClientContext;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
namespace optimizer {
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* This optimizer detects patterns where we're counting all rows from a single rel table
|
|
15
|
+
* without any filters, and replaces the scan + aggregate with a direct count from table metadata.
|
|
16
|
+
*
|
|
17
|
+
* Pattern detected:
|
|
18
|
+
* AGGREGATE (COUNT_STAR only, no keys) →
|
|
19
|
+
* PROJECTION (empty or pass-through) →
|
|
20
|
+
* EXTEND (single rel table) →
|
|
21
|
+
* SCAN_NODE_TABLE
|
|
22
|
+
*
|
|
23
|
+
* This pattern is replaced with:
|
|
24
|
+
* COUNT_REL_TABLE (new operator that directly reads the count from table metadata)
|
|
25
|
+
*/
|
|
26
|
+
class CountRelTableOptimizer : public LogicalOperatorVisitor {
|
|
27
|
+
public:
|
|
28
|
+
explicit CountRelTableOptimizer(main::ClientContext* context) : context{context} {}
|
|
29
|
+
|
|
30
|
+
void rewrite(planner::LogicalPlan* plan);
|
|
31
|
+
|
|
32
|
+
private:
|
|
33
|
+
std::shared_ptr<planner::LogicalOperator> visitOperator(
|
|
34
|
+
const std::shared_ptr<planner::LogicalOperator>& op);
|
|
35
|
+
|
|
36
|
+
std::shared_ptr<planner::LogicalOperator> visitAggregateReplace(
|
|
37
|
+
std::shared_ptr<planner::LogicalOperator> op) override;
|
|
38
|
+
|
|
39
|
+
// Check if the aggregate is a simple COUNT(*) with no keys
|
|
40
|
+
bool isSimpleCountStar(planner::LogicalOperator* op) const;
|
|
41
|
+
|
|
42
|
+
// Check if the plan below aggregate matches the pattern for optimization
|
|
43
|
+
bool canOptimize(planner::LogicalOperator* aggregate) const;
|
|
44
|
+
|
|
45
|
+
main::ClientContext* context;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
} // namespace optimizer
|
|
49
|
+
} // namespace lbug
|
|
@@ -39,6 +39,12 @@ protected:
|
|
|
39
39
|
return op;
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
+
virtual void visitCountRelTable(planner::LogicalOperator* /*op*/) {}
|
|
43
|
+
virtual std::shared_ptr<planner::LogicalOperator> visitCountRelTableReplace(
|
|
44
|
+
std::shared_ptr<planner::LogicalOperator> op) {
|
|
45
|
+
return op;
|
|
46
|
+
}
|
|
47
|
+
|
|
42
48
|
virtual void visitDelete(planner::LogicalOperator* /*op*/) {}
|
|
43
49
|
virtual std::shared_ptr<planner::LogicalOperator> visitDeleteReplace(
|
|
44
50
|
std::shared_ptr<planner::LogicalOperator> op) {
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "binder/expression/expression.h"
|
|
4
|
+
#include "binder/expression/node_expression.h"
|
|
5
|
+
#include "catalog/catalog_entry/rel_group_catalog_entry.h"
|
|
6
|
+
#include "common/enums/extend_direction.h"
|
|
7
|
+
#include "planner/operator/logical_operator.h"
|
|
8
|
+
|
|
9
|
+
namespace lbug {
|
|
10
|
+
namespace planner {
|
|
11
|
+
|
|
12
|
+
struct LogicalCountRelTablePrintInfo final : OPPrintInfo {
|
|
13
|
+
std::string relTableName;
|
|
14
|
+
std::shared_ptr<binder::Expression> countExpr;
|
|
15
|
+
|
|
16
|
+
LogicalCountRelTablePrintInfo(std::string relTableName,
|
|
17
|
+
std::shared_ptr<binder::Expression> countExpr)
|
|
18
|
+
: relTableName{std::move(relTableName)}, countExpr{std::move(countExpr)} {}
|
|
19
|
+
|
|
20
|
+
std::string toString() const override {
|
|
21
|
+
return "Table: " + relTableName + ", Count: " + countExpr->toString();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
std::unique_ptr<OPPrintInfo> copy() const override {
|
|
25
|
+
return std::make_unique<LogicalCountRelTablePrintInfo>(relTableName, countExpr);
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* LogicalCountRelTable is an optimized operator that counts the number of rows
|
|
31
|
+
* in a rel table by scanning through bound nodes and counting edges.
|
|
32
|
+
*
|
|
33
|
+
* This operator is created by CountRelTableOptimizer when it detects:
|
|
34
|
+
* COUNT(*) over a single rel table with no filters
|
|
35
|
+
*/
|
|
36
|
+
class LogicalCountRelTable final : public LogicalOperator {
|
|
37
|
+
static constexpr LogicalOperatorType type_ = LogicalOperatorType::COUNT_REL_TABLE;
|
|
38
|
+
|
|
39
|
+
public:
|
|
40
|
+
LogicalCountRelTable(catalog::RelGroupCatalogEntry* relGroupEntry,
|
|
41
|
+
std::vector<common::table_id_t> relTableIDs,
|
|
42
|
+
std::vector<common::table_id_t> boundNodeTableIDs,
|
|
43
|
+
std::shared_ptr<binder::NodeExpression> boundNode, common::ExtendDirection direction,
|
|
44
|
+
std::shared_ptr<binder::Expression> countExpr)
|
|
45
|
+
: LogicalOperator{type_}, relGroupEntry{relGroupEntry}, relTableIDs{std::move(relTableIDs)},
|
|
46
|
+
boundNodeTableIDs{std::move(boundNodeTableIDs)}, boundNode{std::move(boundNode)},
|
|
47
|
+
direction{direction}, countExpr{std::move(countExpr)} {
|
|
48
|
+
cardinality = 1; // Always returns exactly one row
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
void computeFactorizedSchema() override;
|
|
52
|
+
void computeFlatSchema() override;
|
|
53
|
+
|
|
54
|
+
std::string getExpressionsForPrinting() const override { return countExpr->toString(); }
|
|
55
|
+
|
|
56
|
+
catalog::RelGroupCatalogEntry* getRelGroupEntry() const { return relGroupEntry; }
|
|
57
|
+
const std::vector<common::table_id_t>& getRelTableIDs() const { return relTableIDs; }
|
|
58
|
+
const std::vector<common::table_id_t>& getBoundNodeTableIDs() const {
|
|
59
|
+
return boundNodeTableIDs;
|
|
60
|
+
}
|
|
61
|
+
std::shared_ptr<binder::NodeExpression> getBoundNode() const { return boundNode; }
|
|
62
|
+
common::ExtendDirection getDirection() const { return direction; }
|
|
63
|
+
std::shared_ptr<binder::Expression> getCountExpr() const { return countExpr; }
|
|
64
|
+
|
|
65
|
+
std::unique_ptr<OPPrintInfo> getPrintInfo() const override {
|
|
66
|
+
return std::make_unique<LogicalCountRelTablePrintInfo>(relGroupEntry->getName(), countExpr);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
std::unique_ptr<LogicalOperator> copy() override {
|
|
70
|
+
return std::make_unique<LogicalCountRelTable>(relGroupEntry, relTableIDs, boundNodeTableIDs,
|
|
71
|
+
boundNode, direction, countExpr);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
private:
|
|
75
|
+
catalog::RelGroupCatalogEntry* relGroupEntry;
|
|
76
|
+
std::vector<common::table_id_t> relTableIDs;
|
|
77
|
+
std::vector<common::table_id_t> boundNodeTableIDs;
|
|
78
|
+
std::shared_ptr<binder::NodeExpression> boundNode;
|
|
79
|
+
common::ExtendDirection direction;
|
|
80
|
+
std::shared_ptr<binder::Expression> countExpr;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
} // namespace planner
|
|
84
|
+
} // namespace lbug
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "common/enums/rel_direction.h"
|
|
4
|
+
#include "processor/operator/physical_operator.h"
|
|
5
|
+
#include "storage/table/node_table.h"
|
|
6
|
+
#include "storage/table/rel_table.h"
|
|
7
|
+
|
|
8
|
+
namespace lbug {
|
|
9
|
+
namespace processor {
|
|
10
|
+
|
|
11
|
+
struct CountRelTablePrintInfo final : OPPrintInfo {
|
|
12
|
+
std::string relTableName;
|
|
13
|
+
|
|
14
|
+
explicit CountRelTablePrintInfo(std::string relTableName)
|
|
15
|
+
: relTableName{std::move(relTableName)} {}
|
|
16
|
+
|
|
17
|
+
std::string toString() const override { return "Table: " + relTableName; }
|
|
18
|
+
|
|
19
|
+
std::unique_ptr<OPPrintInfo> copy() const override {
|
|
20
|
+
return std::make_unique<CountRelTablePrintInfo>(relTableName);
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* CountRelTable is a source operator that counts edges in a rel table
|
|
26
|
+
* by scanning through all bound nodes and counting their edges.
|
|
27
|
+
* It creates its own internal vectors for node scanning (not exposed in ResultSet).
|
|
28
|
+
*/
|
|
29
|
+
class CountRelTable final : public PhysicalOperator {
|
|
30
|
+
static constexpr PhysicalOperatorType type_ = PhysicalOperatorType::COUNT_REL_TABLE;
|
|
31
|
+
|
|
32
|
+
public:
|
|
33
|
+
CountRelTable(std::vector<storage::NodeTable*> nodeTables,
|
|
34
|
+
std::vector<storage::RelTable*> relTables, common::RelDataDirection direction,
|
|
35
|
+
DataPos countOutputPos, physical_op_id id, std::unique_ptr<OPPrintInfo> printInfo)
|
|
36
|
+
: PhysicalOperator{type_, id, std::move(printInfo)}, nodeTables{std::move(nodeTables)},
|
|
37
|
+
relTables{std::move(relTables)}, direction{direction}, countOutputPos{countOutputPos} {}
|
|
38
|
+
|
|
39
|
+
bool isSource() const override { return true; }
|
|
40
|
+
bool isParallel() const override { return false; }
|
|
41
|
+
|
|
42
|
+
void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override;
|
|
43
|
+
|
|
44
|
+
bool getNextTuplesInternal(ExecutionContext* context) override;
|
|
45
|
+
|
|
46
|
+
std::unique_ptr<PhysicalOperator> copy() override {
|
|
47
|
+
return std::make_unique<CountRelTable>(nodeTables, relTables, direction, countOutputPos, id,
|
|
48
|
+
printInfo->copy());
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
private:
|
|
52
|
+
std::vector<storage::NodeTable*> nodeTables;
|
|
53
|
+
std::vector<storage::RelTable*> relTables;
|
|
54
|
+
common::RelDataDirection direction;
|
|
55
|
+
DataPos countOutputPos;
|
|
56
|
+
common::ValueVector* countVector;
|
|
57
|
+
bool hasExecuted;
|
|
58
|
+
common::row_idx_t totalCount;
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
} // namespace processor
|
|
62
|
+
} // namespace lbug
|
|
@@ -90,6 +90,8 @@ public:
|
|
|
90
90
|
std::unique_ptr<PhysicalOperator> mapCopyRelFrom(
|
|
91
91
|
const planner::LogicalOperator* logicalOperator);
|
|
92
92
|
std::unique_ptr<PhysicalOperator> mapCopyTo(const planner::LogicalOperator* logicalOperator);
|
|
93
|
+
std::unique_ptr<PhysicalOperator> mapCountRelTable(
|
|
94
|
+
const planner::LogicalOperator* logicalOperator);
|
|
93
95
|
std::unique_ptr<PhysicalOperator> mapCreateMacro(
|
|
94
96
|
const planner::LogicalOperator* logicalOperator);
|
|
95
97
|
std::unique_ptr<PhysicalOperator> mapCreateSequence(
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
#include "optimizer/count_rel_table_optimizer.h"
|
|
2
|
+
|
|
3
|
+
#include "binder/expression/aggregate_function_expression.h"
|
|
4
|
+
#include "binder/expression/node_expression.h"
|
|
5
|
+
#include "catalog/catalog_entry/node_table_id_pair.h"
|
|
6
|
+
#include "function/aggregate/count_star.h"
|
|
7
|
+
#include "main/client_context.h"
|
|
8
|
+
#include "planner/operator/extend/logical_extend.h"
|
|
9
|
+
#include "planner/operator/logical_aggregate.h"
|
|
10
|
+
#include "planner/operator/logical_projection.h"
|
|
11
|
+
#include "planner/operator/scan/logical_count_rel_table.h"
|
|
12
|
+
#include "planner/operator/scan/logical_scan_node_table.h"
|
|
13
|
+
|
|
14
|
+
using namespace lbug::common;
|
|
15
|
+
using namespace lbug::planner;
|
|
16
|
+
using namespace lbug::binder;
|
|
17
|
+
using namespace lbug::catalog;
|
|
18
|
+
|
|
19
|
+
namespace lbug {
|
|
20
|
+
namespace optimizer {
|
|
21
|
+
|
|
22
|
+
void CountRelTableOptimizer::rewrite(LogicalPlan* plan) {
|
|
23
|
+
visitOperator(plan->getLastOperator());
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitOperator(
|
|
27
|
+
const std::shared_ptr<LogicalOperator>& op) {
|
|
28
|
+
// bottom-up traversal
|
|
29
|
+
for (auto i = 0u; i < op->getNumChildren(); ++i) {
|
|
30
|
+
op->setChild(i, visitOperator(op->getChild(i)));
|
|
31
|
+
}
|
|
32
|
+
auto result = visitOperatorReplaceSwitch(op);
|
|
33
|
+
result->computeFlatSchema();
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
bool CountRelTableOptimizer::isSimpleCountStar(LogicalOperator* op) const {
|
|
38
|
+
if (op->getOperatorType() != LogicalOperatorType::AGGREGATE) {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
auto& aggregate = op->constCast<LogicalAggregate>();
|
|
42
|
+
|
|
43
|
+
// Must have no keys (i.e., a simple aggregate without GROUP BY)
|
|
44
|
+
if (aggregate.hasKeys()) {
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Must have exactly one aggregate expression
|
|
49
|
+
auto aggregates = aggregate.getAggregates();
|
|
50
|
+
if (aggregates.size() != 1) {
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Must be COUNT_STAR
|
|
55
|
+
auto& aggExpr = aggregates[0];
|
|
56
|
+
if (aggExpr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
auto& aggFuncExpr = aggExpr->constCast<AggregateFunctionExpression>();
|
|
60
|
+
if (aggFuncExpr.getFunction().name != function::CountStarFunction::name) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// COUNT_STAR should not be DISTINCT (conceptually it doesn't make sense)
|
|
65
|
+
if (aggFuncExpr.isDistinct()) {
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return true;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
bool CountRelTableOptimizer::canOptimize(LogicalOperator* aggregate) const {
|
|
73
|
+
// Pattern we're looking for:
|
|
74
|
+
// AGGREGATE (COUNT_STAR, no keys)
|
|
75
|
+
// -> PROJECTION (empty expressions or pass-through)
|
|
76
|
+
// -> EXTEND (single rel table, no properties scanned)
|
|
77
|
+
// -> SCAN_NODE_TABLE (no properties scanned)
|
|
78
|
+
//
|
|
79
|
+
// Note: The projection between aggregate and extend might be empty or
|
|
80
|
+
// just projecting the count expression.
|
|
81
|
+
|
|
82
|
+
auto* current = aggregate->getChild(0).get();
|
|
83
|
+
|
|
84
|
+
// Skip any projections (they should be empty or just for count)
|
|
85
|
+
while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
|
|
86
|
+
auto& proj = current->constCast<LogicalProjection>();
|
|
87
|
+
// Empty projection is okay, it's just a passthrough
|
|
88
|
+
if (!proj.getExpressionsToProject().empty()) {
|
|
89
|
+
// If projection has expressions, they should all be aggregate expressions
|
|
90
|
+
// (which means they're just passing through the count)
|
|
91
|
+
for (auto& expr : proj.getExpressionsToProject()) {
|
|
92
|
+
if (expr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
current = current->getChild(0).get();
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Now we should have EXTEND
|
|
101
|
+
if (current->getOperatorType() != LogicalOperatorType::EXTEND) {
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
auto& extend = current->constCast<LogicalExtend>();
|
|
105
|
+
|
|
106
|
+
// Don't optimize for undirected edges (BOTH direction) - the query pattern
|
|
107
|
+
// (a)-[e]-(b) generates a plan that scans both directions, and optimizing
|
|
108
|
+
// this would require special handling to avoid double counting.
|
|
109
|
+
if (extend.getDirection() == ExtendDirection::BOTH) {
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// The rel should be a single table (not multi-labeled)
|
|
114
|
+
auto rel = extend.getRel();
|
|
115
|
+
if (rel->isMultiLabeled()) {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Check if we're scanning any properties (we can only optimize when no properties needed)
|
|
120
|
+
if (!extend.getProperties().empty()) {
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// The child of extend should be SCAN_NODE_TABLE
|
|
125
|
+
auto* extendChild = current->getChild(0).get();
|
|
126
|
+
if (extendChild->getOperatorType() != LogicalOperatorType::SCAN_NODE_TABLE) {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
auto& scanNode = extendChild->constCast<LogicalScanNodeTable>();
|
|
130
|
+
|
|
131
|
+
// Check if node scan has any properties (we can only optimize when no properties needed)
|
|
132
|
+
if (!scanNode.getProperties().empty()) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return true;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitAggregateReplace(
|
|
140
|
+
std::shared_ptr<LogicalOperator> op) {
|
|
141
|
+
if (!isSimpleCountStar(op.get())) {
|
|
142
|
+
return op;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (!canOptimize(op.get())) {
|
|
146
|
+
return op;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Find the EXTEND operator
|
|
150
|
+
auto* current = op->getChild(0).get();
|
|
151
|
+
while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
|
|
152
|
+
current = current->getChild(0).get();
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
KU_ASSERT(current->getOperatorType() == LogicalOperatorType::EXTEND);
|
|
156
|
+
auto& extend = current->constCast<LogicalExtend>();
|
|
157
|
+
auto rel = extend.getRel();
|
|
158
|
+
auto boundNode = extend.getBoundNode();
|
|
159
|
+
auto nbrNode = extend.getNbrNode();
|
|
160
|
+
|
|
161
|
+
// Get the rel group entry
|
|
162
|
+
KU_ASSERT(rel->getNumEntries() == 1);
|
|
163
|
+
auto* relGroupEntry = rel->getEntry(0)->ptrCast<RelGroupCatalogEntry>();
|
|
164
|
+
|
|
165
|
+
// Determine the source and destination node table IDs based on extend direction.
|
|
166
|
+
// If extendFromSource is true, then boundNode is the source and nbrNode is the destination.
|
|
167
|
+
// If extendFromSource is false, then boundNode is the destination and nbrNode is the source.
|
|
168
|
+
auto boundNodeTableIDs = boundNode->getTableIDsSet();
|
|
169
|
+
auto nbrNodeTableIDs = nbrNode->getTableIDsSet();
|
|
170
|
+
|
|
171
|
+
// Get only the rel table IDs that match the specific node table ID pairs in the query.
|
|
172
|
+
// A rel table connects a specific (srcTableID, dstTableID) pair.
|
|
173
|
+
std::vector<table_id_t> relTableIDs;
|
|
174
|
+
for (auto& info : relGroupEntry->getRelEntryInfos()) {
|
|
175
|
+
table_id_t srcTableID = info.nodePair.srcTableID;
|
|
176
|
+
table_id_t dstTableID = info.nodePair.dstTableID;
|
|
177
|
+
|
|
178
|
+
bool matches = false;
|
|
179
|
+
if (extend.extendFromSourceNode()) {
|
|
180
|
+
// boundNode is src, nbrNode is dst
|
|
181
|
+
matches =
|
|
182
|
+
boundNodeTableIDs.contains(srcTableID) && nbrNodeTableIDs.contains(dstTableID);
|
|
183
|
+
} else {
|
|
184
|
+
// boundNode is dst, nbrNode is src
|
|
185
|
+
matches =
|
|
186
|
+
boundNodeTableIDs.contains(dstTableID) && nbrNodeTableIDs.contains(srcTableID);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (matches) {
|
|
190
|
+
relTableIDs.push_back(info.oid);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// If no matching rel tables, don't optimize (shouldn't happen for valid queries)
|
|
195
|
+
if (relTableIDs.empty()) {
|
|
196
|
+
return op;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Get the count expression from the original aggregate
|
|
200
|
+
auto& aggregate = op->constCast<LogicalAggregate>();
|
|
201
|
+
auto countExpr = aggregate.getAggregates()[0];
|
|
202
|
+
|
|
203
|
+
// Get the bound node table IDs as a vector
|
|
204
|
+
std::vector<table_id_t> boundNodeTableIDsVec(boundNodeTableIDs.begin(),
|
|
205
|
+
boundNodeTableIDs.end());
|
|
206
|
+
|
|
207
|
+
// Create the new COUNT_REL_TABLE operator with all necessary information for scanning
|
|
208
|
+
auto countRelTable =
|
|
209
|
+
std::make_shared<LogicalCountRelTable>(relGroupEntry, std::move(relTableIDs),
|
|
210
|
+
std::move(boundNodeTableIDsVec), boundNode, extend.getDirection(), countExpr);
|
|
211
|
+
countRelTable->computeFlatSchema();
|
|
212
|
+
|
|
213
|
+
return countRelTable;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
} // namespace optimizer
|
|
217
|
+
} // namespace lbug
|
|
@@ -19,6 +19,9 @@ void LogicalOperatorVisitor::visitOperatorSwitch(LogicalOperator* op) {
|
|
|
19
19
|
case LogicalOperatorType::COPY_TO: {
|
|
20
20
|
visitCopyTo(op);
|
|
21
21
|
} break;
|
|
22
|
+
case LogicalOperatorType::COUNT_REL_TABLE: {
|
|
23
|
+
visitCountRelTable(op);
|
|
24
|
+
} break;
|
|
22
25
|
case LogicalOperatorType::DELETE: {
|
|
23
26
|
visitDelete(op);
|
|
24
27
|
} break;
|
|
@@ -108,6 +111,9 @@ std::shared_ptr<LogicalOperator> LogicalOperatorVisitor::visitOperatorReplaceSwi
|
|
|
108
111
|
case LogicalOperatorType::COPY_TO: {
|
|
109
112
|
return visitCopyToReplace(op);
|
|
110
113
|
}
|
|
114
|
+
case LogicalOperatorType::COUNT_REL_TABLE: {
|
|
115
|
+
return visitCountRelTableReplace(op);
|
|
116
|
+
}
|
|
111
117
|
case LogicalOperatorType::DELETE: {
|
|
112
118
|
return visitDeleteReplace(op);
|
|
113
119
|
}
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
#include "optimizer/agg_key_dependency_optimizer.h"
|
|
6
6
|
#include "optimizer/cardinality_updater.h"
|
|
7
7
|
#include "optimizer/correlated_subquery_unnest_solver.h"
|
|
8
|
+
#include "optimizer/count_rel_table_optimizer.h"
|
|
8
9
|
#include "optimizer/factorization_rewriter.h"
|
|
9
10
|
#include "optimizer/filter_push_down_optimizer.h"
|
|
10
11
|
#include "optimizer/limit_push_down_optimizer.h"
|
|
@@ -32,6 +33,11 @@ void Optimizer::optimize(planner::LogicalPlan* plan, main::ClientContext* contex
|
|
|
32
33
|
auto removeUnnecessaryJoinOptimizer = RemoveUnnecessaryJoinOptimizer();
|
|
33
34
|
removeUnnecessaryJoinOptimizer.rewrite(plan);
|
|
34
35
|
|
|
36
|
+
// CountRelTableOptimizer should be applied early before other optimizations
|
|
37
|
+
// that might change the plan structure.
|
|
38
|
+
auto countRelTableOptimizer = CountRelTableOptimizer(context);
|
|
39
|
+
countRelTableOptimizer.rewrite(plan);
|
|
40
|
+
|
|
35
41
|
auto filterPushDownOptimizer = FilterPushDownOptimizer(context);
|
|
36
42
|
filterPushDownOptimizer.rewrite(plan);
|
|
37
43
|
|
|
@@ -22,6 +22,8 @@ std::string LogicalOperatorUtils::logicalOperatorTypeToString(LogicalOperatorTyp
|
|
|
22
22
|
return "COPY_FROM";
|
|
23
23
|
case LogicalOperatorType::COPY_TO:
|
|
24
24
|
return "COPY_TO";
|
|
25
|
+
case LogicalOperatorType::COUNT_REL_TABLE:
|
|
26
|
+
return "COUNT_REL_TABLE";
|
|
25
27
|
case LogicalOperatorType::CREATE_MACRO:
|
|
26
28
|
return "CREATE_MACRO";
|
|
27
29
|
case LogicalOperatorType::CREATE_SEQUENCE:
|