lbug 0.12.3-dev.2 → 0.12.3-dev.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -6
- package/lbug-source/.github/workflows/ci-workflow.yml +9 -2
- package/lbug-source/CMakeLists.txt +15 -6
- package/lbug-source/Makefile +1 -2
- package/lbug-source/README.md +2 -6
- package/lbug-source/benchmark/serializer.py +24 -3
- package/lbug-source/dataset/demo-db/csv/copy.cypher +4 -4
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
- package/lbug-source/dataset/demo-db/parquet/copy.cypher +4 -4
- package/lbug-source/extension/httpfs/test/test_files/http.test +1 -0
- package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
- package/lbug-source/scripts/antlr4/hash.md5 +1 -1
- package/lbug-source/scripts/generate_binary_demo.sh +1 -1
- package/lbug-source/src/antlr4/Cypher.g4 +1 -1
- package/lbug-source/src/binder/bind/bind_ddl.cpp +23 -13
- package/lbug-source/src/catalog/catalog.cpp +5 -4
- package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
- package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +7 -0
- package/lbug-source/src/function/function_collection.cpp +2 -1
- package/lbug-source/src/function/table/CMakeLists.txt +1 -0
- package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
- package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +10 -6
- package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
- package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +4 -2
- package/lbug-source/src/include/common/constants.h +1 -0
- package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
- package/lbug-source/src/include/optimizer/count_rel_table_optimizer.h +49 -0
- package/lbug-source/src/include/optimizer/logical_operator_visitor.h +6 -0
- package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
- package/lbug-source/src/include/planner/operator/logical_operator.h +1 -0
- package/lbug-source/src/include/planner/operator/scan/logical_count_rel_table.h +84 -0
- package/lbug-source/src/include/processor/operator/physical_operator.h +1 -0
- package/lbug-source/src/include/processor/operator/scan/count_rel_table.h +62 -0
- package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
- package/lbug-source/src/include/processor/plan_mapper.h +2 -0
- package/lbug-source/src/include/storage/storage_manager.h +1 -0
- package/lbug-source/src/include/storage/storage_version_info.h +1 -7
- package/lbug-source/src/include/storage/table/node_table.h +6 -1
- package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
- package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
- package/lbug-source/src/include/storage/table/rel_table.h +2 -2
- package/lbug-source/src/include/transaction/transaction.h +2 -0
- package/lbug-source/src/main/query_result/materialized_query_result.cpp +2 -2
- package/lbug-source/src/optimizer/CMakeLists.txt +1 -0
- package/lbug-source/src/optimizer/count_rel_table_optimizer.cpp +217 -0
- package/lbug-source/src/optimizer/logical_operator_visitor.cpp +6 -0
- package/lbug-source/src/optimizer/optimizer.cpp +6 -0
- package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
- package/lbug-source/src/planner/operator/logical_operator.cpp +2 -0
- package/lbug-source/src/planner/operator/scan/CMakeLists.txt +1 -0
- package/lbug-source/src/planner/operator/scan/logical_count_rel_table.cpp +24 -0
- package/lbug-source/src/processor/map/CMakeLists.txt +1 -0
- package/lbug-source/src/processor/map/map_count_rel_table.cpp +55 -0
- package/lbug-source/src/processor/map/plan_mapper.cpp +3 -0
- package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
- package/lbug-source/src/processor/operator/physical_operator.cpp +2 -0
- package/lbug-source/src/processor/operator/scan/CMakeLists.txt +1 -0
- package/lbug-source/src/processor/operator/scan/count_rel_table.cpp +137 -0
- package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
- package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
- package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +12 -2
- package/lbug-source/src/storage/storage_manager.cpp +37 -6
- package/lbug-source/src/storage/table/CMakeLists.txt +2 -0
- package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
- package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
- package/lbug-source/test/api/api_test.cpp +18 -0
- package/lbug-source/test/common/string_format.cpp +9 -1
- package/lbug-source/test/copy/copy_test.cpp +4 -4
- package/lbug-source/test/graph_test/CMakeLists.txt +1 -1
- package/lbug-source/test/include/test_runner/test_group.h +11 -1
- package/lbug-source/test/optimizer/optimizer_test.cpp +46 -0
- package/lbug-source/test/runner/e2e_test.cpp +7 -1
- package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
- package/lbug-source/test/test_helper/CMakeLists.txt +1 -1
- package/lbug-source/test/test_helper/test_helper.cpp +33 -1
- package/lbug-source/test/test_runner/CMakeLists.txt +1 -1
- package/lbug-source/test/test_runner/insert_by_row.cpp +6 -8
- package/lbug-source/test/test_runner/multi_copy_split.cpp +2 -4
- package/lbug-source/test/test_runner/test_parser.cpp +3 -0
- package/lbug-source/test/transaction/checkpoint_test.cpp +1 -1
- package/lbug-source/test/transaction/transaction_test.cpp +19 -15
- package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2761 -2701
- package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -0
- package/lbug-source/tools/benchmark/count_rel_table.benchmark +5 -0
- package/lbug-source/tools/shell/embedded_shell.cpp +78 -3
- package/lbug-source/tools/shell/include/embedded_shell.h +2 -0
- package/lbug-source/tools/shell/linenoise.cpp +3 -3
- package/lbug-source/tools/shell/test/test_helper.py +1 -1
- package/lbug-source/tools/shell/test/test_shell_basics.py +12 -0
- package/lbug-source/tools/shell/test/test_shell_commands.py +19 -0
- package/package.json +1 -1
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "common/enums/rel_direction.h"
|
|
4
|
+
#include "processor/operator/physical_operator.h"
|
|
5
|
+
#include "storage/table/node_table.h"
|
|
6
|
+
#include "storage/table/rel_table.h"
|
|
7
|
+
|
|
8
|
+
namespace lbug {
|
|
9
|
+
namespace processor {
|
|
10
|
+
|
|
11
|
+
struct CountRelTablePrintInfo final : OPPrintInfo {
|
|
12
|
+
std::string relTableName;
|
|
13
|
+
|
|
14
|
+
explicit CountRelTablePrintInfo(std::string relTableName)
|
|
15
|
+
: relTableName{std::move(relTableName)} {}
|
|
16
|
+
|
|
17
|
+
std::string toString() const override { return "Table: " + relTableName; }
|
|
18
|
+
|
|
19
|
+
std::unique_ptr<OPPrintInfo> copy() const override {
|
|
20
|
+
return std::make_unique<CountRelTablePrintInfo>(relTableName);
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* CountRelTable is a source operator that counts edges in a rel table
|
|
26
|
+
* by scanning through all bound nodes and counting their edges.
|
|
27
|
+
* It creates its own internal vectors for node scanning (not exposed in ResultSet).
|
|
28
|
+
*/
|
|
29
|
+
class CountRelTable final : public PhysicalOperator {
|
|
30
|
+
static constexpr PhysicalOperatorType type_ = PhysicalOperatorType::COUNT_REL_TABLE;
|
|
31
|
+
|
|
32
|
+
public:
|
|
33
|
+
CountRelTable(std::vector<storage::NodeTable*> nodeTables,
|
|
34
|
+
std::vector<storage::RelTable*> relTables, common::RelDataDirection direction,
|
|
35
|
+
DataPos countOutputPos, physical_op_id id, std::unique_ptr<OPPrintInfo> printInfo)
|
|
36
|
+
: PhysicalOperator{type_, id, std::move(printInfo)}, nodeTables{std::move(nodeTables)},
|
|
37
|
+
relTables{std::move(relTables)}, direction{direction}, countOutputPos{countOutputPos} {}
|
|
38
|
+
|
|
39
|
+
bool isSource() const override { return true; }
|
|
40
|
+
bool isParallel() const override { return false; }
|
|
41
|
+
|
|
42
|
+
void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override;
|
|
43
|
+
|
|
44
|
+
bool getNextTuplesInternal(ExecutionContext* context) override;
|
|
45
|
+
|
|
46
|
+
std::unique_ptr<PhysicalOperator> copy() override {
|
|
47
|
+
return std::make_unique<CountRelTable>(nodeTables, relTables, direction, countOutputPos, id,
|
|
48
|
+
printInfo->copy());
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
private:
|
|
52
|
+
std::vector<storage::NodeTable*> nodeTables;
|
|
53
|
+
std::vector<storage::RelTable*> relTables;
|
|
54
|
+
common::RelDataDirection direction;
|
|
55
|
+
DataPos countOutputPos;
|
|
56
|
+
common::ValueVector* countVector;
|
|
57
|
+
bool hasExecuted;
|
|
58
|
+
common::row_idx_t totalCount;
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
} // namespace processor
|
|
62
|
+
} // namespace lbug
|
|
@@ -24,7 +24,7 @@ public:
|
|
|
24
24
|
void initialize(const transaction::Transaction* transaction, storage::NodeTable* table,
|
|
25
25
|
ScanNodeTableProgressSharedState& progressSharedState);
|
|
26
26
|
|
|
27
|
-
void nextMorsel(storage::
|
|
27
|
+
void nextMorsel(storage::TableScanState& scanState,
|
|
28
28
|
ScanNodeTableProgressSharedState& progressSharedState);
|
|
29
29
|
|
|
30
30
|
common::SemiMask* getSemiMask() const { return semiMask.get(); }
|
|
@@ -116,7 +116,7 @@ private:
|
|
|
116
116
|
|
|
117
117
|
private:
|
|
118
118
|
common::idx_t currentTableIdx;
|
|
119
|
-
std::unique_ptr<storage::
|
|
119
|
+
std::unique_ptr<storage::TableScanState> scanState;
|
|
120
120
|
std::vector<ScanNodeTableInfo> tableInfos;
|
|
121
121
|
std::vector<std::shared_ptr<ScanNodeTableSharedState>> sharedStates;
|
|
122
122
|
std::shared_ptr<ScanNodeTableProgressSharedState> progressSharedState;
|
|
@@ -90,6 +90,8 @@ public:
|
|
|
90
90
|
std::unique_ptr<PhysicalOperator> mapCopyRelFrom(
|
|
91
91
|
const planner::LogicalOperator* logicalOperator);
|
|
92
92
|
std::unique_ptr<PhysicalOperator> mapCopyTo(const planner::LogicalOperator* logicalOperator);
|
|
93
|
+
std::unique_ptr<PhysicalOperator> mapCountRelTable(
|
|
94
|
+
const planner::LogicalOperator* logicalOperator);
|
|
93
95
|
std::unique_ptr<PhysicalOperator> mapCreateMacro(
|
|
94
96
|
const planner::LogicalOperator* logicalOperator);
|
|
95
97
|
std::unique_ptr<PhysicalOperator> mapCreateSequence(
|
|
@@ -13,13 +13,7 @@ using storage_version_t = uint64_t;
|
|
|
13
13
|
|
|
14
14
|
struct StorageVersionInfo {
|
|
15
15
|
static std::unordered_map<std::string, storage_version_t> getStorageVersionInfo() {
|
|
16
|
-
return {{"0.
|
|
17
|
-
{"0.7.1.1", 35}, {"0.7.0", 34}, {"0.6.0.6", 33}, {"0.6.0.5", 32}, {"0.6.0.2", 31},
|
|
18
|
-
{"0.6.0.1", 31}, {"0.6.0", 28}, {"0.5.0", 28}, {"0.4.2", 27}, {"0.4.1", 27},
|
|
19
|
-
{"0.4.0", 27}, {"0.3.2", 26}, {"0.3.1", 26}, {"0.3.0", 26}, {"0.2.1", 25},
|
|
20
|
-
{"0.2.0", 25}, {"0.1.0", 24}, {"0.0.12.3", 24}, {"0.0.12.2", 24}, {"0.0.12.1", 24},
|
|
21
|
-
{"0.0.12", 23}, {"0.0.11", 23}, {"0.0.10", 23}, {"0.0.9", 23}, {"0.0.8", 17},
|
|
22
|
-
{"0.0.7", 15}, {"0.0.6", 9}, {"0.0.5", 8}, {"0.0.4", 7}, {"0.0.3", 1}};
|
|
16
|
+
return {{"0.12.0", 40}, {"0.12.2", 40}};
|
|
23
17
|
}
|
|
24
18
|
|
|
25
19
|
static LBUG_API storage_version_t getStorageVersion();
|
|
@@ -107,7 +107,7 @@ private:
|
|
|
107
107
|
|
|
108
108
|
class StorageManager;
|
|
109
109
|
|
|
110
|
-
class LBUG_API NodeTable
|
|
110
|
+
class LBUG_API NodeTable : public Table {
|
|
111
111
|
public:
|
|
112
112
|
NodeTable(const StorageManager* storageManager,
|
|
113
113
|
const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* mm);
|
|
@@ -119,6 +119,11 @@ public:
|
|
|
119
119
|
void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
|
|
120
120
|
common::table_id_t tableID, common::offset_t startOffset) const;
|
|
121
121
|
|
|
122
|
+
// Virtual method for operator-level scan coordination initialization
|
|
123
|
+
// Called once per scan operation (not per scan state)
|
|
124
|
+
virtual void initializeScanCoordination(
|
|
125
|
+
[[maybe_unused]] const transaction::Transaction* transaction) {}
|
|
126
|
+
|
|
122
127
|
bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
|
|
123
128
|
template<bool lock = true>
|
|
124
129
|
bool lookup(const transaction::Transaction* transaction, const TableScanState& scanState) const;
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <mutex>
|
|
4
|
+
#include <vector>
|
|
5
|
+
|
|
6
|
+
#include "catalog/catalog_entry/node_table_catalog_entry.h"
|
|
7
|
+
#include "common/exception/runtime.h"
|
|
8
|
+
#include "common/types/internal_id_util.h"
|
|
9
|
+
#include "common/types/value/value.h"
|
|
10
|
+
#include "processor/operator/persistent/reader/parquet/parquet_reader.h"
|
|
11
|
+
#include "storage/table/node_table.h"
|
|
12
|
+
|
|
13
|
+
namespace lbug {
|
|
14
|
+
namespace storage {
|
|
15
|
+
|
|
16
|
+
struct ParquetNodeTableScanState final : NodeTableScanState {
|
|
17
|
+
std::unique_ptr<processor::ParquetReader> parquetReader;
|
|
18
|
+
std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
|
|
19
|
+
bool initialized = false;
|
|
20
|
+
bool scanCompleted = false; // Track if this scan state has finished reading
|
|
21
|
+
bool dataRead = false;
|
|
22
|
+
std::vector<std::vector<std::unique_ptr<common::Value>>> allData;
|
|
23
|
+
size_t totalRows = 0;
|
|
24
|
+
size_t nextRowToDistribute = 0;
|
|
25
|
+
uint64_t lastQueryId = 0; // Track the last query ID to detect new queries
|
|
26
|
+
|
|
27
|
+
ParquetNodeTableScanState([[maybe_unused]] MemoryManager& mm, common::ValueVector* nodeIDVector,
|
|
28
|
+
std::vector<common::ValueVector*> outputVectors,
|
|
29
|
+
std::shared_ptr<common::DataChunkState> outChunkState)
|
|
30
|
+
: NodeTableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
|
|
31
|
+
parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
// Shared state to coordinate row group assignment across parallel scan states
|
|
36
|
+
struct ParquetNodeTableSharedState {
|
|
37
|
+
std::mutex mtx;
|
|
38
|
+
common::node_group_idx_t currentRowGroupIdx = 0;
|
|
39
|
+
common::node_group_idx_t numRowGroups = 0;
|
|
40
|
+
|
|
41
|
+
void reset(common::node_group_idx_t totalRowGroups) {
|
|
42
|
+
std::lock_guard<std::mutex> lock(mtx);
|
|
43
|
+
currentRowGroupIdx = 0;
|
|
44
|
+
numRowGroups = totalRowGroups;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
bool getNextRowGroup(common::node_group_idx_t& assignedRowGroupIdx) {
|
|
48
|
+
std::lock_guard<std::mutex> lock(mtx);
|
|
49
|
+
if (currentRowGroupIdx < numRowGroups) {
|
|
50
|
+
assignedRowGroupIdx = currentRowGroupIdx++;
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
class ParquetNodeTable final : public NodeTable {
|
|
58
|
+
public:
|
|
59
|
+
ParquetNodeTable(const StorageManager* storageManager,
|
|
60
|
+
const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager);
|
|
61
|
+
|
|
62
|
+
void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
|
|
63
|
+
bool resetCachedBoundNodeSelVec = true) const override;
|
|
64
|
+
|
|
65
|
+
// Override to reset shared state for row group coordination at the start of each scan operation
|
|
66
|
+
void initializeScanCoordination(const transaction::Transaction* transaction) override;
|
|
67
|
+
|
|
68
|
+
bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
|
|
69
|
+
|
|
70
|
+
// For parquet-backed tables, we don't support modifications
|
|
71
|
+
void insert([[maybe_unused]] transaction::Transaction* transaction,
|
|
72
|
+
[[maybe_unused]] TableInsertState& insertState) override {
|
|
73
|
+
throw common::RuntimeException("Cannot insert into parquet-backed node table");
|
|
74
|
+
}
|
|
75
|
+
void update([[maybe_unused]] transaction::Transaction* transaction,
|
|
76
|
+
[[maybe_unused]] TableUpdateState& updateState) override {
|
|
77
|
+
throw common::RuntimeException("Cannot update parquet-backed node table");
|
|
78
|
+
}
|
|
79
|
+
bool delete_([[maybe_unused]] transaction::Transaction* transaction,
|
|
80
|
+
[[maybe_unused]] TableDeleteState& deleteState) override {
|
|
81
|
+
throw common::RuntimeException("Cannot delete from parquet-backed node table");
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
|
|
86
|
+
|
|
87
|
+
const std::string& getParquetFilePath() const { return parquetFilePath; }
|
|
88
|
+
|
|
89
|
+
// Note: Cannot override getNumCommittedNodeGroups since it's not virtual in base class
|
|
90
|
+
// Will need a different approach
|
|
91
|
+
|
|
92
|
+
private:
|
|
93
|
+
std::string parquetFilePath;
|
|
94
|
+
const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry;
|
|
95
|
+
mutable std::unique_ptr<ParquetNodeTableSharedState> sharedState;
|
|
96
|
+
|
|
97
|
+
void initializeParquetReader(transaction::Transaction* transaction) const;
|
|
98
|
+
void initParquetScanForRowGroup(transaction::Transaction* transaction,
|
|
99
|
+
ParquetNodeTableScanState& scanState) const;
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
} // namespace storage
|
|
103
|
+
} // namespace lbug
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "catalog/catalog_entry/rel_group_catalog_entry.h"
|
|
4
|
+
#include "common/exception/runtime.h"
|
|
5
|
+
#include "common/types/internal_id_util.h"
|
|
6
|
+
#include "processor/operator/persistent/reader/parquet/parquet_reader.h"
|
|
7
|
+
#include "storage/table/rel_table.h"
|
|
8
|
+
#include "transaction/transaction.h"
|
|
9
|
+
|
|
10
|
+
namespace lbug {
|
|
11
|
+
namespace storage {
|
|
12
|
+
|
|
13
|
+
struct ParquetRelTableScanState final : RelTableScanState {
|
|
14
|
+
std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
|
|
15
|
+
// For CSR format: store matching rows for current bound node
|
|
16
|
+
size_t nextRowToProcess = 0;
|
|
17
|
+
|
|
18
|
+
// Row group range for morsel-driven parallelism
|
|
19
|
+
uint64_t startRowGroup = 0;
|
|
20
|
+
uint64_t endRowGroup = 0;
|
|
21
|
+
uint64_t currentRowGroup = 0;
|
|
22
|
+
|
|
23
|
+
// Per-scan-state readers for thread safety
|
|
24
|
+
std::unique_ptr<processor::ParquetReader> indicesReader;
|
|
25
|
+
std::unique_ptr<processor::ParquetReader> indptrReader;
|
|
26
|
+
|
|
27
|
+
ParquetRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector,
|
|
28
|
+
std::vector<common::ValueVector*> outputVectors,
|
|
29
|
+
std::shared_ptr<common::DataChunkState> outChunkState)
|
|
30
|
+
: RelTableScanState{mm, nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
|
|
31
|
+
parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
void setToTable(const transaction::Transaction* transaction, Table* table_,
|
|
35
|
+
std::vector<common::column_id_t> columnIDs_,
|
|
36
|
+
std::vector<ColumnPredicateSet> columnPredicateSets_,
|
|
37
|
+
common::RelDataDirection direction_) override;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
class ParquetRelTable final : public RelTable {
|
|
41
|
+
public:
|
|
42
|
+
ParquetRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID,
|
|
43
|
+
common::table_id_t toTableID, const StorageManager* storageManager,
|
|
44
|
+
MemoryManager* memoryManager);
|
|
45
|
+
|
|
46
|
+
void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
|
|
47
|
+
bool resetCachedBoundNodeSelVec = true) const override;
|
|
48
|
+
|
|
49
|
+
bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
|
|
50
|
+
|
|
51
|
+
// For parquet-backed tables, we don't support modifications
|
|
52
|
+
void insert([[maybe_unused]] transaction::Transaction* transaction,
|
|
53
|
+
[[maybe_unused]] TableInsertState& insertState) override {
|
|
54
|
+
throw common::RuntimeException("Cannot insert into parquet-backed rel table");
|
|
55
|
+
}
|
|
56
|
+
void update([[maybe_unused]] transaction::Transaction* transaction,
|
|
57
|
+
[[maybe_unused]] TableUpdateState& updateState) override {
|
|
58
|
+
throw common::RuntimeException("Cannot update parquet-backed rel table");
|
|
59
|
+
}
|
|
60
|
+
bool delete_([[maybe_unused]] transaction::Transaction* transaction,
|
|
61
|
+
[[maybe_unused]] TableDeleteState& deleteState) override {
|
|
62
|
+
throw common::RuntimeException("Cannot delete from parquet-backed rel table");
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
|
|
67
|
+
|
|
68
|
+
private:
|
|
69
|
+
catalog::RelGroupCatalogEntry* relGroupEntry; // Store reference to table schema
|
|
70
|
+
std::string indicesFilePath;
|
|
71
|
+
std::string indptrFilePath;
|
|
72
|
+
mutable std::unique_ptr<processor::ParquetReader> indicesReader;
|
|
73
|
+
mutable std::unique_ptr<processor::ParquetReader> indptrReader;
|
|
74
|
+
mutable std::mutex parquetReaderMutex;
|
|
75
|
+
mutable std::mutex indptrDataMutex;
|
|
76
|
+
mutable std::vector<common::offset_t> indptrData; // Cached indptr data for CSR format
|
|
77
|
+
|
|
78
|
+
void initializeParquetReaders(transaction::Transaction* transaction) const;
|
|
79
|
+
void initializeIndptrReader(transaction::Transaction* transaction) const;
|
|
80
|
+
void loadIndptrData(transaction::Transaction* transaction) const;
|
|
81
|
+
bool scanInternalByRowGroups(transaction::Transaction* transaction,
|
|
82
|
+
ParquetRelTableScanState& parquetRelScanState);
|
|
83
|
+
bool scanRowGroupForBoundNodes(transaction::Transaction* transaction,
|
|
84
|
+
ParquetRelTableScanState& parquetRelScanState,
|
|
85
|
+
const std::vector<uint64_t>& rowGroupsToProcess,
|
|
86
|
+
const std::unordered_set<common::offset_t>& boundNodeOffsets);
|
|
87
|
+
common::offset_t findSourceNodeForRow(common::offset_t globalRowIdx) const;
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
} // namespace storage
|
|
91
|
+
} // namespace lbug
|
|
@@ -48,7 +48,7 @@ struct RelTableScanState : TableScanState {
|
|
|
48
48
|
nodeGroupScanState = std::make_unique<CSRNodeGroupScanState>();
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
-
void setToTable(const transaction::Transaction* transaction, Table* table_,
|
|
51
|
+
virtual void setToTable(const transaction::Transaction* transaction, Table* table_,
|
|
52
52
|
std::vector<common::column_id_t> columnIDs_,
|
|
53
53
|
std::vector<ColumnPredicateSet> columnPredicateSets_,
|
|
54
54
|
common::RelDataDirection direction_) override;
|
|
@@ -138,7 +138,7 @@ struct LBUG_API RelTableDeleteState final : TableDeleteState {
|
|
|
138
138
|
relIDVector{relIDVector}, detachDeleteDirection{detachDeleteDirection} {}
|
|
139
139
|
};
|
|
140
140
|
|
|
141
|
-
class LBUG_API RelTable
|
|
141
|
+
class LBUG_API RelTable : public Table {
|
|
142
142
|
public:
|
|
143
143
|
using rel_multiplicity_constraint_throw_func_t =
|
|
144
144
|
std::function<void(const std::string&, common::offset_t, common::RelDataDirection)>;
|
|
@@ -130,6 +130,8 @@ public:
|
|
|
130
130
|
return getMinUncommittedNodeOffset(tableID) + localRowIdx;
|
|
131
131
|
}
|
|
132
132
|
|
|
133
|
+
main::ClientContext* getClientContext() const { return clientContext; }
|
|
134
|
+
|
|
133
135
|
void pushCreateDropCatalogEntry(catalog::CatalogSet& catalogSet,
|
|
134
136
|
catalog::CatalogEntry& catalogEntry, bool isInternal, bool skipLoggingToWAL = false);
|
|
135
137
|
void pushAlterCatalogEntry(catalog::CatalogSet& catalogSet, catalog::CatalogEntry& catalogEntry,
|
|
@@ -79,8 +79,8 @@ std::string MaterializedQueryResult::toString() const {
|
|
|
79
79
|
result += "\n";
|
|
80
80
|
auto tuple_ = FlatTuple(this->columnTypes);
|
|
81
81
|
auto iterator_ = FactorizedTableIterator(*table);
|
|
82
|
-
while (
|
|
83
|
-
|
|
82
|
+
while (iterator_.hasNext()) {
|
|
83
|
+
iterator_.getNext(tuple_);
|
|
84
84
|
result += tuple_.toString();
|
|
85
85
|
}
|
|
86
86
|
return result;
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
#include "optimizer/count_rel_table_optimizer.h"
|
|
2
|
+
|
|
3
|
+
#include "binder/expression/aggregate_function_expression.h"
|
|
4
|
+
#include "binder/expression/node_expression.h"
|
|
5
|
+
#include "catalog/catalog_entry/node_table_id_pair.h"
|
|
6
|
+
#include "function/aggregate/count_star.h"
|
|
7
|
+
#include "main/client_context.h"
|
|
8
|
+
#include "planner/operator/extend/logical_extend.h"
|
|
9
|
+
#include "planner/operator/logical_aggregate.h"
|
|
10
|
+
#include "planner/operator/logical_projection.h"
|
|
11
|
+
#include "planner/operator/scan/logical_count_rel_table.h"
|
|
12
|
+
#include "planner/operator/scan/logical_scan_node_table.h"
|
|
13
|
+
|
|
14
|
+
using namespace lbug::common;
|
|
15
|
+
using namespace lbug::planner;
|
|
16
|
+
using namespace lbug::binder;
|
|
17
|
+
using namespace lbug::catalog;
|
|
18
|
+
|
|
19
|
+
namespace lbug {
|
|
20
|
+
namespace optimizer {
|
|
21
|
+
|
|
22
|
+
void CountRelTableOptimizer::rewrite(LogicalPlan* plan) {
|
|
23
|
+
visitOperator(plan->getLastOperator());
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitOperator(
|
|
27
|
+
const std::shared_ptr<LogicalOperator>& op) {
|
|
28
|
+
// bottom-up traversal
|
|
29
|
+
for (auto i = 0u; i < op->getNumChildren(); ++i) {
|
|
30
|
+
op->setChild(i, visitOperator(op->getChild(i)));
|
|
31
|
+
}
|
|
32
|
+
auto result = visitOperatorReplaceSwitch(op);
|
|
33
|
+
result->computeFlatSchema();
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
bool CountRelTableOptimizer::isSimpleCountStar(LogicalOperator* op) const {
|
|
38
|
+
if (op->getOperatorType() != LogicalOperatorType::AGGREGATE) {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
auto& aggregate = op->constCast<LogicalAggregate>();
|
|
42
|
+
|
|
43
|
+
// Must have no keys (i.e., a simple aggregate without GROUP BY)
|
|
44
|
+
if (aggregate.hasKeys()) {
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Must have exactly one aggregate expression
|
|
49
|
+
auto aggregates = aggregate.getAggregates();
|
|
50
|
+
if (aggregates.size() != 1) {
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Must be COUNT_STAR
|
|
55
|
+
auto& aggExpr = aggregates[0];
|
|
56
|
+
if (aggExpr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
auto& aggFuncExpr = aggExpr->constCast<AggregateFunctionExpression>();
|
|
60
|
+
if (aggFuncExpr.getFunction().name != function::CountStarFunction::name) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// COUNT_STAR should not be DISTINCT (conceptually it doesn't make sense)
|
|
65
|
+
if (aggFuncExpr.isDistinct()) {
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return true;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
bool CountRelTableOptimizer::canOptimize(LogicalOperator* aggregate) const {
|
|
73
|
+
// Pattern we're looking for:
|
|
74
|
+
// AGGREGATE (COUNT_STAR, no keys)
|
|
75
|
+
// -> PROJECTION (empty expressions or pass-through)
|
|
76
|
+
// -> EXTEND (single rel table, no properties scanned)
|
|
77
|
+
// -> SCAN_NODE_TABLE (no properties scanned)
|
|
78
|
+
//
|
|
79
|
+
// Note: The projection between aggregate and extend might be empty or
|
|
80
|
+
// just projecting the count expression.
|
|
81
|
+
|
|
82
|
+
auto* current = aggregate->getChild(0).get();
|
|
83
|
+
|
|
84
|
+
// Skip any projections (they should be empty or just for count)
|
|
85
|
+
while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
|
|
86
|
+
auto& proj = current->constCast<LogicalProjection>();
|
|
87
|
+
// Empty projection is okay, it's just a passthrough
|
|
88
|
+
if (!proj.getExpressionsToProject().empty()) {
|
|
89
|
+
// If projection has expressions, they should all be aggregate expressions
|
|
90
|
+
// (which means they're just passing through the count)
|
|
91
|
+
for (auto& expr : proj.getExpressionsToProject()) {
|
|
92
|
+
if (expr->expressionType != ExpressionType::AGGREGATE_FUNCTION) {
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
current = current->getChild(0).get();
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Now we should have EXTEND
|
|
101
|
+
if (current->getOperatorType() != LogicalOperatorType::EXTEND) {
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
auto& extend = current->constCast<LogicalExtend>();
|
|
105
|
+
|
|
106
|
+
// Don't optimize for undirected edges (BOTH direction) - the query pattern
|
|
107
|
+
// (a)-[e]-(b) generates a plan that scans both directions, and optimizing
|
|
108
|
+
// this would require special handling to avoid double counting.
|
|
109
|
+
if (extend.getDirection() == ExtendDirection::BOTH) {
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// The rel should be a single table (not multi-labeled)
|
|
114
|
+
auto rel = extend.getRel();
|
|
115
|
+
if (rel->isMultiLabeled()) {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Check if we're scanning any properties (we can only optimize when no properties needed)
|
|
120
|
+
if (!extend.getProperties().empty()) {
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// The child of extend should be SCAN_NODE_TABLE
|
|
125
|
+
auto* extendChild = current->getChild(0).get();
|
|
126
|
+
if (extendChild->getOperatorType() != LogicalOperatorType::SCAN_NODE_TABLE) {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
auto& scanNode = extendChild->constCast<LogicalScanNodeTable>();
|
|
130
|
+
|
|
131
|
+
// Check if node scan has any properties (we can only optimize when no properties needed)
|
|
132
|
+
if (!scanNode.getProperties().empty()) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return true;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
std::shared_ptr<LogicalOperator> CountRelTableOptimizer::visitAggregateReplace(
|
|
140
|
+
std::shared_ptr<LogicalOperator> op) {
|
|
141
|
+
if (!isSimpleCountStar(op.get())) {
|
|
142
|
+
return op;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (!canOptimize(op.get())) {
|
|
146
|
+
return op;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Find the EXTEND operator
|
|
150
|
+
auto* current = op->getChild(0).get();
|
|
151
|
+
while (current->getOperatorType() == LogicalOperatorType::PROJECTION) {
|
|
152
|
+
current = current->getChild(0).get();
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
KU_ASSERT(current->getOperatorType() == LogicalOperatorType::EXTEND);
|
|
156
|
+
auto& extend = current->constCast<LogicalExtend>();
|
|
157
|
+
auto rel = extend.getRel();
|
|
158
|
+
auto boundNode = extend.getBoundNode();
|
|
159
|
+
auto nbrNode = extend.getNbrNode();
|
|
160
|
+
|
|
161
|
+
// Get the rel group entry
|
|
162
|
+
KU_ASSERT(rel->getNumEntries() == 1);
|
|
163
|
+
auto* relGroupEntry = rel->getEntry(0)->ptrCast<RelGroupCatalogEntry>();
|
|
164
|
+
|
|
165
|
+
// Determine the source and destination node table IDs based on extend direction.
|
|
166
|
+
// If extendFromSource is true, then boundNode is the source and nbrNode is the destination.
|
|
167
|
+
// If extendFromSource is false, then boundNode is the destination and nbrNode is the source.
|
|
168
|
+
auto boundNodeTableIDs = boundNode->getTableIDsSet();
|
|
169
|
+
auto nbrNodeTableIDs = nbrNode->getTableIDsSet();
|
|
170
|
+
|
|
171
|
+
// Get only the rel table IDs that match the specific node table ID pairs in the query.
|
|
172
|
+
// A rel table connects a specific (srcTableID, dstTableID) pair.
|
|
173
|
+
std::vector<table_id_t> relTableIDs;
|
|
174
|
+
for (auto& info : relGroupEntry->getRelEntryInfos()) {
|
|
175
|
+
table_id_t srcTableID = info.nodePair.srcTableID;
|
|
176
|
+
table_id_t dstTableID = info.nodePair.dstTableID;
|
|
177
|
+
|
|
178
|
+
bool matches = false;
|
|
179
|
+
if (extend.extendFromSourceNode()) {
|
|
180
|
+
// boundNode is src, nbrNode is dst
|
|
181
|
+
matches =
|
|
182
|
+
boundNodeTableIDs.contains(srcTableID) && nbrNodeTableIDs.contains(dstTableID);
|
|
183
|
+
} else {
|
|
184
|
+
// boundNode is dst, nbrNode is src
|
|
185
|
+
matches =
|
|
186
|
+
boundNodeTableIDs.contains(dstTableID) && nbrNodeTableIDs.contains(srcTableID);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (matches) {
|
|
190
|
+
relTableIDs.push_back(info.oid);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// If no matching rel tables, don't optimize (shouldn't happen for valid queries)
|
|
195
|
+
if (relTableIDs.empty()) {
|
|
196
|
+
return op;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Get the count expression from the original aggregate
|
|
200
|
+
auto& aggregate = op->constCast<LogicalAggregate>();
|
|
201
|
+
auto countExpr = aggregate.getAggregates()[0];
|
|
202
|
+
|
|
203
|
+
// Get the bound node table IDs as a vector
|
|
204
|
+
std::vector<table_id_t> boundNodeTableIDsVec(boundNodeTableIDs.begin(),
|
|
205
|
+
boundNodeTableIDs.end());
|
|
206
|
+
|
|
207
|
+
// Create the new COUNT_REL_TABLE operator with all necessary information for scanning
|
|
208
|
+
auto countRelTable =
|
|
209
|
+
std::make_shared<LogicalCountRelTable>(relGroupEntry, std::move(relTableIDs),
|
|
210
|
+
std::move(boundNodeTableIDsVec), boundNode, extend.getDirection(), countExpr);
|
|
211
|
+
countRelTable->computeFlatSchema();
|
|
212
|
+
|
|
213
|
+
return countRelTable;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
} // namespace optimizer
|
|
217
|
+
} // namespace lbug
|
|
@@ -19,6 +19,9 @@ void LogicalOperatorVisitor::visitOperatorSwitch(LogicalOperator* op) {
|
|
|
19
19
|
case LogicalOperatorType::COPY_TO: {
|
|
20
20
|
visitCopyTo(op);
|
|
21
21
|
} break;
|
|
22
|
+
case LogicalOperatorType::COUNT_REL_TABLE: {
|
|
23
|
+
visitCountRelTable(op);
|
|
24
|
+
} break;
|
|
22
25
|
case LogicalOperatorType::DELETE: {
|
|
23
26
|
visitDelete(op);
|
|
24
27
|
} break;
|
|
@@ -108,6 +111,9 @@ std::shared_ptr<LogicalOperator> LogicalOperatorVisitor::visitOperatorReplaceSwi
|
|
|
108
111
|
case LogicalOperatorType::COPY_TO: {
|
|
109
112
|
return visitCopyToReplace(op);
|
|
110
113
|
}
|
|
114
|
+
case LogicalOperatorType::COUNT_REL_TABLE: {
|
|
115
|
+
return visitCountRelTableReplace(op);
|
|
116
|
+
}
|
|
111
117
|
case LogicalOperatorType::DELETE: {
|
|
112
118
|
return visitDeleteReplace(op);
|
|
113
119
|
}
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
#include "optimizer/agg_key_dependency_optimizer.h"
|
|
6
6
|
#include "optimizer/cardinality_updater.h"
|
|
7
7
|
#include "optimizer/correlated_subquery_unnest_solver.h"
|
|
8
|
+
#include "optimizer/count_rel_table_optimizer.h"
|
|
8
9
|
#include "optimizer/factorization_rewriter.h"
|
|
9
10
|
#include "optimizer/filter_push_down_optimizer.h"
|
|
10
11
|
#include "optimizer/limit_push_down_optimizer.h"
|
|
@@ -32,6 +33,11 @@ void Optimizer::optimize(planner::LogicalPlan* plan, main::ClientContext* contex
|
|
|
32
33
|
auto removeUnnecessaryJoinOptimizer = RemoveUnnecessaryJoinOptimizer();
|
|
33
34
|
removeUnnecessaryJoinOptimizer.rewrite(plan);
|
|
34
35
|
|
|
36
|
+
// CountRelTableOptimizer should be applied early before other optimizations
|
|
37
|
+
// that might change the plan structure.
|
|
38
|
+
auto countRelTableOptimizer = CountRelTableOptimizer(context);
|
|
39
|
+
countRelTableOptimizer.rewrite(plan);
|
|
40
|
+
|
|
35
41
|
auto filterPushDownOptimizer = FilterPushDownOptimizer(context);
|
|
36
42
|
filterPushDownOptimizer.rewrite(plan);
|
|
37
43
|
|
|
@@ -81,7 +81,12 @@ std::unique_ptr<Statement> Transformer::transformCreateNodeTable(
|
|
|
81
81
|
} else {
|
|
82
82
|
createTableInfo.propertyDefinitions =
|
|
83
83
|
transformPropertyDefinitions(*ctx.kU_PropertyDefinitions());
|
|
84
|
-
|
|
84
|
+
options_t options;
|
|
85
|
+
if (ctx.kU_Options()) {
|
|
86
|
+
options = transformOptions(*ctx.kU_Options());
|
|
87
|
+
}
|
|
88
|
+
createTableInfo.extraInfo =
|
|
89
|
+
std::make_unique<ExtraCreateNodeTableInfo>(getPKName(ctx), std::move(options));
|
|
85
90
|
return std::make_unique<CreateTable>(std::move(createTableInfo));
|
|
86
91
|
}
|
|
87
92
|
}
|
|
@@ -22,6 +22,8 @@ std::string LogicalOperatorUtils::logicalOperatorTypeToString(LogicalOperatorTyp
|
|
|
22
22
|
return "COPY_FROM";
|
|
23
23
|
case LogicalOperatorType::COPY_TO:
|
|
24
24
|
return "COPY_TO";
|
|
25
|
+
case LogicalOperatorType::COUNT_REL_TABLE:
|
|
26
|
+
return "COUNT_REL_TABLE";
|
|
25
27
|
case LogicalOperatorType::CREATE_MACRO:
|
|
26
28
|
return "CREATE_MACRO";
|
|
27
29
|
case LogicalOperatorType::CREATE_SEQUENCE:
|