lbug 0.12.3-dev.15 → 0.12.3-dev.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lbug-source/CMakeLists.txt +1 -1
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
- package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
- package/lbug-source/scripts/antlr4/hash.md5 +1 -1
- package/lbug-source/src/antlr4/Cypher.g4 +1 -1
- package/lbug-source/src/binder/bind/bind_ddl.cpp +23 -13
- package/lbug-source/src/catalog/catalog.cpp +5 -4
- package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
- package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +7 -0
- package/lbug-source/src/function/function_collection.cpp +2 -1
- package/lbug-source/src/function/table/CMakeLists.txt +1 -0
- package/lbug-source/src/function/table/disk_size_info.cpp +322 -0
- package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +10 -6
- package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
- package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +4 -2
- package/lbug-source/src/include/common/constants.h +1 -0
- package/lbug-source/src/include/function/table/simple_table_function.h +6 -0
- package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
- package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
- package/lbug-source/src/include/storage/storage_manager.h +1 -0
- package/lbug-source/src/include/storage/table/node_table.h +6 -1
- package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
- package/lbug-source/src/include/storage/table/parquet_rel_table.h +99 -0
- package/lbug-source/src/include/storage/table/rel_table.h +2 -2
- package/lbug-source/src/include/transaction/transaction.h +2 -0
- package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
- package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
- package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
- package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
- package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +12 -2
- package/lbug-source/src/storage/storage_manager.cpp +40 -6
- package/lbug-source/src/storage/table/CMakeLists.txt +2 -0
- package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
- package/lbug-source/src/storage/table/parquet_rel_table.cpp +470 -0
- package/lbug-source/test/include/test_runner/test_group.h +11 -1
- package/lbug-source/test/runner/e2e_test.cpp +7 -1
- package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +43 -0
- package/lbug-source/test/test_helper/test_helper.cpp +24 -0
- package/lbug-source/test/test_runner/test_parser.cpp +3 -0
- package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2761 -2701
- package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -0
- package/package.json +1 -1
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
|
@@ -34,10 +34,10 @@ public:
|
|
|
34
34
|
RelGroupCatalogEntry() = default;
|
|
35
35
|
RelGroupCatalogEntry(std::string tableName, common::RelMultiplicity srcMultiplicity,
|
|
36
36
|
common::RelMultiplicity dstMultiplicity, common::ExtendDirection storageDirection,
|
|
37
|
-
std::vector<RelTableCatalogInfo> relTableInfos)
|
|
37
|
+
std::vector<RelTableCatalogInfo> relTableInfos, std::string storage = "")
|
|
38
38
|
: TableCatalogEntry{type_, std::move(tableName)}, srcMultiplicity{srcMultiplicity},
|
|
39
39
|
dstMultiplicity{dstMultiplicity}, storageDirection{storageDirection},
|
|
40
|
-
relTableInfos{std::move(relTableInfos)} {
|
|
40
|
+
relTableInfos{std::move(relTableInfos)}, storage{std::move(storage)} {
|
|
41
41
|
propertyCollection =
|
|
42
42
|
PropertyDefinitionCollection{1}; // Skip NBR_NODE_ID column as the first one.
|
|
43
43
|
}
|
|
@@ -53,6 +53,7 @@ public:
|
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
common::ExtendDirection getStorageDirection() const { return storageDirection; }
|
|
56
|
+
const std::string& getStorage() const { return storage; }
|
|
56
57
|
|
|
57
58
|
common::idx_t getNumRelTables() const { return relTableInfos.size(); }
|
|
58
59
|
const std::vector<RelTableCatalogInfo>& getRelEntryInfos() const { return relTableInfos; }
|
|
@@ -97,6 +98,7 @@ private:
|
|
|
97
98
|
// TODO(Guodong): Avoid using extend direction for storage direction
|
|
98
99
|
common::ExtendDirection storageDirection = common::ExtendDirection::BOTH;
|
|
99
100
|
std::vector<RelTableCatalogInfo> relTableInfos;
|
|
101
|
+
std::string storage;
|
|
100
102
|
};
|
|
101
103
|
|
|
102
104
|
} // namespace catalog
|
|
@@ -134,6 +134,12 @@ struct FileInfoFunction final {
|
|
|
134
134
|
static function_set getFunctionSet();
|
|
135
135
|
};
|
|
136
136
|
|
|
137
|
+
struct DiskSizeInfoFunction final {
|
|
138
|
+
static constexpr const char* name = "DISK_SIZE_INFO";
|
|
139
|
+
|
|
140
|
+
static function_set getFunctionSet();
|
|
141
|
+
};
|
|
142
|
+
|
|
137
143
|
struct ShowAttachedDatabasesFunction final {
|
|
138
144
|
static constexpr const char* name = "SHOW_ATTACHED_DATABASES";
|
|
139
145
|
|
|
@@ -36,8 +36,10 @@ struct CreateTableInfo {
|
|
|
36
36
|
|
|
37
37
|
struct ExtraCreateNodeTableInfo final : ExtraCreateTableInfo {
|
|
38
38
|
std::string pKName;
|
|
39
|
+
options_t options;
|
|
39
40
|
|
|
40
|
-
explicit ExtraCreateNodeTableInfo(std::string pKName
|
|
41
|
+
explicit ExtraCreateNodeTableInfo(std::string pKName, options_t options = {})
|
|
42
|
+
: pKName{std::move(pKName)}, options{std::move(options)} {}
|
|
41
43
|
};
|
|
42
44
|
|
|
43
45
|
struct ExtraCreateRelTableGroupInfo final : ExtraCreateTableInfo {
|
|
@@ -24,7 +24,7 @@ public:
|
|
|
24
24
|
void initialize(const transaction::Transaction* transaction, storage::NodeTable* table,
|
|
25
25
|
ScanNodeTableProgressSharedState& progressSharedState);
|
|
26
26
|
|
|
27
|
-
void nextMorsel(storage::
|
|
27
|
+
void nextMorsel(storage::TableScanState& scanState,
|
|
28
28
|
ScanNodeTableProgressSharedState& progressSharedState);
|
|
29
29
|
|
|
30
30
|
common::SemiMask* getSemiMask() const { return semiMask.get(); }
|
|
@@ -116,7 +116,7 @@ private:
|
|
|
116
116
|
|
|
117
117
|
private:
|
|
118
118
|
common::idx_t currentTableIdx;
|
|
119
|
-
std::unique_ptr<storage::
|
|
119
|
+
std::unique_ptr<storage::TableScanState> scanState;
|
|
120
120
|
std::vector<ScanNodeTableInfo> tableInfos;
|
|
121
121
|
std::vector<std::shared_ptr<ScanNodeTableSharedState>> sharedStates;
|
|
122
122
|
std::shared_ptr<ScanNodeTableProgressSharedState> progressSharedState;
|
|
@@ -107,7 +107,7 @@ private:
|
|
|
107
107
|
|
|
108
108
|
class StorageManager;
|
|
109
109
|
|
|
110
|
-
class LBUG_API NodeTable
|
|
110
|
+
class LBUG_API NodeTable : public Table {
|
|
111
111
|
public:
|
|
112
112
|
NodeTable(const StorageManager* storageManager,
|
|
113
113
|
const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* mm);
|
|
@@ -119,6 +119,11 @@ public:
|
|
|
119
119
|
void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
|
|
120
120
|
common::table_id_t tableID, common::offset_t startOffset) const;
|
|
121
121
|
|
|
122
|
+
// Virtual method for operator-level scan coordination initialization
|
|
123
|
+
// Called once per scan operation (not per scan state)
|
|
124
|
+
virtual void initializeScanCoordination(
|
|
125
|
+
[[maybe_unused]] const transaction::Transaction* transaction) {}
|
|
126
|
+
|
|
122
127
|
bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
|
|
123
128
|
template<bool lock = true>
|
|
124
129
|
bool lookup(const transaction::Transaction* transaction, const TableScanState& scanState) const;
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <mutex>
|
|
4
|
+
#include <vector>
|
|
5
|
+
|
|
6
|
+
#include "catalog/catalog_entry/node_table_catalog_entry.h"
|
|
7
|
+
#include "common/exception/runtime.h"
|
|
8
|
+
#include "common/types/internal_id_util.h"
|
|
9
|
+
#include "common/types/value/value.h"
|
|
10
|
+
#include "processor/operator/persistent/reader/parquet/parquet_reader.h"
|
|
11
|
+
#include "storage/table/node_table.h"
|
|
12
|
+
|
|
13
|
+
namespace lbug {
|
|
14
|
+
namespace storage {
|
|
15
|
+
|
|
16
|
+
struct ParquetNodeTableScanState final : NodeTableScanState {
|
|
17
|
+
std::unique_ptr<processor::ParquetReader> parquetReader;
|
|
18
|
+
std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
|
|
19
|
+
bool initialized = false;
|
|
20
|
+
bool scanCompleted = false; // Track if this scan state has finished reading
|
|
21
|
+
bool dataRead = false;
|
|
22
|
+
std::vector<std::vector<std::unique_ptr<common::Value>>> allData;
|
|
23
|
+
size_t totalRows = 0;
|
|
24
|
+
size_t nextRowToDistribute = 0;
|
|
25
|
+
uint64_t lastQueryId = 0; // Track the last query ID to detect new queries
|
|
26
|
+
|
|
27
|
+
ParquetNodeTableScanState([[maybe_unused]] MemoryManager& mm, common::ValueVector* nodeIDVector,
|
|
28
|
+
std::vector<common::ValueVector*> outputVectors,
|
|
29
|
+
std::shared_ptr<common::DataChunkState> outChunkState)
|
|
30
|
+
: NodeTableScanState{nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
|
|
31
|
+
parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
// Shared state to coordinate row group assignment across parallel scan states
|
|
36
|
+
struct ParquetNodeTableSharedState {
|
|
37
|
+
std::mutex mtx;
|
|
38
|
+
common::node_group_idx_t currentRowGroupIdx = 0;
|
|
39
|
+
common::node_group_idx_t numRowGroups = 0;
|
|
40
|
+
|
|
41
|
+
void reset(common::node_group_idx_t totalRowGroups) {
|
|
42
|
+
std::lock_guard<std::mutex> lock(mtx);
|
|
43
|
+
currentRowGroupIdx = 0;
|
|
44
|
+
numRowGroups = totalRowGroups;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
bool getNextRowGroup(common::node_group_idx_t& assignedRowGroupIdx) {
|
|
48
|
+
std::lock_guard<std::mutex> lock(mtx);
|
|
49
|
+
if (currentRowGroupIdx < numRowGroups) {
|
|
50
|
+
assignedRowGroupIdx = currentRowGroupIdx++;
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
class ParquetNodeTable final : public NodeTable {
|
|
58
|
+
public:
|
|
59
|
+
ParquetNodeTable(const StorageManager* storageManager,
|
|
60
|
+
const catalog::NodeTableCatalogEntry* nodeTableEntry, MemoryManager* memoryManager);
|
|
61
|
+
|
|
62
|
+
void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
|
|
63
|
+
bool resetCachedBoundNodeSelVec = true) const override;
|
|
64
|
+
|
|
65
|
+
// Override to reset shared state for row group coordination at the start of each scan operation
|
|
66
|
+
void initializeScanCoordination(const transaction::Transaction* transaction) override;
|
|
67
|
+
|
|
68
|
+
bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
|
|
69
|
+
|
|
70
|
+
// For parquet-backed tables, we don't support modifications
|
|
71
|
+
void insert([[maybe_unused]] transaction::Transaction* transaction,
|
|
72
|
+
[[maybe_unused]] TableInsertState& insertState) override {
|
|
73
|
+
throw common::RuntimeException("Cannot insert into parquet-backed node table");
|
|
74
|
+
}
|
|
75
|
+
void update([[maybe_unused]] transaction::Transaction* transaction,
|
|
76
|
+
[[maybe_unused]] TableUpdateState& updateState) override {
|
|
77
|
+
throw common::RuntimeException("Cannot update parquet-backed node table");
|
|
78
|
+
}
|
|
79
|
+
bool delete_([[maybe_unused]] transaction::Transaction* transaction,
|
|
80
|
+
[[maybe_unused]] TableDeleteState& deleteState) override {
|
|
81
|
+
throw common::RuntimeException("Cannot delete from parquet-backed node table");
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
|
|
86
|
+
|
|
87
|
+
const std::string& getParquetFilePath() const { return parquetFilePath; }
|
|
88
|
+
|
|
89
|
+
// Note: Cannot override getNumCommittedNodeGroups since it's not virtual in base class
|
|
90
|
+
// Will need a different approach
|
|
91
|
+
|
|
92
|
+
private:
|
|
93
|
+
std::string parquetFilePath;
|
|
94
|
+
const catalog::NodeTableCatalogEntry* nodeTableCatalogEntry;
|
|
95
|
+
mutable std::unique_ptr<ParquetNodeTableSharedState> sharedState;
|
|
96
|
+
|
|
97
|
+
void initializeParquetReader(transaction::Transaction* transaction) const;
|
|
98
|
+
void initParquetScanForRowGroup(transaction::Transaction* transaction,
|
|
99
|
+
ParquetNodeTableScanState& scanState) const;
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
} // namespace storage
|
|
103
|
+
} // namespace lbug
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "catalog/catalog_entry/rel_group_catalog_entry.h"
|
|
4
|
+
#include "common/exception/runtime.h"
|
|
5
|
+
#include "common/types/internal_id_util.h"
|
|
6
|
+
#include "processor/operator/persistent/reader/parquet/parquet_reader.h"
|
|
7
|
+
#include "storage/table/rel_table.h"
|
|
8
|
+
#include "transaction/transaction.h"
|
|
9
|
+
|
|
10
|
+
namespace lbug {
|
|
11
|
+
namespace storage {
|
|
12
|
+
|
|
13
|
+
struct ParquetRelTableScanState final : RelTableScanState {
|
|
14
|
+
std::unique_ptr<processor::ParquetReaderScanState> parquetScanState;
|
|
15
|
+
// For CSR format: store matching rows for current bound node
|
|
16
|
+
size_t nextRowToProcess = 0;
|
|
17
|
+
|
|
18
|
+
// Row group range for morsel-driven parallelism
|
|
19
|
+
uint64_t startRowGroup = 0;
|
|
20
|
+
uint64_t endRowGroup = 0;
|
|
21
|
+
uint64_t currentRowGroup = 0;
|
|
22
|
+
|
|
23
|
+
// Per-scan-state readers for thread safety
|
|
24
|
+
std::unique_ptr<processor::ParquetReader> nodeMappingReader;
|
|
25
|
+
std::unique_ptr<processor::ParquetReader> indicesReader;
|
|
26
|
+
std::unique_ptr<processor::ParquetReader> indptrReader;
|
|
27
|
+
|
|
28
|
+
ParquetRelTableScanState(MemoryManager& mm, common::ValueVector* nodeIDVector,
|
|
29
|
+
std::vector<common::ValueVector*> outputVectors,
|
|
30
|
+
std::shared_ptr<common::DataChunkState> outChunkState)
|
|
31
|
+
: RelTableScanState{mm, nodeIDVector, std::move(outputVectors), std::move(outChunkState)} {
|
|
32
|
+
parquetScanState = std::make_unique<processor::ParquetReaderScanState>();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
void setToTable(const transaction::Transaction* transaction, Table* table_,
|
|
36
|
+
std::vector<common::column_id_t> columnIDs_,
|
|
37
|
+
std::vector<ColumnPredicateSet> columnPredicateSets_,
|
|
38
|
+
common::RelDataDirection direction_) override;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
class ParquetRelTable final : public RelTable {
|
|
42
|
+
public:
|
|
43
|
+
ParquetRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID,
|
|
44
|
+
common::table_id_t toTableID, const StorageManager* storageManager,
|
|
45
|
+
MemoryManager* memoryManager, std::string fromNodeTableName);
|
|
46
|
+
|
|
47
|
+
void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
|
|
48
|
+
bool resetCachedBoundNodeSelVec = true) const override;
|
|
49
|
+
|
|
50
|
+
bool scanInternal(transaction::Transaction* transaction, TableScanState& scanState) override;
|
|
51
|
+
|
|
52
|
+
// For parquet-backed tables, we don't support modifications
|
|
53
|
+
void insert([[maybe_unused]] transaction::Transaction* transaction,
|
|
54
|
+
[[maybe_unused]] TableInsertState& insertState) override {
|
|
55
|
+
throw common::RuntimeException("Cannot insert into parquet-backed rel table");
|
|
56
|
+
}
|
|
57
|
+
void update([[maybe_unused]] transaction::Transaction* transaction,
|
|
58
|
+
[[maybe_unused]] TableUpdateState& updateState) override {
|
|
59
|
+
throw common::RuntimeException("Cannot update parquet-backed rel table");
|
|
60
|
+
}
|
|
61
|
+
bool delete_([[maybe_unused]] transaction::Transaction* transaction,
|
|
62
|
+
[[maybe_unused]] TableDeleteState& deleteState) override {
|
|
63
|
+
throw common::RuntimeException("Cannot delete from parquet-backed rel table");
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
common::row_idx_t getNumTotalRows(const transaction::Transaction* transaction) override;
|
|
68
|
+
|
|
69
|
+
private:
|
|
70
|
+
catalog::RelGroupCatalogEntry* relGroupEntry; // Store reference to table schema
|
|
71
|
+
std::string nodeMappingFilePath;
|
|
72
|
+
std::string indicesFilePath;
|
|
73
|
+
std::string indptrFilePath;
|
|
74
|
+
mutable std::unique_ptr<processor::ParquetReader> nodeMappingReader;
|
|
75
|
+
mutable std::unique_ptr<processor::ParquetReader> indicesReader;
|
|
76
|
+
mutable std::unique_ptr<processor::ParquetReader> indptrReader;
|
|
77
|
+
mutable std::mutex parquetReaderMutex;
|
|
78
|
+
mutable std::mutex indptrDataMutex;
|
|
79
|
+
mutable std::vector<common::offset_t> indptrData; // Cached indptr data for CSR format
|
|
80
|
+
mutable common::internal_id_map_t<common::offset_t>
|
|
81
|
+
nodeMapping; // Maps node IDs to CSR node IDs
|
|
82
|
+
mutable std::unordered_map<common::offset_t, common::offset_t>
|
|
83
|
+
csrToNodeTableIdMap; // Reverse mapping: CSR node ID to node table ID
|
|
84
|
+
|
|
85
|
+
void initializeParquetReaders(transaction::Transaction* transaction) const;
|
|
86
|
+
void initializeIndptrReader(transaction::Transaction* transaction) const;
|
|
87
|
+
void loadIndptrData(transaction::Transaction* transaction) const;
|
|
88
|
+
void loadNodeMappingData(transaction::Transaction* transaction) const;
|
|
89
|
+
bool scanInternalByRowGroups(transaction::Transaction* transaction,
|
|
90
|
+
ParquetRelTableScanState& parquetRelScanState);
|
|
91
|
+
bool scanRowGroupForBoundNodes(transaction::Transaction* transaction,
|
|
92
|
+
ParquetRelTableScanState& parquetRelScanState,
|
|
93
|
+
const std::vector<uint64_t>& rowGroupsToProcess,
|
|
94
|
+
const std::unordered_set<common::offset_t>& boundNodeOffsets);
|
|
95
|
+
common::offset_t findSourceNodeForRow(common::offset_t globalRowIdx) const;
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
} // namespace storage
|
|
99
|
+
} // namespace lbug
|
|
@@ -48,7 +48,7 @@ struct RelTableScanState : TableScanState {
|
|
|
48
48
|
nodeGroupScanState = std::make_unique<CSRNodeGroupScanState>();
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
-
void setToTable(const transaction::Transaction* transaction, Table* table_,
|
|
51
|
+
virtual void setToTable(const transaction::Transaction* transaction, Table* table_,
|
|
52
52
|
std::vector<common::column_id_t> columnIDs_,
|
|
53
53
|
std::vector<ColumnPredicateSet> columnPredicateSets_,
|
|
54
54
|
common::RelDataDirection direction_) override;
|
|
@@ -138,7 +138,7 @@ struct LBUG_API RelTableDeleteState final : TableDeleteState {
|
|
|
138
138
|
relIDVector{relIDVector}, detachDeleteDirection{detachDeleteDirection} {}
|
|
139
139
|
};
|
|
140
140
|
|
|
141
|
-
class LBUG_API RelTable
|
|
141
|
+
class LBUG_API RelTable : public Table {
|
|
142
142
|
public:
|
|
143
143
|
using rel_multiplicity_constraint_throw_func_t =
|
|
144
144
|
std::function<void(const std::string&, common::offset_t, common::RelDataDirection)>;
|
|
@@ -130,6 +130,8 @@ public:
|
|
|
130
130
|
return getMinUncommittedNodeOffset(tableID) + localRowIdx;
|
|
131
131
|
}
|
|
132
132
|
|
|
133
|
+
main::ClientContext* getClientContext() const { return clientContext; }
|
|
134
|
+
|
|
133
135
|
void pushCreateDropCatalogEntry(catalog::CatalogSet& catalogSet,
|
|
134
136
|
catalog::CatalogEntry& catalogEntry, bool isInternal, bool skipLoggingToWAL = false);
|
|
135
137
|
void pushAlterCatalogEntry(catalog::CatalogSet& catalogSet, catalog::CatalogEntry& catalogEntry,
|
|
@@ -81,7 +81,12 @@ std::unique_ptr<Statement> Transformer::transformCreateNodeTable(
|
|
|
81
81
|
} else {
|
|
82
82
|
createTableInfo.propertyDefinitions =
|
|
83
83
|
transformPropertyDefinitions(*ctx.kU_PropertyDefinitions());
|
|
84
|
-
|
|
84
|
+
options_t options;
|
|
85
|
+
if (ctx.kU_Options()) {
|
|
86
|
+
options = transformOptions(*ctx.kU_Options());
|
|
87
|
+
}
|
|
88
|
+
createTableInfo.extraInfo =
|
|
89
|
+
std::make_unique<ExtraCreateNodeTableInfo>(getPKName(ctx), std::move(options));
|
|
85
90
|
return std::make_unique<CreateTable>(std::move(createTableInfo));
|
|
86
91
|
}
|
|
87
92
|
}
|
|
@@ -340,6 +340,10 @@ std::unique_ptr<ColumnReader> ParquetReader::createReader() {
|
|
|
340
340
|
throw CopyException{"Root element of Parquet file must be a struct"};
|
|
341
341
|
}
|
|
342
342
|
// LCOV_EXCL_STOP
|
|
343
|
+
// Clear existing column metadata before populating (in case createReader is called multiple
|
|
344
|
+
// times)
|
|
345
|
+
columnNames.clear();
|
|
346
|
+
columnTypes.clear();
|
|
343
347
|
for (auto& field : StructType::getFields(rootReader->getDataType())) {
|
|
344
348
|
columnNames.push_back(field.getName());
|
|
345
349
|
columnTypes.push_back(field.getType().copy());
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
#include "processor/execution_context.h"
|
|
4
4
|
#include "storage/local_storage/local_storage.h"
|
|
5
|
+
#include "storage/table/parquet_rel_table.h"
|
|
5
6
|
|
|
6
7
|
using namespace lbug::common;
|
|
7
8
|
using namespace lbug::storage;
|
|
@@ -54,8 +55,29 @@ void ScanMultiRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionCo
|
|
|
54
55
|
auto clientContext = context->clientContext;
|
|
55
56
|
boundNodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get();
|
|
56
57
|
auto nbrNodeIDVector = outVectors[0];
|
|
57
|
-
|
|
58
|
-
|
|
58
|
+
|
|
59
|
+
// Check if any table in any scanner is a ParquetRelTable
|
|
60
|
+
bool hasParquetTable = false;
|
|
61
|
+
for (auto& [_, scanner] : scanners) {
|
|
62
|
+
for (auto& relInfo : scanner.relInfos) {
|
|
63
|
+
if (dynamic_cast<storage::ParquetRelTable*>(relInfo.table) != nullptr) {
|
|
64
|
+
hasParquetTable = true;
|
|
65
|
+
break;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
if (hasParquetTable)
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Create appropriate scan state type
|
|
73
|
+
if (hasParquetTable) {
|
|
74
|
+
scanState =
|
|
75
|
+
std::make_unique<storage::ParquetRelTableScanState>(*MemoryManager::Get(*clientContext),
|
|
76
|
+
boundNodeIDVector, outVectors, nbrNodeIDVector->state);
|
|
77
|
+
} else {
|
|
78
|
+
scanState = std::make_unique<RelTableScanState>(*MemoryManager::Get(*clientContext),
|
|
79
|
+
boundNodeIDVector, outVectors, nbrNodeIDVector->state);
|
|
80
|
+
}
|
|
59
81
|
for (auto& [_, scanner] : scanners) {
|
|
60
82
|
for (auto& relInfo : scanner.relInfos) {
|
|
61
83
|
if (directionInfo.directionPos.isValid()) {
|
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
#include "binder/expression/expression_util.h"
|
|
4
4
|
#include "processor/execution_context.h"
|
|
5
|
+
#include "storage/buffer_manager/memory_manager.h"
|
|
5
6
|
#include "storage/local_storage/local_node_table.h"
|
|
6
7
|
#include "storage/local_storage/local_storage.h"
|
|
8
|
+
#include "storage/table/parquet_node_table.h"
|
|
7
9
|
|
|
8
10
|
using namespace lbug::common;
|
|
9
11
|
using namespace lbug::storage;
|
|
@@ -35,7 +37,23 @@ void ScanNodeTableSharedState::initialize(const transaction::Transaction* transa
|
|
|
35
37
|
this->table = table;
|
|
36
38
|
this->currentCommittedGroupIdx = 0;
|
|
37
39
|
this->currentUnCommittedGroupIdx = 0;
|
|
38
|
-
|
|
40
|
+
|
|
41
|
+
// Initialize table-specific scan coordination (e.g., for ParquetNodeTable)
|
|
42
|
+
table->initializeScanCoordination(transaction);
|
|
43
|
+
|
|
44
|
+
if (const auto parquetTable = dynamic_cast<ParquetNodeTable*>(table)) {
|
|
45
|
+
// For parquet tables, set numCommittedNodeGroups to number of row groups
|
|
46
|
+
std::vector<bool> columnSkips;
|
|
47
|
+
try {
|
|
48
|
+
auto tempReader = std::make_unique<processor::ParquetReader>(
|
|
49
|
+
parquetTable->getParquetFilePath(), columnSkips, transaction->getClientContext());
|
|
50
|
+
this->numCommittedNodeGroups = tempReader->getNumRowsGroups();
|
|
51
|
+
} catch (const std::exception& e) {
|
|
52
|
+
this->numCommittedNodeGroups = 1;
|
|
53
|
+
}
|
|
54
|
+
} else {
|
|
55
|
+
this->numCommittedNodeGroups = table->getNumCommittedNodeGroups();
|
|
56
|
+
}
|
|
39
57
|
if (transaction->isWriteTransaction()) {
|
|
40
58
|
if (const auto localTable =
|
|
41
59
|
transaction->getLocalStorage()->getLocalTable(this->table->getTableID())) {
|
|
@@ -46,21 +64,23 @@ void ScanNodeTableSharedState::initialize(const transaction::Transaction* transa
|
|
|
46
64
|
progressSharedState.numGroups += numCommittedNodeGroups;
|
|
47
65
|
}
|
|
48
66
|
|
|
49
|
-
void ScanNodeTableSharedState::nextMorsel(
|
|
67
|
+
void ScanNodeTableSharedState::nextMorsel(TableScanState& scanState,
|
|
50
68
|
ScanNodeTableProgressSharedState& progressSharedState) {
|
|
51
69
|
std::unique_lock lck{mtx};
|
|
70
|
+
// Cast to NodeTableScanState since we know this is for node tables
|
|
71
|
+
auto& nodeScanState = scanState.cast<NodeTableScanState>();
|
|
52
72
|
if (currentCommittedGroupIdx < numCommittedNodeGroups) {
|
|
53
|
-
|
|
73
|
+
nodeScanState.nodeGroupIdx = currentCommittedGroupIdx++;
|
|
54
74
|
progressSharedState.numGroupsScanned++;
|
|
55
|
-
|
|
75
|
+
nodeScanState.source = TableScanSource::COMMITTED;
|
|
56
76
|
return;
|
|
57
77
|
}
|
|
58
78
|
if (currentUnCommittedGroupIdx < numUnCommittedNodeGroups) {
|
|
59
|
-
|
|
60
|
-
|
|
79
|
+
nodeScanState.nodeGroupIdx = currentUnCommittedGroupIdx++;
|
|
80
|
+
nodeScanState.source = TableScanSource::UNCOMMITTED;
|
|
61
81
|
return;
|
|
62
82
|
}
|
|
63
|
-
|
|
83
|
+
nodeScanState.source = TableScanSource::NONE;
|
|
64
84
|
}
|
|
65
85
|
|
|
66
86
|
table_id_map_t<SemiMask*> ScanNodeTable::getSemiMasks() const {
|
|
@@ -82,7 +102,18 @@ void ScanNodeTableInfo::initScanState(TableScanState& scanState,
|
|
|
82
102
|
void ScanNodeTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) {
|
|
83
103
|
ScanTable::initLocalStateInternal(resultSet, context);
|
|
84
104
|
auto nodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get();
|
|
85
|
-
|
|
105
|
+
|
|
106
|
+
// Check if the first table is a ParquetNodeTable and create appropriate scan state
|
|
107
|
+
auto* parquetTable = dynamic_cast<ParquetNodeTable*>(tableInfos[0].table);
|
|
108
|
+
if (parquetTable) {
|
|
109
|
+
scanState = std::make_unique<ParquetNodeTableScanState>(
|
|
110
|
+
*MemoryManager::Get(*context->clientContext), nodeIDVector, outVectors,
|
|
111
|
+
nodeIDVector->state);
|
|
112
|
+
} else {
|
|
113
|
+
scanState =
|
|
114
|
+
std::make_unique<NodeTableScanState>(nodeIDVector, outVectors, nodeIDVector->state);
|
|
115
|
+
}
|
|
116
|
+
|
|
86
117
|
currentTableIdx = 0;
|
|
87
118
|
initCurrentTable(context);
|
|
88
119
|
}
|
|
@@ -91,6 +122,11 @@ void ScanNodeTable::initCurrentTable(ExecutionContext* context) {
|
|
|
91
122
|
auto& currentInfo = tableInfos[currentTableIdx];
|
|
92
123
|
currentInfo.initScanState(*scanState, outVectors, context->clientContext);
|
|
93
124
|
scanState->semiMask = sharedStates[currentTableIdx]->getSemiMask();
|
|
125
|
+
// Call table->initScanState for ParquetNodeTable
|
|
126
|
+
if (dynamic_cast<ParquetNodeTable*>(tableInfos[currentTableIdx].table)) {
|
|
127
|
+
auto transaction = transaction::Transaction::Get(*context->clientContext);
|
|
128
|
+
tableInfos[currentTableIdx].table->initScanState(transaction, *scanState);
|
|
129
|
+
}
|
|
94
130
|
}
|
|
95
131
|
|
|
96
132
|
void ScanNodeTable::initGlobalStateInternal(ExecutionContext* context) {
|
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
#include "binder/expression/expression_util.h"
|
|
4
4
|
#include "processor/execution_context.h"
|
|
5
|
+
#include "storage/buffer_manager/memory_manager.h"
|
|
5
6
|
#include "storage/local_storage/local_rel_table.h"
|
|
7
|
+
#include "storage/table/parquet_rel_table.h"
|
|
6
8
|
|
|
7
9
|
using namespace lbug::common;
|
|
8
10
|
using namespace lbug::storage;
|
|
@@ -66,8 +68,16 @@ void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext
|
|
|
66
68
|
auto clientContext = context->clientContext;
|
|
67
69
|
auto boundNodeIDVector = resultSet->getValueVector(opInfo.nodeIDPos).get();
|
|
68
70
|
auto nbrNodeIDVector = outVectors[0];
|
|
69
|
-
|
|
70
|
-
|
|
71
|
+
// Check if this is a ParquetRelTable and create appropriate scan state
|
|
72
|
+
auto* parquetTable = dynamic_cast<storage::ParquetRelTable*>(tableInfo.table);
|
|
73
|
+
if (parquetTable) {
|
|
74
|
+
scanState =
|
|
75
|
+
std::make_unique<storage::ParquetRelTableScanState>(*MemoryManager::Get(*clientContext),
|
|
76
|
+
boundNodeIDVector, outVectors, nbrNodeIDVector->state);
|
|
77
|
+
} else {
|
|
78
|
+
scanState = std::make_unique<RelTableScanState>(*MemoryManager::Get(*clientContext),
|
|
79
|
+
boundNodeIDVector, outVectors, nbrNodeIDVector->state);
|
|
80
|
+
}
|
|
71
81
|
tableInfo.initScanState(*scanState, outVectors, clientContext);
|
|
72
82
|
}
|
|
73
83
|
|
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
#include "storage/buffer_manager/memory_manager.h"
|
|
14
14
|
#include "storage/checkpointer.h"
|
|
15
15
|
#include "storage/table/node_table.h"
|
|
16
|
+
#include "storage/table/parquet_node_table.h"
|
|
17
|
+
#include "storage/table/parquet_rel_table.h"
|
|
16
18
|
#include "storage/table/rel_table.h"
|
|
17
19
|
#include "storage/wal/wal_replayer.h"
|
|
18
20
|
#include "transaction/transaction.h"
|
|
@@ -77,15 +79,31 @@ void StorageManager::recover(main::ClientContext& clientContext, bool throwOnWal
|
|
|
77
79
|
}
|
|
78
80
|
|
|
79
81
|
void StorageManager::createNodeTable(NodeTableCatalogEntry* entry) {
|
|
80
|
-
|
|
82
|
+
tableNameCache[entry->getTableID()] = entry->getName();
|
|
83
|
+
if (!entry->getStorage().empty()) {
|
|
84
|
+
// Create parquet-backed node table
|
|
85
|
+
tables[entry->getTableID()] =
|
|
86
|
+
std::make_unique<ParquetNodeTable>(this, entry, &memoryManager);
|
|
87
|
+
} else {
|
|
88
|
+
// Create regular node table
|
|
89
|
+
tables[entry->getTableID()] = std::make_unique<NodeTable>(this, entry, &memoryManager);
|
|
90
|
+
}
|
|
81
91
|
}
|
|
82
92
|
|
|
83
93
|
// TODO(Guodong): This API is added since storageManager doesn't provide an API to add a single
|
|
84
94
|
// rel table. We may have to refactor the existing StorageManager::createTable(TableCatalogEntry*
|
|
85
95
|
// entry).
|
|
86
96
|
void StorageManager::addRelTable(RelGroupCatalogEntry* entry, const RelTableCatalogInfo& info) {
|
|
87
|
-
|
|
88
|
-
|
|
97
|
+
if (!entry->getStorage().empty()) {
|
|
98
|
+
// Create parquet-backed rel table
|
|
99
|
+
std::string fromNodeTableName = tableNameCache.at(info.nodePair.srcTableID);
|
|
100
|
+
tables[info.oid] = std::make_unique<ParquetRelTable>(entry, info.nodePair.srcTableID,
|
|
101
|
+
info.nodePair.dstTableID, this, &memoryManager, fromNodeTableName);
|
|
102
|
+
} else {
|
|
103
|
+
// Create regular rel table
|
|
104
|
+
tables[info.oid] = std::make_unique<RelTable>(entry, info.nodePair.srcTableID,
|
|
105
|
+
info.nodePair.dstTableID, this, &memoryManager);
|
|
106
|
+
}
|
|
89
107
|
}
|
|
90
108
|
|
|
91
109
|
void StorageManager::createRelTableGroup(RelGroupCatalogEntry* entry) {
|
|
@@ -257,7 +275,14 @@ void StorageManager::deserialize(main::ClientContext* context, const Catalog* ca
|
|
|
257
275
|
KU_ASSERT(!tables.contains(tableID));
|
|
258
276
|
auto tableEntry = catalog->getTableCatalogEntry(&DUMMY_TRANSACTION, tableID)
|
|
259
277
|
->ptrCast<NodeTableCatalogEntry>();
|
|
260
|
-
|
|
278
|
+
tableNameCache[tableID] = tableEntry->getName();
|
|
279
|
+
if (!tableEntry->getStorage().empty()) {
|
|
280
|
+
// Create parquet-backed node table
|
|
281
|
+
tables[tableID] = std::make_unique<ParquetNodeTable>(this, tableEntry, &memoryManager);
|
|
282
|
+
} else {
|
|
283
|
+
// Create regular node table
|
|
284
|
+
tables[tableID] = std::make_unique<NodeTable>(this, tableEntry, &memoryManager);
|
|
285
|
+
}
|
|
261
286
|
tables[tableID]->deserialize(context, this, deSer);
|
|
262
287
|
}
|
|
263
288
|
deSer.validateDebuggingInfo(key, "num_rel_groups");
|
|
@@ -279,8 +304,17 @@ void StorageManager::deserialize(main::ClientContext* context, const Catalog* ca
|
|
|
279
304
|
for (auto k = 0u; k < numInnerRelTables; k++) {
|
|
280
305
|
RelTableCatalogInfo info = RelTableCatalogInfo::deserialize(deSer);
|
|
281
306
|
KU_ASSERT(!tables.contains(info.oid));
|
|
282
|
-
|
|
283
|
-
|
|
307
|
+
if (!relGroupEntry->getStorage().empty()) {
|
|
308
|
+
// Create parquet-backed rel table
|
|
309
|
+
std::string fromNodeTableName = tableNameCache.at(info.nodePair.srcTableID);
|
|
310
|
+
tables[info.oid] =
|
|
311
|
+
std::make_unique<ParquetRelTable>(relGroupEntry, info.nodePair.srcTableID,
|
|
312
|
+
info.nodePair.dstTableID, this, &memoryManager, fromNodeTableName);
|
|
313
|
+
} else {
|
|
314
|
+
// Create regular rel table
|
|
315
|
+
tables[info.oid] = std::make_unique<RelTable>(relGroupEntry,
|
|
316
|
+
info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager);
|
|
317
|
+
}
|
|
284
318
|
tables.at(info.oid)->deserialize(context, this, deSer);
|
|
285
319
|
}
|
|
286
320
|
}
|