lbug 0.12.3-dev.17 → 0.12.3-dev.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lbug-source/CMakeLists.txt +1 -1
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
- package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
- package/lbug-source/src/include/storage/table/parquet_rel_table.h +1 -9
- package/lbug-source/src/storage/storage_manager.cpp +3 -6
- package/lbug-source/src/storage/table/parquet_rel_table.cpp +4 -86
- package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +49 -15
- package/package.json +1 -1
- package/prebuilt/lbugjs-darwin-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-arm64.node +0 -0
- package/prebuilt/lbugjs-linux-x64.node +0 -0
- package/prebuilt/lbugjs-win32-x64.node +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -21,7 +21,6 @@ struct ParquetRelTableScanState final : RelTableScanState {
|
|
|
21
21
|
uint64_t currentRowGroup = 0;
|
|
22
22
|
|
|
23
23
|
// Per-scan-state readers for thread safety
|
|
24
|
-
std::unique_ptr<processor::ParquetReader> nodeMappingReader;
|
|
25
24
|
std::unique_ptr<processor::ParquetReader> indicesReader;
|
|
26
25
|
std::unique_ptr<processor::ParquetReader> indptrReader;
|
|
27
26
|
|
|
@@ -42,7 +41,7 @@ class ParquetRelTable final : public RelTable {
|
|
|
42
41
|
public:
|
|
43
42
|
ParquetRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID,
|
|
44
43
|
common::table_id_t toTableID, const StorageManager* storageManager,
|
|
45
|
-
MemoryManager* memoryManager
|
|
44
|
+
MemoryManager* memoryManager);
|
|
46
45
|
|
|
47
46
|
void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
|
|
48
47
|
bool resetCachedBoundNodeSelVec = true) const override;
|
|
@@ -68,24 +67,17 @@ public:
|
|
|
68
67
|
|
|
69
68
|
private:
|
|
70
69
|
catalog::RelGroupCatalogEntry* relGroupEntry; // Store reference to table schema
|
|
71
|
-
std::string nodeMappingFilePath;
|
|
72
70
|
std::string indicesFilePath;
|
|
73
71
|
std::string indptrFilePath;
|
|
74
|
-
mutable std::unique_ptr<processor::ParquetReader> nodeMappingReader;
|
|
75
72
|
mutable std::unique_ptr<processor::ParquetReader> indicesReader;
|
|
76
73
|
mutable std::unique_ptr<processor::ParquetReader> indptrReader;
|
|
77
74
|
mutable std::mutex parquetReaderMutex;
|
|
78
75
|
mutable std::mutex indptrDataMutex;
|
|
79
76
|
mutable std::vector<common::offset_t> indptrData; // Cached indptr data for CSR format
|
|
80
|
-
mutable common::internal_id_map_t<common::offset_t>
|
|
81
|
-
nodeMapping; // Maps node IDs to CSR node IDs
|
|
82
|
-
mutable std::unordered_map<common::offset_t, common::offset_t>
|
|
83
|
-
csrToNodeTableIdMap; // Reverse mapping: CSR node ID to node table ID
|
|
84
77
|
|
|
85
78
|
void initializeParquetReaders(transaction::Transaction* transaction) const;
|
|
86
79
|
void initializeIndptrReader(transaction::Transaction* transaction) const;
|
|
87
80
|
void loadIndptrData(transaction::Transaction* transaction) const;
|
|
88
|
-
void loadNodeMappingData(transaction::Transaction* transaction) const;
|
|
89
81
|
bool scanInternalByRowGroups(transaction::Transaction* transaction,
|
|
90
82
|
ParquetRelTableScanState& parquetRelScanState);
|
|
91
83
|
bool scanRowGroupForBoundNodes(transaction::Transaction* transaction,
|
|
@@ -96,9 +96,8 @@ void StorageManager::createNodeTable(NodeTableCatalogEntry* entry) {
|
|
|
96
96
|
void StorageManager::addRelTable(RelGroupCatalogEntry* entry, const RelTableCatalogInfo& info) {
|
|
97
97
|
if (!entry->getStorage().empty()) {
|
|
98
98
|
// Create parquet-backed rel table
|
|
99
|
-
std::string fromNodeTableName = tableNameCache.at(info.nodePair.srcTableID);
|
|
100
99
|
tables[info.oid] = std::make_unique<ParquetRelTable>(entry, info.nodePair.srcTableID,
|
|
101
|
-
info.nodePair.dstTableID, this, &memoryManager
|
|
100
|
+
info.nodePair.dstTableID, this, &memoryManager);
|
|
102
101
|
} else {
|
|
103
102
|
// Create regular rel table
|
|
104
103
|
tables[info.oid] = std::make_unique<RelTable>(entry, info.nodePair.srcTableID,
|
|
@@ -306,10 +305,8 @@ void StorageManager::deserialize(main::ClientContext* context, const Catalog* ca
|
|
|
306
305
|
KU_ASSERT(!tables.contains(info.oid));
|
|
307
306
|
if (!relGroupEntry->getStorage().empty()) {
|
|
308
307
|
// Create parquet-backed rel table
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
std::make_unique<ParquetRelTable>(relGroupEntry, info.nodePair.srcTableID,
|
|
312
|
-
info.nodePair.dstTableID, this, &memoryManager, fromNodeTableName);
|
|
308
|
+
tables[info.oid] = std::make_unique<ParquetRelTable>(relGroupEntry,
|
|
309
|
+
info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager);
|
|
313
310
|
} else {
|
|
314
311
|
// Create regular rel table
|
|
315
312
|
tables[info.oid] = std::make_unique<RelTable>(relGroupEntry,
|
|
@@ -42,8 +42,7 @@ void ParquetRelTableScanState::setToTable(const Transaction* transaction, Table*
|
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
ParquetRelTable::ParquetRelTable(RelGroupCatalogEntry* relGroupEntry, table_id_t fromTableID,
|
|
45
|
-
table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager
|
|
46
|
-
std::string fromNodeTableName)
|
|
45
|
+
table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager)
|
|
47
46
|
: RelTable{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager},
|
|
48
47
|
relGroupEntry{relGroupEntry} {
|
|
49
48
|
std::string storage = relGroupEntry->getStorage();
|
|
@@ -58,7 +57,6 @@ ParquetRelTable::ParquetRelTable(RelGroupCatalogEntry* relGroupEntry, table_id_t
|
|
|
58
57
|
// prefix_indices_{relName}.parquet, prefix_indptr_{relName}.parquet,
|
|
59
58
|
// prefix_metadata_{relName}.parquet
|
|
60
59
|
std::string prefix = storage;
|
|
61
|
-
nodeMappingFilePath = prefix + "_mapping_" + fromNodeTableName + ".parquet";
|
|
62
60
|
indicesFilePath = prefix + "_indices_" + relName + ".parquet";
|
|
63
61
|
indptrFilePath = prefix + "_indptr_" + relName + ".parquet";
|
|
64
62
|
}
|
|
@@ -75,12 +73,6 @@ void ParquetRelTable::initScanState(Transaction* transaction, TableScanState& sc
|
|
|
75
73
|
auto& parquetRelScanState = static_cast<ParquetRelTableScanState&>(relScanState);
|
|
76
74
|
|
|
77
75
|
// Initialize readers if not already done for this scan state
|
|
78
|
-
if (!parquetRelScanState.nodeMappingReader) {
|
|
79
|
-
std::vector<bool> columnSkips; // Read all columns
|
|
80
|
-
auto context = transaction->getClientContext();
|
|
81
|
-
parquetRelScanState.nodeMappingReader =
|
|
82
|
-
std::make_unique<ParquetReader>(nodeMappingFilePath, columnSkips, context);
|
|
83
|
-
}
|
|
84
76
|
if (!parquetRelScanState.indicesReader) {
|
|
85
77
|
std::vector<bool> columnSkips; // Read all columns
|
|
86
78
|
auto context = transaction->getClientContext();
|
|
@@ -94,8 +86,7 @@ void ParquetRelTable::initScanState(Transaction* transaction, TableScanState& sc
|
|
|
94
86
|
std::make_unique<ParquetReader>(indptrFilePath, columnSkips, context);
|
|
95
87
|
}
|
|
96
88
|
|
|
97
|
-
// Load shared data
|
|
98
|
-
loadNodeMappingData(transaction);
|
|
89
|
+
// Load shared indptr data - thread-safe to read
|
|
99
90
|
if (!indptrFilePath.empty()) {
|
|
100
91
|
loadIndptrData(transaction);
|
|
101
92
|
}
|
|
@@ -127,14 +118,8 @@ void ParquetRelTable::initScanState(Transaction* transaction, TableScanState& sc
|
|
|
127
118
|
}
|
|
128
119
|
|
|
129
120
|
void ParquetRelTable::initializeParquetReaders(Transaction* transaction) const {
|
|
130
|
-
if (!
|
|
121
|
+
if (!indicesReader) {
|
|
131
122
|
std::lock_guard lock(parquetReaderMutex);
|
|
132
|
-
if (!nodeMappingReader) {
|
|
133
|
-
std::vector<bool> columnSkips; // Read all columns
|
|
134
|
-
auto context = transaction->getClientContext();
|
|
135
|
-
nodeMappingReader =
|
|
136
|
-
std::make_unique<ParquetReader>(nodeMappingFilePath, columnSkips, context);
|
|
137
|
-
}
|
|
138
123
|
if (!indicesReader) {
|
|
139
124
|
std::vector<bool> columnSkips; // Read all columns
|
|
140
125
|
auto context = transaction->getClientContext();
|
|
@@ -154,71 +139,6 @@ void ParquetRelTable::initializeIndptrReader(Transaction* transaction) const {
|
|
|
154
139
|
}
|
|
155
140
|
}
|
|
156
141
|
|
|
157
|
-
void ParquetRelTable::loadNodeMappingData(Transaction* transaction) const {
|
|
158
|
-
if (nodeMapping.empty() && !nodeMappingFilePath.empty()) {
|
|
159
|
-
std::lock_guard lock(parquetReaderMutex);
|
|
160
|
-
if (nodeMapping.empty()) {
|
|
161
|
-
// Initialize node mapping reader if not already done
|
|
162
|
-
if (!nodeMappingReader) {
|
|
163
|
-
std::vector<bool> columnSkips; // Read all columns
|
|
164
|
-
auto context = transaction->getClientContext();
|
|
165
|
-
nodeMappingReader =
|
|
166
|
-
std::make_unique<ParquetReader>(nodeMappingFilePath, columnSkips, context);
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// Initialize scan to populate column types
|
|
170
|
-
auto context = transaction->getClientContext();
|
|
171
|
-
auto vfs = VirtualFileSystem::GetUnsafe(*context);
|
|
172
|
-
std::vector<uint64_t> groupsToRead;
|
|
173
|
-
for (uint64_t i = 0; i < nodeMappingReader->getNumRowsGroups(); ++i) {
|
|
174
|
-
groupsToRead.push_back(i);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
ParquetReaderScanState scanState;
|
|
178
|
-
nodeMappingReader->initializeScan(scanState, groupsToRead, vfs);
|
|
179
|
-
|
|
180
|
-
// Check if the node mapping file has columns
|
|
181
|
-
auto numColumns = nodeMappingReader->getNumColumns();
|
|
182
|
-
if (numColumns < 2) {
|
|
183
|
-
throw RuntimeException("Node mapping parquet file must have at least 2 columns");
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
// Validate column types for node mapping
|
|
187
|
-
const auto& csrNodeIdType = nodeMappingReader->getColumnType(0);
|
|
188
|
-
const auto& nodeTableIdType = nodeMappingReader->getColumnType(1);
|
|
189
|
-
if (!LogicalTypeUtils::isIntegral(csrNodeIdType.getLogicalTypeID()) ||
|
|
190
|
-
!LogicalTypeUtils::isIntegral(nodeTableIdType.getLogicalTypeID())) {
|
|
191
|
-
throw RuntimeException(
|
|
192
|
-
"Node mapping parquet file columns must be integer types (columns 0 and 1)");
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
// Read the node mapping data
|
|
196
|
-
DataChunk dataChunk(2);
|
|
197
|
-
|
|
198
|
-
// Get column types
|
|
199
|
-
for (uint32_t i = 0; i < 2 && i < numColumns; ++i) {
|
|
200
|
-
const auto& columnTypeRef = nodeMappingReader->getColumnType(i);
|
|
201
|
-
auto columnType = columnTypeRef.copy();
|
|
202
|
-
auto vector = std::make_shared<ValueVector>(std::move(columnType));
|
|
203
|
-
dataChunk.insert(i, vector);
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
// Read all node mapping values
|
|
207
|
-
while (nodeMappingReader->scanInternal(scanState, dataChunk)) {
|
|
208
|
-
auto selSize = dataChunk.state->getSelVector().getSelSize();
|
|
209
|
-
for (size_t i = 0; i < selSize; ++i) {
|
|
210
|
-
auto csrNodeId = dataChunk.getValueVector(0).getValue<common::offset_t>(i);
|
|
211
|
-
auto nodeTableId = dataChunk.getValueVector(1).getValue<common::offset_t>(i);
|
|
212
|
-
nodeMapping[common::internalID_t(nodeTableId, getFromNodeTableID())] =
|
|
213
|
-
csrNodeId;
|
|
214
|
-
// Also create reverse mapping for destination node lookups
|
|
215
|
-
csrToNodeTableIdMap[csrNodeId] = nodeTableId;
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
|
|
222
142
|
void ParquetRelTable::loadIndptrData(Transaction* transaction) const {
|
|
223
143
|
if (indptrData.empty() && !indptrFilePath.empty()) {
|
|
224
144
|
std::lock_guard lock(indptrDataMutex);
|
|
@@ -278,9 +198,7 @@ bool ParquetRelTable::scanInternal(Transaction* transaction, TableScanState& sca
|
|
|
278
198
|
// Get the ParquetRelTableScanState
|
|
279
199
|
auto& parquetRelScanState = static_cast<ParquetRelTableScanState&>(relScanState);
|
|
280
200
|
|
|
281
|
-
//
|
|
282
|
-
// Load shared data (node mapping and indptr) - these are thread-safe to read
|
|
283
|
-
loadNodeMappingData(transaction);
|
|
201
|
+
// Load shared indptr data - thread-safe to read
|
|
284
202
|
if (!indptrFilePath.empty()) {
|
|
285
203
|
loadIndptrData(transaction);
|
|
286
204
|
}
|
|
@@ -6,14 +6,11 @@
|
|
|
6
6
|
|
|
7
7
|
-LOG MatchUserLivesInCity
|
|
8
8
|
-STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN u.name, u.age, c.name;
|
|
9
|
-
----
|
|
10
|
-
Adam|30|
|
|
11
|
-
Adam|30|Guelph
|
|
9
|
+
---- 4
|
|
10
|
+
Adam|30|Waterloo
|
|
12
11
|
Karissa|40|Waterloo
|
|
13
|
-
Noura|25|Kitchener
|
|
14
|
-
Noura|25|Waterloo
|
|
15
|
-
Noura|25|Waterloo
|
|
16
12
|
Zhang|50|Kitchener
|
|
13
|
+
Noura|25|Guelph
|
|
17
14
|
|
|
18
15
|
-LOG MatchSingleNodeLabel
|
|
19
16
|
-STATEMENT MATCH (a:user) RETURN a.name, a.age;
|
|
@@ -32,12 +29,49 @@ Guelph|75000
|
|
|
32
29
|
|
|
33
30
|
-LOG MatchFollowsRel
|
|
34
31
|
-STATEMENT MATCH (a:user)-[e:follows]->(b:user) RETURN a.name, b.name, e.since;
|
|
35
|
-
----
|
|
36
|
-
Adam|
|
|
37
|
-
|
|
38
|
-
Karissa|Zhang|
|
|
39
|
-
Noura|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
32
|
+
---- 4
|
|
33
|
+
Adam|Karissa|2020
|
|
34
|
+
Adam|Zhang|2020
|
|
35
|
+
Karissa|Zhang|2021
|
|
36
|
+
Zhang|Noura|2022
|
|
37
|
+
|
|
38
|
+
-LOG MatchLivesInWithCityPopulation
|
|
39
|
+
-STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN u.name, c.name, c.population ORDER BY c.population DESC;
|
|
40
|
+
---- 4
|
|
41
|
+
Zhang|Kitchener|200000
|
|
42
|
+
Adam|Waterloo|150000
|
|
43
|
+
Karissa|Waterloo|150000
|
|
44
|
+
Noura|Guelph|75000
|
|
45
|
+
|
|
46
|
+
-LOG MatchLivesInFilterByCity
|
|
47
|
+
-STATEMENT MATCH (u:user)-[l:livesin]->(c:city) WHERE c.name = 'Waterloo' RETURN u.name, u.age;
|
|
48
|
+
---- 2
|
|
49
|
+
Adam|30
|
|
50
|
+
Karissa|40
|
|
51
|
+
|
|
52
|
+
-LOG MatchLivesInFilterByCityPopulation
|
|
53
|
+
-STATEMENT MATCH (u:user)-[l:livesin]->(c:city) WHERE c.population > 100000 RETURN u.name, c.name ORDER BY u.name;
|
|
54
|
+
---- 3
|
|
55
|
+
Adam|Waterloo
|
|
56
|
+
Karissa|Waterloo
|
|
57
|
+
Zhang|Kitchener
|
|
58
|
+
|
|
59
|
+
-LOG CountUsersPerCity
|
|
60
|
+
-STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN c.name, COUNT(*) AS num_users ORDER BY num_users DESC;
|
|
61
|
+
---- 3
|
|
62
|
+
Waterloo|2
|
|
63
|
+
Guelph|1
|
|
64
|
+
Kitchener|1
|
|
65
|
+
|
|
66
|
+
-LOG MatchFollowsWithDestinationAge
|
|
67
|
+
-STATEMENT MATCH (a:user)-[e:follows]->(b:user) WHERE b.age > 30 RETURN a.name, b.name, b.age ORDER BY b.age DESC;
|
|
68
|
+
---- 3
|
|
69
|
+
Adam|Zhang|50
|
|
70
|
+
Karissa|Zhang|50
|
|
71
|
+
Adam|Karissa|40
|
|
72
|
+
|
|
73
|
+
-LOG MatchFollowsFilterBySourceAndDest
|
|
74
|
+
-STATEMENT MATCH (a:user)-[e:follows]->(b:user) WHERE a.age < 40 AND b.age >= 40 RETURN a.name, b.name;
|
|
75
|
+
---- 2
|
|
76
|
+
Adam|Karissa
|
|
77
|
+
Adam|Zhang
|
package/package.json
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|