lbug 0.12.3-dev.16 → 0.12.3-dev.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/lbug-source/CMakeLists.txt +1 -1
  2. package/lbug-source/dataset/demo-db/graph-std/demo_indices_follows.parquet +0 -0
  3. package/lbug-source/dataset/demo-db/graph-std/demo_indices_livesin.parquet +0 -0
  4. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_follows.parquet +0 -0
  5. package/lbug-source/dataset/demo-db/graph-std/demo_indptr_livesin.parquet +0 -0
  6. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_city.parquet +0 -0
  7. package/lbug-source/dataset/demo-db/graph-std/demo_mapping_user.parquet +0 -0
  8. package/lbug-source/dataset/demo-db/graph-std/demo_metadata.parquet +0 -0
  9. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_city.parquet +0 -0
  10. package/lbug-source/dataset/demo-db/graph-std/demo_nodes_user.parquet +0 -0
  11. package/lbug-source/dataset/demo-db/graph-std/schema.cypher +4 -0
  12. package/lbug-source/scripts/antlr4/Cypher.g4 +1 -1
  13. package/lbug-source/scripts/antlr4/hash.md5 +1 -1
  14. package/lbug-source/src/antlr4/Cypher.g4 +1 -1
  15. package/lbug-source/src/binder/bind/bind_ddl.cpp +23 -13
  16. package/lbug-source/src/catalog/catalog.cpp +5 -4
  17. package/lbug-source/src/catalog/catalog_entry/node_table_catalog_entry.cpp +8 -1
  18. package/lbug-source/src/catalog/catalog_entry/rel_group_catalog_entry.cpp +7 -0
  19. package/lbug-source/src/include/binder/ddl/bound_create_table_info.h +10 -6
  20. package/lbug-source/src/include/catalog/catalog_entry/node_table_catalog_entry.h +5 -3
  21. package/lbug-source/src/include/catalog/catalog_entry/rel_group_catalog_entry.h +4 -2
  22. package/lbug-source/src/include/common/constants.h +1 -0
  23. package/lbug-source/src/include/parser/ddl/create_table_info.h +3 -1
  24. package/lbug-source/src/include/processor/operator/scan/scan_node_table.h +2 -2
  25. package/lbug-source/src/include/storage/storage_manager.h +1 -0
  26. package/lbug-source/src/include/storage/table/node_table.h +6 -1
  27. package/lbug-source/src/include/storage/table/parquet_node_table.h +103 -0
  28. package/lbug-source/src/include/storage/table/parquet_rel_table.h +91 -0
  29. package/lbug-source/src/include/storage/table/rel_table.h +2 -2
  30. package/lbug-source/src/include/transaction/transaction.h +2 -0
  31. package/lbug-source/src/parser/transform/transform_ddl.cpp +6 -1
  32. package/lbug-source/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +4 -0
  33. package/lbug-source/src/processor/operator/scan/scan_multi_rel_tables.cpp +24 -2
  34. package/lbug-source/src/processor/operator/scan/scan_node_table.cpp +44 -8
  35. package/lbug-source/src/processor/operator/scan/scan_rel_table.cpp +12 -2
  36. package/lbug-source/src/storage/storage_manager.cpp +37 -6
  37. package/lbug-source/src/storage/table/CMakeLists.txt +2 -0
  38. package/lbug-source/src/storage/table/parquet_node_table.cpp +338 -0
  39. package/lbug-source/src/storage/table/parquet_rel_table.cpp +388 -0
  40. package/lbug-source/test/include/test_runner/test_group.h +11 -1
  41. package/lbug-source/test/runner/e2e_test.cpp +7 -1
  42. package/lbug-source/test/test_files/demo_db/demo_db_graph_std.test +77 -0
  43. package/lbug-source/test/test_helper/test_helper.cpp +24 -0
  44. package/lbug-source/test/test_runner/test_parser.cpp +3 -0
  45. package/lbug-source/third_party/antlr4_cypher/cypher_parser.cpp +2761 -2701
  46. package/lbug-source/third_party/antlr4_cypher/include/cypher_parser.h +2 -0
  47. package/package.json +1 -1
  48. package/prebuilt/lbugjs-darwin-arm64.node +0 -0
  49. package/prebuilt/lbugjs-linux-arm64.node +0 -0
  50. package/prebuilt/lbugjs-linux-x64.node +0 -0
  51. package/prebuilt/lbugjs-win32-x64.node +0 -0
@@ -0,0 +1,388 @@
1
+ #include "storage/table/parquet_rel_table.h"
2
+
3
+ #include <thread>
4
+
5
+ #include "catalog/catalog_entry/rel_group_catalog_entry.h"
6
+ #include "common/data_chunk/sel_vector.h"
7
+ #include "common/exception/runtime.h"
8
+ #include "common/file_system/virtual_file_system.h"
9
+ #include "main/client_context.h"
10
+ #include "processor/operator/persistent/reader/parquet/parquet_reader.h"
11
+ #include "storage/storage_manager.h"
12
+ #include "transaction/transaction.h"
13
+
14
+ using namespace lbug::catalog;
15
+ using namespace lbug::common;
16
+ using namespace lbug::processor;
17
+ using namespace lbug::transaction;
18
+
19
+ namespace lbug {
20
+ namespace storage {
21
+
22
+ void ParquetRelTableScanState::setToTable(const Transaction* transaction, Table* table_,
23
+ std::vector<column_id_t> columnIDs_, std::vector<ColumnPredicateSet> columnPredicateSets_,
24
+ RelDataDirection direction_) {
25
+ // Call base class implementation but skip local table setup
26
+ TableScanState::setToTable(transaction, table_, std::move(columnIDs_),
27
+ std::move(columnPredicateSets_));
28
+ columns.resize(columnIDs.size());
29
+ direction = direction_;
30
+ for (size_t i = 0; i < columnIDs.size(); ++i) {
31
+ auto columnID = columnIDs[i];
32
+ if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID) {
33
+ columns[i] = nullptr;
34
+ } else {
35
+ columns[i] = table->cast<RelTable>().getColumn(columnID, direction);
36
+ }
37
+ }
38
+ csrOffsetColumn = table->cast<RelTable>().getCSROffsetColumn(direction);
39
+ csrLengthColumn = table->cast<RelTable>().getCSRLengthColumn(direction);
40
+ nodeGroupIdx = INVALID_NODE_GROUP_IDX;
41
+ // ParquetRelTable does not support local storage, so we skip the local table initialization
42
+ }
43
+
44
+ ParquetRelTable::ParquetRelTable(RelGroupCatalogEntry* relGroupEntry, table_id_t fromTableID,
45
+ table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager)
46
+ : RelTable{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager},
47
+ relGroupEntry{relGroupEntry} {
48
+ std::string storage = relGroupEntry->getStorage();
49
+ if (storage.empty()) {
50
+ throw RuntimeException("Parquet file path is empty for parquet-backed rel table");
51
+ }
52
+
53
+ // Get the relationship name for multi-table directory support
54
+ std::string relName = relGroupEntry->getName();
55
+
56
+ // New prefix format with relationship name: "prefix" which expands to:
57
+ // prefix_indices_{relName}.parquet, prefix_indptr_{relName}.parquet,
58
+ // prefix_metadata_{relName}.parquet
59
+ std::string prefix = storage;
60
+ indicesFilePath = prefix + "_indices_" + relName + ".parquet";
61
+ indptrFilePath = prefix + "_indptr_" + relName + ".parquet";
62
+ }
63
+
64
+ void ParquetRelTable::initScanState(Transaction* transaction, TableScanState& scanState,
65
+ bool resetCachedBoundNodeSelVec) const {
66
+ // For parquet tables, we create our own scan state
67
+ auto& relScanState = scanState.cast<RelTableScanState>();
68
+ relScanState.source = TableScanSource::COMMITTED;
69
+ relScanState.nodeGroup = nullptr;
70
+ relScanState.nodeGroupIdx = INVALID_NODE_GROUP_IDX;
71
+
72
+ // Initialize ParquetReaders for this scan state (per-thread)
73
+ auto& parquetRelScanState = static_cast<ParquetRelTableScanState&>(relScanState);
74
+
75
+ // Initialize readers if not already done for this scan state
76
+ if (!parquetRelScanState.indicesReader) {
77
+ std::vector<bool> columnSkips; // Read all columns
78
+ auto context = transaction->getClientContext();
79
+ parquetRelScanState.indicesReader =
80
+ std::make_unique<ParquetReader>(indicesFilePath, columnSkips, context);
81
+ }
82
+ if (!indptrFilePath.empty() && !parquetRelScanState.indptrReader) {
83
+ std::vector<bool> columnSkips; // Read all columns
84
+ auto context = transaction->getClientContext();
85
+ parquetRelScanState.indptrReader =
86
+ std::make_unique<ParquetReader>(indptrFilePath, columnSkips, context);
87
+ }
88
+
89
+ // Load shared indptr data - thread-safe to read
90
+ if (!indptrFilePath.empty()) {
91
+ loadIndptrData(transaction);
92
+ }
93
+
94
+ // For morsel-driven parallelism, each scan state maintains its own bound node processing state
95
+ // No shared state needed between threads
96
+ if (resetCachedBoundNodeSelVec) {
97
+ // Copy the cached bound node selection vector from the scan state
98
+ if (relScanState.nodeIDVector->state->getSelVector().isUnfiltered()) {
99
+ relScanState.cachedBoundNodeSelVector.setToUnfiltered();
100
+ } else {
101
+ relScanState.cachedBoundNodeSelVector.setToFiltered();
102
+ memcpy(relScanState.cachedBoundNodeSelVector.getMutableBuffer().data(),
103
+ relScanState.nodeIDVector->state->getSelVector().getMutableBuffer().data(),
104
+ relScanState.nodeIDVector->state->getSelVector().getSelSize() * sizeof(sel_t));
105
+ }
106
+ relScanState.cachedBoundNodeSelVector.setSelSize(
107
+ relScanState.nodeIDVector->state->getSelVector().getSelSize());
108
+ }
109
+
110
+ // Initialize row group ranges for morsel-driven parallelism
111
+ // For now, assign all row groups to this scan state (will be partitioned by the scan operator)
112
+ parquetRelScanState.startRowGroup = 0;
113
+ parquetRelScanState.endRowGroup = parquetRelScanState.indicesReader ?
114
+ parquetRelScanState.indicesReader->getNumRowsGroups() :
115
+ 0;
116
+ parquetRelScanState.currentRowGroup = parquetRelScanState.startRowGroup;
117
+ parquetRelScanState.nextRowToProcess = 0;
118
+ }
119
+
120
+ void ParquetRelTable::initializeParquetReaders(Transaction* transaction) const {
121
+ if (!indicesReader) {
122
+ std::lock_guard lock(parquetReaderMutex);
123
+ if (!indicesReader) {
124
+ std::vector<bool> columnSkips; // Read all columns
125
+ auto context = transaction->getClientContext();
126
+ indicesReader = std::make_unique<ParquetReader>(indicesFilePath, columnSkips, context);
127
+ }
128
+ }
129
+ }
130
+
131
+ void ParquetRelTable::initializeIndptrReader(Transaction* transaction) const {
132
+ if (!indptrFilePath.empty() && !indptrReader) {
133
+ std::lock_guard lock(parquetReaderMutex);
134
+ if (!indptrReader) {
135
+ std::vector<bool> columnSkips; // Read all columns
136
+ auto context = transaction->getClientContext();
137
+ indptrReader = std::make_unique<ParquetReader>(indptrFilePath, columnSkips, context);
138
+ }
139
+ }
140
+ }
141
+
142
+ void ParquetRelTable::loadIndptrData(Transaction* transaction) const {
143
+ if (indptrData.empty() && !indptrFilePath.empty()) {
144
+ std::lock_guard lock(indptrDataMutex);
145
+ if (indptrData.empty()) {
146
+ initializeIndptrReader(transaction);
147
+ if (!indptrReader)
148
+ return;
149
+
150
+ // Initialize scan to populate column types
151
+ auto context = transaction->getClientContext();
152
+ auto vfs = VirtualFileSystem::GetUnsafe(*context);
153
+ std::vector<uint64_t> groupsToRead;
154
+ for (uint64_t i = 0; i < indptrReader->getNumRowsGroups(); ++i) {
155
+ groupsToRead.push_back(i);
156
+ }
157
+
158
+ ParquetReaderScanState scanState;
159
+ indptrReader->initializeScan(scanState, groupsToRead, vfs);
160
+
161
+ // Check if the indptr file has any columns after scan initialization
162
+ auto numColumns = indptrReader->getNumColumns();
163
+ if (numColumns == 0) {
164
+ throw RuntimeException("Indptr parquet file has no columns");
165
+ }
166
+
167
+ // Validate column type for indptr
168
+ const auto& indptrType = indptrReader->getColumnType(0);
169
+ if (!LogicalTypeUtils::isIntegral(indptrType.getLogicalTypeID())) {
170
+ throw RuntimeException(
171
+ "Indptr parquet file column must be integer type (column 0)");
172
+ }
173
+
174
+ // Read the indptr column
175
+ DataChunk dataChunk(1);
176
+
177
+ // Now get the column type after scan is initialized
178
+ const auto& columnTypeRef = indptrReader->getColumnType(0);
179
+ auto columnType = columnTypeRef.copy();
180
+ auto vector = std::make_shared<ValueVector>(std::move(columnType));
181
+ dataChunk.insert(0, vector);
182
+
183
+ // Read all indptr values
184
+ while (indptrReader->scanInternal(scanState, dataChunk)) {
185
+ auto selSize = dataChunk.state->getSelVector().getSelSize();
186
+ for (size_t i = 0; i < selSize; ++i) {
187
+ auto value = dataChunk.getValueVector(0).getValue<common::offset_t>(i);
188
+ indptrData.push_back(value);
189
+ }
190
+ }
191
+ }
192
+ }
193
+ }
194
+
195
+ bool ParquetRelTable::scanInternal(Transaction* transaction, TableScanState& scanState) {
196
+ auto& relScanState = scanState.cast<RelTableScanState>();
197
+
198
+ // Get the ParquetRelTableScanState
199
+ auto& parquetRelScanState = static_cast<ParquetRelTableScanState&>(relScanState);
200
+
201
+ // Load shared indptr data - thread-safe to read
202
+ if (!indptrFilePath.empty()) {
203
+ loadIndptrData(transaction);
204
+ }
205
+
206
+ // True morsel-driven parallelism: each scan state processes its assigned row groups
207
+ // Process all row groups assigned to this scan state, collecting relationships for bound nodes
208
+ return scanInternalByRowGroups(transaction, parquetRelScanState);
209
+ }
210
+
211
+ bool ParquetRelTable::scanInternalByRowGroups(Transaction* transaction,
212
+ ParquetRelTableScanState& parquetRelScanState) {
213
+ // True morsel-driven parallelism: process assigned row groups and collect relationships for
214
+ // bound nodes
215
+
216
+ // Check if we have any row groups left to process
217
+ if (parquetRelScanState.currentRowGroup >= parquetRelScanState.endRowGroup) {
218
+ // No more row groups to process
219
+ auto newSelVector = std::make_shared<SelectionVector>(0);
220
+ parquetRelScanState.outState->setSelVector(newSelVector);
221
+ return false;
222
+ }
223
+
224
+ // Process the current row group
225
+ std::vector<uint64_t> rowGroupsToProcess = {parquetRelScanState.currentRowGroup};
226
+
227
+ // Create a set of bound node IDs for fast lookup
228
+ std::unordered_set<common::offset_t> boundNodeOffsets;
229
+ for (size_t i = 0; i < parquetRelScanState.cachedBoundNodeSelVector.getSelSize(); ++i) {
230
+ common::sel_t boundNodeIdx = parquetRelScanState.cachedBoundNodeSelVector[i];
231
+ const auto boundNodeID = parquetRelScanState.nodeIDVector->getValue<nodeID_t>(boundNodeIdx);
232
+ boundNodeOffsets.insert(boundNodeID.offset);
233
+ }
234
+
235
+ // Scan the current row group and collect relationships for bound nodes
236
+ bool hasData = scanRowGroupForBoundNodes(transaction, parquetRelScanState, rowGroupsToProcess,
237
+ boundNodeOffsets);
238
+
239
+ // Move to next row group for next call
240
+ parquetRelScanState.currentRowGroup++;
241
+
242
+ return hasData;
243
+ }
244
+
245
+ common::offset_t ParquetRelTable::findSourceNodeForRow(common::offset_t globalRowIdx) const {
246
+ // Binary search in indptrData to find which source node this row belongs to
247
+ // indptrData[i] gives the starting row index for source node i
248
+ // indptrData[i+1] gives the ending row index for source node i
249
+
250
+ if (indptrData.empty()) {
251
+ return common::INVALID_OFFSET;
252
+ }
253
+
254
+ // Binary search to find the source node
255
+ size_t left = 0;
256
+ size_t right = indptrData.size() - 2; // -2 because we compare with i+1
257
+
258
+ while (left <= right) {
259
+ size_t mid = left + (right - left) / 2;
260
+ if (globalRowIdx >= indptrData[mid] && globalRowIdx < indptrData[mid + 1]) {
261
+ return mid; // Found the source node
262
+ } else if (globalRowIdx < indptrData[mid]) {
263
+ if (mid == 0)
264
+ break;
265
+ right = mid - 1;
266
+ } else {
267
+ left = mid + 1;
268
+ }
269
+ }
270
+
271
+ return common::INVALID_OFFSET; // Row not found in any range
272
+ }
273
+
274
+ bool ParquetRelTable::scanRowGroupForBoundNodes(Transaction* transaction,
275
+ ParquetRelTableScanState& parquetRelScanState, const std::vector<uint64_t>& rowGroupsToProcess,
276
+ const std::unordered_set<common::offset_t>& boundNodeOffsets) {
277
+
278
+ // Initialize readers if needed
279
+ initializeParquetReaders(transaction);
280
+
281
+ if (!parquetRelScanState.indicesReader) {
282
+ return false;
283
+ }
284
+
285
+ // Initialize scan state for the assigned row groups
286
+ auto context = transaction->getClientContext();
287
+ auto vfs = VirtualFileSystem::GetUnsafe(*context);
288
+ parquetRelScanState.indicesReader->initializeScan(*parquetRelScanState.parquetScanState,
289
+ rowGroupsToProcess, vfs);
290
+
291
+ // Create DataChunk matching the indices parquet file schema
292
+ auto numIndicesColumns = parquetRelScanState.indicesReader->getNumColumns();
293
+ DataChunk indicesChunk(numIndicesColumns);
294
+
295
+ // Insert value vectors for all columns in the parquet file
296
+ for (uint32_t colIdx = 0; colIdx < numIndicesColumns; ++colIdx) {
297
+ const auto& columnTypeRef = parquetRelScanState.indicesReader->getColumnType(colIdx);
298
+ auto columnType = columnTypeRef.copy();
299
+ auto vector = std::make_shared<ValueVector>(std::move(columnType));
300
+ indicesChunk.insert(colIdx, vector);
301
+ }
302
+
303
+ // Scan the row groups and collect relationships for bound nodes
304
+ uint64_t totalRowsCollected = 0;
305
+ const uint64_t maxRowsPerCall = DEFAULT_VECTOR_CAPACITY;
306
+ uint64_t currentGlobalRowIdx = 0;
307
+
308
+ // Calculate the starting global row index for the first row group
309
+ if (!rowGroupsToProcess.empty()) {
310
+ auto metadata = parquetRelScanState.indicesReader->getMetadata();
311
+ for (uint64_t rgIdx = 0; rgIdx < rowGroupsToProcess[0]; ++rgIdx) {
312
+ currentGlobalRowIdx += metadata->row_groups[rgIdx].num_rows;
313
+ }
314
+ }
315
+
316
+ while (totalRowsCollected < maxRowsPerCall &&
317
+ parquetRelScanState.indicesReader->scanInternal(*parquetRelScanState.parquetScanState,
318
+ indicesChunk)) {
319
+
320
+ auto selSize = indicesChunk.state->getSelVector().getSelSize();
321
+
322
+ for (size_t i = 0; i < selSize && totalRowsCollected < maxRowsPerCall;
323
+ ++i, ++currentGlobalRowIdx) {
324
+ // Find which source node this row belongs to
325
+ common::offset_t sourceNodeOffset = findSourceNodeForRow(currentGlobalRowIdx);
326
+ if (sourceNodeOffset == common::INVALID_OFFSET) {
327
+ continue; // Invalid row
328
+ }
329
+
330
+ // Check if this source node is in our bound nodes
331
+ if (boundNodeOffsets.find(sourceNodeOffset) == boundNodeOffsets.end()) {
332
+ continue; // Not a bound node, skip
333
+ }
334
+
335
+ // This row belongs to a bound node, collect the relationship
336
+
337
+ // Column 0 in indices file is the target/destination node ID
338
+ // Read as offset_t and convert to INTERNAL_ID
339
+ auto dstOffset = indicesChunk.getValueVector(0).getValue<common::offset_t>(i);
340
+ auto dstNodeID = internalID_t(dstOffset, getToNodeTableID());
341
+
342
+ // outputVectors[0] is the neighbor node ID (destination), if requested
343
+ if (!parquetRelScanState.outputVectors.empty()) {
344
+ parquetRelScanState.outputVectors[0]->setValue(totalRowsCollected, dstNodeID);
345
+ }
346
+
347
+ // If there are additional columns (e.g., weight), copy them to subsequent output
348
+ // vectors These are property columns and should have matching types
349
+ for (uint32_t colIdx = 1;
350
+ colIdx < numIndicesColumns && colIdx < parquetRelScanState.outputVectors.size();
351
+ ++colIdx) {
352
+ parquetRelScanState.outputVectors[colIdx]->copyFromVectorData(totalRowsCollected,
353
+ &indicesChunk.getValueVector(colIdx), i);
354
+ }
355
+
356
+ totalRowsCollected++;
357
+ }
358
+ }
359
+
360
+ // Set up the output state
361
+ if (totalRowsCollected > 0) {
362
+ auto selVector = std::make_shared<SelectionVector>(totalRowsCollected);
363
+ selVector->setToFiltered(totalRowsCollected);
364
+ for (uint64_t i = 0; i < totalRowsCollected; ++i) {
365
+ (*selVector)[i] = i;
366
+ }
367
+ parquetRelScanState.outState->setSelVector(selVector);
368
+
369
+ return true;
370
+ } else {
371
+ // No data found
372
+ auto selVector = std::make_shared<SelectionVector>(0);
373
+ parquetRelScanState.outState->setSelVector(selVector);
374
+ return false;
375
+ }
376
+ }
377
+
378
+ row_idx_t ParquetRelTable::getNumTotalRows(const transaction::Transaction* transaction) {
379
+ initializeParquetReaders(const_cast<transaction::Transaction*>(transaction));
380
+ if (!indicesReader) {
381
+ return 0;
382
+ }
383
+ auto metadata = indicesReader->getMetadata();
384
+ return metadata ? metadata->num_rows : 0;
385
+ }
386
+
387
+ } // namespace storage
388
+ } // namespace lbug
@@ -107,7 +107,17 @@ struct TestGroup {
107
107
  std::unordered_map<std::string, std::set<std::string>> testCasesConnNames;
108
108
  bool testFwdOnly;
109
109
 
110
- enum class DatasetType { CSV, PARQUET, NPY, CSV_TO_PARQUET, TURTLE, LBUG, JSON, CSV_TO_JSON };
110
+ enum class DatasetType {
111
+ CSV,
112
+ PARQUET,
113
+ NPY,
114
+ CSV_TO_PARQUET,
115
+ TURTLE,
116
+ LBUG,
117
+ JSON,
118
+ CSV_TO_JSON,
119
+ GRAPH_STD
120
+ };
111
121
  DatasetType datasetType;
112
122
 
113
123
  bool isValid() const { return !group.empty() && !dataset.empty(); }
@@ -38,7 +38,13 @@ public:
38
38
  }
39
39
  createDB(checkpointWaitTimeout);
40
40
  createConns(connNames);
41
- if (datasetType != TestGroup::DatasetType::LBUG && dataset != "empty") {
41
+ if (datasetType == TestGroup::DatasetType::GRAPH_STD) {
42
+ // For GRAPH_STD, only run schema.cypher (which contains WITH storage = ... clauses)
43
+ // No copy.cypher needed as data is in external parquet files
44
+ lbug::main::Connection* connection =
45
+ conn ? conn.get() : (connMap.begin()->second).get();
46
+ TestHelper::executeScript(dataset + "/" + TestHelper::SCHEMA_FILE_NAME, *connection);
47
+ } else if (datasetType != TestGroup::DatasetType::LBUG && dataset != "empty") {
42
48
  initGraph();
43
49
  } else if (generateBinaryDemo && TestHelper::E2E_OVERRIDE_IMPORT_DIR.empty()) {
44
50
  initGraph(TestHelper::appendLbugRootPath("dataset/demo-db/parquet/"));
@@ -0,0 +1,77 @@
1
+ -DATASET GRAPH-STD demo-db/graph-std
2
+
3
+ --
4
+
5
+ -CASE DemoDBGraphStdTest
6
+
7
+ -LOG MatchUserLivesInCity
8
+ -STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN u.name, u.age, c.name;
9
+ ---- 4
10
+ Adam|30|Waterloo
11
+ Karissa|40|Waterloo
12
+ Zhang|50|Kitchener
13
+ Noura|25|Guelph
14
+
15
+ -LOG MatchSingleNodeLabel
16
+ -STATEMENT MATCH (a:user) RETURN a.name, a.age;
17
+ ---- 4
18
+ Adam|30
19
+ Karissa|40
20
+ Zhang|50
21
+ Noura|25
22
+
23
+ -LOG MatchCityNodes
24
+ -STATEMENT MATCH (c:city) RETURN c.name, c.population;
25
+ ---- 3
26
+ Waterloo|150000
27
+ Kitchener|200000
28
+ Guelph|75000
29
+
30
+ -LOG MatchFollowsRel
31
+ -STATEMENT MATCH (a:user)-[e:follows]->(b:user) RETURN a.name, b.name, e.since;
32
+ ---- 4
33
+ Adam|Karissa|2020
34
+ Adam|Zhang|2020
35
+ Karissa|Zhang|2021
36
+ Zhang|Noura|2022
37
+
38
+ -LOG MatchLivesInWithCityPopulation
39
+ -STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN u.name, c.name, c.population ORDER BY c.population DESC;
40
+ ---- 4
41
+ Zhang|Kitchener|200000
42
+ Adam|Waterloo|150000
43
+ Karissa|Waterloo|150000
44
+ Noura|Guelph|75000
45
+
46
+ -LOG MatchLivesInFilterByCity
47
+ -STATEMENT MATCH (u:user)-[l:livesin]->(c:city) WHERE c.name = 'Waterloo' RETURN u.name, u.age;
48
+ ---- 2
49
+ Adam|30
50
+ Karissa|40
51
+
52
+ -LOG MatchLivesInFilterByCityPopulation
53
+ -STATEMENT MATCH (u:user)-[l:livesin]->(c:city) WHERE c.population > 100000 RETURN u.name, c.name ORDER BY u.name;
54
+ ---- 3
55
+ Adam|Waterloo
56
+ Karissa|Waterloo
57
+ Zhang|Kitchener
58
+
59
+ -LOG CountUsersPerCity
60
+ -STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN c.name, COUNT(*) AS num_users ORDER BY num_users DESC;
61
+ ---- 3
62
+ Waterloo|2
63
+ Guelph|1
64
+ Kitchener|1
65
+
66
+ -LOG MatchFollowsWithDestinationAge
67
+ -STATEMENT MATCH (a:user)-[e:follows]->(b:user) WHERE b.age > 30 RETURN a.name, b.name, b.age ORDER BY b.age DESC;
68
+ ---- 3
69
+ Adam|Zhang|50
70
+ Karissa|Zhang|50
71
+ Adam|Karissa|40
72
+
73
+ -LOG MatchFollowsFilterBySourceAndDest
74
+ -STATEMENT MATCH (a:user)-[e:follows]->(b:user) WHERE a.age < 40 AND b.age >= 40 RETURN a.name, b.name;
75
+ ---- 2
76
+ Adam|Karissa
77
+ Adam|Zhang
@@ -71,6 +71,30 @@ void TestHelper::executeScript(const std::string& cypherScript, Connection& conn
71
71
  auto fullPath = appendLbugRootPath(csvFilePath);
72
72
  line.replace(line.find(csvFilePath), csvFilePath.length(), fullPath);
73
73
  }
74
+ // Also handle storage = 'path' for parquet tables
75
+ std::vector<std::string> storagePaths;
76
+ size_t storageIndex = 0;
77
+ while (true) {
78
+ size_t start = line.find("storage = '", storageIndex);
79
+ if (start == std::string::npos) {
80
+ break;
81
+ }
82
+ start += 11; // length of "storage = '"
83
+ size_t end = line.find("'", start);
84
+ if (end == std::string::npos) {
85
+ break;
86
+ }
87
+ std::string storagePath = line.substr(start, end - start);
88
+ storagePaths.push_back(storagePath);
89
+ storageIndex = end + 1;
90
+ }
91
+ for (auto& storagePath : storagePaths) {
92
+ auto fullPath = appendLbugRootPath(storagePath);
93
+ size_t pos = line.find(storagePath);
94
+ if (pos != std::string::npos) {
95
+ line.replace(pos, storagePath.length(), fullPath);
96
+ }
97
+ }
74
98
  #ifdef __STATIC_LINK_EXTENSION_TEST__
75
99
  if (line.starts_with("load extension")) {
76
100
  continue;
@@ -87,6 +87,9 @@ void TestParser::extractDataset() {
87
87
  testGroup->datasetType = TestGroup::DatasetType::JSON;
88
88
  testGroup->dataset = currentToken.params[2];
89
89
  }
90
+ } else if (datasetType == "GRAPH-STD") {
91
+ testGroup->datasetType = TestGroup::DatasetType::GRAPH_STD;
92
+ testGroup->dataset = currentToken.params[2];
90
93
  } else {
91
94
  throw TestException(
92
95
  "Invalid dataset type `" + currentToken.params[1] + "` [" + path + ":" + line + "].");