lbug 0.12.3-dev.17 → 0.12.3-dev.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  cmake_minimum_required(VERSION 3.15)
2
2
 
3
- project(Lbug VERSION 0.12.3.17 LANGUAGES CXX C)
3
+ project(Lbug VERSION 0.12.3.19 LANGUAGES CXX C)
4
4
 
5
5
  option(SINGLE_THREADED "Single-threaded mode" FALSE)
6
6
  if(SINGLE_THREADED)
@@ -39,7 +39,17 @@ def serialize(lbug_exec_path, dataset_name, dataset_path, serialized_graph_path,
39
39
  with open(os.path.join(dataset_path, 'schema.cypher'), 'r') as f:
40
40
  serialize_queries += f.readlines()
41
41
  with open(os.path.join(dataset_path, 'copy.cypher'), 'r') as f:
42
- serialize_queries += f.readlines()
42
+ copy_lines = f.readlines()
43
+ # Fix relative paths in copy.cypher
44
+ for line in copy_lines:
45
+ # Replace quoted paths with absolute paths
46
+ def replace_path(match):
47
+ path = match.group(1)
48
+ if not os.path.isabs(path):
49
+ return '"' + os.path.join(dataset_path, path) + '"'
50
+ return match.group(0)
51
+ fixed_line = re.sub(r'"([^"]*)"', replace_path, line)
52
+ serialize_queries.append(fixed_line.strip())
43
53
  else:
44
54
  with open(os.path.join(base_dir, 'serialize.cypher'), 'r') as f:
45
55
  serialize_queries += f.readlines()
@@ -1,4 +1,4 @@
1
- COPY User From "dataset/demo-db/csv/user.csv"
2
- COPY City FROM "dataset/demo-db/csv/city.csv"
3
- COPY Follows FROM "dataset/demo-db/csv/follows.csv"
4
- COPY LivesIn FROM "dataset/demo-db/csv/lives-in.csv"
1
+ COPY User From "user.csv"
2
+ COPY City FROM "city.csv"
3
+ COPY Follows FROM "follows.csv"
4
+ COPY LivesIn FROM "lives-in.csv"
@@ -1,4 +1,4 @@
1
- COPY User From "dataset/demo-db/parquet/user.parquet";
2
- COPY City FROM "dataset/demo-db/parquet/city.parquet";
3
- COPY Follows FROM "dataset/demo-db/parquet/follows.parquet";
4
- COPY LivesIn FROM "dataset/demo-db/parquet/lives-in.parquet";
1
+ COPY User From "user.parquet";
2
+ COPY City FROM "city.parquet";
3
+ COPY Follows FROM "follows.parquet";
4
+ COPY LivesIn FROM "lives-in.parquet";
@@ -21,7 +21,6 @@ struct ParquetRelTableScanState final : RelTableScanState {
21
21
  uint64_t currentRowGroup = 0;
22
22
 
23
23
  // Per-scan-state readers for thread safety
24
- std::unique_ptr<processor::ParquetReader> nodeMappingReader;
25
24
  std::unique_ptr<processor::ParquetReader> indicesReader;
26
25
  std::unique_ptr<processor::ParquetReader> indptrReader;
27
26
 
@@ -42,7 +41,7 @@ class ParquetRelTable final : public RelTable {
42
41
  public:
43
42
  ParquetRelTable(catalog::RelGroupCatalogEntry* relGroupEntry, common::table_id_t fromTableID,
44
43
  common::table_id_t toTableID, const StorageManager* storageManager,
45
- MemoryManager* memoryManager, std::string fromNodeTableName);
44
+ MemoryManager* memoryManager);
46
45
 
47
46
  void initScanState(transaction::Transaction* transaction, TableScanState& scanState,
48
47
  bool resetCachedBoundNodeSelVec = true) const override;
@@ -68,24 +67,17 @@ public:
68
67
 
69
68
  private:
70
69
  catalog::RelGroupCatalogEntry* relGroupEntry; // Store reference to table schema
71
- std::string nodeMappingFilePath;
72
70
  std::string indicesFilePath;
73
71
  std::string indptrFilePath;
74
- mutable std::unique_ptr<processor::ParquetReader> nodeMappingReader;
75
72
  mutable std::unique_ptr<processor::ParquetReader> indicesReader;
76
73
  mutable std::unique_ptr<processor::ParquetReader> indptrReader;
77
74
  mutable std::mutex parquetReaderMutex;
78
75
  mutable std::mutex indptrDataMutex;
79
76
  mutable std::vector<common::offset_t> indptrData; // Cached indptr data for CSR format
80
- mutable common::internal_id_map_t<common::offset_t>
81
- nodeMapping; // Maps node IDs to CSR node IDs
82
- mutable std::unordered_map<common::offset_t, common::offset_t>
83
- csrToNodeTableIdMap; // Reverse mapping: CSR node ID to node table ID
84
77
 
85
78
  void initializeParquetReaders(transaction::Transaction* transaction) const;
86
79
  void initializeIndptrReader(transaction::Transaction* transaction) const;
87
80
  void loadIndptrData(transaction::Transaction* transaction) const;
88
- void loadNodeMappingData(transaction::Transaction* transaction) const;
89
81
  bool scanInternalByRowGroups(transaction::Transaction* transaction,
90
82
  ParquetRelTableScanState& parquetRelScanState);
91
83
  bool scanRowGroupForBoundNodes(transaction::Transaction* transaction,
@@ -96,9 +96,8 @@ void StorageManager::createNodeTable(NodeTableCatalogEntry* entry) {
96
96
  void StorageManager::addRelTable(RelGroupCatalogEntry* entry, const RelTableCatalogInfo& info) {
97
97
  if (!entry->getStorage().empty()) {
98
98
  // Create parquet-backed rel table
99
- std::string fromNodeTableName = tableNameCache.at(info.nodePair.srcTableID);
100
99
  tables[info.oid] = std::make_unique<ParquetRelTable>(entry, info.nodePair.srcTableID,
101
- info.nodePair.dstTableID, this, &memoryManager, fromNodeTableName);
100
+ info.nodePair.dstTableID, this, &memoryManager);
102
101
  } else {
103
102
  // Create regular rel table
104
103
  tables[info.oid] = std::make_unique<RelTable>(entry, info.nodePair.srcTableID,
@@ -306,10 +305,8 @@ void StorageManager::deserialize(main::ClientContext* context, const Catalog* ca
306
305
  KU_ASSERT(!tables.contains(info.oid));
307
306
  if (!relGroupEntry->getStorage().empty()) {
308
307
  // Create parquet-backed rel table
309
- std::string fromNodeTableName = tableNameCache.at(info.nodePair.srcTableID);
310
- tables[info.oid] =
311
- std::make_unique<ParquetRelTable>(relGroupEntry, info.nodePair.srcTableID,
312
- info.nodePair.dstTableID, this, &memoryManager, fromNodeTableName);
308
+ tables[info.oid] = std::make_unique<ParquetRelTable>(relGroupEntry,
309
+ info.nodePair.srcTableID, info.nodePair.dstTableID, this, &memoryManager);
313
310
  } else {
314
311
  // Create regular rel table
315
312
  tables[info.oid] = std::make_unique<RelTable>(relGroupEntry,
@@ -42,8 +42,7 @@ void ParquetRelTableScanState::setToTable(const Transaction* transaction, Table*
42
42
  }
43
43
 
44
44
  ParquetRelTable::ParquetRelTable(RelGroupCatalogEntry* relGroupEntry, table_id_t fromTableID,
45
- table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager,
46
- std::string fromNodeTableName)
45
+ table_id_t toTableID, const StorageManager* storageManager, MemoryManager* memoryManager)
47
46
  : RelTable{relGroupEntry, fromTableID, toTableID, storageManager, memoryManager},
48
47
  relGroupEntry{relGroupEntry} {
49
48
  std::string storage = relGroupEntry->getStorage();
@@ -58,7 +57,6 @@ ParquetRelTable::ParquetRelTable(RelGroupCatalogEntry* relGroupEntry, table_id_t
58
57
  // prefix_indices_{relName}.parquet, prefix_indptr_{relName}.parquet,
59
58
  // prefix_metadata_{relName}.parquet
60
59
  std::string prefix = storage;
61
- nodeMappingFilePath = prefix + "_mapping_" + fromNodeTableName + ".parquet";
62
60
  indicesFilePath = prefix + "_indices_" + relName + ".parquet";
63
61
  indptrFilePath = prefix + "_indptr_" + relName + ".parquet";
64
62
  }
@@ -75,12 +73,6 @@ void ParquetRelTable::initScanState(Transaction* transaction, TableScanState& sc
75
73
  auto& parquetRelScanState = static_cast<ParquetRelTableScanState&>(relScanState);
76
74
 
77
75
  // Initialize readers if not already done for this scan state
78
- if (!parquetRelScanState.nodeMappingReader) {
79
- std::vector<bool> columnSkips; // Read all columns
80
- auto context = transaction->getClientContext();
81
- parquetRelScanState.nodeMappingReader =
82
- std::make_unique<ParquetReader>(nodeMappingFilePath, columnSkips, context);
83
- }
84
76
  if (!parquetRelScanState.indicesReader) {
85
77
  std::vector<bool> columnSkips; // Read all columns
86
78
  auto context = transaction->getClientContext();
@@ -94,8 +86,7 @@ void ParquetRelTable::initScanState(Transaction* transaction, TableScanState& sc
94
86
  std::make_unique<ParquetReader>(indptrFilePath, columnSkips, context);
95
87
  }
96
88
 
97
- // Load shared data (node mapping and indptr) - these are thread-safe to read
98
- loadNodeMappingData(transaction);
89
+ // Load shared indptr data - thread-safe to read
99
90
  if (!indptrFilePath.empty()) {
100
91
  loadIndptrData(transaction);
101
92
  }
@@ -127,14 +118,8 @@ void ParquetRelTable::initScanState(Transaction* transaction, TableScanState& sc
127
118
  }
128
119
 
129
120
  void ParquetRelTable::initializeParquetReaders(Transaction* transaction) const {
130
- if (!nodeMappingReader || !indicesReader) {
121
+ if (!indicesReader) {
131
122
  std::lock_guard lock(parquetReaderMutex);
132
- if (!nodeMappingReader) {
133
- std::vector<bool> columnSkips; // Read all columns
134
- auto context = transaction->getClientContext();
135
- nodeMappingReader =
136
- std::make_unique<ParquetReader>(nodeMappingFilePath, columnSkips, context);
137
- }
138
123
  if (!indicesReader) {
139
124
  std::vector<bool> columnSkips; // Read all columns
140
125
  auto context = transaction->getClientContext();
@@ -154,71 +139,6 @@ void ParquetRelTable::initializeIndptrReader(Transaction* transaction) const {
154
139
  }
155
140
  }
156
141
 
157
- void ParquetRelTable::loadNodeMappingData(Transaction* transaction) const {
158
- if (nodeMapping.empty() && !nodeMappingFilePath.empty()) {
159
- std::lock_guard lock(parquetReaderMutex);
160
- if (nodeMapping.empty()) {
161
- // Initialize node mapping reader if not already done
162
- if (!nodeMappingReader) {
163
- std::vector<bool> columnSkips; // Read all columns
164
- auto context = transaction->getClientContext();
165
- nodeMappingReader =
166
- std::make_unique<ParquetReader>(nodeMappingFilePath, columnSkips, context);
167
- }
168
-
169
- // Initialize scan to populate column types
170
- auto context = transaction->getClientContext();
171
- auto vfs = VirtualFileSystem::GetUnsafe(*context);
172
- std::vector<uint64_t> groupsToRead;
173
- for (uint64_t i = 0; i < nodeMappingReader->getNumRowsGroups(); ++i) {
174
- groupsToRead.push_back(i);
175
- }
176
-
177
- ParquetReaderScanState scanState;
178
- nodeMappingReader->initializeScan(scanState, groupsToRead, vfs);
179
-
180
- // Check if the node mapping file has columns
181
- auto numColumns = nodeMappingReader->getNumColumns();
182
- if (numColumns < 2) {
183
- throw RuntimeException("Node mapping parquet file must have at least 2 columns");
184
- }
185
-
186
- // Validate column types for node mapping
187
- const auto& csrNodeIdType = nodeMappingReader->getColumnType(0);
188
- const auto& nodeTableIdType = nodeMappingReader->getColumnType(1);
189
- if (!LogicalTypeUtils::isIntegral(csrNodeIdType.getLogicalTypeID()) ||
190
- !LogicalTypeUtils::isIntegral(nodeTableIdType.getLogicalTypeID())) {
191
- throw RuntimeException(
192
- "Node mapping parquet file columns must be integer types (columns 0 and 1)");
193
- }
194
-
195
- // Read the node mapping data
196
- DataChunk dataChunk(2);
197
-
198
- // Get column types
199
- for (uint32_t i = 0; i < 2 && i < numColumns; ++i) {
200
- const auto& columnTypeRef = nodeMappingReader->getColumnType(i);
201
- auto columnType = columnTypeRef.copy();
202
- auto vector = std::make_shared<ValueVector>(std::move(columnType));
203
- dataChunk.insert(i, vector);
204
- }
205
-
206
- // Read all node mapping values
207
- while (nodeMappingReader->scanInternal(scanState, dataChunk)) {
208
- auto selSize = dataChunk.state->getSelVector().getSelSize();
209
- for (size_t i = 0; i < selSize; ++i) {
210
- auto csrNodeId = dataChunk.getValueVector(0).getValue<common::offset_t>(i);
211
- auto nodeTableId = dataChunk.getValueVector(1).getValue<common::offset_t>(i);
212
- nodeMapping[common::internalID_t(nodeTableId, getFromNodeTableID())] =
213
- csrNodeId;
214
- // Also create reverse mapping for destination node lookups
215
- csrToNodeTableIdMap[csrNodeId] = nodeTableId;
216
- }
217
- }
218
- }
219
- }
220
- }
221
-
222
142
  void ParquetRelTable::loadIndptrData(Transaction* transaction) const {
223
143
  if (indptrData.empty() && !indptrFilePath.empty()) {
224
144
  std::lock_guard lock(indptrDataMutex);
@@ -278,9 +198,7 @@ bool ParquetRelTable::scanInternal(Transaction* transaction, TableScanState& sca
278
198
  // Get the ParquetRelTableScanState
279
199
  auto& parquetRelScanState = static_cast<ParquetRelTableScanState&>(relScanState);
280
200
 
281
- // Readers are now initialized per scan state in initScanState
282
- // Load shared data (node mapping and indptr) - these are thread-safe to read
283
- loadNodeMappingData(transaction);
201
+ // Load shared indptr data - thread-safe to read
284
202
  if (!indptrFilePath.empty()) {
285
203
  loadIndptrData(transaction);
286
204
  }
@@ -6,14 +6,11 @@
6
6
 
7
7
  -LOG MatchUserLivesInCity
8
8
  -STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN u.name, u.age, c.name;
9
- ---- 7
10
- Adam|30|Guelph
11
- Adam|30|Guelph
9
+ ---- 4
10
+ Adam|30|Waterloo
12
11
  Karissa|40|Waterloo
13
- Noura|25|Kitchener
14
- Noura|25|Waterloo
15
- Noura|25|Waterloo
16
12
  Zhang|50|Kitchener
13
+ Noura|25|Guelph
17
14
 
18
15
  -LOG MatchSingleNodeLabel
19
16
  -STATEMENT MATCH (a:user) RETURN a.name, a.age;
@@ -32,12 +29,49 @@ Guelph|75000
32
29
 
33
30
  -LOG MatchFollowsRel
34
31
  -STATEMENT MATCH (a:user)-[e:follows]->(b:user) RETURN a.name, b.name, e.since;
35
- ---- 8
36
- Adam|Zhang|2022
37
- Karissa|Noura|2020
38
- Karissa|Zhang|2020
39
- Noura|Karissa|2020
40
- Noura|Zhang|2021
41
- Zhang|Adam|2022
42
- Zhang|Karissa|2020
43
- Zhang|Noura|2021
32
+ ---- 4
33
+ Adam|Karissa|2020
34
+ Adam|Zhang|2020
35
+ Karissa|Zhang|2021
36
+ Zhang|Noura|2022
37
+
38
+ -LOG MatchLivesInWithCityPopulation
39
+ -STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN u.name, c.name, c.population ORDER BY c.population DESC;
40
+ ---- 4
41
+ Zhang|Kitchener|200000
42
+ Adam|Waterloo|150000
43
+ Karissa|Waterloo|150000
44
+ Noura|Guelph|75000
45
+
46
+ -LOG MatchLivesInFilterByCity
47
+ -STATEMENT MATCH (u:user)-[l:livesin]->(c:city) WHERE c.name = 'Waterloo' RETURN u.name, u.age;
48
+ ---- 2
49
+ Adam|30
50
+ Karissa|40
51
+
52
+ -LOG MatchLivesInFilterByCityPopulation
53
+ -STATEMENT MATCH (u:user)-[l:livesin]->(c:city) WHERE c.population > 100000 RETURN u.name, c.name ORDER BY u.name;
54
+ ---- 3
55
+ Adam|Waterloo
56
+ Karissa|Waterloo
57
+ Zhang|Kitchener
58
+
59
+ -LOG CountUsersPerCity
60
+ -STATEMENT MATCH (u:user)-[l:livesin]->(c:city) RETURN c.name, COUNT(*) AS num_users ORDER BY num_users DESC;
61
+ ---- 3
62
+ Waterloo|2
63
+ Guelph|1
64
+ Kitchener|1
65
+
66
+ -LOG MatchFollowsWithDestinationAge
67
+ -STATEMENT MATCH (a:user)-[e:follows]->(b:user) WHERE b.age > 30 RETURN a.name, b.name, b.age ORDER BY b.age DESC;
68
+ ---- 3
69
+ Adam|Zhang|50
70
+ Karissa|Zhang|50
71
+ Adam|Karissa|40
72
+
73
+ -LOG MatchFollowsFilterBySourceAndDest
74
+ -STATEMENT MATCH (a:user)-[e:follows]->(b:user) WHERE a.age < 40 AND b.age >= 40 RETURN a.name, b.name;
75
+ ---- 2
76
+ Adam|Karissa
77
+ Adam|Zhang
@@ -32,6 +32,7 @@ void TestHelper::executeScript(const std::string& cypherScript, Connection& conn
32
32
  std::cout << "cypherScript: " << cypherScript << " doesn't exist. Skipping..." << std::endl;
33
33
  return;
34
34
  }
35
+ auto cypherDir = std::filesystem::path(cypherScript).parent_path();
35
36
  std::ifstream file(cypherScript);
36
37
  if (!file.is_open()) {
37
38
  throw Exception(stringFormat("Error opening file: {}, errno: {}.", cypherScript, errno));
@@ -68,7 +69,14 @@ void TestHelper::executeScript(const std::string& cypherScript, Connection& conn
68
69
  index = end + 1;
69
70
  }
70
71
  for (auto& csvFilePath : csvFilePaths) {
71
- auto fullPath = appendLbugRootPath(csvFilePath);
72
+ std::string fullPath = csvFilePath;
73
+ if (std::filesystem::path(csvFilePath).is_relative()) {
74
+ if (std::filesystem::path(csvFilePath).parent_path().empty()) {
75
+ fullPath = (cypherDir / csvFilePath).string();
76
+ } else {
77
+ fullPath = appendLbugRootPath(csvFilePath);
78
+ }
79
+ }
72
80
  line.replace(line.find(csvFilePath), csvFilePath.length(), fullPath);
73
81
  }
74
82
  // Also handle storage = 'path' for parquet tables
@@ -271,6 +271,22 @@ void completion(const char* buffer, linenoiseCompletions* lc) {
271
271
  return;
272
272
  }
273
273
 
274
+ // RETURN *; completion for MATCH and CALL queries.
275
+ // Trigger when buffer ends with ')' or ') ' after a MATCH pattern or CALL function.
276
+ if (regex_search(buf, std::regex(R"(\)\s*$)"))) {
277
+ // Check for MATCH pattern: MATCH(var:Table) or MATCH (var:Table)
278
+ bool isMatchQuery =
279
+ regex_search(buf, std::regex(R"(^\s*MATCH\s*\()", std::regex_constants::icase));
280
+ // Check for CALL function: CALL func_name(...) or CALL func_name (...)
281
+ bool isCallFunction =
282
+ regex_search(buf, std::regex(R"(^\s*CALL\s+\w+\s*\()", std::regex_constants::icase));
283
+ if (isMatchQuery || isCallFunction) {
284
+ std::string suffix = buf.back() == ')' ? " RETURN *;" : "RETURN *;";
285
+ linenoiseAddCompletion(lc, (buf + suffix).c_str());
286
+ return;
287
+ }
288
+ }
289
+
274
290
  // Node table name completion. Match patterns that include an open bracket `(` with no closing
275
291
  // bracket `)`, and a colon `:` sometime after the open bracket.
276
292
  if (regex_search(buf, std::regex("^[^]*\\([^\\)]*:[^\\)]*$"))) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lbug",
3
- "version": "0.12.3-dev.17",
3
+ "version": "0.12.3-dev.19",
4
4
  "description": "An in-process property graph database management system built for query speed and scalability.",
5
5
  "main": "index.js",
6
6
  "module": "./index.mjs",
Binary file
Binary file
Binary file
Binary file