gitnexus 1.6.8-rc.39 → 1.6.8-rc.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,19 +11,36 @@
11
11
  * - Double quotes within fields are escaped by doubling them ("")
12
12
  * - All fields are consistently quoted for safety with code content
13
13
  */
14
+ import type { GraphRelationship } from '../../_shared/index.js';
14
15
  import { KnowledgeGraph } from '../graph/types.js';
15
16
  import { NodeTableName } from './schema.js';
16
17
  export declare const sanitizeUTF8: (str: string) => string;
17
18
  export declare const escapeCSVField: (value: string | number | undefined | null) => string;
18
19
  export declare const escapeCSVNumber: (value: number | undefined | null, defaultValue?: number) => string;
19
20
  export declare const isBinaryContent: (content: string) => boolean;
21
+ /** Canonical relationship CSV header — shared by the emit pass and the
22
+ * `splitRelCsvByLabelPair` differential oracle. */
23
+ export declare const REL_CSV_HEADER = "from,to,type,confidence,reason,step";
24
+ /** Build the escaped CSV row (no trailing newline) for one relationship.
25
+ * Single source of the relationship row bytes — used by the emit pass and by
26
+ * the byte-identity differential test that feeds the legacy split oracle. */
27
+ export declare const buildRelRow: (rel: GraphRelationship) => string;
20
28
  export interface StreamedCSVResult {
21
29
  nodeFiles: Map<NodeTableName, {
22
30
  csvPath: string;
23
31
  rows: number;
24
32
  }>;
25
- relCsvPath: string;
26
- relRows: number;
33
+ /** pairKey (`From|To`) → per-FROM→TO-label-pair CSV file. */
34
+ relsByPair: Map<string, {
35
+ csvPath: string;
36
+ rows: number;
37
+ }>;
38
+ /** Header line shared by every per-pair file. */
39
+ relHeader: string;
40
+ /** Edges skipped because an endpoint label is not a valid node table. */
41
+ skippedRels: number;
42
+ /** Edges routed to a per-pair file. */
43
+ totalValidRels: number;
27
44
  }
28
45
  /**
29
46
  * Stream all CSV data directly to disk files.
@@ -14,6 +14,8 @@
14
14
  import fs from 'fs/promises';
15
15
  import { createWriteStream } from 'fs';
16
16
  import path from 'path';
17
+ import { NODE_TABLES } from './schema.js';
18
+ import { RelPairRouter } from './rel-pair-routing.js';
17
19
  import { parseTruthyEnv } from '../ingestion/utils/env.js';
18
20
  /**
19
21
  * Deterministic output ordering — optional (out-of-core / windowed-resolve
@@ -158,13 +160,20 @@ class BufferedCSVWriter {
158
160
  this.ws.setMaxListeners(50);
159
161
  this.buffer.push(header);
160
162
  }
163
+ /**
164
+ * Buffer a row. Returns a promise ONLY when the buffer crossed FLUSH_EVERY
165
+ * and a disk write was issued; otherwise returns `undefined` so the caller
166
+ * can skip awaiting (#2203 U3) — avoiding a microtask tick on every buffered
167
+ * row (millions at scale). The flush promise still resolves on drain, so
168
+ * backpressure is preserved on the rows that actually write.
169
+ */
161
170
  addRow(row) {
162
171
  this.buffer.push(row);
163
172
  this.rows++;
164
173
  if (this.buffer.length >= FLUSH_EVERY) {
165
174
  return this.flush();
166
175
  }
167
- return Promise.resolve();
176
+ return undefined;
168
177
  }
169
178
  flush() {
170
179
  if (this.buffer.length === 0)
@@ -194,6 +203,23 @@ class BufferedCSVWriter {
194
203
  });
195
204
  }
196
205
  }
206
+ // ============================================================================
207
+ // STREAMING CSV GENERATION — SINGLE PASS
208
+ // ============================================================================
209
+ /** Canonical relationship CSV header — shared by the emit pass and the
210
+ * `splitRelCsvByLabelPair` differential oracle. */
211
+ export const REL_CSV_HEADER = 'from,to,type,confidence,reason,step';
212
+ /** Build the escaped CSV row (no trailing newline) for one relationship.
213
+ * Single source of the relationship row bytes — used by the emit pass and by
214
+ * the byte-identity differential test that feeds the legacy split oracle. */
215
+ export const buildRelRow = (rel) => [
216
+ escapeCSVField(rel.sourceId),
217
+ escapeCSVField(rel.targetId),
218
+ escapeCSVField(rel.type),
219
+ escapeCSVNumber(rel.confidence, 1.0),
220
+ escapeCSVField(rel.reason),
221
+ escapeCSVNumber(rel.step, 0),
222
+ ].join(',');
197
223
  /**
198
224
  * Stream all CSV data directly to disk files.
199
225
  * Iterates graph nodes exactly ONCE — routes each node to the right writer.
@@ -213,191 +239,131 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
213
239
  // MaxListenersExceededWarning (restored after all streams finish).
214
240
  const prevMax = process.getMaxListeners();
215
241
  process.setMaxListeners(prevMax + 40);
216
- const contentCache = new FileContentCache(repoPath);
217
- // Create writers for every node type up-front
218
- const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content');
219
- const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath');
220
- const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,description';
221
- const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader);
222
- const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader);
223
- const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader);
224
- const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,description,parameterCount,returnType';
225
- const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), methodHeader);
226
- const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader);
227
- const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount');
228
- const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId');
229
- // Section nodes have an extra 'level' column
230
- const sectionWriter = new BufferedCSVWriter(path.join(csvDir, 'section.csv'), 'id,name,filePath,startLine,endLine,level,content,description');
231
- // Route nodes for API endpoint mapping
232
- const routeWriter = new BufferedCSVWriter(path.join(csvDir, 'route.csv'), 'id,name,filePath,responseKeys,errorKeys,middleware');
233
- // Tool nodes for MCP tool definitions
234
- const toolWriter = new BufferedCSVWriter(path.join(csvDir, 'tool.csv'), 'id,name,filePath,description');
235
- // BasicBlock nodes taint/PDG substrate (issue #2080). No `name` column;
236
- // blocks are identified by id + source span. Emitted by no phase yet.
237
- const basicBlockWriter = new BufferedCSVWriter(path.join(csvDir, 'basicblock.csv'), 'id,filePath,startLine,endLine,text');
238
- // Multi-language node types share the same CSV shape (no isExported column)
239
- const multiLangHeader = 'id,name,filePath,startLine,endLine,content,description';
240
- const MULTI_LANG_TYPES = [
241
- 'Struct',
242
- 'Enum',
243
- 'Macro',
244
- 'Typedef',
245
- 'Union',
246
- 'Namespace',
247
- 'Trait',
248
- 'Impl',
249
- 'TypeAlias',
250
- 'Const',
251
- 'Static',
252
- 'Variable',
253
- 'Property',
254
- 'Record',
255
- 'Delegate',
256
- 'Annotation',
257
- 'Constructor',
258
- 'Template',
259
- 'Module',
260
- ];
261
- const propertyHeader = 'id,name,filePath,startLine,endLine,content,description,declaredType';
262
- const multiLangWriters = new Map();
263
- for (const t of MULTI_LANG_TYPES) {
264
- multiLangWriters.set(t, new BufferedCSVWriter(path.join(csvDir, `${t.toLowerCase()}.csv`), t === 'Property' ? propertyHeader : multiLangHeader));
265
- }
266
- const codeWriterMap = {
267
- Function: functionWriter,
268
- Class: classWriter,
269
- Interface: interfaceWriter,
270
- CodeElement: codeElemWriter,
271
- };
272
- // Deduplicate all node types — the pipeline can produce duplicate IDs across
273
- // all symbol types (Class, Method, Function, etc.), not just File nodes.
274
- // A single Set covering every label prevents PK violations on COPY.
275
- const seenNodeIds = new Set();
276
- // --- SINGLE PASS over all nodes ---
277
- for (const node of orderedNodes(graph, sortOutput)) {
278
- if (seenNodeIds.has(node.id))
279
- continue;
280
- seenNodeIds.add(node.id);
281
- switch (node.label) {
282
- case 'File': {
283
- const content = await extractContent(node, contentCache);
284
- await fileWriter.addRow([
285
- escapeCSVField(node.id),
286
- escapeCSVField(node.properties.name || ''),
287
- escapeCSVField(node.properties.filePath || ''),
288
- escapeCSVField(content),
289
- ].join(','));
290
- break;
291
- }
292
- case 'Folder':
293
- await folderWriter.addRow([
294
- escapeCSVField(node.id),
295
- escapeCSVField(node.properties.name || ''),
296
- escapeCSVField(node.properties.filePath || ''),
297
- ].join(','));
298
- break;
299
- case 'Community': {
300
- const keywords = node.properties.keywords || [];
301
- const keywordsStr = `[${keywords.map((k) => `'${k.replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/,/g, '\\,')}'`).join(',')}]`;
302
- await communityWriter.addRow([
303
- escapeCSVField(node.id),
304
- escapeCSVField(node.properties.name || ''),
305
- escapeCSVField(node.properties.heuristicLabel || ''),
306
- keywordsStr,
307
- escapeCSVField(node.properties.description || ''),
308
- escapeCSVField(node.properties.enrichedBy || 'heuristic'),
309
- escapeCSVNumber(node.properties.cohesion, 0),
310
- escapeCSVNumber(node.properties.symbolCount, 0),
311
- ].join(','));
312
- break;
313
- }
314
- case 'Process': {
315
- const communities = node.properties.communities || [];
316
- const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
317
- await processWriter.addRow([
318
- escapeCSVField(node.id),
319
- escapeCSVField(node.properties.name || ''),
320
- escapeCSVField(node.properties.heuristicLabel || ''),
321
- escapeCSVField(node.properties.processType || ''),
322
- escapeCSVNumber(node.properties.stepCount, 0),
323
- escapeCSVField(communitiesStr),
324
- escapeCSVField(node.properties.entryPointId || ''),
325
- escapeCSVField(node.properties.terminalId || ''),
326
- ].join(','));
327
- break;
328
- }
329
- case 'Method': {
330
- const content = await extractContent(node, contentCache);
331
- await methodWriter.addRow([
332
- escapeCSVField(node.id),
333
- escapeCSVField(node.properties.name || ''),
334
- escapeCSVField(node.properties.filePath || ''),
335
- escapeCSVNumber(node.properties.startLine, -1),
336
- escapeCSVNumber(node.properties.endLine, -1),
337
- node.properties.isExported ? 'true' : 'false',
338
- escapeCSVField(content),
339
- escapeCSVField(node.properties.description || ''),
340
- escapeCSVNumber(node.properties.parameterCount, 0),
341
- escapeCSVField(node.properties.returnType || ''),
342
- ].join(','));
343
- break;
344
- }
345
- case 'Section': {
346
- const content = await extractContent(node, contentCache);
347
- await sectionWriter.addRow([
348
- escapeCSVField(node.id),
349
- escapeCSVField(node.properties.name || ''),
350
- escapeCSVField(node.properties.filePath || ''),
351
- escapeCSVNumber(node.properties.startLine, -1),
352
- escapeCSVNumber(node.properties.endLine, -1),
353
- escapeCSVNumber(node.properties.level, 1),
354
- escapeCSVField(content),
355
- escapeCSVField(node.properties.description || ''),
356
- ].join(','));
357
- break;
358
- }
359
- case 'Route': {
360
- const responseKeys = node.properties.responseKeys || [];
361
- // LadybugDB array literal inside a quoted CSV field: escapeCSVField wraps in "..."
362
- // and the array uses single-quoted elements
363
- const keysStr = `[${responseKeys.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
364
- const errorKeys = node.properties.errorKeys || [];
365
- const errorKeysStr = `[${errorKeys.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
366
- const middleware = node.properties.middleware || [];
367
- const middlewareStr = `[${middleware.map((m) => `'${m.replace(/'/g, "''")}'`).join(',')}]`;
368
- await routeWriter.addRow([
369
- escapeCSVField(node.id),
370
- escapeCSVField(node.properties.name || ''),
371
- escapeCSVField(node.properties.filePath || ''),
372
- escapeCSVField(keysStr),
373
- escapeCSVField(errorKeysStr),
374
- escapeCSVField(middlewareStr),
375
- ].join(','));
376
- break;
377
- }
378
- case 'Tool':
379
- await toolWriter.addRow([
380
- escapeCSVField(node.id),
381
- escapeCSVField(node.properties.name || ''),
382
- escapeCSVField(node.properties.filePath || ''),
383
- escapeCSVField(node.properties.description || ''),
384
- ].join(','));
385
- break;
386
- case 'BasicBlock':
387
- await basicBlockWriter.addRow([
388
- escapeCSVField(node.id),
389
- escapeCSVField(node.properties.filePath || ''),
390
- escapeCSVNumber(node.properties.startLine, -1),
391
- escapeCSVNumber(node.properties.endLine, -1),
392
- escapeCSVField(node.properties.text || ''),
393
- ].join(','));
394
- break;
395
- default: {
396
- // Code element nodes (Function, Class, Interface, CodeElement)
397
- const writer = codeWriterMap[node.label];
398
- if (writer) {
242
+ // try/finally so the listener bump is ALWAYS restored — including the
243
+ // rel-routing throw path (#2203 U2) and any node-writer finish() rejection,
244
+ // not just the success path (avoids leaking +40 listeners across failed runs
245
+ // in long-lived hosts / the test suite).
246
+ try {
247
+ const contentCache = new FileContentCache(repoPath);
248
+ // Create writers for every node type up-front
249
+ const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content');
250
+ const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath');
251
+ const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,description';
252
+ const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader);
253
+ const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader);
254
+ const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader);
255
+ const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,description,parameterCount,returnType';
256
+ const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), methodHeader);
257
+ const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader);
258
+ const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount');
259
+ const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId');
260
+ // Section nodes have an extra 'level' column
261
+ const sectionWriter = new BufferedCSVWriter(path.join(csvDir, 'section.csv'), 'id,name,filePath,startLine,endLine,level,content,description');
262
+ // Route nodes for API endpoint mapping
263
+ const routeWriter = new BufferedCSVWriter(path.join(csvDir, 'route.csv'), 'id,name,filePath,responseKeys,errorKeys,middleware');
264
+ // Tool nodes for MCP tool definitions
265
+ const toolWriter = new BufferedCSVWriter(path.join(csvDir, 'tool.csv'), 'id,name,filePath,description');
266
+ // BasicBlock nodes — taint/PDG substrate (issue #2080). No `name` column;
267
+ // blocks are identified by id + source span. Emitted by no phase yet.
268
+ const basicBlockWriter = new BufferedCSVWriter(path.join(csvDir, 'basicblock.csv'), 'id,filePath,startLine,endLine,text');
269
+ // Multi-language node types share the same CSV shape (no isExported column)
270
+ const multiLangHeader = 'id,name,filePath,startLine,endLine,content,description';
271
+ const MULTI_LANG_TYPES = [
272
+ 'Struct',
273
+ 'Enum',
274
+ 'Macro',
275
+ 'Typedef',
276
+ 'Union',
277
+ 'Namespace',
278
+ 'Trait',
279
+ 'Impl',
280
+ 'TypeAlias',
281
+ 'Const',
282
+ 'Static',
283
+ 'Variable',
284
+ 'Property',
285
+ 'Record',
286
+ 'Delegate',
287
+ 'Annotation',
288
+ 'Constructor',
289
+ 'Template',
290
+ 'Module',
291
+ ];
292
+ const propertyHeader = 'id,name,filePath,startLine,endLine,content,description,declaredType';
293
+ const multiLangWriters = new Map();
294
+ for (const t of MULTI_LANG_TYPES) {
295
+ multiLangWriters.set(t, new BufferedCSVWriter(path.join(csvDir, `${t.toLowerCase()}.csv`), t === 'Property' ? propertyHeader : multiLangHeader));
296
+ }
297
+ const codeWriterMap = {
298
+ Function: functionWriter,
299
+ Class: classWriter,
300
+ Interface: interfaceWriter,
301
+ CodeElement: codeElemWriter,
302
+ };
303
+ // Deduplicate all node types the pipeline can produce duplicate IDs across
304
+ // all symbol types (Class, Method, Function, etc.), not just File nodes.
305
+ // A single Set covering every label prevents PK violations on COPY.
306
+ const seenNodeIds = new Set();
307
+ // --- SINGLE PASS over all nodes ---
308
+ for (const node of orderedNodes(graph, sortOutput)) {
309
+ if (seenNodeIds.has(node.id))
310
+ continue;
311
+ seenNodeIds.add(node.id);
312
+ // addRow returns a promise only when it flushes; awaiting it once after the
313
+ // switch (instead of `await`-ing every addRow) skips a per-row microtask
314
+ // tick on the ~FLUSH_EVERY-1 buffered rows between flushes (#2203 U3).
315
+ let pending;
316
+ switch (node.label) {
317
+ case 'File': {
399
318
  const content = await extractContent(node, contentCache);
400
- await writer.addRow([
319
+ pending = fileWriter.addRow([
320
+ escapeCSVField(node.id),
321
+ escapeCSVField(node.properties.name || ''),
322
+ escapeCSVField(node.properties.filePath || ''),
323
+ escapeCSVField(content),
324
+ ].join(','));
325
+ break;
326
+ }
327
+ case 'Folder':
328
+ pending = folderWriter.addRow([
329
+ escapeCSVField(node.id),
330
+ escapeCSVField(node.properties.name || ''),
331
+ escapeCSVField(node.properties.filePath || ''),
332
+ ].join(','));
333
+ break;
334
+ case 'Community': {
335
+ const keywords = node.properties.keywords || [];
336
+ const keywordsStr = `[${keywords.map((k) => `'${k.replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/,/g, '\\,')}'`).join(',')}]`;
337
+ pending = communityWriter.addRow([
338
+ escapeCSVField(node.id),
339
+ escapeCSVField(node.properties.name || ''),
340
+ escapeCSVField(node.properties.heuristicLabel || ''),
341
+ keywordsStr,
342
+ escapeCSVField(node.properties.description || ''),
343
+ escapeCSVField(node.properties.enrichedBy || 'heuristic'),
344
+ escapeCSVNumber(node.properties.cohesion, 0),
345
+ escapeCSVNumber(node.properties.symbolCount, 0),
346
+ ].join(','));
347
+ break;
348
+ }
349
+ case 'Process': {
350
+ const communities = node.properties.communities || [];
351
+ const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
352
+ pending = processWriter.addRow([
353
+ escapeCSVField(node.id),
354
+ escapeCSVField(node.properties.name || ''),
355
+ escapeCSVField(node.properties.heuristicLabel || ''),
356
+ escapeCSVField(node.properties.processType || ''),
357
+ escapeCSVNumber(node.properties.stepCount, 0),
358
+ escapeCSVField(communitiesStr),
359
+ escapeCSVField(node.properties.entryPointId || ''),
360
+ escapeCSVField(node.properties.terminalId || ''),
361
+ ].join(','));
362
+ break;
363
+ }
364
+ case 'Method': {
365
+ const content = await extractContent(node, contentCache);
366
+ pending = methodWriter.addRow([
401
367
  escapeCSVField(node.id),
402
368
  escapeCSVField(node.properties.name || ''),
403
369
  escapeCSVField(node.properties.filePath || ''),
@@ -406,90 +372,186 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
406
372
  node.properties.isExported ? 'true' : 'false',
407
373
  escapeCSVField(content),
408
374
  escapeCSVField(node.properties.description || ''),
375
+ escapeCSVNumber(node.properties.parameterCount, 0),
376
+ escapeCSVField(node.properties.returnType || ''),
377
+ ].join(','));
378
+ break;
379
+ }
380
+ case 'Section': {
381
+ const content = await extractContent(node, contentCache);
382
+ pending = sectionWriter.addRow([
383
+ escapeCSVField(node.id),
384
+ escapeCSVField(node.properties.name || ''),
385
+ escapeCSVField(node.properties.filePath || ''),
386
+ escapeCSVNumber(node.properties.startLine, -1),
387
+ escapeCSVNumber(node.properties.endLine, -1),
388
+ escapeCSVNumber(node.properties.level, 1),
389
+ escapeCSVField(content),
390
+ escapeCSVField(node.properties.description || ''),
409
391
  ].join(','));
392
+ break;
410
393
  }
411
- else {
412
- // Multi-language node types (Struct, Impl, Trait, Macro, etc.)
413
- const mlWriter = multiLangWriters.get(node.label);
414
- if (mlWriter) {
394
+ case 'Route': {
395
+ const responseKeys = node.properties.responseKeys || [];
396
+ // LadybugDB array literal inside a quoted CSV field: escapeCSVField wraps in "..."
397
+ // and the array uses single-quoted elements
398
+ const keysStr = `[${responseKeys.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
399
+ const errorKeys = node.properties.errorKeys || [];
400
+ const errorKeysStr = `[${errorKeys.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
401
+ const middleware = node.properties.middleware || [];
402
+ const middlewareStr = `[${middleware.map((m) => `'${m.replace(/'/g, "''")}'`).join(',')}]`;
403
+ pending = routeWriter.addRow([
404
+ escapeCSVField(node.id),
405
+ escapeCSVField(node.properties.name || ''),
406
+ escapeCSVField(node.properties.filePath || ''),
407
+ escapeCSVField(keysStr),
408
+ escapeCSVField(errorKeysStr),
409
+ escapeCSVField(middlewareStr),
410
+ ].join(','));
411
+ break;
412
+ }
413
+ case 'Tool':
414
+ pending = toolWriter.addRow([
415
+ escapeCSVField(node.id),
416
+ escapeCSVField(node.properties.name || ''),
417
+ escapeCSVField(node.properties.filePath || ''),
418
+ escapeCSVField(node.properties.description || ''),
419
+ ].join(','));
420
+ break;
421
+ case 'BasicBlock':
422
+ pending = basicBlockWriter.addRow([
423
+ escapeCSVField(node.id),
424
+ escapeCSVField(node.properties.filePath || ''),
425
+ escapeCSVNumber(node.properties.startLine, -1),
426
+ escapeCSVNumber(node.properties.endLine, -1),
427
+ escapeCSVField(node.properties.text || ''),
428
+ ].join(','));
429
+ break;
430
+ default: {
431
+ // Code element nodes (Function, Class, Interface, CodeElement)
432
+ const writer = codeWriterMap[node.label];
433
+ if (writer) {
415
434
  const content = await extractContent(node, contentCache);
416
- await mlWriter.addRow([
435
+ pending = writer.addRow([
417
436
  escapeCSVField(node.id),
418
437
  escapeCSVField(node.properties.name || ''),
419
438
  escapeCSVField(node.properties.filePath || ''),
420
439
  escapeCSVNumber(node.properties.startLine, -1),
421
440
  escapeCSVNumber(node.properties.endLine, -1),
441
+ node.properties.isExported ? 'true' : 'false',
422
442
  escapeCSVField(content),
423
443
  escapeCSVField(node.properties.description || ''),
424
- ...(node.label === 'Property'
425
- ? [escapeCSVField(node.properties.declaredType || '')]
426
- : []),
427
444
  ].join(','));
428
445
  }
446
+ else {
447
+ // Multi-language node types (Struct, Impl, Trait, Macro, etc.)
448
+ const mlWriter = multiLangWriters.get(node.label);
449
+ if (mlWriter) {
450
+ const content = await extractContent(node, contentCache);
451
+ pending = mlWriter.addRow([
452
+ escapeCSVField(node.id),
453
+ escapeCSVField(node.properties.name || ''),
454
+ escapeCSVField(node.properties.filePath || ''),
455
+ escapeCSVNumber(node.properties.startLine, -1),
456
+ escapeCSVNumber(node.properties.endLine, -1),
457
+ escapeCSVField(content),
458
+ escapeCSVField(node.properties.description || ''),
459
+ ...(node.label === 'Property'
460
+ ? [escapeCSVField(node.properties.declaredType || '')]
461
+ : []),
462
+ ].join(','));
463
+ }
464
+ else {
465
+ // Unknown label: not in codeWriterMap or multiLangWriters, so there
466
+ // is no CSV table for it and it is intentionally NOT persisted —
467
+ // `pending` stays undefined, so the loop awaits nothing. Made
468
+ // explicit so a future node type isn't silently dropped here: wire
469
+ // it into one of the writer maps above (or this branch).
470
+ }
471
+ }
472
+ break;
429
473
  }
430
- break;
431
474
  }
475
+ if (pending)
476
+ await pending;
432
477
  }
433
- }
434
- // Finish all node writers
435
- const allWriters = [
436
- fileWriter,
437
- folderWriter,
438
- functionWriter,
439
- classWriter,
440
- interfaceWriter,
441
- methodWriter,
442
- codeElemWriter,
443
- communityWriter,
444
- processWriter,
445
- sectionWriter,
446
- routeWriter,
447
- toolWriter,
448
- basicBlockWriter,
449
- ...multiLangWriters.values(),
450
- ];
451
- await Promise.all(allWriters.map((w) => w.finish()));
452
- // --- Stream relationship CSV ---
453
- const relCsvPath = path.join(csvDir, 'relations.csv');
454
- const relWriter = new BufferedCSVWriter(relCsvPath, 'from,to,type,confidence,reason,step');
455
- for (const rel of orderedRelationships(graph, sortOutput)) {
456
- await relWriter.addRow([
457
- escapeCSVField(rel.sourceId),
458
- escapeCSVField(rel.targetId),
459
- escapeCSVField(rel.type),
460
- escapeCSVNumber(rel.confidence, 1.0),
461
- escapeCSVField(rel.reason),
462
- escapeCSVNumber(rel.step, 0),
463
- ].join(','));
464
- }
465
- await relWriter.finish();
466
- // Build result map — only include tables that have rows
467
- const nodeFiles = new Map();
468
- const tableMap = [
469
- ['File', fileWriter],
470
- ['Folder', folderWriter],
471
- ['Function', functionWriter],
472
- ['Class', classWriter],
473
- ['Interface', interfaceWriter],
474
- ['Method', methodWriter],
475
- ['CodeElement', codeElemWriter],
476
- ['Community', communityWriter],
477
- ['Process', processWriter],
478
- ['Section', sectionWriter],
479
- ['Route', routeWriter],
480
- ['Tool', toolWriter],
481
- ['BasicBlock', basicBlockWriter],
482
- ...Array.from(multiLangWriters.entries()).map(([name, w]) => [name, w]),
483
- ];
484
- for (const [name, writer] of tableMap) {
485
- if (writer.rows > 0) {
486
- nodeFiles.set(name, {
487
- csvPath: path.join(csvDir, `${name.toLowerCase()}.csv`),
488
- rows: writer.rows,
489
- });
478
+ // Finish all node writers
479
+ const allWriters = [
480
+ fileWriter,
481
+ folderWriter,
482
+ functionWriter,
483
+ classWriter,
484
+ interfaceWriter,
485
+ methodWriter,
486
+ codeElemWriter,
487
+ communityWriter,
488
+ processWriter,
489
+ sectionWriter,
490
+ routeWriter,
491
+ toolWriter,
492
+ basicBlockWriter,
493
+ ...multiLangWriters.values(),
494
+ ];
495
+ await Promise.all(allWriters.map((w) => w.finish()));
496
+ // --- Stream relationships directly to per-FROM→TO-label-pair files ---
497
+ // (#2203 U2) Route every edge to its pair file in this single pass. The old
498
+ // monolithic relations.csv — and its line-by-line re-read + per-edge regex
499
+ // re-split in loadGraphToLbug — are gone, so the ~1M-edge set is written and
500
+ // read once instead of twice. The router applies the SAME label-derivation +
501
+ // validTables filter as the legacy splitRelCsvByLabelPair, so the per-pair
502
+ // files are byte-identical (asserted by the differential test).
503
+ const relRouter = new RelPairRouter(csvDir, REL_CSV_HEADER, new Set(NODE_TABLES));
504
+ try {
505
+ for (const rel of orderedRelationships(graph, sortOutput)) {
506
+ const pending = relRouter.route(rel.sourceId, rel.targetId, buildRelRow(rel));
507
+ if (pending)
508
+ await pending;
509
+ }
510
+ await relRouter.close();
511
+ }
512
+ catch (err) {
513
+ relRouter.destroy();
514
+ // Rethrow the real stream error (EMFILE / disk-full) rather than the generic
515
+ // AbortError a pending drain-await rejects with — mirrors the retained
516
+ // splitRelCsvByLabelPair's `throw streamError ?? err`.
517
+ throw relRouter.lastError ?? err;
490
518
  }
519
+ // Build result map — only include tables that have rows
520
+ const nodeFiles = new Map();
521
+ const tableMap = [
522
+ ['File', fileWriter],
523
+ ['Folder', folderWriter],
524
+ ['Function', functionWriter],
525
+ ['Class', classWriter],
526
+ ['Interface', interfaceWriter],
527
+ ['Method', methodWriter],
528
+ ['CodeElement', codeElemWriter],
529
+ ['Community', communityWriter],
530
+ ['Process', processWriter],
531
+ ['Section', sectionWriter],
532
+ ['Route', routeWriter],
533
+ ['Tool', toolWriter],
534
+ ['BasicBlock', basicBlockWriter],
535
+ ...Array.from(multiLangWriters.entries()).map(([name, w]) => [name, w]),
536
+ ];
537
+ for (const [name, writer] of tableMap) {
538
+ if (writer.rows > 0) {
539
+ nodeFiles.set(name, {
540
+ csvPath: path.join(csvDir, `${name.toLowerCase()}.csv`),
541
+ rows: writer.rows,
542
+ });
543
+ }
544
+ }
545
+ return {
546
+ nodeFiles,
547
+ relsByPair: relRouter.byPair,
548
+ relHeader: REL_CSV_HEADER,
549
+ skippedRels: relRouter.skipped,
550
+ totalValidRels: relRouter.total,
551
+ };
552
+ }
553
+ finally {
554
+ // Restore original process listener limit on every path (success or throw).
555
+ process.setMaxListeners(prevMax);
491
556
  }
492
- // Restore original process listener limit
493
- process.setMaxListeners(prevMax);
494
- return { nodeFiles, relCsvPath, relRows: relWriter.rows };
495
557
  };
@@ -2,8 +2,6 @@ import lbug from '@ladybugdb/core';
2
2
  import { KnowledgeGraph } from '../graph/types.js';
3
3
  import type { CachedEmbedding } from '../embeddings/types.js';
4
4
  import { type ExtensionEnsureOptions } from './extension-loader.js';
5
- /** Factory for creating WriteStreams — injectable for testing. */
6
- export type WriteStreamFactory = (filePath: string) => import('fs').WriteStream;
7
5
  /** Result of splitting the relationship CSV into per-label-pair files. */
8
6
  export interface RelCsvSplitResult {
9
7
  relHeader: string;
@@ -15,21 +13,6 @@ export interface RelCsvSplitResult {
15
13
  skippedRels: number;
16
14
  totalValidRels: number;
17
15
  }
18
- /**
19
- * Split a relationship CSV into per-label-pair files on disk.
20
- *
21
- * Streams the CSV line-by-line, routing each relationship to a file named
22
- * `rel_{fromLabel}_{toLabel}.csv`. Handles backpressure correctly: only one
23
- * drain listener per stream at a time, and readline resumes only when ALL
24
- * backpressured streams have drained.
25
- *
26
- * @param csvPath Path to the combined relationship CSV
27
- * @param csvDir Directory to write per-pair CSV files
28
- * @param validTables Set of valid node table names
29
- * @param getNodeLabel Function to extract the label from a node ID
30
- * @param wsFactory Optional WriteStream factory (defaults to fs.createWriteStream)
31
- */
32
- export declare const splitRelCsvByLabelPair: (csvPath: string, csvDir: string, validTables: Set<string>, getNodeLabel: (id: string) => string, wsFactory?: WriteStreamFactory) => Promise<RelCsvSplitResult>;
33
16
  /** Expose the current Database for pool adapter reuse in tests. */
34
17
  export declare const getDatabase: () => lbug.Database | null;
35
18
  /**
@@ -10,6 +10,7 @@ import lbug from '@ladybugdb/core';
10
10
  import { closeQueryResults } from './query-result-utils.js';
11
11
  import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, } from './schema.js';
12
12
  import { streamAllCSVsToDisk } from './csv-generator.js';
13
+ import { getNodeLabel as deriveNodeLabel } from './rel-pair-routing.js';
13
14
  import { extensionManager } from './extension-loader.js';
14
15
  import { closeLbugConnection, isDbBusyError, isOpenRetryExhausted, isWalCorruptionError, openLbugConnection, toNativeSafePath, WAL_RECOVERY_SUGGESTION, waitForWindowsHandleRelease, } from './lbug-config.js';
15
16
  import { finalizeLbugSidecarsAfterClose, inspectLbugSidecars, isMissingShadowSidecarError, isReadOnlyShadowReplayError, preflightLbugSidecars, quarantineWalForMissingShadow, renameFailureMessage, shadowSidecarRecoveryMessage, } from './sidecar-recovery.js';
@@ -18,6 +19,15 @@ import { logger } from '../logger.js';
18
19
  /**
19
20
  * Split a relationship CSV into per-label-pair files on disk.
20
21
  *
22
+ * @internal RETAINED AS A DIFFERENTIAL ORACLE. As of #2203 U2, production emit
23
+ * routes relationships to per-pair files directly during the single pass (see
24
+ * RelPairRouter in `rel-pair-routing.ts`), so this function has NO production
25
+ * callers — it is kept ONLY so the byte-identity test in
26
+ * `test/integration/csv-pipeline.test.ts` ("direct per-pair emit matches the
27
+ * split oracle") can diff the direct-emit output against this proven path. Do
28
+ * NOT delete it as dead code without also removing that test and accepting the
29
+ * loss of the byte-identity guard (and likewise `test/unit/rel-csv-split.test.ts`).
30
+ *
21
31
  * Streams the CSV line-by-line, routing each relationship to a file named
22
32
  * `rel_{fromLabel}_{toLabel}.csv`. Handles backpressure correctly: only one
23
33
  * drain listener per stream at a time, and readline resumes only when ALL
@@ -737,6 +747,16 @@ export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress)
737
747
  throw new Error('LadybugDB not initialized. Call initLbug first.');
738
748
  }
739
749
  const log = onProgress || (() => { });
750
+ // ── #2203 persistence-path profiling ──────────────────────────────────
751
+ // Mirrors the PROF_SCOPE_RESOLUTION pattern (scope-resolution/pipeline/
752
+ // run.ts): zero-cost when off — process.hrtime.bigint() is only read under
753
+ // PROF_LBUG_LOAD=1, and the summary is logged behind the same gate. Fills
754
+ // the gap that the DB-persistence path is un-timed today (the analyze
755
+ // "emit" number is the scope-resolution emit bucket, not this COPY path).
756
+ const PROF = process.env.PROF_LBUG_LOAD === '1';
757
+ const mark = () => (PROF ? process.hrtime.bigint() : 0n);
758
+ const span = (a, b) => (Number(b - a) / 1e6).toFixed(1);
759
+ const tStart = mark();
740
760
  let csvDir;
741
761
  if (process.platform === 'win32' && /[^\x00-\x7F]/.test(storagePath)) {
742
762
  const hash = crypto.createHash('sha256').update(storagePath).digest('hex').slice(0, 16);
@@ -747,14 +767,8 @@ export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress)
747
767
  }
748
768
  log('Streaming CSVs to disk...');
749
769
  const csvResult = await streamAllCSVsToDisk(graph, repoPath, csvDir);
770
+ const tCsv = mark();
750
771
  const validTables = new Set(NODE_TABLES);
751
- const getNodeLabel = (nodeId) => {
752
- if (nodeId.startsWith('comm_'))
753
- return 'Community';
754
- if (nodeId.startsWith('proc_'))
755
- return 'Process';
756
- return nodeId.split(':')[0];
757
- };
758
772
  // Bulk COPY all node CSVs (sequential — LadybugDB allows only one write txn at a time)
759
773
  const nodeFiles = [...csvResult.nodeFiles.entries()];
760
774
  const totalSteps = nodeFiles.length + 1; // +1 for relationships
@@ -778,29 +792,27 @@ export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress)
778
792
  }
779
793
  }
780
794
  }
781
- // Bulk COPY relationships — split by FROM→TO label pair (LadybugDB requires it)
782
- const { relHeader, relsByPairMeta, pairWriteStreams, skippedRels, totalValidRels } = await splitRelCsvByLabelPair(csvResult.relCsvPath, csvDir, validTables, getNodeLabel);
783
- // Close all per-pair write streams before COPY. `stream/promises.finished`
784
- // resolves on the stream's 'finish' event and rejects on 'error' — replaces
785
- // a hand-rolled promisification with the stdlib primitive.
786
- await Promise.all(Array.from(pairWriteStreams.values()).map(async (ws) => {
787
- ws.end();
788
- await finished(ws);
789
- }));
795
+ const tCopyNodes = mark();
796
+ // Bulk COPY relationships. They were already routed to per-FROM→TO-label-pair
797
+ // files during the emit pass (#2203 U2) — there is no monolithic relations.csv
798
+ // to re-read/re-split here; we COPY each pair file directly.
799
+ const { relsByPair, relHeader, skippedRels, totalValidRels } = csvResult;
800
+ let tCopyRels = tCopyNodes;
801
+ let tFallback = tCopyNodes;
790
802
  const insertedRels = totalValidRels;
791
803
  const warnings = [];
792
804
  if (insertedRels > 0) {
793
- log(`Loading edges: ${insertedRels.toLocaleString()} across ${relsByPairMeta.size} types`);
805
+ log(`Loading edges: ${insertedRels.toLocaleString()} across ${relsByPair.size} types`);
794
806
  let pairIdx = 0;
795
807
  let failedPairEdges = 0;
796
808
  const failedPairCsvPaths = new Set();
797
- for (const [pairKey, { csvPath: pairCsvPath, rows }] of relsByPairMeta) {
809
+ for (const [pairKey, { csvPath: pairCsvPath, rows }] of relsByPair) {
798
810
  pairIdx++;
799
811
  const [fromLabel, toLabel] = pairKey.split('|');
800
812
  const normalizedPath = normalizeCopyPath(pairCsvPath);
801
813
  const copyQuery = `COPY ${REL_TABLE_NAME} FROM "${normalizedPath}" (from="${fromLabel}", to="${toLabel}", HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`;
802
814
  if (pairIdx % 5 === 0 || rows > 1000) {
803
- log(`Loading edges: ${pairIdx}/${relsByPairMeta.size} types (${fromLabel} -> ${toLabel})`);
815
+ log(`Loading edges: ${pairIdx}/${relsByPair.size} types (${fromLabel} -> ${toLabel})`);
804
816
  }
805
817
  try {
806
818
  await queryAndDrain(conn, copyQuery);
@@ -825,6 +837,7 @@ export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress)
825
837
  catch { }
826
838
  }
827
839
  }
840
+ tCopyRels = mark();
828
841
  if (failedPairCsvPaths.size > 0) {
829
842
  log(`Inserting ${failedPairEdges} edges individually (missing schema pairs)`);
830
843
  // Read failed pair files and merge for fallback inserts
@@ -846,15 +859,13 @@ export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress)
846
859
  catch { }
847
860
  }
848
861
  if (allLines.length > 1) {
849
- await fallbackRelationshipInserts(allLines, validTables, getNodeLabel);
862
+ await fallbackRelationshipInserts(allLines, validTables, deriveNodeLabel);
850
863
  }
851
864
  }
865
+ tFallback = mark();
852
866
  }
853
- // Cleanup all CSVs
854
- try {
855
- await fs.unlink(csvResult.relCsvPath);
856
- }
857
- catch { }
867
+ // Cleanup all CSVs (per-pair rel files are unlinked in the COPY loop above;
868
+ // the remaining sweep below catches node CSVs + any leftover pair files).
858
869
  for (const [, { csvPath }] of csvResult.nodeFiles) {
859
870
  try {
860
871
  await fs.unlink(csvPath);
@@ -875,6 +886,16 @@ export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress)
875
886
  await fs.rmdir(csvDir);
876
887
  }
877
888
  catch { }
889
+ if (PROF) {
890
+ const tEnd = mark();
891
+ let totalNodeRows = 0;
892
+ for (const [, { rows }] of csvResult.nodeFiles)
893
+ totalNodeRows += rows;
894
+ logger.warn(`[lbug-load prof] csv-emit=${span(tStart, tCsv)}ms ` +
895
+ `copy-nodes=${span(tCsv, tCopyNodes)}ms copy-rels=${span(tCopyNodes, tCopyRels)}ms ` +
896
+ `fallback=${span(tCopyRels, tFallback)}ms total=${span(tStart, tEnd)}ms ` +
897
+ `(${totalNodeRows} nodes, ${insertedRels} rels)`);
898
+ }
878
899
  return { success: true, insertedRels, skippedRels, warnings };
879
900
  };
880
901
  // LadybugDB default ESCAPE is '\' (backslash), but our CSV uses RFC 4180 escaping ("" for literal quotes).
@@ -0,0 +1,54 @@
1
+ import { type WriteStream } from 'fs';
2
+ /** Injectable for tests (backpressure/error simulation), mirroring split. */
3
+ export type WriteStreamFactory = (filePath: string) => WriteStream;
4
+ /**
5
+ * Derive a node's table label from its graph id. Matches the legacy
6
+ * `getNodeLabel` that lived inline in `loadGraphToLbug`:
7
+ * - `comm_*` → Community
8
+ * - `proc_*` → Process
9
+ * - otherwise the prefix before the first `:` (e.g. `Function:…` → Function)
10
+ */
11
+ export declare const getNodeLabel: (nodeId: string) => string;
12
+ export interface RelPairMeta {
13
+ csvPath: string;
14
+ rows: number;
15
+ }
16
+ /**
17
+ * Routes already-escaped relationship CSV rows to per-FROM→TO-label-pair
18
+ * files. Filters edges whose endpoint labels are not valid node tables
19
+ * (counted as `skipped`), exactly as the legacy split did.
20
+ */
21
+ export declare class RelPairRouter {
22
+ private readonly csvDir;
23
+ private readonly header;
24
+ private readonly validTables;
25
+ private readonly wsFactory;
26
+ /** pairKey (`From|To`) → { csvPath, rows } */
27
+ readonly byPair: Map<string, RelPairMeta>;
28
+ private readonly streams;
29
+ skipped: number;
30
+ total: number;
31
+ private streamError;
32
+ private readonly abort;
33
+ constructor(csvDir: string, header: string, validTables: Set<string>, wsFactory?: WriteStreamFactory);
34
+ private markError;
35
+ /**
36
+ * The first stream error observed, if any. Lets the emit caller rethrow the
37
+ * real error (EMFILE / disk-full) instead of the generic `AbortError` that a
38
+ * pending `once(ws,'drain',{signal})` rejects with when the abort fires —
39
+ * mirroring the retained `splitRelCsvByLabelPair`'s `throw streamError ?? err`.
40
+ */
41
+ get lastError(): Error | null;
42
+ /**
43
+ * Route one already-escaped CSV row (no trailing newline) to its pair file.
44
+ * Returns `void` on the synchronous hot path; a `Promise<void>` only when a
45
+ * stream signals backpressure (or a new pair's header does) — the caller
46
+ * awaits the promise before routing the next edge.
47
+ */
48
+ route(fromId: string, toId: string, row: string): void | Promise<void>;
49
+ private openAndWrite;
50
+ /** Flush + close every pair stream. Rejects if any stream errored. */
51
+ close(): Promise<void>;
52
+ /** Tear down all streams (no flush) — used on the error path. */
53
+ destroy(): void;
54
+ }
@@ -0,0 +1,140 @@
1
+ /**
2
+ * Relationship per-label-pair routing (#2203 U2).
3
+ *
4
+ * LadybugDB's bulk `COPY` into the single `CodeRelation` rel table requires a
5
+ * separate CSV per FROM→TO node-label pair (the `from=`/`to=` COPY params).
6
+ * Historically the emit pass wrote one monolithic `relations.csv`, which
7
+ * `loadGraphToLbug` then RE-READ line-by-line (regex per edge) and re-split
8
+ * into per-pair files — writing and reading the entire ~1M-edge set twice.
9
+ *
10
+ * This router lets the single emit pass route each edge to its per-pair file
11
+ * directly, so the monolithic write + re-read + per-edge regex are all gone.
12
+ * The label-derivation + validTables filtering + per-pair-file format here match
13
+ * the legacy `splitRelCsvByLabelPair`, so the per-pair files are byte-identical
14
+ * for all quote-free ids — see the differential test in
15
+ * `test/integration/csv-pipeline.test.ts`. ONE intentional divergence: this
16
+ * router derives the label from the RAW id, while the oracle re-derives it via a
17
+ * regex over the ESCAPED row — so for an id containing a `"` the router is the
18
+ * more-correct path (it routes the edge to the right pair; the oracle's regex
19
+ * mis-buckets or drops it). `splitRelCsvByLabelPair` is retained as the
20
+ * differential oracle (the quote-in-id divergence is asserted explicitly).
21
+ *
22
+ * Backpressure: at most one stream is awaited at a time (the caller routes
23
+ * edges sequentially and awaits the returned drain promise before the next),
24
+ * mirroring the legacy split's `for await` invariant. The hot path (existing
25
+ * pair, no backpressure) returns `void` — no microtask per edge.
26
+ */
27
+ import path from 'path';
28
+ import { createWriteStream } from 'fs';
29
+ import { once } from 'events';
30
+ import { finished } from 'stream/promises';
31
+ /**
32
+ * Derive a node's table label from its graph id. Matches the legacy
33
+ * `getNodeLabel` that lived inline in `loadGraphToLbug`:
34
+ * - `comm_*` → Community
35
+ * - `proc_*` → Process
36
+ * - otherwise the prefix before the first `:` (e.g. `Function:…` → Function)
37
+ */
38
+ export const getNodeLabel = (nodeId) => {
39
+ if (nodeId.startsWith('comm_'))
40
+ return 'Community';
41
+ if (nodeId.startsWith('proc_'))
42
+ return 'Process';
43
+ return nodeId.split(':')[0];
44
+ };
45
+ /**
46
+ * Routes already-escaped relationship CSV rows to per-FROM→TO-label-pair
47
+ * files. Filters edges whose endpoint labels are not valid node tables
48
+ * (counted as `skipped`), exactly as the legacy split did.
49
+ */
50
+ export class RelPairRouter {
51
+ csvDir;
52
+ header;
53
+ validTables;
54
+ wsFactory;
55
+ /** pairKey (`From|To`) → { csvPath, rows } */
56
+ byPair = new Map();
57
+ streams = new Map();
58
+ skipped = 0;
59
+ total = 0;
60
+ streamError = null;
61
+ abort = new AbortController();
62
+ constructor(csvDir, header, validTables, wsFactory = (p) => createWriteStream(p, 'utf-8')) {
63
+ this.csvDir = csvDir;
64
+ this.header = header;
65
+ this.validTables = validTables;
66
+ this.wsFactory = wsFactory;
67
+ }
68
+ markError = (err) => {
69
+ this.streamError ??= err;
70
+ this.abort.abort(err);
71
+ };
72
+ /**
73
+ * The first stream error observed, if any. Lets the emit caller rethrow the
74
+ * real error (EMFILE / disk-full) instead of the generic `AbortError` that a
75
+ * pending `once(ws,'drain',{signal})` rejects with when the abort fires —
76
+ * mirroring the retained `splitRelCsvByLabelPair`'s `throw streamError ?? err`.
77
+ */
78
+ get lastError() {
79
+ return this.streamError;
80
+ }
81
+ /**
82
+ * Route one already-escaped CSV row (no trailing newline) to its pair file.
83
+ * Returns `void` on the synchronous hot path; a `Promise<void>` only when a
84
+ * stream signals backpressure (or a new pair's header does) — the caller
85
+ * awaits the promise before routing the next edge.
86
+ */
87
+ route(fromId, toId, row) {
88
+ if (this.streamError)
89
+ throw this.streamError;
90
+ const fromLabel = getNodeLabel(fromId);
91
+ const toLabel = getNodeLabel(toId);
92
+ if (!this.validTables.has(fromLabel) || !this.validTables.has(toLabel)) {
93
+ this.skipped++;
94
+ return;
95
+ }
96
+ const pairKey = `${fromLabel}|${toLabel}`;
97
+ const ws = this.streams.get(pairKey);
98
+ if (ws === undefined) {
99
+ // First edge for this pair: open the stream, write header + row.
100
+ return this.openAndWrite(pairKey, fromLabel, toLabel, row);
101
+ }
102
+ this.byPair.get(pairKey).rows++;
103
+ this.total++;
104
+ if (!ws.write(row + '\n')) {
105
+ return once(ws, 'drain', { signal: this.abort.signal }).then(() => undefined);
106
+ }
107
+ }
108
+ async openAndWrite(pairKey, fromLabel, toLabel, row) {
109
+ const csvPath = path.join(this.csvDir, `rel_${fromLabel}_${toLabel}.csv`);
110
+ const ws = this.wsFactory(csvPath);
111
+ ws.on('error', this.markError);
112
+ this.streams.set(pairKey, ws);
113
+ this.byPair.set(pairKey, { csvPath, rows: 1 });
114
+ this.total++;
115
+ if (!ws.write(this.header + '\n')) {
116
+ await once(ws, 'drain', { signal: this.abort.signal });
117
+ }
118
+ if (!ws.write(row + '\n')) {
119
+ await once(ws, 'drain', { signal: this.abort.signal });
120
+ }
121
+ }
122
+ /** Flush + close every pair stream. Rejects if any stream errored. */
123
+ async close() {
124
+ if (this.streamError) {
125
+ this.destroy();
126
+ throw this.streamError;
127
+ }
128
+ await Promise.all(Array.from(this.streams.values()).map(async (ws) => {
129
+ ws.end();
130
+ await finished(ws);
131
+ }));
132
+ if (this.streamError)
133
+ throw this.streamError;
134
+ }
135
+ /** Tear down all streams (no flush) — used on the error path. */
136
+ destroy() {
137
+ for (const ws of this.streams.values())
138
+ ws.destroy();
139
+ }
140
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.8-rc.39",
3
+ "version": "1.6.8-rc.40",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",