gitnexus 1.2.8 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +194 -186
  2. package/dist/cli/ai-context.js +71 -71
  3. package/dist/cli/analyze.js +69 -28
  4. package/dist/cli/index.js +20 -0
  5. package/dist/cli/setup.js +8 -1
  6. package/dist/cli/view.d.ts +13 -0
  7. package/dist/cli/view.js +59 -0
  8. package/dist/core/augmentation/engine.js +20 -20
  9. package/dist/core/embeddings/embedding-pipeline.js +26 -26
  10. package/dist/core/graph/graph.js +5 -0
  11. package/dist/core/graph/html-graph-viewer.d.ts +15 -0
  12. package/dist/core/graph/html-graph-viewer.js +542 -0
  13. package/dist/core/graph/html-graph-viewer.test.d.ts +1 -0
  14. package/dist/core/graph/html-graph-viewer.test.js +67 -0
  15. package/dist/core/graph/types.d.ts +12 -1
  16. package/dist/core/ingestion/call-processor.js +52 -32
  17. package/dist/core/ingestion/cluster-enricher.js +16 -16
  18. package/dist/core/ingestion/community-processor.js +75 -40
  19. package/dist/core/ingestion/filesystem-walker.d.ts +23 -0
  20. package/dist/core/ingestion/filesystem-walker.js +38 -3
  21. package/dist/core/ingestion/import-processor.d.ts +11 -3
  22. package/dist/core/ingestion/import-processor.js +27 -11
  23. package/dist/core/ingestion/parsing-processor.js +2 -4
  24. package/dist/core/ingestion/pipeline.js +142 -135
  25. package/dist/core/ingestion/process-processor.js +12 -11
  26. package/dist/core/ingestion/workers/parse-worker.js +67 -6
  27. package/dist/core/ingestion/workers/worker-pool.d.ts +3 -9
  28. package/dist/core/ingestion/workers/worker-pool.js +39 -18
  29. package/dist/core/kuzu/csv-generator.d.ts +15 -8
  30. package/dist/core/kuzu/csv-generator.js +258 -196
  31. package/dist/core/kuzu/kuzu-adapter.d.ts +1 -4
  32. package/dist/core/kuzu/kuzu-adapter.js +84 -72
  33. package/dist/core/kuzu/schema.d.ts +1 -1
  34. package/dist/core/kuzu/schema.js +266 -256
  35. package/dist/core/search/bm25-index.js +5 -5
  36. package/dist/core/search/hybrid-search.js +3 -3
  37. package/dist/core/wiki/graph-queries.js +52 -52
  38. package/dist/core/wiki/html-viewer.js +192 -192
  39. package/dist/core/wiki/prompts.js +82 -82
  40. package/dist/mcp/core/embedder.js +8 -4
  41. package/dist/mcp/local/local-backend.d.ts +6 -0
  42. package/dist/mcp/local/local-backend.js +224 -117
  43. package/dist/mcp/resources.js +42 -42
  44. package/dist/mcp/server.js +16 -16
  45. package/dist/mcp/tools.js +86 -77
  46. package/dist/server/api.d.ts +4 -2
  47. package/dist/server/api.js +253 -83
  48. package/dist/types/pipeline.d.ts +6 -2
  49. package/dist/types/pipeline.js +6 -4
  50. package/hooks/claude/gitnexus-hook.cjs +135 -135
  51. package/hooks/claude/pre-tool-use.sh +78 -78
  52. package/hooks/claude/session-start.sh +42 -42
  53. package/package.json +82 -82
  54. package/skills/debugging.md +85 -85
  55. package/skills/exploring.md +75 -75
  56. package/skills/impact-analysis.md +94 -94
  57. package/skills/refactoring.md +113 -113
  58. package/vendor/leiden/index.cjs +355 -355
  59. package/vendor/leiden/utils.cjs +392 -392
@@ -1,59 +1,47 @@
1
1
  /**
2
2
  * CSV Generator for KuzuDB Hybrid Schema
3
3
  *
4
- * Generates separate CSV files for each node table and one relation CSV.
5
- * This enables efficient bulk loading via COPY FROM for hybrid schema.
4
+ * Streams CSV rows directly to disk files in a single pass over graph nodes.
5
+ * File contents are lazy-read from disk per-node to avoid holding the entire
6
+ * repo in RAM. Rows are buffered (FLUSH_EVERY) before writing to minimize
7
+ * per-row Promise overhead.
6
8
  *
7
9
  * RFC 4180 Compliant:
8
10
  * - Fields containing commas, double quotes, or newlines are enclosed in double quotes
9
11
  * - Double quotes within fields are escaped by doubling them ("")
10
12
  * - All fields are consistently quoted for safety with code content
11
13
  */
14
+ import fs from 'fs/promises';
15
+ import { createWriteStream } from 'fs';
16
+ import path from 'path';
17
+ /** Flush buffered rows to disk every N rows */
18
+ const FLUSH_EVERY = 500;
12
19
  // ============================================================================
13
20
  // CSV ESCAPE UTILITIES
14
21
  // ============================================================================
15
- /**
16
- * Sanitize string to ensure valid UTF-8 and safe CSV content for KuzuDB
17
- * Removes or replaces invalid characters that would break CSV parsing.
18
- *
19
- * Critical: KuzuDB's native CSV parser on Windows can misinterpret \r\n
20
- * inside quoted fields. We normalize all line endings to \n only.
21
- */
22
22
  const sanitizeUTF8 = (str) => {
23
23
  return str
24
- .replace(/\r\n/g, '\n') // Normalize Windows line endings first
25
- .replace(/\r/g, '\n') // Normalize remaining \r to \n
26
- .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '') // Remove control chars except \t \n
27
- .replace(/[\uD800-\uDFFF]/g, '') // Remove surrogate pairs (invalid standalone)
28
- .replace(/[\uFFFE\uFFFF]/g, ''); // Remove BOM and special chars
24
+ .replace(/\r\n/g, '\n')
25
+ .replace(/\r/g, '\n')
26
+ .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '')
27
+ .replace(/[\uD800-\uDFFF]/g, '')
28
+ .replace(/[\uFFFE\uFFFF]/g, '');
29
29
  };
30
- /**
31
- * RFC 4180 compliant CSV field escaping
32
- * ALWAYS wraps in double quotes for safety with code content
33
- */
34
30
  const escapeCSVField = (value) => {
35
- if (value === undefined || value === null) {
31
+ if (value === undefined || value === null)
36
32
  return '""';
37
- }
38
33
  let str = String(value);
39
34
  str = sanitizeUTF8(str);
40
35
  return `"${str.replace(/"/g, '""')}"`;
41
36
  };
42
- /**
43
- * Escape a numeric value (no quotes needed for numbers)
44
- */
45
37
  const escapeCSVNumber = (value, defaultValue = -1) => {
46
- if (value === undefined || value === null) {
38
+ if (value === undefined || value === null)
47
39
  return String(defaultValue);
48
- }
49
40
  return String(value);
50
41
  };
51
42
  // ============================================================================
52
- // CONTENT EXTRACTION
43
+ // CONTENT EXTRACTION (lazy — reads from disk on demand)
53
44
  // ============================================================================
54
- /**
55
- * Check if content looks like binary data
56
- */
57
45
  const isBinaryContent = (content) => {
58
46
  if (!content || content.length === 0)
59
47
  return false;
@@ -61,181 +49,261 @@ const isBinaryContent = (content) => {
61
49
  let nonPrintable = 0;
62
50
  for (let i = 0; i < sample.length; i++) {
63
51
  const code = sample.charCodeAt(i);
64
- if ((code < 9) || (code > 13 && code < 32) || code === 127) {
52
+ if ((code < 9) || (code > 13 && code < 32) || code === 127)
65
53
  nonPrintable++;
66
- }
67
54
  }
68
55
  return (nonPrintable / sample.length) > 0.1;
69
56
  };
70
57
  /**
71
- * Extract code content for a node
58
+ * LRU content cache — avoids re-reading the same source file for every
59
+ * symbol defined in it. Sized generously so most files stay cached during
60
+ * the single-pass node iteration.
72
61
  */
73
- const extractContent = (node, fileContents) => {
62
+ class FileContentCache {
63
+ cache = new Map();
64
+ accessOrder = [];
65
+ maxSize;
66
+ repoPath;
67
+ constructor(repoPath, maxSize = 3000) {
68
+ this.repoPath = repoPath;
69
+ this.maxSize = maxSize;
70
+ }
71
+ async get(relativePath) {
72
+ if (!relativePath)
73
+ return '';
74
+ const cached = this.cache.get(relativePath);
75
+ if (cached !== undefined)
76
+ return cached;
77
+ try {
78
+ const fullPath = path.join(this.repoPath, relativePath);
79
+ const content = await fs.readFile(fullPath, 'utf-8');
80
+ this.set(relativePath, content);
81
+ return content;
82
+ }
83
+ catch {
84
+ this.set(relativePath, '');
85
+ return '';
86
+ }
87
+ }
88
+ set(key, value) {
89
+ if (this.cache.size >= this.maxSize) {
90
+ const oldest = this.accessOrder.shift();
91
+ if (oldest)
92
+ this.cache.delete(oldest);
93
+ }
94
+ this.cache.set(key, value);
95
+ this.accessOrder.push(key);
96
+ }
97
+ }
98
+ const extractContent = async (node, contentCache) => {
74
99
  const filePath = node.properties.filePath;
75
- const content = fileContents.get(filePath);
100
+ const content = await contentCache.get(filePath);
76
101
  if (!content)
77
102
  return '';
78
103
  if (node.label === 'Folder')
79
104
  return '';
80
105
  if (isBinaryContent(content))
81
106
  return '[Binary file - content not stored]';
82
- // For File nodes, return content (limited)
83
107
  if (node.label === 'File') {
84
108
  const MAX_FILE_CONTENT = 10000;
85
- if (content.length > MAX_FILE_CONTENT) {
86
- return content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]';
87
- }
88
- return content;
109
+ return content.length > MAX_FILE_CONTENT
110
+ ? content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]'
111
+ : content;
89
112
  }
90
- // For code elements, extract the relevant lines with context
91
113
  const startLine = node.properties.startLine;
92
114
  const endLine = node.properties.endLine;
93
115
  if (startLine === undefined || endLine === undefined)
94
116
  return '';
95
117
  const lines = content.split('\n');
96
- const contextLines = 2;
97
- const start = Math.max(0, startLine - contextLines);
98
- const end = Math.min(lines.length - 1, endLine + contextLines);
118
+ const start = Math.max(0, startLine - 2);
119
+ const end = Math.min(lines.length - 1, endLine + 2);
99
120
  const snippet = lines.slice(start, end + 1).join('\n');
100
121
  const MAX_SNIPPET = 5000;
101
- if (snippet.length > MAX_SNIPPET) {
102
- return snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]';
103
- }
104
- return snippet;
122
+ return snippet.length > MAX_SNIPPET
123
+ ? snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]'
124
+ : snippet;
105
125
  };
106
126
  // ============================================================================
107
- // NODE CSV GENERATORS
127
+ // BUFFERED CSV WRITER
108
128
  // ============================================================================
109
- /**
110
- * Generate CSV for File nodes
111
- * Headers: id,name,filePath,content
112
- */
113
- const generateFileCSV = (nodes, fileContents) => {
114
- const headers = ['id', 'name', 'filePath', 'content'];
115
- const rows = [headers.join(',')];
116
- const seenIds = new Set();
117
- for (const node of nodes) {
118
- if (node.label !== 'File')
119
- continue;
120
- // Skip duplicates
121
- if (seenIds.has(node.id))
122
- continue;
123
- seenIds.add(node.id);
124
- const content = extractContent(node, fileContents);
125
- rows.push([
126
- escapeCSVField(node.id),
127
- escapeCSVField(node.properties.name || ''),
128
- escapeCSVField(node.properties.filePath || ''),
129
- escapeCSVField(content),
130
- ].join(','));
129
+ class BufferedCSVWriter {
130
+ ws;
131
+ buffer = [];
132
+ rows = 0;
133
+ constructor(filePath, header) {
134
+ this.ws = createWriteStream(filePath, 'utf-8');
135
+ // Large repos flush many times — raise listener cap to avoid MaxListenersExceededWarning
136
+ this.ws.setMaxListeners(50);
137
+ this.buffer.push(header);
131
138
  }
132
- return rows.join('\n');
133
- };
134
- /**
135
- * Generate CSV for Folder nodes
136
- * Headers: id,name,filePath
137
- */
138
- const generateFolderCSV = (nodes) => {
139
- const headers = ['id', 'name', 'filePath'];
140
- const rows = [headers.join(',')];
141
- for (const node of nodes) {
142
- if (node.label !== 'Folder')
143
- continue;
144
- rows.push([
145
- escapeCSVField(node.id),
146
- escapeCSVField(node.properties.name || ''),
147
- escapeCSVField(node.properties.filePath || ''),
148
- ].join(','));
139
+ addRow(row) {
140
+ this.buffer.push(row);
141
+ this.rows++;
142
+ if (this.buffer.length >= FLUSH_EVERY) {
143
+ return this.flush();
144
+ }
145
+ return Promise.resolve();
149
146
  }
150
- return rows.join('\n');
151
- };
152
- /**
153
- * Generate CSV for code element nodes (Function, Class, Interface, Method, CodeElement)
154
- * Headers: id,name,filePath,startLine,endLine,isExported,content
155
- */
156
- const generateCodeElementCSV = (nodes, label, fileContents) => {
157
- const headers = ['id', 'name', 'filePath', 'startLine', 'endLine', 'isExported', 'content'];
158
- const rows = [headers.join(',')];
159
- for (const node of nodes) {
160
- if (node.label !== label)
161
- continue;
162
- const content = extractContent(node, fileContents);
163
- rows.push([
164
- escapeCSVField(node.id),
165
- escapeCSVField(node.properties.name || ''),
166
- escapeCSVField(node.properties.filePath || ''),
167
- escapeCSVNumber(node.properties.startLine, -1),
168
- escapeCSVNumber(node.properties.endLine, -1),
169
- node.properties.isExported ? 'true' : 'false',
170
- escapeCSVField(content),
171
- ].join(','));
147
+ flush() {
148
+ if (this.buffer.length === 0)
149
+ return Promise.resolve();
150
+ const chunk = this.buffer.join('\n') + '\n';
151
+ this.buffer.length = 0;
152
+ return new Promise((resolve, reject) => {
153
+ const ok = this.ws.write(chunk);
154
+ if (ok)
155
+ resolve();
156
+ else
157
+ this.ws.once('drain', resolve);
158
+ });
172
159
  }
173
- return rows.join('\n');
174
- };
175
- /**
176
- * Generate CSV for Community nodes (from Leiden algorithm)
177
- * Headers: id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount
178
- */
179
- const generateCommunityCSV = (nodes) => {
180
- const headers = ['id', 'label', 'heuristicLabel', 'keywords', 'description', 'enrichedBy', 'cohesion', 'symbolCount'];
181
- const rows = [headers.join(',')];
182
- for (const node of nodes) {
183
- if (node.label !== 'Community')
184
- continue;
185
- // Handle keywords array - convert to KuzuDB array format
186
- const keywords = node.properties.keywords || [];
187
- const keywordsStr = `[${keywords.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
188
- rows.push([
189
- escapeCSVField(node.id),
190
- escapeCSVField(node.properties.name || ''), // label is stored in name
191
- escapeCSVField(node.properties.heuristicLabel || ''),
192
- keywordsStr, // Array format for KuzuDB
193
- escapeCSVField(node.properties.description || ''),
194
- escapeCSVField(node.properties.enrichedBy || 'heuristic'),
195
- escapeCSVNumber(node.properties.cohesion, 0),
196
- escapeCSVNumber(node.properties.symbolCount, 0),
197
- ].join(','));
160
+ async finish() {
161
+ await this.flush();
162
+ return new Promise((resolve, reject) => {
163
+ this.ws.end(() => resolve());
164
+ this.ws.on('error', reject);
165
+ });
198
166
  }
199
- return rows.join('\n');
200
- };
167
+ }
201
168
  /**
202
- * Generate CSV for Process nodes
203
- * Headers: id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId
169
+ * Stream all CSV data directly to disk files.
170
+ * Iterates graph nodes exactly ONCE — routes each node to the right writer.
171
+ * File contents are lazy-read from disk with a generous LRU cache.
204
172
  */
205
- const generateProcessCSV = (nodes) => {
206
- const headers = ['id', 'label', 'heuristicLabel', 'processType', 'stepCount', 'communities', 'entryPointId', 'terminalId'];
207
- const rows = [headers.join(',')];
208
- for (const node of nodes) {
209
- if (node.label !== 'Process')
210
- continue;
211
- // Handle communities array (string[])
212
- const communities = node.properties.communities || [];
213
- const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
214
- rows.push([
215
- escapeCSVField(node.id),
216
- escapeCSVField(node.properties.name || ''), // label stores name
217
- escapeCSVField(node.properties.heuristicLabel || ''),
218
- escapeCSVField(node.properties.processType || ''),
219
- escapeCSVNumber(node.properties.stepCount, 0),
220
- escapeCSVField(communitiesStr), // Needs CSV escaping because it contains commas!
221
- escapeCSVField(node.properties.entryPointId || ''),
222
- escapeCSVField(node.properties.terminalId || ''),
223
- ].join(','));
173
+ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
174
+ // Remove stale CSVs from previous crashed runs, then recreate
175
+ try {
176
+ await fs.rm(csvDir, { recursive: true, force: true });
224
177
  }
225
- return rows.join('\n');
226
- };
227
- /**
228
- * Generate CSV for the single CodeRelation table
229
- * Headers: from,to,type,confidence,reason
230
- *
231
- * confidence: 0-1 score for CALLS edges (how sure are we about the target?)
232
- * reason: 'import-resolved' | 'same-file' | 'fuzzy-global' (or empty for non-CALLS)
233
- */
234
- const generateRelationCSV = (graph) => {
235
- const headers = ['from', 'to', 'type', 'confidence', 'reason', 'step'];
236
- const rows = [headers.join(',')];
237
- for (const rel of graph.relationships) {
238
- rows.push([
178
+ catch { }
179
+ await fs.mkdir(csvDir, { recursive: true });
180
+ // We open ~30 concurrent write-streams; raise process limit to suppress
181
+ // MaxListenersExceededWarning (restored after all streams finish).
182
+ const prevMax = process.getMaxListeners();
183
+ process.setMaxListeners(prevMax + 40);
184
+ const contentCache = new FileContentCache(repoPath);
185
+ // Create writers for every node type up-front
186
+ const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content');
187
+ const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath');
188
+ const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content';
189
+ const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader);
190
+ const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader);
191
+ const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader);
192
+ const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), codeElementHeader);
193
+ const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader);
194
+ const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount');
195
+ const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId');
196
+ // Multi-language node types share the same CSV shape (no isExported column)
197
+ const multiLangHeader = 'id,name,filePath,startLine,endLine,content';
198
+ const MULTI_LANG_TYPES = ['Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl',
199
+ 'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation', 'Constructor', 'Template', 'Module'];
200
+ const multiLangWriters = new Map();
201
+ for (const t of MULTI_LANG_TYPES) {
202
+ multiLangWriters.set(t, new BufferedCSVWriter(path.join(csvDir, `${t.toLowerCase()}.csv`), multiLangHeader));
203
+ }
204
+ const codeWriterMap = {
205
+ 'Function': functionWriter,
206
+ 'Class': classWriter,
207
+ 'Interface': interfaceWriter,
208
+ 'Method': methodWriter,
209
+ 'CodeElement': codeElemWriter,
210
+ };
211
+ const seenFileIds = new Set();
212
+ // --- SINGLE PASS over all nodes ---
213
+ for (const node of graph.iterNodes()) {
214
+ switch (node.label) {
215
+ case 'File': {
216
+ if (seenFileIds.has(node.id))
217
+ break;
218
+ seenFileIds.add(node.id);
219
+ const content = await extractContent(node, contentCache);
220
+ await fileWriter.addRow([
221
+ escapeCSVField(node.id),
222
+ escapeCSVField(node.properties.name || ''),
223
+ escapeCSVField(node.properties.filePath || ''),
224
+ escapeCSVField(content),
225
+ ].join(','));
226
+ break;
227
+ }
228
+ case 'Folder':
229
+ await folderWriter.addRow([
230
+ escapeCSVField(node.id),
231
+ escapeCSVField(node.properties.name || ''),
232
+ escapeCSVField(node.properties.filePath || ''),
233
+ ].join(','));
234
+ break;
235
+ case 'Community': {
236
+ const keywords = node.properties.keywords || [];
237
+ const keywordsStr = `[${keywords.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
238
+ await communityWriter.addRow([
239
+ escapeCSVField(node.id),
240
+ escapeCSVField(node.properties.name || ''),
241
+ escapeCSVField(node.properties.heuristicLabel || ''),
242
+ keywordsStr,
243
+ escapeCSVField(node.properties.description || ''),
244
+ escapeCSVField(node.properties.enrichedBy || 'heuristic'),
245
+ escapeCSVNumber(node.properties.cohesion, 0),
246
+ escapeCSVNumber(node.properties.symbolCount, 0),
247
+ ].join(','));
248
+ break;
249
+ }
250
+ case 'Process': {
251
+ const communities = node.properties.communities || [];
252
+ const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
253
+ await processWriter.addRow([
254
+ escapeCSVField(node.id),
255
+ escapeCSVField(node.properties.name || ''),
256
+ escapeCSVField(node.properties.heuristicLabel || ''),
257
+ escapeCSVField(node.properties.processType || ''),
258
+ escapeCSVNumber(node.properties.stepCount, 0),
259
+ escapeCSVField(communitiesStr),
260
+ escapeCSVField(node.properties.entryPointId || ''),
261
+ escapeCSVField(node.properties.terminalId || ''),
262
+ ].join(','));
263
+ break;
264
+ }
265
+ default: {
266
+ // Code element nodes (Function, Class, Interface, Method, CodeElement)
267
+ const writer = codeWriterMap[node.label];
268
+ if (writer) {
269
+ const content = await extractContent(node, contentCache);
270
+ await writer.addRow([
271
+ escapeCSVField(node.id),
272
+ escapeCSVField(node.properties.name || ''),
273
+ escapeCSVField(node.properties.filePath || ''),
274
+ escapeCSVNumber(node.properties.startLine, -1),
275
+ escapeCSVNumber(node.properties.endLine, -1),
276
+ node.properties.isExported ? 'true' : 'false',
277
+ escapeCSVField(content),
278
+ ].join(','));
279
+ }
280
+ else {
281
+ // Multi-language node types (Struct, Impl, Trait, Macro, etc.)
282
+ const mlWriter = multiLangWriters.get(node.label);
283
+ if (mlWriter) {
284
+ const content = await extractContent(node, contentCache);
285
+ await mlWriter.addRow([
286
+ escapeCSVField(node.id),
287
+ escapeCSVField(node.properties.name || ''),
288
+ escapeCSVField(node.properties.filePath || ''),
289
+ escapeCSVNumber(node.properties.startLine, -1),
290
+ escapeCSVNumber(node.properties.endLine, -1),
291
+ escapeCSVField(content),
292
+ ].join(','));
293
+ }
294
+ }
295
+ break;
296
+ }
297
+ }
298
+ }
299
+ // Finish all node writers
300
+ const allWriters = [fileWriter, folderWriter, functionWriter, classWriter, interfaceWriter, methodWriter, codeElemWriter, communityWriter, processWriter, ...multiLangWriters.values()];
301
+ await Promise.all(allWriters.map(w => w.finish()));
302
+ // --- Stream relationship CSV ---
303
+ const relCsvPath = path.join(csvDir, 'relations.csv');
304
+ const relWriter = new BufferedCSVWriter(relCsvPath, 'from,to,type,confidence,reason,step');
305
+ for (const rel of graph.iterRelationships()) {
306
+ await relWriter.addRow([
239
307
  escapeCSVField(rel.sourceId),
240
308
  escapeCSVField(rel.targetId),
241
309
  escapeCSVField(rel.type),
@@ -244,29 +312,23 @@ const generateRelationCSV = (graph) => {
244
312
  escapeCSVNumber(rel.step, 0),
245
313
  ].join(','));
246
314
  }
247
- return rows.join('\n');
248
- };
249
- // ============================================================================
250
- // MAIN CSV GENERATION FUNCTION
251
- // ============================================================================
252
- /**
253
- * Generate all CSV data for hybrid schema bulk loading
254
- * Returns Maps of node table name -> CSV content, and single relation CSV
255
- */
256
- export const generateAllCSVs = (graph, fileContents) => {
257
- const nodes = Array.from(graph.nodes);
258
- // Generate node CSVs
259
- const nodeCSVs = new Map();
260
- nodeCSVs.set('File', generateFileCSV(nodes, fileContents));
261
- nodeCSVs.set('Folder', generateFolderCSV(nodes));
262
- nodeCSVs.set('Function', generateCodeElementCSV(nodes, 'Function', fileContents));
263
- nodeCSVs.set('Class', generateCodeElementCSV(nodes, 'Class', fileContents));
264
- nodeCSVs.set('Interface', generateCodeElementCSV(nodes, 'Interface', fileContents));
265
- nodeCSVs.set('Method', generateCodeElementCSV(nodes, 'Method', fileContents));
266
- nodeCSVs.set('CodeElement', generateCodeElementCSV(nodes, 'CodeElement', fileContents));
267
- nodeCSVs.set('Community', generateCommunityCSV(nodes));
268
- nodeCSVs.set('Process', generateProcessCSV(nodes));
269
- // Generate single relation CSV
270
- const relCSV = generateRelationCSV(graph);
271
- return { nodes: nodeCSVs, relCSV };
315
+ await relWriter.finish();
316
+ // Build result map — only include tables that have rows
317
+ const nodeFiles = new Map();
318
+ const tableMap = [
319
+ ['File', fileWriter], ['Folder', folderWriter],
320
+ ['Function', functionWriter], ['Class', classWriter],
321
+ ['Interface', interfaceWriter], ['Method', methodWriter],
322
+ ['CodeElement', codeElemWriter],
323
+ ['Community', communityWriter], ['Process', processWriter],
324
+ ...Array.from(multiLangWriters.entries()).map(([name, w]) => [name, w]),
325
+ ];
326
+ for (const [name, writer] of tableMap) {
327
+ if (writer.rows > 0) {
328
+ nodeFiles.set(name, { csvPath: path.join(csvDir, `${name.toLowerCase()}.csv`), rows: writer.rows });
329
+ }
330
+ }
331
+ // Restore original process listener limit
332
+ process.setMaxListeners(prevMax);
333
+ return { nodeFiles, relCsvPath, relRows: relWriter.rows };
272
334
  };
@@ -5,7 +5,7 @@ export declare const initKuzu: (dbPath: string) => Promise<{
5
5
  conn: kuzu.Connection;
6
6
  }>;
7
7
  export type KuzuProgressCallback = (message: string) => void;
8
- export declare const loadGraphToKuzu: (graph: KnowledgeGraph, fileContents: Map<string, string>, storagePath: string, onProgress?: KuzuProgressCallback) => Promise<{
8
+ export declare const loadGraphToKuzu: (graph: KnowledgeGraph, repoPath: string, storagePath: string, onProgress?: KuzuProgressCallback) => Promise<{
9
9
  success: boolean;
10
10
  insertedRels: number;
11
11
  skippedRels: number;
@@ -61,9 +61,6 @@ export declare const deleteNodesForFile: (filePath: string, dbPath?: string) =>
61
61
  deletedNodes: number;
62
62
  }>;
63
63
  export declare const getEmbeddingTableName: () => string;
64
- /**
65
- * Load the FTS extension (required before using FTS functions)
66
- */
67
64
  export declare const loadFTSExtension: () => Promise<void>;
68
65
  /**
69
66
  * Create a full-text search index on a table