gitnexus 1.2.8 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +194 -186
- package/dist/cli/ai-context.js +71 -71
- package/dist/cli/analyze.js +69 -28
- package/dist/cli/index.js +20 -0
- package/dist/cli/setup.js +8 -1
- package/dist/cli/view.d.ts +13 -0
- package/dist/cli/view.js +59 -0
- package/dist/core/augmentation/engine.js +20 -20
- package/dist/core/embeddings/embedding-pipeline.js +26 -26
- package/dist/core/graph/graph.js +5 -0
- package/dist/core/graph/html-graph-viewer.d.ts +15 -0
- package/dist/core/graph/html-graph-viewer.js +542 -0
- package/dist/core/graph/html-graph-viewer.test.d.ts +1 -0
- package/dist/core/graph/html-graph-viewer.test.js +67 -0
- package/dist/core/graph/types.d.ts +12 -1
- package/dist/core/ingestion/call-processor.js +52 -32
- package/dist/core/ingestion/cluster-enricher.js +16 -16
- package/dist/core/ingestion/community-processor.js +75 -40
- package/dist/core/ingestion/filesystem-walker.d.ts +23 -0
- package/dist/core/ingestion/filesystem-walker.js +38 -3
- package/dist/core/ingestion/import-processor.d.ts +11 -3
- package/dist/core/ingestion/import-processor.js +27 -11
- package/dist/core/ingestion/parsing-processor.js +2 -4
- package/dist/core/ingestion/pipeline.js +142 -135
- package/dist/core/ingestion/process-processor.js +12 -11
- package/dist/core/ingestion/workers/parse-worker.js +67 -6
- package/dist/core/ingestion/workers/worker-pool.d.ts +3 -9
- package/dist/core/ingestion/workers/worker-pool.js +39 -18
- package/dist/core/kuzu/csv-generator.d.ts +15 -8
- package/dist/core/kuzu/csv-generator.js +258 -196
- package/dist/core/kuzu/kuzu-adapter.d.ts +1 -4
- package/dist/core/kuzu/kuzu-adapter.js +84 -72
- package/dist/core/kuzu/schema.d.ts +1 -1
- package/dist/core/kuzu/schema.js +266 -256
- package/dist/core/search/bm25-index.js +5 -5
- package/dist/core/search/hybrid-search.js +3 -3
- package/dist/core/wiki/graph-queries.js +52 -52
- package/dist/core/wiki/html-viewer.js +192 -192
- package/dist/core/wiki/prompts.js +82 -82
- package/dist/mcp/core/embedder.js +8 -4
- package/dist/mcp/local/local-backend.d.ts +6 -0
- package/dist/mcp/local/local-backend.js +224 -117
- package/dist/mcp/resources.js +42 -42
- package/dist/mcp/server.js +16 -16
- package/dist/mcp/tools.js +86 -77
- package/dist/server/api.d.ts +4 -2
- package/dist/server/api.js +253 -83
- package/dist/types/pipeline.d.ts +6 -2
- package/dist/types/pipeline.js +6 -4
- package/hooks/claude/gitnexus-hook.cjs +135 -135
- package/hooks/claude/pre-tool-use.sh +78 -78
- package/hooks/claude/session-start.sh +42 -42
- package/package.json +82 -82
- package/skills/debugging.md +85 -85
- package/skills/exploring.md +75 -75
- package/skills/impact-analysis.md +94 -94
- package/skills/refactoring.md +113 -113
- package/vendor/leiden/index.cjs +355 -355
- package/vendor/leiden/utils.cjs +392 -392
|
@@ -1,59 +1,47 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* CSV Generator for KuzuDB Hybrid Schema
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Streams CSV rows directly to disk files in a single pass over graph nodes.
|
|
5
|
+
* File contents are lazy-read from disk per-node to avoid holding the entire
|
|
6
|
+
* repo in RAM. Rows are buffered (FLUSH_EVERY) before writing to minimize
|
|
7
|
+
* per-row Promise overhead.
|
|
6
8
|
*
|
|
7
9
|
* RFC 4180 Compliant:
|
|
8
10
|
* - Fields containing commas, double quotes, or newlines are enclosed in double quotes
|
|
9
11
|
* - Double quotes within fields are escaped by doubling them ("")
|
|
10
12
|
* - All fields are consistently quoted for safety with code content
|
|
11
13
|
*/
|
|
14
|
+
import fs from 'fs/promises';
|
|
15
|
+
import { createWriteStream } from 'fs';
|
|
16
|
+
import path from 'path';
|
|
17
|
+
/** Flush buffered rows to disk every N rows */
|
|
18
|
+
const FLUSH_EVERY = 500;
|
|
12
19
|
// ============================================================================
|
|
13
20
|
// CSV ESCAPE UTILITIES
|
|
14
21
|
// ============================================================================
|
|
15
|
-
/**
|
|
16
|
-
* Sanitize string to ensure valid UTF-8 and safe CSV content for KuzuDB
|
|
17
|
-
* Removes or replaces invalid characters that would break CSV parsing.
|
|
18
|
-
*
|
|
19
|
-
* Critical: KuzuDB's native CSV parser on Windows can misinterpret \r\n
|
|
20
|
-
* inside quoted fields. We normalize all line endings to \n only.
|
|
21
|
-
*/
|
|
22
22
|
const sanitizeUTF8 = (str) => {
|
|
23
23
|
return str
|
|
24
|
-
.replace(/\r\n/g, '\n')
|
|
25
|
-
.replace(/\r/g, '\n')
|
|
26
|
-
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '')
|
|
27
|
-
.replace(/[\uD800-\uDFFF]/g, '')
|
|
28
|
-
.replace(/[\uFFFE\uFFFF]/g, '');
|
|
24
|
+
.replace(/\r\n/g, '\n')
|
|
25
|
+
.replace(/\r/g, '\n')
|
|
26
|
+
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '')
|
|
27
|
+
.replace(/[\uD800-\uDFFF]/g, '')
|
|
28
|
+
.replace(/[\uFFFE\uFFFF]/g, '');
|
|
29
29
|
};
|
|
30
|
-
/**
|
|
31
|
-
* RFC 4180 compliant CSV field escaping
|
|
32
|
-
* ALWAYS wraps in double quotes for safety with code content
|
|
33
|
-
*/
|
|
34
30
|
const escapeCSVField = (value) => {
|
|
35
|
-
if (value === undefined || value === null)
|
|
31
|
+
if (value === undefined || value === null)
|
|
36
32
|
return '""';
|
|
37
|
-
}
|
|
38
33
|
let str = String(value);
|
|
39
34
|
str = sanitizeUTF8(str);
|
|
40
35
|
return `"${str.replace(/"/g, '""')}"`;
|
|
41
36
|
};
|
|
42
|
-
/**
|
|
43
|
-
* Escape a numeric value (no quotes needed for numbers)
|
|
44
|
-
*/
|
|
45
37
|
const escapeCSVNumber = (value, defaultValue = -1) => {
|
|
46
|
-
if (value === undefined || value === null)
|
|
38
|
+
if (value === undefined || value === null)
|
|
47
39
|
return String(defaultValue);
|
|
48
|
-
}
|
|
49
40
|
return String(value);
|
|
50
41
|
};
|
|
51
42
|
// ============================================================================
|
|
52
|
-
// CONTENT EXTRACTION
|
|
43
|
+
// CONTENT EXTRACTION (lazy — reads from disk on demand)
|
|
53
44
|
// ============================================================================
|
|
54
|
-
/**
|
|
55
|
-
* Check if content looks like binary data
|
|
56
|
-
*/
|
|
57
45
|
const isBinaryContent = (content) => {
|
|
58
46
|
if (!content || content.length === 0)
|
|
59
47
|
return false;
|
|
@@ -61,181 +49,261 @@ const isBinaryContent = (content) => {
|
|
|
61
49
|
let nonPrintable = 0;
|
|
62
50
|
for (let i = 0; i < sample.length; i++) {
|
|
63
51
|
const code = sample.charCodeAt(i);
|
|
64
|
-
if ((code < 9) || (code > 13 && code < 32) || code === 127)
|
|
52
|
+
if ((code < 9) || (code > 13 && code < 32) || code === 127)
|
|
65
53
|
nonPrintable++;
|
|
66
|
-
}
|
|
67
54
|
}
|
|
68
55
|
return (nonPrintable / sample.length) > 0.1;
|
|
69
56
|
};
|
|
70
57
|
/**
|
|
71
|
-
*
|
|
58
|
+
* LRU content cache — avoids re-reading the same source file for every
|
|
59
|
+
* symbol defined in it. Sized generously so most files stay cached during
|
|
60
|
+
* the single-pass node iteration.
|
|
72
61
|
*/
|
|
73
|
-
|
|
62
|
+
class FileContentCache {
|
|
63
|
+
cache = new Map();
|
|
64
|
+
accessOrder = [];
|
|
65
|
+
maxSize;
|
|
66
|
+
repoPath;
|
|
67
|
+
constructor(repoPath, maxSize = 3000) {
|
|
68
|
+
this.repoPath = repoPath;
|
|
69
|
+
this.maxSize = maxSize;
|
|
70
|
+
}
|
|
71
|
+
async get(relativePath) {
|
|
72
|
+
if (!relativePath)
|
|
73
|
+
return '';
|
|
74
|
+
const cached = this.cache.get(relativePath);
|
|
75
|
+
if (cached !== undefined)
|
|
76
|
+
return cached;
|
|
77
|
+
try {
|
|
78
|
+
const fullPath = path.join(this.repoPath, relativePath);
|
|
79
|
+
const content = await fs.readFile(fullPath, 'utf-8');
|
|
80
|
+
this.set(relativePath, content);
|
|
81
|
+
return content;
|
|
82
|
+
}
|
|
83
|
+
catch {
|
|
84
|
+
this.set(relativePath, '');
|
|
85
|
+
return '';
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
set(key, value) {
|
|
89
|
+
if (this.cache.size >= this.maxSize) {
|
|
90
|
+
const oldest = this.accessOrder.shift();
|
|
91
|
+
if (oldest)
|
|
92
|
+
this.cache.delete(oldest);
|
|
93
|
+
}
|
|
94
|
+
this.cache.set(key, value);
|
|
95
|
+
this.accessOrder.push(key);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
const extractContent = async (node, contentCache) => {
|
|
74
99
|
const filePath = node.properties.filePath;
|
|
75
|
-
const content =
|
|
100
|
+
const content = await contentCache.get(filePath);
|
|
76
101
|
if (!content)
|
|
77
102
|
return '';
|
|
78
103
|
if (node.label === 'Folder')
|
|
79
104
|
return '';
|
|
80
105
|
if (isBinaryContent(content))
|
|
81
106
|
return '[Binary file - content not stored]';
|
|
82
|
-
// For File nodes, return content (limited)
|
|
83
107
|
if (node.label === 'File') {
|
|
84
108
|
const MAX_FILE_CONTENT = 10000;
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
return content;
|
|
109
|
+
return content.length > MAX_FILE_CONTENT
|
|
110
|
+
? content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]'
|
|
111
|
+
: content;
|
|
89
112
|
}
|
|
90
|
-
// For code elements, extract the relevant lines with context
|
|
91
113
|
const startLine = node.properties.startLine;
|
|
92
114
|
const endLine = node.properties.endLine;
|
|
93
115
|
if (startLine === undefined || endLine === undefined)
|
|
94
116
|
return '';
|
|
95
117
|
const lines = content.split('\n');
|
|
96
|
-
const
|
|
97
|
-
const
|
|
98
|
-
const end = Math.min(lines.length - 1, endLine + contextLines);
|
|
118
|
+
const start = Math.max(0, startLine - 2);
|
|
119
|
+
const end = Math.min(lines.length - 1, endLine + 2);
|
|
99
120
|
const snippet = lines.slice(start, end + 1).join('\n');
|
|
100
121
|
const MAX_SNIPPET = 5000;
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
return snippet;
|
|
122
|
+
return snippet.length > MAX_SNIPPET
|
|
123
|
+
? snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]'
|
|
124
|
+
: snippet;
|
|
105
125
|
};
|
|
106
126
|
// ============================================================================
|
|
107
|
-
//
|
|
127
|
+
// BUFFERED CSV WRITER
|
|
108
128
|
// ============================================================================
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
if (node.label !== 'File')
|
|
119
|
-
continue;
|
|
120
|
-
// Skip duplicates
|
|
121
|
-
if (seenIds.has(node.id))
|
|
122
|
-
continue;
|
|
123
|
-
seenIds.add(node.id);
|
|
124
|
-
const content = extractContent(node, fileContents);
|
|
125
|
-
rows.push([
|
|
126
|
-
escapeCSVField(node.id),
|
|
127
|
-
escapeCSVField(node.properties.name || ''),
|
|
128
|
-
escapeCSVField(node.properties.filePath || ''),
|
|
129
|
-
escapeCSVField(content),
|
|
130
|
-
].join(','));
|
|
129
|
+
class BufferedCSVWriter {
|
|
130
|
+
ws;
|
|
131
|
+
buffer = [];
|
|
132
|
+
rows = 0;
|
|
133
|
+
constructor(filePath, header) {
|
|
134
|
+
this.ws = createWriteStream(filePath, 'utf-8');
|
|
135
|
+
// Large repos flush many times — raise listener cap to avoid MaxListenersExceededWarning
|
|
136
|
+
this.ws.setMaxListeners(50);
|
|
137
|
+
this.buffer.push(header);
|
|
131
138
|
}
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
const headers = ['id', 'name', 'filePath'];
|
|
140
|
-
const rows = [headers.join(',')];
|
|
141
|
-
for (const node of nodes) {
|
|
142
|
-
if (node.label !== 'Folder')
|
|
143
|
-
continue;
|
|
144
|
-
rows.push([
|
|
145
|
-
escapeCSVField(node.id),
|
|
146
|
-
escapeCSVField(node.properties.name || ''),
|
|
147
|
-
escapeCSVField(node.properties.filePath || ''),
|
|
148
|
-
].join(','));
|
|
139
|
+
addRow(row) {
|
|
140
|
+
this.buffer.push(row);
|
|
141
|
+
this.rows++;
|
|
142
|
+
if (this.buffer.length >= FLUSH_EVERY) {
|
|
143
|
+
return this.flush();
|
|
144
|
+
}
|
|
145
|
+
return Promise.resolve();
|
|
149
146
|
}
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
const
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
const content = extractContent(node, fileContents);
|
|
163
|
-
rows.push([
|
|
164
|
-
escapeCSVField(node.id),
|
|
165
|
-
escapeCSVField(node.properties.name || ''),
|
|
166
|
-
escapeCSVField(node.properties.filePath || ''),
|
|
167
|
-
escapeCSVNumber(node.properties.startLine, -1),
|
|
168
|
-
escapeCSVNumber(node.properties.endLine, -1),
|
|
169
|
-
node.properties.isExported ? 'true' : 'false',
|
|
170
|
-
escapeCSVField(content),
|
|
171
|
-
].join(','));
|
|
147
|
+
flush() {
|
|
148
|
+
if (this.buffer.length === 0)
|
|
149
|
+
return Promise.resolve();
|
|
150
|
+
const chunk = this.buffer.join('\n') + '\n';
|
|
151
|
+
this.buffer.length = 0;
|
|
152
|
+
return new Promise((resolve, reject) => {
|
|
153
|
+
const ok = this.ws.write(chunk);
|
|
154
|
+
if (ok)
|
|
155
|
+
resolve();
|
|
156
|
+
else
|
|
157
|
+
this.ws.once('drain', resolve);
|
|
158
|
+
});
|
|
172
159
|
}
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
const generateCommunityCSV = (nodes) => {
|
|
180
|
-
const headers = ['id', 'label', 'heuristicLabel', 'keywords', 'description', 'enrichedBy', 'cohesion', 'symbolCount'];
|
|
181
|
-
const rows = [headers.join(',')];
|
|
182
|
-
for (const node of nodes) {
|
|
183
|
-
if (node.label !== 'Community')
|
|
184
|
-
continue;
|
|
185
|
-
// Handle keywords array - convert to KuzuDB array format
|
|
186
|
-
const keywords = node.properties.keywords || [];
|
|
187
|
-
const keywordsStr = `[${keywords.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
|
|
188
|
-
rows.push([
|
|
189
|
-
escapeCSVField(node.id),
|
|
190
|
-
escapeCSVField(node.properties.name || ''), // label is stored in name
|
|
191
|
-
escapeCSVField(node.properties.heuristicLabel || ''),
|
|
192
|
-
keywordsStr, // Array format for KuzuDB
|
|
193
|
-
escapeCSVField(node.properties.description || ''),
|
|
194
|
-
escapeCSVField(node.properties.enrichedBy || 'heuristic'),
|
|
195
|
-
escapeCSVNumber(node.properties.cohesion, 0),
|
|
196
|
-
escapeCSVNumber(node.properties.symbolCount, 0),
|
|
197
|
-
].join(','));
|
|
160
|
+
async finish() {
|
|
161
|
+
await this.flush();
|
|
162
|
+
return new Promise((resolve, reject) => {
|
|
163
|
+
this.ws.end(() => resolve());
|
|
164
|
+
this.ws.on('error', reject);
|
|
165
|
+
});
|
|
198
166
|
}
|
|
199
|
-
|
|
200
|
-
};
|
|
167
|
+
}
|
|
201
168
|
/**
|
|
202
|
-
*
|
|
203
|
-
*
|
|
169
|
+
* Stream all CSV data directly to disk files.
|
|
170
|
+
* Iterates graph nodes exactly ONCE — routes each node to the right writer.
|
|
171
|
+
* File contents are lazy-read from disk with a generous LRU cache.
|
|
204
172
|
*/
|
|
205
|
-
const
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
if (node.label !== 'Process')
|
|
210
|
-
continue;
|
|
211
|
-
// Handle communities array (string[])
|
|
212
|
-
const communities = node.properties.communities || [];
|
|
213
|
-
const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
|
|
214
|
-
rows.push([
|
|
215
|
-
escapeCSVField(node.id),
|
|
216
|
-
escapeCSVField(node.properties.name || ''), // label stores name
|
|
217
|
-
escapeCSVField(node.properties.heuristicLabel || ''),
|
|
218
|
-
escapeCSVField(node.properties.processType || ''),
|
|
219
|
-
escapeCSVNumber(node.properties.stepCount, 0),
|
|
220
|
-
escapeCSVField(communitiesStr), // Needs CSV escaping because it contains commas!
|
|
221
|
-
escapeCSVField(node.properties.entryPointId || ''),
|
|
222
|
-
escapeCSVField(node.properties.terminalId || ''),
|
|
223
|
-
].join(','));
|
|
173
|
+
export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
174
|
+
// Remove stale CSVs from previous crashed runs, then recreate
|
|
175
|
+
try {
|
|
176
|
+
await fs.rm(csvDir, { recursive: true, force: true });
|
|
224
177
|
}
|
|
225
|
-
|
|
226
|
-
};
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
const
|
|
235
|
-
const
|
|
236
|
-
const
|
|
237
|
-
|
|
238
|
-
|
|
178
|
+
catch { }
|
|
179
|
+
await fs.mkdir(csvDir, { recursive: true });
|
|
180
|
+
// We open ~30 concurrent write-streams; raise process limit to suppress
|
|
181
|
+
// MaxListenersExceededWarning (restored after all streams finish).
|
|
182
|
+
const prevMax = process.getMaxListeners();
|
|
183
|
+
process.setMaxListeners(prevMax + 40);
|
|
184
|
+
const contentCache = new FileContentCache(repoPath);
|
|
185
|
+
// Create writers for every node type up-front
|
|
186
|
+
const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content');
|
|
187
|
+
const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath');
|
|
188
|
+
const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content';
|
|
189
|
+
const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader);
|
|
190
|
+
const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader);
|
|
191
|
+
const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader);
|
|
192
|
+
const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), codeElementHeader);
|
|
193
|
+
const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader);
|
|
194
|
+
const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount');
|
|
195
|
+
const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId');
|
|
196
|
+
// Multi-language node types share the same CSV shape (no isExported column)
|
|
197
|
+
const multiLangHeader = 'id,name,filePath,startLine,endLine,content';
|
|
198
|
+
const MULTI_LANG_TYPES = ['Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl',
|
|
199
|
+
'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation', 'Constructor', 'Template', 'Module'];
|
|
200
|
+
const multiLangWriters = new Map();
|
|
201
|
+
for (const t of MULTI_LANG_TYPES) {
|
|
202
|
+
multiLangWriters.set(t, new BufferedCSVWriter(path.join(csvDir, `${t.toLowerCase()}.csv`), multiLangHeader));
|
|
203
|
+
}
|
|
204
|
+
const codeWriterMap = {
|
|
205
|
+
'Function': functionWriter,
|
|
206
|
+
'Class': classWriter,
|
|
207
|
+
'Interface': interfaceWriter,
|
|
208
|
+
'Method': methodWriter,
|
|
209
|
+
'CodeElement': codeElemWriter,
|
|
210
|
+
};
|
|
211
|
+
const seenFileIds = new Set();
|
|
212
|
+
// --- SINGLE PASS over all nodes ---
|
|
213
|
+
for (const node of graph.iterNodes()) {
|
|
214
|
+
switch (node.label) {
|
|
215
|
+
case 'File': {
|
|
216
|
+
if (seenFileIds.has(node.id))
|
|
217
|
+
break;
|
|
218
|
+
seenFileIds.add(node.id);
|
|
219
|
+
const content = await extractContent(node, contentCache);
|
|
220
|
+
await fileWriter.addRow([
|
|
221
|
+
escapeCSVField(node.id),
|
|
222
|
+
escapeCSVField(node.properties.name || ''),
|
|
223
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
224
|
+
escapeCSVField(content),
|
|
225
|
+
].join(','));
|
|
226
|
+
break;
|
|
227
|
+
}
|
|
228
|
+
case 'Folder':
|
|
229
|
+
await folderWriter.addRow([
|
|
230
|
+
escapeCSVField(node.id),
|
|
231
|
+
escapeCSVField(node.properties.name || ''),
|
|
232
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
233
|
+
].join(','));
|
|
234
|
+
break;
|
|
235
|
+
case 'Community': {
|
|
236
|
+
const keywords = node.properties.keywords || [];
|
|
237
|
+
const keywordsStr = `[${keywords.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
|
|
238
|
+
await communityWriter.addRow([
|
|
239
|
+
escapeCSVField(node.id),
|
|
240
|
+
escapeCSVField(node.properties.name || ''),
|
|
241
|
+
escapeCSVField(node.properties.heuristicLabel || ''),
|
|
242
|
+
keywordsStr,
|
|
243
|
+
escapeCSVField(node.properties.description || ''),
|
|
244
|
+
escapeCSVField(node.properties.enrichedBy || 'heuristic'),
|
|
245
|
+
escapeCSVNumber(node.properties.cohesion, 0),
|
|
246
|
+
escapeCSVNumber(node.properties.symbolCount, 0),
|
|
247
|
+
].join(','));
|
|
248
|
+
break;
|
|
249
|
+
}
|
|
250
|
+
case 'Process': {
|
|
251
|
+
const communities = node.properties.communities || [];
|
|
252
|
+
const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
|
|
253
|
+
await processWriter.addRow([
|
|
254
|
+
escapeCSVField(node.id),
|
|
255
|
+
escapeCSVField(node.properties.name || ''),
|
|
256
|
+
escapeCSVField(node.properties.heuristicLabel || ''),
|
|
257
|
+
escapeCSVField(node.properties.processType || ''),
|
|
258
|
+
escapeCSVNumber(node.properties.stepCount, 0),
|
|
259
|
+
escapeCSVField(communitiesStr),
|
|
260
|
+
escapeCSVField(node.properties.entryPointId || ''),
|
|
261
|
+
escapeCSVField(node.properties.terminalId || ''),
|
|
262
|
+
].join(','));
|
|
263
|
+
break;
|
|
264
|
+
}
|
|
265
|
+
default: {
|
|
266
|
+
// Code element nodes (Function, Class, Interface, Method, CodeElement)
|
|
267
|
+
const writer = codeWriterMap[node.label];
|
|
268
|
+
if (writer) {
|
|
269
|
+
const content = await extractContent(node, contentCache);
|
|
270
|
+
await writer.addRow([
|
|
271
|
+
escapeCSVField(node.id),
|
|
272
|
+
escapeCSVField(node.properties.name || ''),
|
|
273
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
274
|
+
escapeCSVNumber(node.properties.startLine, -1),
|
|
275
|
+
escapeCSVNumber(node.properties.endLine, -1),
|
|
276
|
+
node.properties.isExported ? 'true' : 'false',
|
|
277
|
+
escapeCSVField(content),
|
|
278
|
+
].join(','));
|
|
279
|
+
}
|
|
280
|
+
else {
|
|
281
|
+
// Multi-language node types (Struct, Impl, Trait, Macro, etc.)
|
|
282
|
+
const mlWriter = multiLangWriters.get(node.label);
|
|
283
|
+
if (mlWriter) {
|
|
284
|
+
const content = await extractContent(node, contentCache);
|
|
285
|
+
await mlWriter.addRow([
|
|
286
|
+
escapeCSVField(node.id),
|
|
287
|
+
escapeCSVField(node.properties.name || ''),
|
|
288
|
+
escapeCSVField(node.properties.filePath || ''),
|
|
289
|
+
escapeCSVNumber(node.properties.startLine, -1),
|
|
290
|
+
escapeCSVNumber(node.properties.endLine, -1),
|
|
291
|
+
escapeCSVField(content),
|
|
292
|
+
].join(','));
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
// Finish all node writers
|
|
300
|
+
const allWriters = [fileWriter, folderWriter, functionWriter, classWriter, interfaceWriter, methodWriter, codeElemWriter, communityWriter, processWriter, ...multiLangWriters.values()];
|
|
301
|
+
await Promise.all(allWriters.map(w => w.finish()));
|
|
302
|
+
// --- Stream relationship CSV ---
|
|
303
|
+
const relCsvPath = path.join(csvDir, 'relations.csv');
|
|
304
|
+
const relWriter = new BufferedCSVWriter(relCsvPath, 'from,to,type,confidence,reason,step');
|
|
305
|
+
for (const rel of graph.iterRelationships()) {
|
|
306
|
+
await relWriter.addRow([
|
|
239
307
|
escapeCSVField(rel.sourceId),
|
|
240
308
|
escapeCSVField(rel.targetId),
|
|
241
309
|
escapeCSVField(rel.type),
|
|
@@ -244,29 +312,23 @@ const generateRelationCSV = (graph) => {
|
|
|
244
312
|
escapeCSVNumber(rel.step, 0),
|
|
245
313
|
].join(','));
|
|
246
314
|
}
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
nodeCSVs.set('CodeElement', generateCodeElementCSV(nodes, 'CodeElement', fileContents));
|
|
267
|
-
nodeCSVs.set('Community', generateCommunityCSV(nodes));
|
|
268
|
-
nodeCSVs.set('Process', generateProcessCSV(nodes));
|
|
269
|
-
// Generate single relation CSV
|
|
270
|
-
const relCSV = generateRelationCSV(graph);
|
|
271
|
-
return { nodes: nodeCSVs, relCSV };
|
|
315
|
+
await relWriter.finish();
|
|
316
|
+
// Build result map — only include tables that have rows
|
|
317
|
+
const nodeFiles = new Map();
|
|
318
|
+
const tableMap = [
|
|
319
|
+
['File', fileWriter], ['Folder', folderWriter],
|
|
320
|
+
['Function', functionWriter], ['Class', classWriter],
|
|
321
|
+
['Interface', interfaceWriter], ['Method', methodWriter],
|
|
322
|
+
['CodeElement', codeElemWriter],
|
|
323
|
+
['Community', communityWriter], ['Process', processWriter],
|
|
324
|
+
...Array.from(multiLangWriters.entries()).map(([name, w]) => [name, w]),
|
|
325
|
+
];
|
|
326
|
+
for (const [name, writer] of tableMap) {
|
|
327
|
+
if (writer.rows > 0) {
|
|
328
|
+
nodeFiles.set(name, { csvPath: path.join(csvDir, `${name.toLowerCase()}.csv`), rows: writer.rows });
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
// Restore original process listener limit
|
|
332
|
+
process.setMaxListeners(prevMax);
|
|
333
|
+
return { nodeFiles, relCsvPath, relRows: relWriter.rows };
|
|
272
334
|
};
|
|
@@ -5,7 +5,7 @@ export declare const initKuzu: (dbPath: string) => Promise<{
|
|
|
5
5
|
conn: kuzu.Connection;
|
|
6
6
|
}>;
|
|
7
7
|
export type KuzuProgressCallback = (message: string) => void;
|
|
8
|
-
export declare const loadGraphToKuzu: (graph: KnowledgeGraph,
|
|
8
|
+
export declare const loadGraphToKuzu: (graph: KnowledgeGraph, repoPath: string, storagePath: string, onProgress?: KuzuProgressCallback) => Promise<{
|
|
9
9
|
success: boolean;
|
|
10
10
|
insertedRels: number;
|
|
11
11
|
skippedRels: number;
|
|
@@ -61,9 +61,6 @@ export declare const deleteNodesForFile: (filePath: string, dbPath?: string) =>
|
|
|
61
61
|
deletedNodes: number;
|
|
62
62
|
}>;
|
|
63
63
|
export declare const getEmbeddingTableName: () => string;
|
|
64
|
-
/**
|
|
65
|
-
* Load the FTS extension (required before using FTS functions)
|
|
66
|
-
*/
|
|
67
64
|
export declare const loadFTSExtension: () => Promise<void>;
|
|
68
65
|
/**
|
|
69
66
|
* Create a full-text search index on a table
|