@codragraph/cli 1.6.4 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +34 -0
  2. package/dist/_shared/cgdb/schema-constants.d.ts +16 -0
  3. package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -0
  4. package/dist/_shared/cgdb/schema-constants.js +67 -0
  5. package/dist/_shared/cgdb/schema-constants.js.map +1 -0
  6. package/dist/_shared/index.d.ts +2 -2
  7. package/dist/_shared/index.js +1 -1
  8. package/dist/cli/analyze.d.ts +22 -0
  9. package/dist/cli/analyze.js +109 -6
  10. package/dist/cli/compress-stats.d.ts +29 -0
  11. package/dist/cli/compress-stats.js +97 -0
  12. package/dist/cli/graphstore.d.ts +6 -2
  13. package/dist/cli/graphstore.js +45 -23
  14. package/dist/cli/index-repo.js +3 -3
  15. package/dist/cli/index.js +16 -2
  16. package/dist/cli/profile-heap.d.ts +35 -0
  17. package/dist/cli/profile-heap.js +126 -0
  18. package/dist/cli/setup.d.ts +13 -0
  19. package/dist/cli/setup.js +22 -11
  20. package/dist/cli/skill-gen.d.ts +14 -2
  21. package/dist/cli/skill-gen.js +52 -19
  22. package/dist/cli/tool.js +4 -0
  23. package/dist/cli/wiki.js +3 -3
  24. package/dist/core/augmentation/engine.js +7 -7
  25. package/dist/core/cgdb/cgdb-adapter.d.ts +176 -0
  26. package/dist/core/cgdb/cgdb-adapter.js +1320 -0
  27. package/dist/core/cgdb/content-read.d.ts +46 -0
  28. package/dist/core/cgdb/content-read.js +64 -0
  29. package/dist/core/cgdb/csv-generator.d.ts +29 -0
  30. package/dist/core/cgdb/csv-generator.js +492 -0
  31. package/dist/core/cgdb/pool-adapter.d.ts +93 -0
  32. package/dist/core/cgdb/pool-adapter.js +550 -0
  33. package/dist/core/cgdb/schema.d.ts +62 -0
  34. package/dist/core/cgdb/schema.js +502 -0
  35. package/dist/core/embeddings/embedding-pipeline.js +27 -10
  36. package/dist/core/graphstore/cgdb-row-source.d.ts +19 -0
  37. package/dist/core/graphstore/cgdb-row-source.js +141 -0
  38. package/dist/core/graphstore/index.d.ts +1 -1
  39. package/dist/core/graphstore/index.js +3 -3
  40. package/dist/core/group/bridge-db.d.ts +2 -2
  41. package/dist/core/group/bridge-db.js +123 -36
  42. package/dist/core/group/bridge-schema.d.ts +4 -4
  43. package/dist/core/group/bridge-schema.js +4 -4
  44. package/dist/core/group/cross-impact.js +3 -3
  45. package/dist/core/group/sync.js +4 -4
  46. package/dist/core/lbug/content-read.d.ts +46 -0
  47. package/dist/core/lbug/content-read.js +64 -0
  48. package/dist/core/lbug/csv-generator.d.ts +2 -6
  49. package/dist/core/lbug/csv-generator.js +45 -12
  50. package/dist/core/lbug/lbug-adapter.d.ts +4 -1
  51. package/dist/core/lbug/lbug-adapter.js +153 -21
  52. package/dist/core/lbug/schema.d.ts +7 -7
  53. package/dist/core/lbug/schema.js +18 -0
  54. package/dist/core/run-analyze.d.ts +13 -0
  55. package/dist/core/run-analyze.js +114 -27
  56. package/dist/core/search/bm25-index.d.ts +3 -3
  57. package/dist/core/search/bm25-index.js +75 -23
  58. package/dist/core/search/hybrid-search.js +2 -2
  59. package/dist/core/wiki/generator.d.ts +2 -2
  60. package/dist/core/wiki/generator.js +4 -4
  61. package/dist/core/wiki/graph-queries.d.ts +2 -2
  62. package/dist/core/wiki/graph-queries.js +5 -5
  63. package/dist/mcp/core/cgdb-adapter.d.ts +5 -0
  64. package/dist/mcp/core/cgdb-adapter.js +5 -0
  65. package/dist/mcp/core/embedder.js +1 -1
  66. package/dist/mcp/local/local-backend.d.ts +2 -2
  67. package/dist/mcp/local/local-backend.js +36 -19
  68. package/dist/mcp/server.js +3 -3
  69. package/dist/mcp/tools.js +1 -1
  70. package/dist/server/analyze-worker.js +2 -2
  71. package/dist/server/api.js +34 -33
  72. package/dist/storage/repo-manager.d.ts +42 -3
  73. package/dist/storage/repo-manager.js +23 -4
  74. package/hooks/claude/codragraph-hook.cjs +98 -5
  75. package/package.json +4 -4
  76. package/scripts/build-tree-sitter-proto.cjs +15 -3
  77. package/scripts/build.js +8 -9
  78. package/scripts/patch-tree-sitter-swift.cjs +17 -4
  79. package/skills/codragraph-api-surface.md +110 -0
  80. package/skills/codragraph-config-audit.md +146 -0
  81. package/skills/codragraph-cross-repo-impact.md +135 -0
  82. package/skills/codragraph-data-lineage.md +137 -0
  83. package/skills/codragraph-dead-code.md +119 -0
  84. package/skills/codragraph-gh-actions-debug.md +162 -0
  85. package/skills/codragraph-gh-issue-workflow.md +178 -0
  86. package/skills/codragraph-gh-pr-workflow.md +176 -0
  87. package/skills/codragraph-gh-release-workflow.md +187 -0
  88. package/skills/codragraph-git-bisect.md +176 -0
  89. package/skills/codragraph-git-force-push.md +147 -0
  90. package/skills/codragraph-git-history-rewrite.md +174 -0
  91. package/skills/codragraph-git-rebase-vs-merge.md +138 -0
  92. package/skills/codragraph-git-recovery.md +181 -0
  93. package/skills/codragraph-git-worktree.md +145 -0
  94. package/skills/codragraph-migration-tracking.md +130 -0
  95. package/skills/codragraph-notebook-context.md +136 -0
  96. package/skills/codragraph-observability-coverage.md +125 -0
  97. package/skills/codragraph-onboarding.md +129 -0
  98. package/skills/codragraph-perf-hotspots.md +132 -0
  99. package/skills/codragraph-project-switcher.md +116 -0
  100. package/skills/codragraph-security-audit.md +144 -0
  101. package/skills/codragraph-sql-tracing.md +122 -0
  102. package/skills/codragraph-supply-chain-audit.md +153 -0
  103. package/skills/codragraph-test-coverage.md +97 -0
  104. package/vendor/tree-sitter-proto/bindings/node/index.js +3 -3
  105. package/vendor/tree-sitter-proto/src/node-types.json +1 -1
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Read-side decoder for `content` columns in cgdb node rows.
3
+ *
4
+ * RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
5
+ * every node table that has `content`. Default is `'none'` (passthrough)
6
+ * so existing reads keep working unchanged. When a writer opts into
7
+ * `--compress brotli|zstd`, the column carries the encoding tag and the
8
+ * `content` column carries base64-encoded compressed bytes — readers
9
+ * MUST run those bytes back through `decodeContent` before handing them
10
+ * to a consumer (MCP tool result, HTTP API response, embedding model,
11
+ * LLM input).
12
+ *
13
+ * Centralizing the decode in one helper has two benefits:
14
+ * 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
15
+ * contentEncoding` to the Cypher RETURN, and pipe the row through
16
+ * `decodeContentField` (or `decodeContentRow`) at the boundary.
17
+ * 2. Anyone hunting for "where does the read path decode compressed
18
+ * bytes" greps for `decodeContentField` and gets every site in one
19
+ * shot — no per-table feature detection scattered across files.
20
+ */
21
+ /**
22
+ * Decode a single (content, contentEncoding) pair from a Cypher row.
23
+ *
24
+ * Returns the input content unchanged when:
25
+ * - the encoding is missing / empty / `'none'` (the common case for
26
+ * 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
27
+ * `--compress`);
28
+ * - content is null/undefined (caller decides whether that's an error);
29
+ * - content is not a string (pre-Phase-2 indexes never wrote non-string
30
+ * content, but defensive: don't crash a read path on a malformed row).
31
+ *
32
+ * Throws (via `decodeContent`) only when the row claims an encoding this
33
+ * CLI build can't decode — that's a forward-compat error and the right
34
+ * behavior is to fail loudly rather than return wrong content.
35
+ */
36
+ export declare function decodeContentField(content: unknown, encoding: unknown): string | undefined;
37
+ /**
38
+ * Apply `decodeContentField` to a row that carries `content` and
39
+ * `contentEncoding` keys (or their numeric column-index aliases).
40
+ *
41
+ * The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
42
+ * driver versions vary on whether named keys are populated, so existing
43
+ * read sites do `r.content ?? r[N]`. This helper accepts the same
44
+ * pattern. Returns a NEW object (does not mutate input).
45
+ */
46
+ export declare function decodeContentRow<T extends Record<string, unknown>>(row: T, contentKey?: keyof T, encodingKey?: keyof T): T;
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Read-side decoder for `content` columns in cgdb node rows.
3
+ *
4
+ * RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
5
+ * every node table that has `content`. Default is `'none'` (passthrough)
6
+ * so existing reads keep working unchanged. When a writer opts into
7
+ * `--compress brotli|zstd`, the column carries the encoding tag and the
8
+ * `content` column carries base64-encoded compressed bytes — readers
9
+ * MUST run those bytes back through `decodeContent` before handing them
10
+ * to a consumer (MCP tool result, HTTP API response, embedding model,
11
+ * LLM input).
12
+ *
13
+ * Centralizing the decode in one helper has two benefits:
14
+ * 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
15
+ * contentEncoding` to the Cypher RETURN, and pipe the row through
16
+ * `decodeContentField` (or `decodeContentRow`) at the boundary.
17
+ * 2. Anyone hunting for "where does the read path decode compressed
18
+ * bytes" greps for `decodeContentField` and gets every site in one
19
+ * shot — no per-table feature detection scattered across files.
20
+ */
21
+ import { decodeContent } from '@codragraph/graphstore';
22
+ /**
23
+ * Decode a single (content, contentEncoding) pair from a Cypher row.
24
+ *
25
+ * Returns the input content unchanged when:
26
+ * - the encoding is missing / empty / `'none'` (the common case for
27
+ * 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
28
+ * `--compress`);
29
+ * - content is null/undefined (caller decides whether that's an error);
30
+ * - content is not a string (pre-Phase-2 indexes never wrote non-string
31
+ * content, but defensive: don't crash a read path on a malformed row).
32
+ *
33
+ * Throws (via `decodeContent`) only when the row claims an encoding this
34
+ * CLI build can't decode — that's a forward-compat error and the right
35
+ * behavior is to fail loudly rather than return wrong content.
36
+ */
37
+ export function decodeContentField(content, encoding) {
38
+ if (content === undefined || content === null)
39
+ return undefined;
40
+ if (typeof content !== 'string')
41
+ return content;
42
+ if (typeof encoding !== 'string' || encoding === '' || encoding === 'none') {
43
+ return content;
44
+ }
45
+ return decodeContent(content, encoding);
46
+ }
47
+ /**
48
+ * Apply `decodeContentField` to a row that carries `content` and
49
+ * `contentEncoding` keys (or their numeric column-index aliases).
50
+ *
51
+ * The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
52
+ * driver versions vary on whether named keys are populated, so existing
53
+ * read sites do `r.content ?? r[N]`. This helper accepts the same
54
+ * pattern. Returns a NEW object (does not mutate input).
55
+ */
56
+ export function decodeContentRow(row, contentKey = 'content', encodingKey = 'contentEncoding') {
57
+ const content = row[contentKey];
58
+ if (content === undefined || content === null)
59
+ return row;
60
+ const encoding = row[encodingKey];
61
+ if (typeof encoding !== 'string' || encoding === '' || encoding === 'none')
62
+ return row;
63
+ return { ...row, [contentKey]: decodeContentField(content, encoding) };
64
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * CSV Generator for LadybugDB Hybrid Schema
3
+ *
4
+ * Streams CSV rows directly to disk files in a single pass over graph nodes.
5
+ * File contents are lazy-read from disk per-node to avoid holding the entire
6
+ * repo in RAM. Rows are buffered (FLUSH_EVERY) before writing to minimize
7
+ * per-row Promise overhead.
8
+ *
9
+ * RFC 4180 Compliant:
10
+ * - Fields containing commas, double quotes, or newlines are enclosed in double quotes
11
+ * - Double quotes within fields are escaped by doubling them ("")
12
+ * - All fields are consistently quoted for safety with code content
13
+ */
14
+ import { KnowledgeGraph } from '../graph/types.js';
15
+ import { NodeTableName } from './schema.js';
16
+ import { type ContentEncoding } from '@codragraph/graphstore';
17
+ export declare const sanitizeUTF8: (str: string) => string;
18
+ export declare const escapeCSVField: (value: string | number | undefined | null) => string;
19
+ export declare const escapeCSVNumber: (value: number | undefined | null, defaultValue?: number) => string;
20
+ export declare const isBinaryContent: (content: string) => boolean;
21
+ export interface StreamedCSVResult {
22
+ nodeFiles: Map<NodeTableName, {
23
+ csvPath: string;
24
+ rows: number;
25
+ }>;
26
+ relCsvPath: string;
27
+ relRows: number;
28
+ }
29
+ export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string, compress?: ContentEncoding) => Promise<StreamedCSVResult>;
@@ -0,0 +1,492 @@
1
+ /**
2
+ * CSV Generator for LadybugDB Hybrid Schema
3
+ *
4
+ * Streams CSV rows directly to disk files in a single pass over graph nodes.
5
+ * File contents are lazy-read from disk per-node to avoid holding the entire
6
+ * repo in RAM. Rows are buffered (FLUSH_EVERY) before writing to minimize
7
+ * per-row Promise overhead.
8
+ *
9
+ * RFC 4180 Compliant:
10
+ * - Fields containing commas, double quotes, or newlines are enclosed in double quotes
11
+ * - Double quotes within fields are escaped by doubling them ("")
12
+ * - All fields are consistently quoted for safety with code content
13
+ */
14
+ import fs from 'fs/promises';
15
+ import { createWriteStream } from 'fs';
16
+ import path from 'path';
17
+ import { encodeContent } from '@codragraph/graphstore';
18
+ /** Flush buffered rows to disk every N rows */
19
+ const FLUSH_EVERY = 500;
20
+ // ============================================================================
21
+ // CSV ESCAPE UTILITIES
22
+ // ============================================================================
23
+ export const sanitizeUTF8 = (str) => {
24
+ return str
25
+ .replace(/\r\n/g, '\n')
26
+ .replace(/\r/g, '\n')
27
+ .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '')
28
+ .replace(/[\uD800-\uDFFF]/g, '')
29
+ .replace(/[\uFFFE\uFFFF]/g, '');
30
+ };
31
+ export const escapeCSVField = (value) => {
32
+ if (value === undefined || value === null)
33
+ return '""';
34
+ let str = String(value);
35
+ str = sanitizeUTF8(str);
36
+ return `"${str.replace(/"/g, '""')}"`;
37
+ };
38
+ export const escapeCSVNumber = (value, defaultValue = -1) => {
39
+ if (value === undefined || value === null)
40
+ return String(defaultValue);
41
+ return String(value);
42
+ };
43
+ // ============================================================================
44
+ // CONTENT EXTRACTION (lazy — reads from disk on demand)
45
+ // ============================================================================
46
+ export const isBinaryContent = (content) => {
47
+ if (!content || content.length === 0)
48
+ return false;
49
+ const sample = content.slice(0, 1000);
50
+ let nonPrintable = 0;
51
+ for (let i = 0; i < sample.length; i++) {
52
+ const code = sample.charCodeAt(i);
53
+ if (code < 9 || (code > 13 && code < 32) || code === 127)
54
+ nonPrintable++;
55
+ }
56
+ return nonPrintable / sample.length > 0.1;
57
+ };
58
+ /**
59
+ * LRU content cache — avoids re-reading the same source file for every
60
+ * symbol defined in it. Sized generously so most files stay cached during
61
+ * the single-pass node iteration.
62
+ */
63
+ class FileContentCache {
64
+ cache = new Map();
65
+ accessOrder = [];
66
+ maxSize;
67
+ repoPath;
68
+ constructor(repoPath, maxSize = 3000) {
69
+ this.repoPath = repoPath;
70
+ this.maxSize = maxSize;
71
+ }
72
+ async get(relativePath) {
73
+ if (!relativePath)
74
+ return '';
75
+ const cached = this.cache.get(relativePath);
76
+ if (cached !== undefined) {
77
+ // Move to end of accessOrder (LRU promotion)
78
+ const idx = this.accessOrder.indexOf(relativePath);
79
+ if (idx !== -1) {
80
+ this.accessOrder.splice(idx, 1);
81
+ this.accessOrder.push(relativePath);
82
+ }
83
+ return cached;
84
+ }
85
+ try {
86
+ const fullPath = path.join(this.repoPath, relativePath);
87
+ const content = await fs.readFile(fullPath, 'utf-8');
88
+ this.set(relativePath, content);
89
+ return content;
90
+ }
91
+ catch {
92
+ this.set(relativePath, '');
93
+ return '';
94
+ }
95
+ }
96
+ set(key, value) {
97
+ if (this.cache.size >= this.maxSize) {
98
+ const oldest = this.accessOrder.shift();
99
+ if (oldest)
100
+ this.cache.delete(oldest);
101
+ }
102
+ this.cache.set(key, value);
103
+ this.accessOrder.push(key);
104
+ }
105
+ }
106
+ const extractContent = async (node, contentCache) => {
107
+ const filePath = node.properties.filePath;
108
+ const content = await contentCache.get(filePath);
109
+ if (!content)
110
+ return '';
111
+ if (node.label === 'Folder')
112
+ return '';
113
+ if (isBinaryContent(content))
114
+ return '[Binary file - content not stored]';
115
+ if (node.label === 'File') {
116
+ const MAX_FILE_CONTENT = 10000;
117
+ return content.length > MAX_FILE_CONTENT
118
+ ? content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]'
119
+ : content;
120
+ }
121
+ const startLine = node.properties.startLine;
122
+ const endLine = node.properties.endLine;
123
+ if (startLine === undefined || endLine === undefined)
124
+ return '';
125
+ const lines = content.split('\n');
126
+ const start = Math.max(0, startLine - 2);
127
+ const end = Math.min(lines.length - 1, endLine + 2);
128
+ const snippet = lines.slice(start, end + 1).join('\n');
129
+ const MAX_SNIPPET = 5000;
130
+ return snippet.length > MAX_SNIPPET
131
+ ? snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]'
132
+ : snippet;
133
+ };
134
+ // ============================================================================
135
+ // BUFFERED CSV WRITER
136
+ // ============================================================================
137
+ class BufferedCSVWriter {
138
+ ws;
139
+ buffer = [];
140
+ rows = 0;
141
+ constructor(filePath, header) {
142
+ this.ws = createWriteStream(filePath, 'utf-8');
143
+ // Large repos flush many times — raise listener cap to avoid MaxListenersExceededWarning
144
+ this.ws.setMaxListeners(50);
145
+ this.buffer.push(header);
146
+ }
147
+ addRow(row) {
148
+ this.buffer.push(row);
149
+ this.rows++;
150
+ if (this.buffer.length >= FLUSH_EVERY) {
151
+ return this.flush();
152
+ }
153
+ return Promise.resolve();
154
+ }
155
+ flush() {
156
+ if (this.buffer.length === 0)
157
+ return Promise.resolve();
158
+ const chunk = this.buffer.join('\n') + '\n';
159
+ this.buffer.length = 0;
160
+ return new Promise((resolve, reject) => {
161
+ this.ws.once('error', reject);
162
+ const ok = this.ws.write(chunk);
163
+ if (ok) {
164
+ this.ws.removeListener('error', reject);
165
+ resolve();
166
+ }
167
+ else {
168
+ this.ws.once('drain', () => {
169
+ this.ws.removeListener('error', reject);
170
+ resolve();
171
+ });
172
+ }
173
+ });
174
+ }
175
+ async finish() {
176
+ await this.flush();
177
+ return new Promise((resolve, reject) => {
178
+ this.ws.end(() => resolve());
179
+ this.ws.on('error', reject);
180
+ });
181
+ }
182
+ }
183
+ /**
184
+ * Stream all CSV data directly to disk files.
185
+ * Iterates graph nodes exactly ONCE — routes each node to the right writer.
186
+ * File contents are lazy-read from disk with a generous LRU cache.
187
+ */
188
+ /**
189
+ * Apply RFC 0001 Phase 2 content encoding. Returns the on-the-wire string
190
+ * + the encoding tag to write into the per-row `contentEncoding` column.
191
+ *
192
+ * `compress: undefined | 'none'` is the default — content goes through
193
+ * unchanged and the tag is `'none'` (matches the schema DEFAULT, so older
194
+ * readers and the schema-default behavior stay in agreement).
195
+ *
196
+ * Always writing the tag column (even as 'none') keeps the CSV / COPY /
197
+ * schema layouts uniform regardless of compression mode. The wasted bytes
198
+ * are negligible — a few characters per row vs the kilobytes of content
199
+ * the column is alongside.
200
+ */
201
+ const applyEncoding = (content, compress) => {
202
+ if (!compress || compress === 'none') {
203
+ return { wireContent: content, tag: 'none' };
204
+ }
205
+ return { wireContent: encodeContent(content, compress), tag: compress };
206
+ };
207
+ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir, compress) => {
208
+ // Remove stale CSVs from previous crashed runs, then recreate
209
+ try {
210
+ await fs.rm(csvDir, { recursive: true, force: true });
211
+ }
212
+ catch { }
213
+ await fs.mkdir(csvDir, { recursive: true });
214
+ // We open ~30 concurrent write-streams; raise process limit to suppress
215
+ // MaxListenersExceededWarning (restored after all streams finish).
216
+ const prevMax = process.getMaxListeners();
217
+ process.setMaxListeners(prevMax + 40);
218
+ const contentCache = new FileContentCache(repoPath);
219
+ // Create writers for every node type up-front. Content-bearing tables
220
+ // carry an extra `contentEncoding` column right after `content` to
221
+ // match the Phase 2 schema layout. Tables without `content` (Folder,
222
+ // Community, Process, Route, Tool) are unchanged.
223
+ const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content,contentEncoding');
224
+ const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath');
225
+ const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,contentEncoding,description';
226
+ const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader);
227
+ const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader);
228
+ const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader);
229
+ const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,contentEncoding,description,parameterCount,returnType';
230
+ const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), methodHeader);
231
+ const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader);
232
+ const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount');
233
+ const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId');
234
+ // Section nodes have an extra 'level' column
235
+ const sectionWriter = new BufferedCSVWriter(path.join(csvDir, 'section.csv'), 'id,name,filePath,startLine,endLine,level,content,contentEncoding,description');
236
+ // Route nodes for API endpoint mapping
237
+ const routeWriter = new BufferedCSVWriter(path.join(csvDir, 'route.csv'), 'id,name,filePath,responseKeys,errorKeys,middleware');
238
+ // Tool nodes for MCP tool definitions
239
+ const toolWriter = new BufferedCSVWriter(path.join(csvDir, 'tool.csv'), 'id,name,filePath,description');
240
+ // Multi-language node types share the same CSV shape (no isExported column)
241
+ const multiLangHeader = 'id,name,filePath,startLine,endLine,content,contentEncoding,description';
242
+ const MULTI_LANG_TYPES = [
243
+ 'Struct',
244
+ 'Enum',
245
+ 'Macro',
246
+ 'Typedef',
247
+ 'Union',
248
+ 'Namespace',
249
+ 'Trait',
250
+ 'Impl',
251
+ 'TypeAlias',
252
+ 'Const',
253
+ 'Static',
254
+ 'Variable',
255
+ 'Property',
256
+ 'Record',
257
+ 'Delegate',
258
+ 'Annotation',
259
+ 'Constructor',
260
+ 'Template',
261
+ 'Module',
262
+ ];
263
+ const multiLangWriters = new Map();
264
+ for (const t of MULTI_LANG_TYPES) {
265
+ multiLangWriters.set(t, new BufferedCSVWriter(path.join(csvDir, `${t.toLowerCase()}.csv`), multiLangHeader));
266
+ }
267
+ const codeWriterMap = {
268
+ Function: functionWriter,
269
+ Class: classWriter,
270
+ Interface: interfaceWriter,
271
+ CodeElement: codeElemWriter,
272
+ };
273
+ // Deduplicate all node types — the pipeline can produce duplicate IDs across
274
+ // all symbol types (Class, Method, Function, etc.), not just File nodes.
275
+ // A single Set covering every label prevents PK violations on COPY.
276
+ const seenNodeIds = new Set();
277
+ // --- SINGLE PASS over all nodes ---
278
+ for (const node of graph.iterNodes()) {
279
+ if (seenNodeIds.has(node.id))
280
+ continue;
281
+ seenNodeIds.add(node.id);
282
+ switch (node.label) {
283
+ case 'File': {
284
+ const content = await extractContent(node, contentCache);
285
+ const { wireContent, tag } = applyEncoding(content, compress);
286
+ await fileWriter.addRow([
287
+ escapeCSVField(node.id),
288
+ escapeCSVField(node.properties.name || ''),
289
+ escapeCSVField(node.properties.filePath || ''),
290
+ escapeCSVField(wireContent),
291
+ escapeCSVField(tag),
292
+ ].join(','));
293
+ break;
294
+ }
295
+ case 'Folder':
296
+ await folderWriter.addRow([
297
+ escapeCSVField(node.id),
298
+ escapeCSVField(node.properties.name || ''),
299
+ escapeCSVField(node.properties.filePath || ''),
300
+ ].join(','));
301
+ break;
302
+ case 'Community': {
303
+ const keywords = node.properties.keywords || [];
304
+ const keywordsStr = `[${keywords.map((k) => `'${k.replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/,/g, '\\,')}'`).join(',')}]`;
305
+ await communityWriter.addRow([
306
+ escapeCSVField(node.id),
307
+ escapeCSVField(node.properties.name || ''),
308
+ escapeCSVField(node.properties.heuristicLabel || ''),
309
+ keywordsStr,
310
+ escapeCSVField(node.properties.description || ''),
311
+ escapeCSVField(node.properties.enrichedBy || 'heuristic'),
312
+ escapeCSVNumber(node.properties.cohesion, 0),
313
+ escapeCSVNumber(node.properties.symbolCount, 0),
314
+ ].join(','));
315
+ break;
316
+ }
317
+ case 'Process': {
318
+ const communities = node.properties.communities || [];
319
+ const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
320
+ await processWriter.addRow([
321
+ escapeCSVField(node.id),
322
+ escapeCSVField(node.properties.name || ''),
323
+ escapeCSVField(node.properties.heuristicLabel || ''),
324
+ escapeCSVField(node.properties.processType || ''),
325
+ escapeCSVNumber(node.properties.stepCount, 0),
326
+ escapeCSVField(communitiesStr),
327
+ escapeCSVField(node.properties.entryPointId || ''),
328
+ escapeCSVField(node.properties.terminalId || ''),
329
+ ].join(','));
330
+ break;
331
+ }
332
+ case 'Method': {
333
+ const content = await extractContent(node, contentCache);
334
+ const { wireContent, tag } = applyEncoding(content, compress);
335
+ await methodWriter.addRow([
336
+ escapeCSVField(node.id),
337
+ escapeCSVField(node.properties.name || ''),
338
+ escapeCSVField(node.properties.filePath || ''),
339
+ escapeCSVNumber(node.properties.startLine, -1),
340
+ escapeCSVNumber(node.properties.endLine, -1),
341
+ node.properties.isExported ? 'true' : 'false',
342
+ escapeCSVField(wireContent),
343
+ escapeCSVField(tag),
344
+ escapeCSVField(node.properties.description || ''),
345
+ escapeCSVNumber(node.properties.parameterCount, 0),
346
+ escapeCSVField(node.properties.returnType || ''),
347
+ ].join(','));
348
+ break;
349
+ }
350
+ case 'Section': {
351
+ const content = await extractContent(node, contentCache);
352
+ const { wireContent, tag } = applyEncoding(content, compress);
353
+ await sectionWriter.addRow([
354
+ escapeCSVField(node.id),
355
+ escapeCSVField(node.properties.name || ''),
356
+ escapeCSVField(node.properties.filePath || ''),
357
+ escapeCSVNumber(node.properties.startLine, -1),
358
+ escapeCSVNumber(node.properties.endLine, -1),
359
+ escapeCSVNumber(node.properties.level, 1),
360
+ escapeCSVField(wireContent),
361
+ escapeCSVField(tag),
362
+ escapeCSVField(node.properties.description || ''),
363
+ ].join(','));
364
+ break;
365
+ }
366
+ case 'Route': {
367
+ const responseKeys = node.properties.responseKeys || [];
368
+ // LadybugDB array literal inside a quoted CSV field: escapeCSVField wraps in "..."
369
+ // and the array uses single-quoted elements
370
+ const keysStr = `[${responseKeys.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
371
+ const errorKeys = node.properties.errorKeys || [];
372
+ const errorKeysStr = `[${errorKeys.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
373
+ const middleware = node.properties.middleware || [];
374
+ const middlewareStr = `[${middleware.map((m) => `'${m.replace(/'/g, "''")}'`).join(',')}]`;
375
+ await routeWriter.addRow([
376
+ escapeCSVField(node.id),
377
+ escapeCSVField(node.properties.name || ''),
378
+ escapeCSVField(node.properties.filePath || ''),
379
+ escapeCSVField(keysStr),
380
+ escapeCSVField(errorKeysStr),
381
+ escapeCSVField(middlewareStr),
382
+ ].join(','));
383
+ break;
384
+ }
385
+ case 'Tool':
386
+ await toolWriter.addRow([
387
+ escapeCSVField(node.id),
388
+ escapeCSVField(node.properties.name || ''),
389
+ escapeCSVField(node.properties.filePath || ''),
390
+ escapeCSVField(node.properties.description || ''),
391
+ ].join(','));
392
+ break;
393
+ default: {
394
+ // Code element nodes (Function, Class, Interface, CodeElement)
395
+ const writer = codeWriterMap[node.label];
396
+ if (writer) {
397
+ const content = await extractContent(node, contentCache);
398
+ const { wireContent, tag } = applyEncoding(content, compress);
399
+ await writer.addRow([
400
+ escapeCSVField(node.id),
401
+ escapeCSVField(node.properties.name || ''),
402
+ escapeCSVField(node.properties.filePath || ''),
403
+ escapeCSVNumber(node.properties.startLine, -1),
404
+ escapeCSVNumber(node.properties.endLine, -1),
405
+ node.properties.isExported ? 'true' : 'false',
406
+ escapeCSVField(wireContent),
407
+ escapeCSVField(tag),
408
+ escapeCSVField(node.properties.description || ''),
409
+ ].join(','));
410
+ }
411
+ else {
412
+ // Multi-language node types (Struct, Impl, Trait, Macro, etc.)
413
+ const mlWriter = multiLangWriters.get(node.label);
414
+ if (mlWriter) {
415
+ const content = await extractContent(node, contentCache);
416
+ const { wireContent, tag } = applyEncoding(content, compress);
417
+ await mlWriter.addRow([
418
+ escapeCSVField(node.id),
419
+ escapeCSVField(node.properties.name || ''),
420
+ escapeCSVField(node.properties.filePath || ''),
421
+ escapeCSVNumber(node.properties.startLine, -1),
422
+ escapeCSVNumber(node.properties.endLine, -1),
423
+ escapeCSVField(wireContent),
424
+ escapeCSVField(tag),
425
+ escapeCSVField(node.properties.description || ''),
426
+ ].join(','));
427
+ }
428
+ }
429
+ break;
430
+ }
431
+ }
432
+ }
433
+ // Finish all node writers
434
+ const allWriters = [
435
+ fileWriter,
436
+ folderWriter,
437
+ functionWriter,
438
+ classWriter,
439
+ interfaceWriter,
440
+ methodWriter,
441
+ codeElemWriter,
442
+ communityWriter,
443
+ processWriter,
444
+ sectionWriter,
445
+ routeWriter,
446
+ toolWriter,
447
+ ...multiLangWriters.values(),
448
+ ];
449
+ await Promise.all(allWriters.map((w) => w.finish()));
450
+ // --- Stream relationship CSV ---
451
+ const relCsvPath = path.join(csvDir, 'relations.csv');
452
+ const relWriter = new BufferedCSVWriter(relCsvPath, 'from,to,type,confidence,reason,step');
453
+ for (const rel of graph.iterRelationships()) {
454
+ await relWriter.addRow([
455
+ escapeCSVField(rel.sourceId),
456
+ escapeCSVField(rel.targetId),
457
+ escapeCSVField(rel.type),
458
+ escapeCSVNumber(rel.confidence, 1.0),
459
+ escapeCSVField(rel.reason),
460
+ escapeCSVNumber(rel.step, 0),
461
+ ].join(','));
462
+ }
463
+ await relWriter.finish();
464
+ // Build result map — only include tables that have rows
465
+ const nodeFiles = new Map();
466
+ const tableMap = [
467
+ ['File', fileWriter],
468
+ ['Folder', folderWriter],
469
+ ['Function', functionWriter],
470
+ ['Class', classWriter],
471
+ ['Interface', interfaceWriter],
472
+ ['Method', methodWriter],
473
+ ['CodeElement', codeElemWriter],
474
+ ['Community', communityWriter],
475
+ ['Process', processWriter],
476
+ ['Section', sectionWriter],
477
+ ['Route', routeWriter],
478
+ ['Tool', toolWriter],
479
+ ...Array.from(multiLangWriters.entries()).map(([name, w]) => [name, w]),
480
+ ];
481
+ for (const [name, writer] of tableMap) {
482
+ if (writer.rows > 0) {
483
+ nodeFiles.set(name, {
484
+ csvPath: path.join(csvDir, `${name.toLowerCase()}.csv`),
485
+ rows: writer.rows,
486
+ });
487
+ }
488
+ }
489
+ // Restore original process listener limit
490
+ process.setMaxListeners(prevMax);
491
+ return { nodeFiles, relCsvPath, relRows: relWriter.rows };
492
+ };