@codragraph/cli 2.1.0 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +62 -21
  2. package/dist/_shared/cgdb/schema-constants.d.ts +2 -2
  3. package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -1
  4. package/dist/_shared/cgdb/schema-constants.js +3 -0
  5. package/dist/_shared/cgdb/schema-constants.js.map +1 -1
  6. package/dist/_shared/feature-clusters.d.ts +99 -0
  7. package/dist/_shared/feature-clusters.d.ts.map +1 -0
  8. package/dist/_shared/feature-clusters.js +2 -0
  9. package/dist/_shared/feature-clusters.js.map +1 -0
  10. package/dist/_shared/graph/types.d.ts +16 -2
  11. package/dist/_shared/graph/types.d.ts.map +1 -1
  12. package/dist/_shared/index.d.ts +1 -0
  13. package/dist/_shared/index.d.ts.map +1 -1
  14. package/dist/_shared/index.js.map +1 -1
  15. package/dist/_shared/pipeline.d.ts +1 -1
  16. package/dist/_shared/pipeline.d.ts.map +1 -1
  17. package/dist/cli/ai-context.js +4 -0
  18. package/dist/cli/analyze.js +46 -26
  19. package/dist/cli/index.js +39 -1
  20. package/dist/cli/serve.d.ts +1 -0
  21. package/dist/cli/serve.js +3 -1
  22. package/dist/cli/setup.js +42 -21
  23. package/dist/cli/status.d.ts +13 -0
  24. package/dist/cli/status.js +99 -0
  25. package/dist/cli/tool.d.ts +25 -0
  26. package/dist/cli/tool.js +74 -0
  27. package/dist/config/ignore-service.js +2 -0
  28. package/dist/config/supported-languages.d.ts +3 -3
  29. package/dist/config/supported-languages.js +3 -3
  30. package/dist/core/cgdb/cgdb-adapter.js +19 -3
  31. package/dist/core/cgdb/csv-generator.js +33 -2
  32. package/dist/core/cgdb/schema.d.ts +2 -1
  33. package/dist/core/cgdb/schema.js +55 -0
  34. package/dist/core/embeddings/embedder.js +4 -2
  35. package/dist/core/graphstore/cgdb-row-source.js +3 -2
  36. package/dist/core/graphstore/index.d.ts +1 -1
  37. package/dist/core/graphstore/index.js +1 -1
  38. package/dist/core/group/bridge-db.js +42 -10
  39. package/dist/core/group/service.d.ts +16 -0
  40. package/dist/core/group/service.js +360 -0
  41. package/dist/core/ingestion/emit-references.d.ts +1 -1
  42. package/dist/core/ingestion/emit-references.js +1 -1
  43. package/dist/core/ingestion/feature-cluster-processor.d.ts +62 -0
  44. package/dist/core/ingestion/feature-cluster-processor.js +626 -0
  45. package/dist/core/ingestion/finalize-orchestrator.js +1 -1
  46. package/dist/core/ingestion/model/registration-table.js +1 -0
  47. package/dist/core/ingestion/model/resolve.d.ts +2 -2
  48. package/dist/core/ingestion/model/resolve.js +3 -3
  49. package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
  50. package/dist/core/ingestion/model/semantic-model.js +1 -1
  51. package/dist/core/ingestion/model/symbol-table.d.ts +1 -1
  52. package/dist/core/ingestion/model/symbol-table.js +1 -1
  53. package/dist/core/ingestion/pipeline-phases/feature-clusters.d.ts +17 -0
  54. package/dist/core/ingestion/pipeline-phases/feature-clusters.js +88 -0
  55. package/dist/core/ingestion/pipeline-phases/index.d.ts +1 -0
  56. package/dist/core/ingestion/pipeline-phases/index.js +1 -0
  57. package/dist/core/ingestion/pipeline.d.ts +4 -0
  58. package/dist/core/ingestion/pipeline.js +9 -5
  59. package/dist/core/run-analyze.d.ts +21 -0
  60. package/dist/core/run-analyze.js +213 -6
  61. package/dist/core/search/hybrid-search.js +11 -3
  62. package/dist/mcp/core/embedder.js +5 -2
  63. package/dist/mcp/local/local-backend.d.ts +12 -0
  64. package/dist/mcp/local/local-backend.js +381 -3
  65. package/dist/mcp/resources.js +139 -0
  66. package/dist/mcp/tools.js +174 -2
  67. package/dist/server/api.d.ts +14 -2
  68. package/dist/server/api.js +206 -7
  69. package/dist/server/mcp-http.d.ts +22 -0
  70. package/dist/server/mcp-http.js +21 -2
  71. package/dist/server/web-dashboard.d.ts +28 -0
  72. package/dist/server/web-dashboard.js +61 -0
  73. package/dist/storage/repo-manager.d.ts +6 -1
  74. package/dist/storage/repo-manager.js +5 -1
  75. package/dist/types/pipeline.d.ts +2 -0
  76. package/dist/web/assets/agent-D5lb0zXz.js +1089 -0
  77. package/dist/web/assets/architectureDiagram-EMZXCZ2Q-CZtc99v_.js +36 -0
  78. package/dist/web/assets/blockDiagram-IGV67L2C-BtoUp-6Y.js +132 -0
  79. package/dist/web/assets/c4Diagram-DFAF54RM-C4Hl3J2U.js +10 -0
  80. package/dist/web/assets/chunk-3GS5O3IE-DkUjU0WD.js +231 -0
  81. package/dist/web/assets/chunk-3YCYZ6SJ-CQkVgT_z.js +1 -0
  82. package/dist/web/assets/chunk-7RZVMHOQ-BitYcNVR.js +338 -0
  83. package/dist/web/assets/chunk-AEOMTBSW-BgTIXPsY.js +1 -0
  84. package/dist/web/assets/chunk-H3VCZNTA-Cx5XV_aC.js +13 -0
  85. package/dist/web/assets/chunk-HN6EAY2L-BBnyTNdB.js +1 -0
  86. package/dist/web/assets/chunk-KSICW3F5-BYzvDLNI.js +15 -0
  87. package/dist/web/assets/chunk-O5ABG6QK-dHwHzA6n.js +1 -0
  88. package/dist/web/assets/chunk-PK6DOVAG-CvsEnugt.js +206 -0
  89. package/dist/web/assets/chunk-RWUO3TPN-BgRTY0_k.js +1 -0
  90. package/dist/web/assets/chunk-TBF5ZNIQ-DL5stGM1.js +1 -0
  91. package/dist/web/assets/chunk-TU3PZOEN-RLyvLcv-.js +1 -0
  92. package/dist/web/assets/classDiagram-PPOCWD7C-DTr8QIOf.js +1 -0
  93. package/dist/web/assets/classDiagram-v2-23LJLIIU-DTr8QIOf.js +1 -0
  94. package/dist/web/assets/context-builder-22jU3V56.js +16 -0
  95. package/dist/web/assets/cose-bilkent-PNC4W37J-DVhePRYg.js +1 -0
  96. package/dist/web/assets/dagre-E77IOHMT-Dzx0A6ZU.js +4 -0
  97. package/dist/web/assets/diagram-H7BISOXX-CC9pRew1.js +43 -0
  98. package/dist/web/assets/diagram-JC5VWROH-Bau_i9tf.js +24 -0
  99. package/dist/web/assets/diagram-LXUTUG65-D9_FM2Gt.js +10 -0
  100. package/dist/web/assets/diagram-WEHSV5V5-BMlayouL.js +24 -0
  101. package/dist/web/assets/erDiagram-GCSMX5X6-C3dhDFA8.js +85 -0
  102. package/dist/web/assets/flowDiagram-OTCZ4VVT-CWSFWmhr.js +162 -0
  103. package/dist/web/assets/ganttDiagram-MUNLMDZQ-D3a67Yol.js +292 -0
  104. package/dist/web/assets/gitGraphDiagram-3HKGZ4G3-7jmry-vM.js +106 -0
  105. package/dist/web/assets/index-BgeqpYgd.js +1415 -0
  106. package/dist/web/assets/index-CT0GtFLZ.css +1 -0
  107. package/dist/web/assets/infoDiagram-MN7RKWGX-G7lhP0Ib.js +2 -0
  108. package/dist/web/assets/ishikawaDiagram-YMYX4NHK-DUoJvNP2.js +70 -0
  109. package/dist/web/assets/journeyDiagram-SO5T7YLQ-RMFPNNqz.js +139 -0
  110. package/dist/web/assets/kanban-definition-LJHFXRCJ-BzpDs1K9.js +89 -0
  111. package/dist/web/assets/katex-GD7MH7QM-DBQvrix-.js +261 -0
  112. package/dist/web/assets/mindmap-definition-2EUWGEK5-Bk0O4roa.js +96 -0
  113. package/dist/web/assets/pieDiagram-3IATQBI2-DKU7kpgS.js +30 -0
  114. package/dist/web/assets/quadrantDiagram-E256RVCF-BY0TGWCS.js +7 -0
  115. package/dist/web/assets/requirementDiagram-M5DCFWZL-DLHOVTSv.js +84 -0
  116. package/dist/web/assets/sankeyDiagram-L3NBLAOT-DVMj5rX2.js +10 -0
  117. package/dist/web/assets/sequenceDiagram-ZOUHS735-CJC73bV-.js +157 -0
  118. package/dist/web/assets/stateDiagram-MLPALWAM-BCFyESls.js +1 -0
  119. package/dist/web/assets/stateDiagram-v2-B5LQ5ZB2-DahzzIca.js +1 -0
  120. package/dist/web/assets/timeline-definition-5SPVSISX-TRSDRgPw.js +120 -0
  121. package/dist/web/assets/vennDiagram-IE5QUKF5-DNy7HRBM.js +34 -0
  122. package/dist/web/assets/wardley-RL74JXVD-BCRCBASE-B-eZEzf9.js +161 -0
  123. package/dist/web/assets/wardleyDiagram-XU3VSMPF-BP-r1xzR.js +20 -0
  124. package/dist/web/assets/xychartDiagram-ZHJ5623Y-Dr9r7a35.js +7 -0
  125. package/dist/web/codragraph-logo-512.png +0 -0
  126. package/dist/web/codragraph-logo.png +0 -0
  127. package/dist/web/favicon.png +0 -0
  128. package/dist/web/index.html +36 -0
  129. package/hooks/claude/codragraph-hook.cjs +24 -9
  130. package/hooks/claude/pre-tool-use.sh +6 -1
  131. package/package.json +15 -4
  132. package/scripts/build.js +75 -16
  133. package/scripts/patch-tree-sitter-swift.cjs +0 -1
  134. package/skills/codragraph-cli.md +17 -1
  135. package/skills/codragraph-guide.md +6 -2
  136. package/skills/codragraph-onboarding.md +2 -2
  137. package/vendor/leiden/index.cjs +272 -285
  138. package/vendor/leiden/utils.cjs +264 -274
  139. package/dist/_shared/lbug/schema-constants.d.ts +0 -16
  140. package/dist/_shared/lbug/schema-constants.d.ts.map +0 -1
  141. package/dist/_shared/lbug/schema-constants.js +0 -67
  142. package/dist/_shared/lbug/schema-constants.js.map +0 -1
  143. package/dist/core/graphstore/lbug-row-source.d.ts +0 -19
  144. package/dist/core/graphstore/lbug-row-source.js +0 -141
  145. package/dist/core/lbug/content-read.d.ts +0 -46
  146. package/dist/core/lbug/content-read.js +0 -64
  147. package/dist/core/lbug/csv-generator.d.ts +0 -29
  148. package/dist/core/lbug/csv-generator.js +0 -492
  149. package/dist/core/lbug/lbug-adapter.d.ts +0 -176
  150. package/dist/core/lbug/lbug-adapter.js +0 -1320
  151. package/dist/core/lbug/pool-adapter.d.ts +0 -93
  152. package/dist/core/lbug/pool-adapter.js +0 -550
  153. package/dist/core/lbug/schema.d.ts +0 -62
  154. package/dist/core/lbug/schema.js +0 -502
  155. package/dist/mcp/core/lbug-adapter.d.ts +0 -5
  156. package/dist/mcp/core/lbug-adapter.js +0 -5
@@ -1,492 +0,0 @@
1
- /**
2
- * CSV Generator for LadybugDB Hybrid Schema
3
- *
4
- * Streams CSV rows directly to disk files in a single pass over graph nodes.
5
- * File contents are lazy-read from disk per-node to avoid holding the entire
6
- * repo in RAM. Rows are buffered (FLUSH_EVERY) before writing to minimize
7
- * per-row Promise overhead.
8
- *
9
- * RFC 4180 Compliant:
10
- * - Fields containing commas, double quotes, or newlines are enclosed in double quotes
11
- * - Double quotes within fields are escaped by doubling them ("")
12
- * - All fields are consistently quoted for safety with code content
13
- */
14
- import fs from 'fs/promises';
15
- import { createWriteStream } from 'fs';
16
- import path from 'path';
17
- import { encodeContent } from '@codragraph/graphstore';
18
- /** Flush buffered rows to disk every N rows */
19
- const FLUSH_EVERY = 500;
20
- // ============================================================================
21
- // CSV ESCAPE UTILITIES
22
- // ============================================================================
23
- export const sanitizeUTF8 = (str) => {
24
- return str
25
- .replace(/\r\n/g, '\n')
26
- .replace(/\r/g, '\n')
27
- .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '')
28
- .replace(/[\uD800-\uDFFF]/g, '')
29
- .replace(/[\uFFFE\uFFFF]/g, '');
30
- };
31
- export const escapeCSVField = (value) => {
32
- if (value === undefined || value === null)
33
- return '""';
34
- let str = String(value);
35
- str = sanitizeUTF8(str);
36
- return `"${str.replace(/"/g, '""')}"`;
37
- };
38
- export const escapeCSVNumber = (value, defaultValue = -1) => {
39
- if (value === undefined || value === null)
40
- return String(defaultValue);
41
- return String(value);
42
- };
43
- // ============================================================================
44
- // CONTENT EXTRACTION (lazy — reads from disk on demand)
45
- // ============================================================================
46
- export const isBinaryContent = (content) => {
47
- if (!content || content.length === 0)
48
- return false;
49
- const sample = content.slice(0, 1000);
50
- let nonPrintable = 0;
51
- for (let i = 0; i < sample.length; i++) {
52
- const code = sample.charCodeAt(i);
53
- if (code < 9 || (code > 13 && code < 32) || code === 127)
54
- nonPrintable++;
55
- }
56
- return nonPrintable / sample.length > 0.1;
57
- };
58
- /**
59
- * LRU content cache — avoids re-reading the same source file for every
60
- * symbol defined in it. Sized generously so most files stay cached during
61
- * the single-pass node iteration.
62
- */
63
- class FileContentCache {
64
- cache = new Map();
65
- accessOrder = [];
66
- maxSize;
67
- repoPath;
68
- constructor(repoPath, maxSize = 3000) {
69
- this.repoPath = repoPath;
70
- this.maxSize = maxSize;
71
- }
72
- async get(relativePath) {
73
- if (!relativePath)
74
- return '';
75
- const cached = this.cache.get(relativePath);
76
- if (cached !== undefined) {
77
- // Move to end of accessOrder (LRU promotion)
78
- const idx = this.accessOrder.indexOf(relativePath);
79
- if (idx !== -1) {
80
- this.accessOrder.splice(idx, 1);
81
- this.accessOrder.push(relativePath);
82
- }
83
- return cached;
84
- }
85
- try {
86
- const fullPath = path.join(this.repoPath, relativePath);
87
- const content = await fs.readFile(fullPath, 'utf-8');
88
- this.set(relativePath, content);
89
- return content;
90
- }
91
- catch {
92
- this.set(relativePath, '');
93
- return '';
94
- }
95
- }
96
- set(key, value) {
97
- if (this.cache.size >= this.maxSize) {
98
- const oldest = this.accessOrder.shift();
99
- if (oldest)
100
- this.cache.delete(oldest);
101
- }
102
- this.cache.set(key, value);
103
- this.accessOrder.push(key);
104
- }
105
- }
106
- const extractContent = async (node, contentCache) => {
107
- const filePath = node.properties.filePath;
108
- const content = await contentCache.get(filePath);
109
- if (!content)
110
- return '';
111
- if (node.label === 'Folder')
112
- return '';
113
- if (isBinaryContent(content))
114
- return '[Binary file - content not stored]';
115
- if (node.label === 'File') {
116
- const MAX_FILE_CONTENT = 10000;
117
- return content.length > MAX_FILE_CONTENT
118
- ? content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]'
119
- : content;
120
- }
121
- const startLine = node.properties.startLine;
122
- const endLine = node.properties.endLine;
123
- if (startLine === undefined || endLine === undefined)
124
- return '';
125
- const lines = content.split('\n');
126
- const start = Math.max(0, startLine - 2);
127
- const end = Math.min(lines.length - 1, endLine + 2);
128
- const snippet = lines.slice(start, end + 1).join('\n');
129
- const MAX_SNIPPET = 5000;
130
- return snippet.length > MAX_SNIPPET
131
- ? snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]'
132
- : snippet;
133
- };
134
- // ============================================================================
135
- // BUFFERED CSV WRITER
136
- // ============================================================================
137
- class BufferedCSVWriter {
138
- ws;
139
- buffer = [];
140
- rows = 0;
141
- constructor(filePath, header) {
142
- this.ws = createWriteStream(filePath, 'utf-8');
143
- // Large repos flush many times — raise listener cap to avoid MaxListenersExceededWarning
144
- this.ws.setMaxListeners(50);
145
- this.buffer.push(header);
146
- }
147
- addRow(row) {
148
- this.buffer.push(row);
149
- this.rows++;
150
- if (this.buffer.length >= FLUSH_EVERY) {
151
- return this.flush();
152
- }
153
- return Promise.resolve();
154
- }
155
- flush() {
156
- if (this.buffer.length === 0)
157
- return Promise.resolve();
158
- const chunk = this.buffer.join('\n') + '\n';
159
- this.buffer.length = 0;
160
- return new Promise((resolve, reject) => {
161
- this.ws.once('error', reject);
162
- const ok = this.ws.write(chunk);
163
- if (ok) {
164
- this.ws.removeListener('error', reject);
165
- resolve();
166
- }
167
- else {
168
- this.ws.once('drain', () => {
169
- this.ws.removeListener('error', reject);
170
- resolve();
171
- });
172
- }
173
- });
174
- }
175
- async finish() {
176
- await this.flush();
177
- return new Promise((resolve, reject) => {
178
- this.ws.end(() => resolve());
179
- this.ws.on('error', reject);
180
- });
181
- }
182
- }
183
- /**
184
- * Stream all CSV data directly to disk files.
185
- * Iterates graph nodes exactly ONCE — routes each node to the right writer.
186
- * File contents are lazy-read from disk with a generous LRU cache.
187
- */
188
- /**
189
- * Apply RFC 0001 Phase 2 content encoding. Returns the on-the-wire string
190
- * + the encoding tag to write into the per-row `contentEncoding` column.
191
- *
192
- * `compress: undefined | 'none'` is the default — content goes through
193
- * unchanged and the tag is `'none'` (matches the schema DEFAULT, so older
194
- * readers and the schema-default behavior stay in agreement).
195
- *
196
- * Always writing the tag column (even as 'none') keeps the CSV / COPY /
197
- * schema layouts uniform regardless of compression mode. The wasted bytes
198
- * are negligible — a few characters per row vs the kilobytes of content
199
- * the column is alongside.
200
- */
201
- const applyEncoding = (content, compress) => {
202
- if (!compress || compress === 'none') {
203
- return { wireContent: content, tag: 'none' };
204
- }
205
- return { wireContent: encodeContent(content, compress), tag: compress };
206
- };
207
- export const streamAllCSVsToDisk = async (graph, repoPath, csvDir, compress) => {
208
- // Remove stale CSVs from previous crashed runs, then recreate
209
- try {
210
- await fs.rm(csvDir, { recursive: true, force: true });
211
- }
212
- catch { }
213
- await fs.mkdir(csvDir, { recursive: true });
214
- // We open ~30 concurrent write-streams; raise process limit to suppress
215
- // MaxListenersExceededWarning (restored after all streams finish).
216
- const prevMax = process.getMaxListeners();
217
- process.setMaxListeners(prevMax + 40);
218
- const contentCache = new FileContentCache(repoPath);
219
- // Create writers for every node type up-front. Content-bearing tables
220
- // carry an extra `contentEncoding` column right after `content` to
221
- // match the Phase 2 schema layout. Tables without `content` (Folder,
222
- // Community, Process, Route, Tool) are unchanged.
223
- const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content,contentEncoding');
224
- const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath');
225
- const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,contentEncoding,description';
226
- const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader);
227
- const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader);
228
- const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader);
229
- const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,contentEncoding,description,parameterCount,returnType';
230
- const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), methodHeader);
231
- const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader);
232
- const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount');
233
- const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId');
234
- // Section nodes have an extra 'level' column
235
- const sectionWriter = new BufferedCSVWriter(path.join(csvDir, 'section.csv'), 'id,name,filePath,startLine,endLine,level,content,contentEncoding,description');
236
- // Route nodes for API endpoint mapping
237
- const routeWriter = new BufferedCSVWriter(path.join(csvDir, 'route.csv'), 'id,name,filePath,responseKeys,errorKeys,middleware');
238
- // Tool nodes for MCP tool definitions
239
- const toolWriter = new BufferedCSVWriter(path.join(csvDir, 'tool.csv'), 'id,name,filePath,description');
240
- // Multi-language node types share the same CSV shape (no isExported column)
241
- const multiLangHeader = 'id,name,filePath,startLine,endLine,content,contentEncoding,description';
242
- const MULTI_LANG_TYPES = [
243
- 'Struct',
244
- 'Enum',
245
- 'Macro',
246
- 'Typedef',
247
- 'Union',
248
- 'Namespace',
249
- 'Trait',
250
- 'Impl',
251
- 'TypeAlias',
252
- 'Const',
253
- 'Static',
254
- 'Variable',
255
- 'Property',
256
- 'Record',
257
- 'Delegate',
258
- 'Annotation',
259
- 'Constructor',
260
- 'Template',
261
- 'Module',
262
- ];
263
- const multiLangWriters = new Map();
264
- for (const t of MULTI_LANG_TYPES) {
265
- multiLangWriters.set(t, new BufferedCSVWriter(path.join(csvDir, `${t.toLowerCase()}.csv`), multiLangHeader));
266
- }
267
- const codeWriterMap = {
268
- Function: functionWriter,
269
- Class: classWriter,
270
- Interface: interfaceWriter,
271
- CodeElement: codeElemWriter,
272
- };
273
- // Deduplicate all node types — the pipeline can produce duplicate IDs across
274
- // all symbol types (Class, Method, Function, etc.), not just File nodes.
275
- // A single Set covering every label prevents PK violations on COPY.
276
- const seenNodeIds = new Set();
277
- // --- SINGLE PASS over all nodes ---
278
- for (const node of graph.iterNodes()) {
279
- if (seenNodeIds.has(node.id))
280
- continue;
281
- seenNodeIds.add(node.id);
282
- switch (node.label) {
283
- case 'File': {
284
- const content = await extractContent(node, contentCache);
285
- const { wireContent, tag } = applyEncoding(content, compress);
286
- await fileWriter.addRow([
287
- escapeCSVField(node.id),
288
- escapeCSVField(node.properties.name || ''),
289
- escapeCSVField(node.properties.filePath || ''),
290
- escapeCSVField(wireContent),
291
- escapeCSVField(tag),
292
- ].join(','));
293
- break;
294
- }
295
- case 'Folder':
296
- await folderWriter.addRow([
297
- escapeCSVField(node.id),
298
- escapeCSVField(node.properties.name || ''),
299
- escapeCSVField(node.properties.filePath || ''),
300
- ].join(','));
301
- break;
302
- case 'Community': {
303
- const keywords = node.properties.keywords || [];
304
- const keywordsStr = `[${keywords.map((k) => `'${k.replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/,/g, '\\,')}'`).join(',')}]`;
305
- await communityWriter.addRow([
306
- escapeCSVField(node.id),
307
- escapeCSVField(node.properties.name || ''),
308
- escapeCSVField(node.properties.heuristicLabel || ''),
309
- keywordsStr,
310
- escapeCSVField(node.properties.description || ''),
311
- escapeCSVField(node.properties.enrichedBy || 'heuristic'),
312
- escapeCSVNumber(node.properties.cohesion, 0),
313
- escapeCSVNumber(node.properties.symbolCount, 0),
314
- ].join(','));
315
- break;
316
- }
317
- case 'Process': {
318
- const communities = node.properties.communities || [];
319
- const communitiesStr = `[${communities.map((c) => `'${c.replace(/'/g, "''")}'`).join(',')}]`;
320
- await processWriter.addRow([
321
- escapeCSVField(node.id),
322
- escapeCSVField(node.properties.name || ''),
323
- escapeCSVField(node.properties.heuristicLabel || ''),
324
- escapeCSVField(node.properties.processType || ''),
325
- escapeCSVNumber(node.properties.stepCount, 0),
326
- escapeCSVField(communitiesStr),
327
- escapeCSVField(node.properties.entryPointId || ''),
328
- escapeCSVField(node.properties.terminalId || ''),
329
- ].join(','));
330
- break;
331
- }
332
- case 'Method': {
333
- const content = await extractContent(node, contentCache);
334
- const { wireContent, tag } = applyEncoding(content, compress);
335
- await methodWriter.addRow([
336
- escapeCSVField(node.id),
337
- escapeCSVField(node.properties.name || ''),
338
- escapeCSVField(node.properties.filePath || ''),
339
- escapeCSVNumber(node.properties.startLine, -1),
340
- escapeCSVNumber(node.properties.endLine, -1),
341
- node.properties.isExported ? 'true' : 'false',
342
- escapeCSVField(wireContent),
343
- escapeCSVField(tag),
344
- escapeCSVField(node.properties.description || ''),
345
- escapeCSVNumber(node.properties.parameterCount, 0),
346
- escapeCSVField(node.properties.returnType || ''),
347
- ].join(','));
348
- break;
349
- }
350
- case 'Section': {
351
- const content = await extractContent(node, contentCache);
352
- const { wireContent, tag } = applyEncoding(content, compress);
353
- await sectionWriter.addRow([
354
- escapeCSVField(node.id),
355
- escapeCSVField(node.properties.name || ''),
356
- escapeCSVField(node.properties.filePath || ''),
357
- escapeCSVNumber(node.properties.startLine, -1),
358
- escapeCSVNumber(node.properties.endLine, -1),
359
- escapeCSVNumber(node.properties.level, 1),
360
- escapeCSVField(wireContent),
361
- escapeCSVField(tag),
362
- escapeCSVField(node.properties.description || ''),
363
- ].join(','));
364
- break;
365
- }
366
- case 'Route': {
367
- const responseKeys = node.properties.responseKeys || [];
368
- // LadybugDB array literal inside a quoted CSV field: escapeCSVField wraps in "..."
369
- // and the array uses single-quoted elements
370
- const keysStr = `[${responseKeys.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
371
- const errorKeys = node.properties.errorKeys || [];
372
- const errorKeysStr = `[${errorKeys.map((k) => `'${k.replace(/'/g, "''")}'`).join(',')}]`;
373
- const middleware = node.properties.middleware || [];
374
- const middlewareStr = `[${middleware.map((m) => `'${m.replace(/'/g, "''")}'`).join(',')}]`;
375
- await routeWriter.addRow([
376
- escapeCSVField(node.id),
377
- escapeCSVField(node.properties.name || ''),
378
- escapeCSVField(node.properties.filePath || ''),
379
- escapeCSVField(keysStr),
380
- escapeCSVField(errorKeysStr),
381
- escapeCSVField(middlewareStr),
382
- ].join(','));
383
- break;
384
- }
385
- case 'Tool':
386
- await toolWriter.addRow([
387
- escapeCSVField(node.id),
388
- escapeCSVField(node.properties.name || ''),
389
- escapeCSVField(node.properties.filePath || ''),
390
- escapeCSVField(node.properties.description || ''),
391
- ].join(','));
392
- break;
393
- default: {
394
- // Code element nodes (Function, Class, Interface, CodeElement)
395
- const writer = codeWriterMap[node.label];
396
- if (writer) {
397
- const content = await extractContent(node, contentCache);
398
- const { wireContent, tag } = applyEncoding(content, compress);
399
- await writer.addRow([
400
- escapeCSVField(node.id),
401
- escapeCSVField(node.properties.name || ''),
402
- escapeCSVField(node.properties.filePath || ''),
403
- escapeCSVNumber(node.properties.startLine, -1),
404
- escapeCSVNumber(node.properties.endLine, -1),
405
- node.properties.isExported ? 'true' : 'false',
406
- escapeCSVField(wireContent),
407
- escapeCSVField(tag),
408
- escapeCSVField(node.properties.description || ''),
409
- ].join(','));
410
- }
411
- else {
412
- // Multi-language node types (Struct, Impl, Trait, Macro, etc.)
413
- const mlWriter = multiLangWriters.get(node.label);
414
- if (mlWriter) {
415
- const content = await extractContent(node, contentCache);
416
- const { wireContent, tag } = applyEncoding(content, compress);
417
- await mlWriter.addRow([
418
- escapeCSVField(node.id),
419
- escapeCSVField(node.properties.name || ''),
420
- escapeCSVField(node.properties.filePath || ''),
421
- escapeCSVNumber(node.properties.startLine, -1),
422
- escapeCSVNumber(node.properties.endLine, -1),
423
- escapeCSVField(wireContent),
424
- escapeCSVField(tag),
425
- escapeCSVField(node.properties.description || ''),
426
- ].join(','));
427
- }
428
- }
429
- break;
430
- }
431
- }
432
- }
433
- // Finish all node writers
434
- const allWriters = [
435
- fileWriter,
436
- folderWriter,
437
- functionWriter,
438
- classWriter,
439
- interfaceWriter,
440
- methodWriter,
441
- codeElemWriter,
442
- communityWriter,
443
- processWriter,
444
- sectionWriter,
445
- routeWriter,
446
- toolWriter,
447
- ...multiLangWriters.values(),
448
- ];
449
- await Promise.all(allWriters.map((w) => w.finish()));
450
- // --- Stream relationship CSV ---
451
- const relCsvPath = path.join(csvDir, 'relations.csv');
452
- const relWriter = new BufferedCSVWriter(relCsvPath, 'from,to,type,confidence,reason,step');
453
- for (const rel of graph.iterRelationships()) {
454
- await relWriter.addRow([
455
- escapeCSVField(rel.sourceId),
456
- escapeCSVField(rel.targetId),
457
- escapeCSVField(rel.type),
458
- escapeCSVNumber(rel.confidence, 1.0),
459
- escapeCSVField(rel.reason),
460
- escapeCSVNumber(rel.step, 0),
461
- ].join(','));
462
- }
463
- await relWriter.finish();
464
- // Build result map — only include tables that have rows
465
- const nodeFiles = new Map();
466
- const tableMap = [
467
- ['File', fileWriter],
468
- ['Folder', folderWriter],
469
- ['Function', functionWriter],
470
- ['Class', classWriter],
471
- ['Interface', interfaceWriter],
472
- ['Method', methodWriter],
473
- ['CodeElement', codeElemWriter],
474
- ['Community', communityWriter],
475
- ['Process', processWriter],
476
- ['Section', sectionWriter],
477
- ['Route', routeWriter],
478
- ['Tool', toolWriter],
479
- ...Array.from(multiLangWriters.entries()).map(([name, w]) => [name, w]),
480
- ];
481
- for (const [name, writer] of tableMap) {
482
- if (writer.rows > 0) {
483
- nodeFiles.set(name, {
484
- csvPath: path.join(csvDir, `${name.toLowerCase()}.csv`),
485
- rows: writer.rows,
486
- });
487
- }
488
- }
489
- // Restore original process listener limit
490
- process.setMaxListeners(prevMax);
491
- return { nodeFiles, relCsvPath, relRows: relWriter.rows };
492
- };
@@ -1,176 +0,0 @@
1
- import lbug from '@ladybugdb/core';
2
- import { KnowledgeGraph } from '../graph/types.js';
3
- import type { CachedEmbedding } from '../embeddings/types.js';
4
- import type { ContentEncoding } from '@codragraph/graphstore';
5
- /** Factory for creating WriteStreams — injectable for testing. */
6
- export type WriteStreamFactory = (filePath: string) => import('fs').WriteStream;
7
- /** Result of splitting the relationship CSV into per-label-pair files. */
8
- export interface RelCsvSplitResult {
9
- relHeader: string;
10
- relsByPairMeta: Map<string, {
11
- csvPath: string;
12
- rows: number;
13
- }>;
14
- pairWriteStreams: Map<string, import('fs').WriteStream>;
15
- skippedRels: number;
16
- totalValidRels: number;
17
- }
18
- /**
19
- * Split a relationship CSV into per-label-pair files on disk.
20
- *
21
- * Streams the CSV line-by-line, routing each relationship to a file named
22
- * `rel_{fromLabel}_{toLabel}.csv`. Handles backpressure correctly: only one
23
- * drain listener per stream at a time, and readline resumes only when ALL
24
- * backpressured streams have drained.
25
- *
26
- * @param csvPath Path to the combined relationship CSV
27
- * @param csvDir Directory to write per-pair CSV files
28
- * @param validTables Set of valid node table names
29
- * @param getNodeLabel Function to extract the label from a node ID
30
- * @param wsFactory Optional WriteStream factory (defaults to fs.createWriteStream)
31
- */
32
- export declare const splitRelCsvByLabelPair: (csvPath: string, csvDir: string, validTables: Set<string>, getNodeLabel: (id: string) => string, wsFactory?: WriteStreamFactory) => Promise<RelCsvSplitResult>;
33
- /** Expose the current Database for pool adapter reuse in tests. */
34
- export declare const getDatabase: () => lbug.Database | null;
35
- /**
36
- * Return true when the error message indicates that another process holds
37
- * an exclusive lock on the LadybugDB file (e.g. `codragraph analyze` or
38
- * `codragraph serve` running at the same time).
39
- */
40
- export declare const isDbBusyError: (err: unknown) => boolean;
41
- export declare const initLbug: (dbPath: string) => Promise<{
42
- db: lbug.Database;
43
- conn: lbug.Connection;
44
- }>;
45
- /**
46
- * Execute multiple queries against one repo DB atomically.
47
- * While the callback runs, no other request can switch the active DB.
48
- *
49
- * Automatically retries up to DB_LOCK_RETRY_ATTEMPTS times when the
50
- * database is busy (e.g. `codragraph analyze` holds the write lock).
51
- * Each retry waits DB_LOCK_RETRY_DELAY_MS * attempt milliseconds.
52
- */
53
- export declare const withLbugDb: <T>(dbPath: string, operation: () => Promise<T>) => Promise<T>;
54
- export type LbugProgressCallback = (message: string) => void;
55
- export declare const loadGraphToLbug: (graph: KnowledgeGraph, repoPath: string, storagePath: string, onProgress?: LbugProgressCallback, options?: {
56
- compress?: ContentEncoding;
57
- }) => Promise<{
58
- success: boolean;
59
- insertedRels: number;
60
- skippedRels: number;
61
- warnings: string[];
62
- }>;
63
- /**
64
- * Insert a single node to LadybugDB
65
- * @param label - Node type (File, Function, Class, etc.)
66
- * @param properties - Node properties
67
- * @param dbPath - Path to LadybugDB database (optional if already initialized)
68
- */
69
- export declare const insertNodeToLbug: (label: string, properties: Record<string, any>, dbPath?: string) => Promise<boolean>;
70
- /**
71
- * Batch insert multiple nodes to LadybugDB using a single connection
72
- * @param nodes - Array of {label, properties} to insert
73
- * @param dbPath - Path to LadybugDB database
74
- * @returns Object with success count and error count
75
- */
76
- export declare const batchInsertNodesToLbug: (nodes: Array<{
77
- label: string;
78
- properties: Record<string, any>;
79
- }>, dbPath: string) => Promise<{
80
- inserted: number;
81
- failed: number;
82
- }>;
83
- export declare const executeQuery: (cypher: string) => Promise<any[]>;
84
- export declare const streamQuery: (cypher: string, onRow: (row: any) => void | Promise<void>) => Promise<number>;
85
- /**
86
- * Execute a single parameterized query (prepare/execute pattern).
87
- * Prevents Cypher injection by binding values as parameters.
88
- */
89
- export declare const executePrepared: (cypher: string, params: Record<string, any>) => Promise<any[]>;
90
- export declare const executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>;
91
- export declare const getLbugStats: () => Promise<{
92
- nodes: number;
93
- edges: number;
94
- }>;
95
- /**
96
- * Load cached embeddings from LadybugDB before a rebuild.
97
- * Returns all embedding vectors so they can be re-inserted after the graph is reloaded,
98
- * avoiding expensive re-embedding of unchanged nodes.
99
- *
100
- * Detects old schema (no chunkIndex column) and returns empty cache to trigger rebuild.
101
- */
102
- export declare const loadCachedEmbeddings: () => Promise<{
103
- embeddingNodeIds: Set<string>;
104
- embeddings: CachedEmbedding[];
105
- }>;
106
- /**
107
- * Fetch existing embedding hashes from CodeEmbedding table for incremental embedding.
108
- * Returns a Map<nodeId, contentHash> suitable for passing to `runEmbeddingPipeline`.
109
- * Handles legacy DBs without the `contentHash` column (all rows treated as stale with empty hash).
110
- * Returns undefined if the CodeEmbedding table does not exist.
111
- *
112
- * @param execQuery - Cypher query executor (typically pool-adapter's `executeQuery`)
113
- */
114
- export declare const fetchExistingEmbeddingHashes: (execQuery: (cypher: string) => Promise<any[]>) => Promise<Map<string, string> | undefined>;
115
- export declare const closeLbug: () => Promise<void>;
116
- export declare const isLbugReady: () => boolean;
117
- /**
118
- * Delete all nodes (and their relationships) for a specific file from LadybugDB
119
- * @param filePath - The file path to delete nodes for
120
- * @param dbPath - Optional path to LadybugDB for per-query connection
121
- * @returns Object with counts of deleted nodes
122
- */
123
- export declare const deleteNodesForFile: (filePath: string, dbPath?: string) => Promise<{
124
- deletedNodes: number;
125
- }>;
126
- export declare const getEmbeddingTableName: () => string;
127
- /**
128
- * Load the FTS extension (required before using FTS functions).
129
- * Safe to call multiple times — tracks loaded state via module-level ftsLoaded.
130
- */
131
- export declare const loadFTSExtension: () => Promise<void>;
132
- /**
133
- * Load the VECTOR extension (required before using QUERY_VECTOR_INDEX).
134
- * Safe to call multiple times -- tracks loaded state via module-level vectorExtensionLoaded.
135
- */
136
- export declare const loadVectorExtension: () => Promise<void>;
137
- /**
138
- * Create a full-text search index on a table
139
- * @param tableName - The node table name (e.g., 'File', 'CodeSymbol')
140
- * @param indexName - Name for the FTS index
141
- * @param properties - List of properties to index (e.g., ['name', 'code'])
142
- * @param stemmer - Stemming algorithm (default: 'porter')
143
- */
144
- export declare const createFTSIndex: (tableName: string, indexName: string, properties: string[], stemmer?: string) => Promise<void>;
145
- /**
146
- * Lazy-create an FTS index, caching the fact in-process.
147
- *
148
- * Used by `queryFTS` so that `analyze` doesn't pay the ~440 ms × 5 fixed
149
- * LadybugDB cost up-front (it dominates analyze on small repos). Instead,
150
- * the cost is moved to the first `query`/`context` call in a session,
151
- * where it's amortised across many lookups.
152
- *
153
- * Safe to call repeatedly — the in-process Set guarantees only the first
154
- * call hits LadybugDB. `closeLbug` clears the cache so re-init starts fresh.
155
- */
156
- export declare const ensureFTSIndex: (tableName: string, indexName: string, properties: string[], stemmer?: string) => Promise<void>;
157
- /**
158
- * Query a full-text search index
159
- * @param tableName - The node table name
160
- * @param indexName - FTS index name
161
- * @param query - Search query string
162
- * @param limit - Maximum results
163
- * @param conjunctive - If true, all terms must match (AND); if false, any term matches (OR)
164
- * @returns Array of { node properties, score }
165
- */
166
- export declare const queryFTS: (tableName: string, indexName: string, query: string, limit?: number, conjunctive?: boolean) => Promise<Array<{
167
- nodeId: string;
168
- name: string;
169
- filePath: string;
170
- score: number;
171
- [key: string]: any;
172
- }>>;
173
- /**
174
- * Drop an FTS index
175
- */
176
- export declare const dropFTSIndex: (tableName: string, indexName: string) => Promise<void>;