@veewo/gitnexus 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. package/README.md +234 -0
  2. package/dist/benchmark/agent-context/evaluators.d.ts +9 -0
  3. package/dist/benchmark/agent-context/evaluators.js +196 -0
  4. package/dist/benchmark/agent-context/evaluators.test.d.ts +1 -0
  5. package/dist/benchmark/agent-context/evaluators.test.js +39 -0
  6. package/dist/benchmark/agent-context/io.d.ts +2 -0
  7. package/dist/benchmark/agent-context/io.js +23 -0
  8. package/dist/benchmark/agent-context/io.test.d.ts +1 -0
  9. package/dist/benchmark/agent-context/io.test.js +19 -0
  10. package/dist/benchmark/agent-context/report.d.ts +2 -0
  11. package/dist/benchmark/agent-context/report.js +59 -0
  12. package/dist/benchmark/agent-context/report.test.d.ts +1 -0
  13. package/dist/benchmark/agent-context/report.test.js +85 -0
  14. package/dist/benchmark/agent-context/runner.d.ts +46 -0
  15. package/dist/benchmark/agent-context/runner.js +111 -0
  16. package/dist/benchmark/agent-context/runner.test.d.ts +1 -0
  17. package/dist/benchmark/agent-context/runner.test.js +79 -0
  18. package/dist/benchmark/agent-context/tool-runner.d.ts +7 -0
  19. package/dist/benchmark/agent-context/tool-runner.js +18 -0
  20. package/dist/benchmark/agent-context/tool-runner.test.d.ts +1 -0
  21. package/dist/benchmark/agent-context/tool-runner.test.js +11 -0
  22. package/dist/benchmark/agent-context/types.d.ts +40 -0
  23. package/dist/benchmark/agent-context/types.js +1 -0
  24. package/dist/benchmark/analyze-runner.d.ts +16 -0
  25. package/dist/benchmark/analyze-runner.js +51 -0
  26. package/dist/benchmark/analyze-runner.test.d.ts +1 -0
  27. package/dist/benchmark/analyze-runner.test.js +37 -0
  28. package/dist/benchmark/evaluators.d.ts +6 -0
  29. package/dist/benchmark/evaluators.js +10 -0
  30. package/dist/benchmark/evaluators.test.d.ts +1 -0
  31. package/dist/benchmark/evaluators.test.js +12 -0
  32. package/dist/benchmark/io.d.ts +7 -0
  33. package/dist/benchmark/io.js +25 -0
  34. package/dist/benchmark/io.test.d.ts +1 -0
  35. package/dist/benchmark/io.test.js +35 -0
  36. package/dist/benchmark/neonspark-candidates.d.ts +19 -0
  37. package/dist/benchmark/neonspark-candidates.js +94 -0
  38. package/dist/benchmark/neonspark-candidates.test.d.ts +1 -0
  39. package/dist/benchmark/neonspark-candidates.test.js +43 -0
  40. package/dist/benchmark/neonspark-materialize.d.ts +19 -0
  41. package/dist/benchmark/neonspark-materialize.js +111 -0
  42. package/dist/benchmark/neonspark-materialize.test.d.ts +1 -0
  43. package/dist/benchmark/neonspark-materialize.test.js +124 -0
  44. package/dist/benchmark/neonspark-sync.d.ts +3 -0
  45. package/dist/benchmark/neonspark-sync.js +53 -0
  46. package/dist/benchmark/neonspark-sync.test.d.ts +1 -0
  47. package/dist/benchmark/neonspark-sync.test.js +20 -0
  48. package/dist/benchmark/report.d.ts +1 -0
  49. package/dist/benchmark/report.js +7 -0
  50. package/dist/benchmark/runner.d.ts +48 -0
  51. package/dist/benchmark/runner.js +302 -0
  52. package/dist/benchmark/runner.test.d.ts +1 -0
  53. package/dist/benchmark/runner.test.js +50 -0
  54. package/dist/benchmark/scoring.d.ts +16 -0
  55. package/dist/benchmark/scoring.js +27 -0
  56. package/dist/benchmark/scoring.test.d.ts +1 -0
  57. package/dist/benchmark/scoring.test.js +24 -0
  58. package/dist/benchmark/tool-runner.d.ts +6 -0
  59. package/dist/benchmark/tool-runner.js +17 -0
  60. package/dist/benchmark/types.d.ts +36 -0
  61. package/dist/benchmark/types.js +1 -0
  62. package/dist/cli/ai-context.d.ts +22 -0
  63. package/dist/cli/ai-context.js +184 -0
  64. package/dist/cli/ai-context.test.d.ts +1 -0
  65. package/dist/cli/ai-context.test.js +30 -0
  66. package/dist/cli/analyze-multi-scope-regression.test.d.ts +1 -0
  67. package/dist/cli/analyze-multi-scope-regression.test.js +22 -0
  68. package/dist/cli/analyze-options.d.ts +7 -0
  69. package/dist/cli/analyze-options.js +56 -0
  70. package/dist/cli/analyze-options.test.d.ts +1 -0
  71. package/dist/cli/analyze-options.test.js +36 -0
  72. package/dist/cli/analyze.d.ts +14 -0
  73. package/dist/cli/analyze.js +384 -0
  74. package/dist/cli/augment.d.ts +13 -0
  75. package/dist/cli/augment.js +33 -0
  76. package/dist/cli/benchmark-agent-context.d.ts +29 -0
  77. package/dist/cli/benchmark-agent-context.js +61 -0
  78. package/dist/cli/benchmark-agent-context.test.d.ts +1 -0
  79. package/dist/cli/benchmark-agent-context.test.js +80 -0
  80. package/dist/cli/benchmark-unity.d.ts +15 -0
  81. package/dist/cli/benchmark-unity.js +31 -0
  82. package/dist/cli/benchmark-unity.test.d.ts +1 -0
  83. package/dist/cli/benchmark-unity.test.js +18 -0
  84. package/dist/cli/claude-hooks.d.ts +22 -0
  85. package/dist/cli/claude-hooks.js +97 -0
  86. package/dist/cli/clean.d.ts +10 -0
  87. package/dist/cli/clean.js +60 -0
  88. package/dist/cli/eval-server.d.ts +30 -0
  89. package/dist/cli/eval-server.js +372 -0
  90. package/dist/cli/index.d.ts +2 -0
  91. package/dist/cli/index.js +182 -0
  92. package/dist/cli/list.d.ts +6 -0
  93. package/dist/cli/list.js +33 -0
  94. package/dist/cli/mcp.d.ts +8 -0
  95. package/dist/cli/mcp.js +34 -0
  96. package/dist/cli/repo-manager-alias.test.d.ts +1 -0
  97. package/dist/cli/repo-manager-alias.test.js +40 -0
  98. package/dist/cli/scope-filter.test.d.ts +1 -0
  99. package/dist/cli/scope-filter.test.js +49 -0
  100. package/dist/cli/serve.d.ts +4 -0
  101. package/dist/cli/serve.js +6 -0
  102. package/dist/cli/setup.d.ts +8 -0
  103. package/dist/cli/setup.js +311 -0
  104. package/dist/cli/setup.test.d.ts +1 -0
  105. package/dist/cli/setup.test.js +31 -0
  106. package/dist/cli/status.d.ts +6 -0
  107. package/dist/cli/status.js +27 -0
  108. package/dist/cli/tool.d.ts +40 -0
  109. package/dist/cli/tool.js +94 -0
  110. package/dist/cli/version.test.d.ts +1 -0
  111. package/dist/cli/version.test.js +19 -0
  112. package/dist/cli/wiki.d.ts +15 -0
  113. package/dist/cli/wiki.js +361 -0
  114. package/dist/config/ignore-service.d.ts +1 -0
  115. package/dist/config/ignore-service.js +210 -0
  116. package/dist/config/supported-languages.d.ts +12 -0
  117. package/dist/config/supported-languages.js +15 -0
  118. package/dist/core/augmentation/engine.d.ts +26 -0
  119. package/dist/core/augmentation/engine.js +213 -0
  120. package/dist/core/embeddings/embedder.d.ts +60 -0
  121. package/dist/core/embeddings/embedder.js +251 -0
  122. package/dist/core/embeddings/embedding-pipeline.d.ts +51 -0
  123. package/dist/core/embeddings/embedding-pipeline.js +329 -0
  124. package/dist/core/embeddings/index.d.ts +9 -0
  125. package/dist/core/embeddings/index.js +9 -0
  126. package/dist/core/embeddings/text-generator.d.ts +24 -0
  127. package/dist/core/embeddings/text-generator.js +182 -0
  128. package/dist/core/embeddings/types.d.ts +87 -0
  129. package/dist/core/embeddings/types.js +32 -0
  130. package/dist/core/graph/graph.d.ts +2 -0
  131. package/dist/core/graph/graph.js +66 -0
  132. package/dist/core/graph/types.d.ts +61 -0
  133. package/dist/core/graph/types.js +1 -0
  134. package/dist/core/ingestion/ast-cache.d.ts +11 -0
  135. package/dist/core/ingestion/ast-cache.js +34 -0
  136. package/dist/core/ingestion/call-processor.d.ts +15 -0
  137. package/dist/core/ingestion/call-processor.js +327 -0
  138. package/dist/core/ingestion/cluster-enricher.d.ts +38 -0
  139. package/dist/core/ingestion/cluster-enricher.js +170 -0
  140. package/dist/core/ingestion/community-processor.d.ts +39 -0
  141. package/dist/core/ingestion/community-processor.js +312 -0
  142. package/dist/core/ingestion/entry-point-scoring.d.ts +39 -0
  143. package/dist/core/ingestion/entry-point-scoring.js +260 -0
  144. package/dist/core/ingestion/filesystem-walker.d.ts +28 -0
  145. package/dist/core/ingestion/filesystem-walker.js +80 -0
  146. package/dist/core/ingestion/framework-detection.d.ts +39 -0
  147. package/dist/core/ingestion/framework-detection.js +235 -0
  148. package/dist/core/ingestion/heritage-processor.d.ts +20 -0
  149. package/dist/core/ingestion/heritage-processor.js +197 -0
  150. package/dist/core/ingestion/import-processor.d.ts +38 -0
  151. package/dist/core/ingestion/import-processor.js +778 -0
  152. package/dist/core/ingestion/parsing-processor.d.ts +15 -0
  153. package/dist/core/ingestion/parsing-processor.js +291 -0
  154. package/dist/core/ingestion/pipeline.d.ts +5 -0
  155. package/dist/core/ingestion/pipeline.js +323 -0
  156. package/dist/core/ingestion/process-processor.d.ts +51 -0
  157. package/dist/core/ingestion/process-processor.js +309 -0
  158. package/dist/core/ingestion/scope-filter.d.ts +25 -0
  159. package/dist/core/ingestion/scope-filter.js +100 -0
  160. package/dist/core/ingestion/structure-processor.d.ts +2 -0
  161. package/dist/core/ingestion/structure-processor.js +36 -0
  162. package/dist/core/ingestion/symbol-table.d.ts +33 -0
  163. package/dist/core/ingestion/symbol-table.js +38 -0
  164. package/dist/core/ingestion/tree-sitter-queries.d.ts +12 -0
  165. package/dist/core/ingestion/tree-sitter-queries.js +398 -0
  166. package/dist/core/ingestion/utils.d.ts +10 -0
  167. package/dist/core/ingestion/utils.js +50 -0
  168. package/dist/core/ingestion/workers/parse-worker.d.ts +59 -0
  169. package/dist/core/ingestion/workers/parse-worker.js +672 -0
  170. package/dist/core/ingestion/workers/worker-pool.d.ts +16 -0
  171. package/dist/core/ingestion/workers/worker-pool.js +120 -0
  172. package/dist/core/kuzu/csv-generator.d.ts +29 -0
  173. package/dist/core/kuzu/csv-generator.js +336 -0
  174. package/dist/core/kuzu/kuzu-adapter.d.ts +101 -0
  175. package/dist/core/kuzu/kuzu-adapter.js +753 -0
  176. package/dist/core/kuzu/schema.d.ts +53 -0
  177. package/dist/core/kuzu/schema.js +407 -0
  178. package/dist/core/search/bm25-index.d.ts +23 -0
  179. package/dist/core/search/bm25-index.js +95 -0
  180. package/dist/core/search/hybrid-search.d.ts +49 -0
  181. package/dist/core/search/hybrid-search.js +118 -0
  182. package/dist/core/tree-sitter/parser-loader.d.ts +4 -0
  183. package/dist/core/tree-sitter/parser-loader.js +44 -0
  184. package/dist/core/wiki/generator.d.ts +110 -0
  185. package/dist/core/wiki/generator.js +786 -0
  186. package/dist/core/wiki/graph-queries.d.ts +80 -0
  187. package/dist/core/wiki/graph-queries.js +238 -0
  188. package/dist/core/wiki/html-viewer.d.ts +10 -0
  189. package/dist/core/wiki/html-viewer.js +297 -0
  190. package/dist/core/wiki/llm-client.d.ts +40 -0
  191. package/dist/core/wiki/llm-client.js +162 -0
  192. package/dist/core/wiki/prompts.d.ts +53 -0
  193. package/dist/core/wiki/prompts.js +174 -0
  194. package/dist/lib/utils.d.ts +1 -0
  195. package/dist/lib/utils.js +3 -0
  196. package/dist/mcp/core/embedder.d.ts +27 -0
  197. package/dist/mcp/core/embedder.js +108 -0
  198. package/dist/mcp/core/kuzu-adapter.d.ts +34 -0
  199. package/dist/mcp/core/kuzu-adapter.js +231 -0
  200. package/dist/mcp/local/local-backend.d.ts +160 -0
  201. package/dist/mcp/local/local-backend.js +1646 -0
  202. package/dist/mcp/resources.d.ts +31 -0
  203. package/dist/mcp/resources.js +407 -0
  204. package/dist/mcp/server.d.ts +23 -0
  205. package/dist/mcp/server.js +251 -0
  206. package/dist/mcp/staleness.d.ts +15 -0
  207. package/dist/mcp/staleness.js +29 -0
  208. package/dist/mcp/tools.d.ts +24 -0
  209. package/dist/mcp/tools.js +195 -0
  210. package/dist/server/api.d.ts +10 -0
  211. package/dist/server/api.js +344 -0
  212. package/dist/server/mcp-http.d.ts +13 -0
  213. package/dist/server/mcp-http.js +100 -0
  214. package/dist/storage/git.d.ts +6 -0
  215. package/dist/storage/git.js +32 -0
  216. package/dist/storage/repo-manager.d.ts +125 -0
  217. package/dist/storage/repo-manager.js +257 -0
  218. package/dist/types/pipeline.d.ts +34 -0
  219. package/dist/types/pipeline.js +18 -0
  220. package/hooks/claude/gitnexus-hook.cjs +135 -0
  221. package/hooks/claude/pre-tool-use.sh +78 -0
  222. package/hooks/claude/session-start.sh +42 -0
  223. package/package.json +92 -0
  224. package/skills/gitnexus-cli.md +82 -0
  225. package/skills/gitnexus-debugging.md +89 -0
  226. package/skills/gitnexus-exploring.md +78 -0
  227. package/skills/gitnexus-guide.md +64 -0
  228. package/skills/gitnexus-impact-analysis.md +97 -0
  229. package/skills/gitnexus-refactoring.md +121 -0
  230. package/vendor/leiden/index.cjs +355 -0
  231. package/vendor/leiden/utils.cjs +392 -0
@@ -0,0 +1,15 @@
1
+ import { KnowledgeGraph } from '../graph/types.js';
2
+ import { SymbolTable } from './symbol-table.js';
3
+ import { ASTCache } from './ast-cache.js';
4
+ import { WorkerPool } from './workers/worker-pool.js';
5
+ import type { ExtractedImport, ExtractedCall, ExtractedHeritage } from './workers/parse-worker.js';
6
+ export type FileProgressCallback = (current: number, total: number, filePath: string) => void;
7
+ export interface WorkerExtractedData {
8
+ imports: ExtractedImport[];
9
+ calls: ExtractedCall[];
10
+ heritage: ExtractedHeritage[];
11
+ }
12
+ export declare const processParsing: (graph: KnowledgeGraph, files: {
13
+ path: string;
14
+ content: string;
15
+ }[], symbolTable: SymbolTable, astCache: ASTCache, onFileProgress?: FileProgressCallback, workerPool?: WorkerPool) => Promise<WorkerExtractedData | null>;
@@ -0,0 +1,291 @@
1
+ import Parser from 'tree-sitter';
2
+ import { loadParser, loadLanguage } from '../tree-sitter/parser-loader.js';
3
+ import { LANGUAGE_QUERIES } from './tree-sitter-queries.js';
4
+ import { generateId } from '../../lib/utils.js';
5
+ import { getLanguageFromFilename, yieldToEventLoop } from './utils.js';
6
+ // ============================================================================
7
+ // EXPORT DETECTION - Language-specific visibility detection
8
+ // ============================================================================
9
+ /**
10
+ * Check if a symbol (function, class, etc.) is exported/public
11
+ * Handles all 9 supported languages with explicit logic
12
+ *
13
+ * @param node - The AST node for the symbol name
14
+ * @param name - The symbol name
15
+ * @param language - The programming language
16
+ * @returns true if the symbol is exported/public
17
+ */
18
+ const isNodeExported = (node, name, language) => {
19
+ let current = node;
20
+ switch (language) {
21
+ // JavaScript/TypeScript: Check for export keyword in ancestors
22
+ case 'javascript':
23
+ case 'typescript':
24
+ while (current) {
25
+ const type = current.type;
26
+ if (type === 'export_statement' ||
27
+ type === 'export_specifier' ||
28
+ type === 'lexical_declaration' && current.parent?.type === 'export_statement') {
29
+ return true;
30
+ }
31
+ // Also check if text starts with 'export '
32
+ if (current.text?.startsWith('export ')) {
33
+ return true;
34
+ }
35
+ current = current.parent;
36
+ }
37
+ return false;
38
+ // Python: Public if no leading underscore (convention)
39
+ case 'python':
40
+ return !name.startsWith('_');
41
+ // Java: Check for 'public' modifier
42
+ // In tree-sitter Java, modifiers are siblings of the name node, not parents
43
+ case 'java':
44
+ while (current) {
45
+ // Check if this node or any sibling is a 'modifiers' node containing 'public'
46
+ if (current.parent) {
47
+ const parent = current.parent;
48
+ // Check all children of the parent for modifiers
49
+ for (let i = 0; i < parent.childCount; i++) {
50
+ const child = parent.child(i);
51
+ if (child?.type === 'modifiers' && child.text?.includes('public')) {
52
+ return true;
53
+ }
54
+ }
55
+ // Also check if the parent's text starts with 'public' (fallback)
56
+ if (parent.type === 'method_declaration' || parent.type === 'constructor_declaration') {
57
+ if (parent.text?.trimStart().startsWith('public')) {
58
+ return true;
59
+ }
60
+ }
61
+ }
62
+ current = current.parent;
63
+ }
64
+ return false;
65
+ // C#: Check for 'public' modifier in ancestors
66
+ case 'csharp':
67
+ while (current) {
68
+ if (current.type === 'modifier' || current.type === 'modifiers') {
69
+ if (current.text?.includes('public'))
70
+ return true;
71
+ }
72
+ current = current.parent;
73
+ }
74
+ return false;
75
+ // Go: Uppercase first letter = exported
76
+ case 'go':
77
+ if (name.length === 0)
78
+ return false;
79
+ const first = name[0];
80
+ // Must be uppercase letter (not a number or symbol)
81
+ return first === first.toUpperCase() && first !== first.toLowerCase();
82
+ // Rust: Check for 'pub' visibility modifier
83
+ case 'rust':
84
+ while (current) {
85
+ if (current.type === 'visibility_modifier') {
86
+ if (current.text?.includes('pub'))
87
+ return true;
88
+ }
89
+ current = current.parent;
90
+ }
91
+ return false;
92
+ // C/C++: No native export concept at language level
93
+ // Entry points will be detected via name patterns (main, etc.)
94
+ case 'c':
95
+ case 'cpp':
96
+ return false;
97
+ default:
98
+ return false;
99
+ }
100
+ };
101
+ // ============================================================================
102
+ // Worker-based parallel parsing
103
+ // ============================================================================
104
+ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, workerPool, onFileProgress) => {
105
+ // Filter to parseable files only
106
+ const parseableFiles = [];
107
+ for (const file of files) {
108
+ const lang = getLanguageFromFilename(file.path);
109
+ if (lang)
110
+ parseableFiles.push({ path: file.path, content: file.content });
111
+ }
112
+ if (parseableFiles.length === 0)
113
+ return { imports: [], calls: [], heritage: [] };
114
+ const total = files.length;
115
+ // Dispatch to worker pool — pool handles splitting into chunks and sub-batching
116
+ const chunkResults = await workerPool.dispatch(parseableFiles, (filesProcessed) => {
117
+ onFileProgress?.(Math.min(filesProcessed, total), total, 'Parsing...');
118
+ });
119
+ // Merge results from all workers into graph and symbol table
120
+ const allImports = [];
121
+ const allCalls = [];
122
+ const allHeritage = [];
123
+ for (const result of chunkResults) {
124
+ for (const node of result.nodes) {
125
+ graph.addNode({
126
+ id: node.id,
127
+ label: node.label,
128
+ properties: node.properties,
129
+ });
130
+ }
131
+ for (const rel of result.relationships) {
132
+ graph.addRelationship(rel);
133
+ }
134
+ for (const sym of result.symbols) {
135
+ symbolTable.add(sym.filePath, sym.name, sym.nodeId, sym.type);
136
+ }
137
+ allImports.push(...result.imports);
138
+ allCalls.push(...result.calls);
139
+ allHeritage.push(...result.heritage);
140
+ }
141
+ // Final progress
142
+ onFileProgress?.(total, total, 'done');
143
+ return { imports: allImports, calls: allCalls, heritage: allHeritage };
144
+ };
145
+ // ============================================================================
146
+ // Sequential fallback (original implementation)
147
+ // ============================================================================
148
+ const processParsingSequential = async (graph, files, symbolTable, astCache, onFileProgress) => {
149
+ const parser = await loadParser();
150
+ const total = files.length;
151
+ for (let i = 0; i < files.length; i++) {
152
+ const file = files[i];
153
+ onFileProgress?.(i + 1, total, file.path);
154
+ if (i % 20 === 0)
155
+ await yieldToEventLoop();
156
+ const language = getLanguageFromFilename(file.path);
157
+ if (!language)
158
+ continue;
159
+ // Skip very large files — they can crash tree-sitter or cause OOM
160
+ if (file.content.length > 512 * 1024)
161
+ continue;
162
+ await loadLanguage(language, file.path);
163
+ let tree;
164
+ try {
165
+ tree = parser.parse(file.content, undefined, { bufferSize: 1024 * 256 });
166
+ }
167
+ catch (parseError) {
168
+ console.warn(`Skipping unparseable file: ${file.path}`);
169
+ continue;
170
+ }
171
+ astCache.set(file.path, tree);
172
+ const queryString = LANGUAGE_QUERIES[language];
173
+ if (!queryString) {
174
+ continue;
175
+ }
176
+ let query;
177
+ let matches;
178
+ try {
179
+ const language = parser.getLanguage();
180
+ query = new Parser.Query(language, queryString);
181
+ matches = query.matches(tree.rootNode);
182
+ }
183
+ catch (queryError) {
184
+ console.warn(`Query error for ${file.path}:`, queryError);
185
+ continue;
186
+ }
187
+ matches.forEach(match => {
188
+ const captureMap = {};
189
+ match.captures.forEach(c => {
190
+ captureMap[c.name] = c.node;
191
+ });
192
+ if (captureMap['import']) {
193
+ return;
194
+ }
195
+ if (captureMap['call']) {
196
+ return;
197
+ }
198
+ const nameNode = captureMap['name'];
199
+ if (!nameNode)
200
+ return;
201
+ const nodeName = nameNode.text;
202
+ let nodeLabel = 'CodeElement';
203
+ if (captureMap['definition.function'])
204
+ nodeLabel = 'Function';
205
+ else if (captureMap['definition.class'])
206
+ nodeLabel = 'Class';
207
+ else if (captureMap['definition.interface'])
208
+ nodeLabel = 'Interface';
209
+ else if (captureMap['definition.method'])
210
+ nodeLabel = 'Method';
211
+ else if (captureMap['definition.struct'])
212
+ nodeLabel = 'Struct';
213
+ else if (captureMap['definition.enum'])
214
+ nodeLabel = 'Enum';
215
+ else if (captureMap['definition.namespace'])
216
+ nodeLabel = 'Namespace';
217
+ else if (captureMap['definition.module'])
218
+ nodeLabel = 'Module';
219
+ else if (captureMap['definition.trait'])
220
+ nodeLabel = 'Trait';
221
+ else if (captureMap['definition.impl'])
222
+ nodeLabel = 'Impl';
223
+ else if (captureMap['definition.type'])
224
+ nodeLabel = 'TypeAlias';
225
+ else if (captureMap['definition.const'])
226
+ nodeLabel = 'Const';
227
+ else if (captureMap['definition.static'])
228
+ nodeLabel = 'Static';
229
+ else if (captureMap['definition.typedef'])
230
+ nodeLabel = 'Typedef';
231
+ else if (captureMap['definition.macro'])
232
+ nodeLabel = 'Macro';
233
+ else if (captureMap['definition.union'])
234
+ nodeLabel = 'Union';
235
+ else if (captureMap['definition.property'])
236
+ nodeLabel = 'Property';
237
+ else if (captureMap['definition.record'])
238
+ nodeLabel = 'Record';
239
+ else if (captureMap['definition.delegate'])
240
+ nodeLabel = 'Delegate';
241
+ else if (captureMap['definition.annotation'])
242
+ nodeLabel = 'Annotation';
243
+ else if (captureMap['definition.constructor'])
244
+ nodeLabel = 'Constructor';
245
+ else if (captureMap['definition.template'])
246
+ nodeLabel = 'Template';
247
+ const nodeId = generateId(nodeLabel, `${file.path}:${nodeName}`);
248
+ const node = {
249
+ id: nodeId,
250
+ label: nodeLabel,
251
+ properties: {
252
+ name: nodeName,
253
+ filePath: file.path,
254
+ startLine: nameNode.startPosition.row + 1,
255
+ endLine: nameNode.endPosition.row + 1,
256
+ language: language,
257
+ isExported: isNodeExported(nameNode, nodeName, language),
258
+ }
259
+ };
260
+ graph.addNode(node);
261
+ symbolTable.add(file.path, nodeName, nodeId, nodeLabel);
262
+ const fileId = generateId('File', file.path);
263
+ const relId = generateId('DEFINES', `${fileId}->${nodeId}`);
264
+ const relationship = {
265
+ id: relId,
266
+ sourceId: fileId,
267
+ targetId: nodeId,
268
+ type: 'DEFINES',
269
+ confidence: 1.0,
270
+ reason: '',
271
+ };
272
+ graph.addRelationship(relationship);
273
+ });
274
+ }
275
+ };
276
+ // ============================================================================
277
+ // Public API
278
+ // ============================================================================
279
+ export const processParsing = async (graph, files, symbolTable, astCache, onFileProgress, workerPool) => {
280
+ if (workerPool) {
281
+ try {
282
+ return await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, onFileProgress);
283
+ }
284
+ catch (err) {
285
+ console.warn('Worker pool parsing failed, falling back to sequential:', err instanceof Error ? err.message : err);
286
+ }
287
+ }
288
+ // Fallback: sequential parsing (no pre-extracted data)
289
+ await processParsingSequential(graph, files, symbolTable, astCache, onFileProgress);
290
+ return null;
291
+ };
@@ -0,0 +1,5 @@
1
+ import { PipelineProgress, PipelineResult } from '../../types/pipeline.js';
2
+ export declare const runPipelineFromRepo: (repoPath: string, onProgress: (progress: PipelineProgress) => void, options?: {
3
+ includeExtensions?: string[];
4
+ scopeRules?: string[];
5
+ }) => Promise<PipelineResult>;
@@ -0,0 +1,323 @@
1
+ import { createKnowledgeGraph } from '../graph/graph.js';
2
+ import { processStructure } from './structure-processor.js';
3
+ import { processParsing } from './parsing-processor.js';
4
+ import { processImports, processImportsFromExtracted, createImportMap, buildImportResolutionContext } from './import-processor.js';
5
+ import { processCalls, processCallsFromExtracted } from './call-processor.js';
6
+ import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js';
7
+ import { processCommunities } from './community-processor.js';
8
+ import { processProcesses } from './process-processor.js';
9
+ import { createSymbolTable } from './symbol-table.js';
10
+ import { createASTCache } from './ast-cache.js';
11
+ import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
12
+ import { getLanguageFromFilename } from './utils.js';
13
+ import { createWorkerPool } from './workers/worker-pool.js';
14
+ import { selectEntriesByScopeRules } from './scope-filter.js';
15
+ import path from 'path';
16
+ const isDev = process.env.NODE_ENV === 'development';
17
+ /** Max bytes of source content to load per parse chunk. Each chunk's source +
18
+ * parsed ASTs + extracted records + worker serialization overhead all live in
19
+ * memory simultaneously, so this must be conservative. 20MB source ≈ 200-400MB
20
+ * peak working memory per chunk after parse expansion. */
21
+ const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB
22
+ /** Max AST trees to keep in LRU cache */
23
+ const AST_CACHE_CAP = 50;
24
+ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
25
+ const graph = createKnowledgeGraph();
26
+ const symbolTable = createSymbolTable();
27
+ let astCache = createASTCache(AST_CACHE_CAP);
28
+ const importMap = createImportMap();
29
+ const cleanup = () => {
30
+ astCache.clear();
31
+ symbolTable.clear();
32
+ };
33
+ try {
34
+ // ── Phase 1: Scan paths only (no content read) ─────────────────────
35
+ onProgress({
36
+ phase: 'extracting',
37
+ percent: 0,
38
+ message: 'Scanning repository...',
39
+ });
40
+ const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
41
+ const scanProgress = Math.round((current / total) * 15);
42
+ onProgress({
43
+ phase: 'extracting',
44
+ percent: scanProgress,
45
+ message: 'Scanning repository...',
46
+ detail: filePath,
47
+ stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
48
+ });
49
+ });
50
+ const scopeSelection = selectEntriesByScopeRules(scannedFiles, options?.scopeRules || []);
51
+ const scopedFiles = scopeSelection.selected;
52
+ if (scopeSelection.diagnostics.appliedRuleCount > 0 && scopedFiles.length === 0) {
53
+ throw new Error('Scope filters matched zero files. Check --scope-manifest/--scope-prefix.');
54
+ }
55
+ const includeExtensions = new Set((options?.includeExtensions || [])
56
+ .map(ext => ext.trim().toLowerCase())
57
+ .filter(Boolean)
58
+ .map(ext => (ext.startsWith('.') ? ext : `.${ext}`)));
59
+ const extensionFiltered = includeExtensions.size > 0
60
+ ? scopedFiles.filter(f => includeExtensions.has(path.extname(f.path).toLowerCase()))
61
+ : scopedFiles;
62
+ const totalFiles = extensionFiltered.length;
63
+ onProgress({
64
+ phase: 'extracting',
65
+ percent: 15,
66
+ message: 'Repository scanned successfully',
67
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
68
+ });
69
+ // ── Phase 2: Structure (paths only — no content needed) ────────────
70
+ onProgress({
71
+ phase: 'structure',
72
+ percent: 15,
73
+ message: 'Analyzing project structure...',
74
+ stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
75
+ });
76
+ const allPaths = extensionFiltered.map(f => f.path);
77
+ processStructure(graph, allPaths);
78
+ onProgress({
79
+ phase: 'structure',
80
+ percent: 20,
81
+ message: 'Project structure analyzed',
82
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
83
+ });
84
+ // ── Phase 3+4: Chunked read + parse ────────────────────────────────
85
+ // Group parseable files into byte-budget chunks so only ~20MB of source
86
+ // is in memory at a time. Each chunk is: read → parse → extract → free.
87
+ const parseableScanned = extensionFiltered.filter(f => getLanguageFromFilename(f.path));
88
+ const totalParseable = parseableScanned.length;
89
+ // Build byte-budget chunks
90
+ const chunks = [];
91
+ let currentChunk = [];
92
+ let currentBytes = 0;
93
+ for (const file of parseableScanned) {
94
+ if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
95
+ chunks.push(currentChunk);
96
+ currentChunk = [];
97
+ currentBytes = 0;
98
+ }
99
+ currentChunk.push(file.path);
100
+ currentBytes += file.size;
101
+ }
102
+ if (currentChunk.length > 0)
103
+ chunks.push(currentChunk);
104
+ const numChunks = chunks.length;
105
+ if (isDev) {
106
+ const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
107
+ console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
108
+ }
109
+ onProgress({
110
+ phase: 'parsing',
111
+ percent: 20,
112
+ message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
113
+ stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
114
+ });
115
+ // Create worker pool once, reuse across chunks
116
+ let workerPool;
117
+ try {
118
+ const workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
119
+ workerPool = createWorkerPool(workerUrl);
120
+ }
121
+ catch (err) {
122
+ // Worker pool creation failed — sequential fallback
123
+ }
124
+ let filesParsedSoFar = 0;
125
+ // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
126
+ const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
127
+ astCache = createASTCache(maxChunkFiles);
128
+ // Build import resolution context once — suffix index, file lists, resolve cache.
129
+ // Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
130
+ const importCtx = buildImportResolutionContext(allPaths);
131
+ const allPathObjects = allPaths.map(p => ({ path: p }));
132
+ // Single-pass: parse + resolve imports/calls/heritage per chunk.
133
+ // Calls/heritage use the symbol table built so far (symbols from earlier chunks
134
+ // are already registered). This trades ~5% cross-chunk resolution accuracy for
135
+ // 200-400MB less memory — critical for Linux-kernel-scale repos.
136
+ const sequentialChunkPaths = [];
137
+ try {
138
+ for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
139
+ const chunkPaths = chunks[chunkIdx];
140
+ // Read content for this chunk only
141
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
142
+ const chunkFiles = chunkPaths
143
+ .filter(p => chunkContents.has(p))
144
+ .map(p => ({ path: p, content: chunkContents.get(p) }));
145
+ // Parse this chunk (workers or sequential fallback)
146
+ const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
147
+ const globalCurrent = filesParsedSoFar + current;
148
+ const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
149
+ onProgress({
150
+ phase: 'parsing',
151
+ percent: Math.round(parsingProgress),
152
+ message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
153
+ detail: filePath,
154
+ stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
155
+ });
156
+ }, workerPool);
157
+ if (chunkWorkerData) {
158
+ // Imports
159
+ await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, importMap, undefined, repoPath, importCtx);
160
+ // Calls — resolve immediately, then free the array
161
+ if (chunkWorkerData.calls.length > 0) {
162
+ await processCallsFromExtracted(graph, chunkWorkerData.calls, symbolTable, importMap);
163
+ }
164
+ // Heritage — resolve immediately, then free
165
+ if (chunkWorkerData.heritage.length > 0) {
166
+ await processHeritageFromExtracted(graph, chunkWorkerData.heritage, symbolTable);
167
+ }
168
+ }
169
+ else {
170
+ await processImports(graph, chunkFiles, astCache, importMap, undefined, repoPath, allPaths);
171
+ sequentialChunkPaths.push(chunkPaths);
172
+ }
173
+ filesParsedSoFar += chunkFiles.length;
174
+ // Clear AST cache between chunks to free memory
175
+ astCache.clear();
176
+ // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
177
+ }
178
+ }
179
+ finally {
180
+ await workerPool?.terminate();
181
+ }
182
+ // Sequential fallback chunks: re-read source for call/heritage resolution
183
+ for (const chunkPaths of sequentialChunkPaths) {
184
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
185
+ const chunkFiles = chunkPaths
186
+ .filter(p => chunkContents.has(p))
187
+ .map(p => ({ path: p, content: chunkContents.get(p) }));
188
+ astCache = createASTCache(chunkFiles.length);
189
+ await processCalls(graph, chunkFiles, astCache, symbolTable, importMap);
190
+ await processHeritage(graph, chunkFiles, astCache, symbolTable);
191
+ astCache.clear();
192
+ }
193
+ // Free import resolution context — suffix index + resolve cache no longer needed
194
+ // (allPathObjects and importCtx hold ~94MB+ for large repos)
195
+ allPathObjects.length = 0;
196
+ importCtx.resolveCache.clear();
197
+ importCtx.suffixIndex = null;
198
+ importCtx.normalizedFileList = null;
199
+ if (isDev) {
200
+ let importsCount = 0;
201
+ for (const r of graph.iterRelationships()) {
202
+ if (r.type === 'IMPORTS')
203
+ importsCount++;
204
+ }
205
+ console.log(`📊 Pipeline: graph has ${importsCount} IMPORTS, ${graph.relationshipCount} total relationships`);
206
+ }
207
+ // ── Phase 5: Communities ───────────────────────────────────────────
208
+ onProgress({
209
+ phase: 'communities',
210
+ percent: 82,
211
+ message: 'Detecting code communities...',
212
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
213
+ });
214
+ const communityResult = await processCommunities(graph, (message, progress) => {
215
+ const communityProgress = 82 + (progress * 0.10);
216
+ onProgress({
217
+ phase: 'communities',
218
+ percent: Math.round(communityProgress),
219
+ message,
220
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
221
+ });
222
+ });
223
+ if (isDev) {
224
+ console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
225
+ }
226
+ communityResult.communities.forEach(comm => {
227
+ graph.addNode({
228
+ id: comm.id,
229
+ label: 'Community',
230
+ properties: {
231
+ name: comm.label,
232
+ filePath: '',
233
+ heuristicLabel: comm.heuristicLabel,
234
+ cohesion: comm.cohesion,
235
+ symbolCount: comm.symbolCount,
236
+ }
237
+ });
238
+ });
239
+ communityResult.memberships.forEach(membership => {
240
+ graph.addRelationship({
241
+ id: `${membership.nodeId}_member_of_${membership.communityId}`,
242
+ type: 'MEMBER_OF',
243
+ sourceId: membership.nodeId,
244
+ targetId: membership.communityId,
245
+ confidence: 1.0,
246
+ reason: 'leiden-algorithm',
247
+ });
248
+ });
249
+ // ── Phase 6: Processes ─────────────────────────────────────────────
250
+ onProgress({
251
+ phase: 'processes',
252
+ percent: 94,
253
+ message: 'Detecting execution flows...',
254
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
255
+ });
256
+ let symbolCount = 0;
257
+ graph.forEachNode(n => { if (n.label !== 'File')
258
+ symbolCount++; });
259
+ const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
260
+ const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
261
+ const processProgress = 94 + (progress * 0.05);
262
+ onProgress({
263
+ phase: 'processes',
264
+ percent: Math.round(processProgress),
265
+ message,
266
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
267
+ });
268
+ }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
269
+ if (isDev) {
270
+ console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`);
271
+ }
272
+ processResult.processes.forEach(proc => {
273
+ graph.addNode({
274
+ id: proc.id,
275
+ label: 'Process',
276
+ properties: {
277
+ name: proc.label,
278
+ filePath: '',
279
+ heuristicLabel: proc.heuristicLabel,
280
+ processType: proc.processType,
281
+ stepCount: proc.stepCount,
282
+ communities: proc.communities,
283
+ entryPointId: proc.entryPointId,
284
+ terminalId: proc.terminalId,
285
+ }
286
+ });
287
+ });
288
+ processResult.steps.forEach(step => {
289
+ graph.addRelationship({
290
+ id: `${step.nodeId}_step_${step.step}_${step.processId}`,
291
+ type: 'STEP_IN_PROCESS',
292
+ sourceId: step.nodeId,
293
+ targetId: step.processId,
294
+ confidence: 1.0,
295
+ reason: 'trace-detection',
296
+ step: step.step,
297
+ });
298
+ });
299
+ onProgress({
300
+ phase: 'complete',
301
+ percent: 100,
302
+ message: `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`,
303
+ stats: {
304
+ filesProcessed: totalFiles,
305
+ totalFiles,
306
+ nodesCreated: graph.nodeCount
307
+ },
308
+ });
309
+ astCache.clear();
310
+ return {
311
+ graph,
312
+ repoPath,
313
+ totalFileCount: totalFiles,
314
+ communityResult,
315
+ processResult,
316
+ scopeDiagnostics: scopeSelection.diagnostics,
317
+ };
318
+ }
319
+ catch (error) {
320
+ cleanup();
321
+ throw error;
322
+ }
323
+ };