@zuvia-software-solutions/code-mapper 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/README.md +215 -0
  2. package/dist/cli/ai-context.d.ts +19 -0
  3. package/dist/cli/ai-context.js +168 -0
  4. package/dist/cli/analyze.d.ts +7 -0
  5. package/dist/cli/analyze.js +325 -0
  6. package/dist/cli/augment.d.ts +7 -0
  7. package/dist/cli/augment.js +27 -0
  8. package/dist/cli/clean.d.ts +5 -0
  9. package/dist/cli/clean.js +56 -0
  10. package/dist/cli/eval-server.d.ts +25 -0
  11. package/dist/cli/eval-server.js +365 -0
  12. package/dist/cli/index.d.ts +6 -0
  13. package/dist/cli/index.js +102 -0
  14. package/dist/cli/lazy-action.d.ts +6 -0
  15. package/dist/cli/lazy-action.js +19 -0
  16. package/dist/cli/list.d.ts +2 -0
  17. package/dist/cli/list.js +27 -0
  18. package/dist/cli/mcp.d.ts +8 -0
  19. package/dist/cli/mcp.js +35 -0
  20. package/dist/cli/refresh.d.ts +12 -0
  21. package/dist/cli/refresh.js +165 -0
  22. package/dist/cli/serve.d.ts +5 -0
  23. package/dist/cli/serve.js +8 -0
  24. package/dist/cli/setup.d.ts +6 -0
  25. package/dist/cli/setup.js +218 -0
  26. package/dist/cli/status.d.ts +2 -0
  27. package/dist/cli/status.js +33 -0
  28. package/dist/cli/tool.d.ts +28 -0
  29. package/dist/cli/tool.js +87 -0
  30. package/dist/config/ignore-service.d.ts +32 -0
  31. package/dist/config/ignore-service.js +282 -0
  32. package/dist/config/supported-languages.d.ts +23 -0
  33. package/dist/config/supported-languages.js +52 -0
  34. package/dist/core/augmentation/engine.d.ts +22 -0
  35. package/dist/core/augmentation/engine.js +232 -0
  36. package/dist/core/embeddings/embedder.d.ts +35 -0
  37. package/dist/core/embeddings/embedder.js +171 -0
  38. package/dist/core/embeddings/embedding-pipeline.d.ts +41 -0
  39. package/dist/core/embeddings/embedding-pipeline.js +402 -0
  40. package/dist/core/embeddings/index.d.ts +5 -0
  41. package/dist/core/embeddings/index.js +6 -0
  42. package/dist/core/embeddings/text-generator.d.ts +20 -0
  43. package/dist/core/embeddings/text-generator.js +159 -0
  44. package/dist/core/embeddings/types.d.ts +60 -0
  45. package/dist/core/embeddings/types.js +23 -0
  46. package/dist/core/graph/graph.d.ts +4 -0
  47. package/dist/core/graph/graph.js +65 -0
  48. package/dist/core/graph/types.d.ts +69 -0
  49. package/dist/core/graph/types.js +3 -0
  50. package/dist/core/incremental/child-process.d.ts +8 -0
  51. package/dist/core/incremental/child-process.js +649 -0
  52. package/dist/core/incremental/refresh-coordinator.d.ts +32 -0
  53. package/dist/core/incremental/refresh-coordinator.js +147 -0
  54. package/dist/core/incremental/types.d.ts +78 -0
  55. package/dist/core/incremental/types.js +153 -0
  56. package/dist/core/incremental/watcher.d.ts +63 -0
  57. package/dist/core/incremental/watcher.js +338 -0
  58. package/dist/core/ingestion/ast-cache.d.ts +12 -0
  59. package/dist/core/ingestion/ast-cache.js +34 -0
  60. package/dist/core/ingestion/call-processor.d.ts +34 -0
  61. package/dist/core/ingestion/call-processor.js +937 -0
  62. package/dist/core/ingestion/call-routing.d.ts +40 -0
  63. package/dist/core/ingestion/call-routing.js +97 -0
  64. package/dist/core/ingestion/cluster-enricher.d.ts +30 -0
  65. package/dist/core/ingestion/cluster-enricher.js +151 -0
  66. package/dist/core/ingestion/community-processor.d.ts +26 -0
  67. package/dist/core/ingestion/community-processor.js +272 -0
  68. package/dist/core/ingestion/constants.d.ts +5 -0
  69. package/dist/core/ingestion/constants.js +8 -0
  70. package/dist/core/ingestion/entry-point-scoring.d.ts +23 -0
  71. package/dist/core/ingestion/entry-point-scoring.js +317 -0
  72. package/dist/core/ingestion/export-detection.d.ts +11 -0
  73. package/dist/core/ingestion/export-detection.js +203 -0
  74. package/dist/core/ingestion/filesystem-walker.d.ts +18 -0
  75. package/dist/core/ingestion/filesystem-walker.js +64 -0
  76. package/dist/core/ingestion/framework-detection.d.ts +42 -0
  77. package/dist/core/ingestion/framework-detection.js +405 -0
  78. package/dist/core/ingestion/heritage-processor.d.ts +15 -0
  79. package/dist/core/ingestion/heritage-processor.js +237 -0
  80. package/dist/core/ingestion/import-processor.d.ts +31 -0
  81. package/dist/core/ingestion/import-processor.js +416 -0
  82. package/dist/core/ingestion/language-config.d.ts +32 -0
  83. package/dist/core/ingestion/language-config.js +161 -0
  84. package/dist/core/ingestion/mro-processor.d.ts +32 -0
  85. package/dist/core/ingestion/mro-processor.js +343 -0
  86. package/dist/core/ingestion/named-binding-extraction.d.ts +51 -0
  87. package/dist/core/ingestion/named-binding-extraction.js +343 -0
  88. package/dist/core/ingestion/parsing-processor.d.ts +20 -0
  89. package/dist/core/ingestion/parsing-processor.js +282 -0
  90. package/dist/core/ingestion/pipeline.d.ts +3 -0
  91. package/dist/core/ingestion/pipeline.js +416 -0
  92. package/dist/core/ingestion/process-processor.d.ts +42 -0
  93. package/dist/core/ingestion/process-processor.js +357 -0
  94. package/dist/core/ingestion/resolution-context.d.ts +40 -0
  95. package/dist/core/ingestion/resolution-context.js +171 -0
  96. package/dist/core/ingestion/resolvers/csharp.d.ts +10 -0
  97. package/dist/core/ingestion/resolvers/csharp.js +101 -0
  98. package/dist/core/ingestion/resolvers/go.d.ts +8 -0
  99. package/dist/core/ingestion/resolvers/go.js +33 -0
  100. package/dist/core/ingestion/resolvers/index.d.ts +14 -0
  101. package/dist/core/ingestion/resolvers/index.js +10 -0
  102. package/dist/core/ingestion/resolvers/jvm.d.ts +9 -0
  103. package/dist/core/ingestion/resolvers/jvm.js +74 -0
  104. package/dist/core/ingestion/resolvers/php.d.ts +7 -0
  105. package/dist/core/ingestion/resolvers/php.js +30 -0
  106. package/dist/core/ingestion/resolvers/ruby.d.ts +9 -0
  107. package/dist/core/ingestion/resolvers/ruby.js +13 -0
  108. package/dist/core/ingestion/resolvers/rust.d.ts +5 -0
  109. package/dist/core/ingestion/resolvers/rust.js +62 -0
  110. package/dist/core/ingestion/resolvers/standard.d.ts +16 -0
  111. package/dist/core/ingestion/resolvers/standard.js +144 -0
  112. package/dist/core/ingestion/resolvers/utils.d.ts +18 -0
  113. package/dist/core/ingestion/resolvers/utils.js +113 -0
  114. package/dist/core/ingestion/structure-processor.d.ts +4 -0
  115. package/dist/core/ingestion/structure-processor.js +39 -0
  116. package/dist/core/ingestion/symbol-table.d.ts +34 -0
  117. package/dist/core/ingestion/symbol-table.js +48 -0
  118. package/dist/core/ingestion/tree-sitter-queries.d.ts +20 -0
  119. package/dist/core/ingestion/tree-sitter-queries.js +691 -0
  120. package/dist/core/ingestion/type-env.d.ts +52 -0
  121. package/dist/core/ingestion/type-env.js +349 -0
  122. package/dist/core/ingestion/type-extractors/c-cpp.d.ts +4 -0
  123. package/dist/core/ingestion/type-extractors/c-cpp.js +214 -0
  124. package/dist/core/ingestion/type-extractors/csharp.d.ts +4 -0
  125. package/dist/core/ingestion/type-extractors/csharp.js +224 -0
  126. package/dist/core/ingestion/type-extractors/go.d.ts +4 -0
  127. package/dist/core/ingestion/type-extractors/go.js +261 -0
  128. package/dist/core/ingestion/type-extractors/index.d.ts +20 -0
  129. package/dist/core/ingestion/type-extractors/index.js +30 -0
  130. package/dist/core/ingestion/type-extractors/jvm.d.ts +5 -0
  131. package/dist/core/ingestion/type-extractors/jvm.js +386 -0
  132. package/dist/core/ingestion/type-extractors/php.d.ts +4 -0
  133. package/dist/core/ingestion/type-extractors/php.js +280 -0
  134. package/dist/core/ingestion/type-extractors/python.d.ts +4 -0
  135. package/dist/core/ingestion/type-extractors/python.js +175 -0
  136. package/dist/core/ingestion/type-extractors/ruby.d.ts +12 -0
  137. package/dist/core/ingestion/type-extractors/ruby.js +218 -0
  138. package/dist/core/ingestion/type-extractors/rust.d.ts +4 -0
  139. package/dist/core/ingestion/type-extractors/rust.js +290 -0
  140. package/dist/core/ingestion/type-extractors/shared.d.ts +81 -0
  141. package/dist/core/ingestion/type-extractors/shared.js +322 -0
  142. package/dist/core/ingestion/type-extractors/swift.d.ts +4 -0
  143. package/dist/core/ingestion/type-extractors/swift.js +140 -0
  144. package/dist/core/ingestion/type-extractors/types.d.ts +111 -0
  145. package/dist/core/ingestion/type-extractors/types.js +4 -0
  146. package/dist/core/ingestion/type-extractors/typescript.d.ts +4 -0
  147. package/dist/core/ingestion/type-extractors/typescript.js +227 -0
  148. package/dist/core/ingestion/utils.d.ts +73 -0
  149. package/dist/core/ingestion/utils.js +992 -0
  150. package/dist/core/ingestion/workers/parse-worker.d.ts +99 -0
  151. package/dist/core/ingestion/workers/parse-worker.js +1055 -0
  152. package/dist/core/ingestion/workers/worker-pool.d.ts +15 -0
  153. package/dist/core/ingestion/workers/worker-pool.js +123 -0
  154. package/dist/core/lbug/csv-generator.d.ts +28 -0
  155. package/dist/core/lbug/csv-generator.js +355 -0
  156. package/dist/core/lbug/lbug-adapter.d.ts +96 -0
  157. package/dist/core/lbug/lbug-adapter.js +753 -0
  158. package/dist/core/lbug/schema.d.ts +46 -0
  159. package/dist/core/lbug/schema.js +402 -0
  160. package/dist/core/search/bm25-index.d.ts +20 -0
  161. package/dist/core/search/bm25-index.js +123 -0
  162. package/dist/core/search/hybrid-search.d.ts +32 -0
  163. package/dist/core/search/hybrid-search.js +131 -0
  164. package/dist/core/search/query-cache.d.ts +18 -0
  165. package/dist/core/search/query-cache.js +47 -0
  166. package/dist/core/search/query-expansion.d.ts +19 -0
  167. package/dist/core/search/query-expansion.js +75 -0
  168. package/dist/core/search/reranker.d.ts +29 -0
  169. package/dist/core/search/reranker.js +122 -0
  170. package/dist/core/search/types.d.ts +154 -0
  171. package/dist/core/search/types.js +51 -0
  172. package/dist/core/semantic/tsgo-service.d.ts +67 -0
  173. package/dist/core/semantic/tsgo-service.js +355 -0
  174. package/dist/core/tree-sitter/parser-loader.d.ts +12 -0
  175. package/dist/core/tree-sitter/parser-loader.js +71 -0
  176. package/dist/lib/memory-guard.d.ts +35 -0
  177. package/dist/lib/memory-guard.js +70 -0
  178. package/dist/lib/utils.d.ts +3 -0
  179. package/dist/lib/utils.js +6 -0
  180. package/dist/mcp/compatible-stdio-transport.d.ts +32 -0
  181. package/dist/mcp/compatible-stdio-transport.js +209 -0
  182. package/dist/mcp/core/embedder.d.ts +24 -0
  183. package/dist/mcp/core/embedder.js +168 -0
  184. package/dist/mcp/core/lbug-adapter.d.ts +29 -0
  185. package/dist/mcp/core/lbug-adapter.js +330 -0
  186. package/dist/mcp/local/local-backend.d.ts +188 -0
  187. package/dist/mcp/local/local-backend.js +2759 -0
  188. package/dist/mcp/resources.d.ts +22 -0
  189. package/dist/mcp/resources.js +379 -0
  190. package/dist/mcp/server.d.ts +10 -0
  191. package/dist/mcp/server.js +217 -0
  192. package/dist/mcp/staleness.d.ts +10 -0
  193. package/dist/mcp/staleness.js +25 -0
  194. package/dist/mcp/tools.d.ts +21 -0
  195. package/dist/mcp/tools.js +202 -0
  196. package/dist/server/api.d.ts +5 -0
  197. package/dist/server/api.js +340 -0
  198. package/dist/server/mcp-http.d.ts +7 -0
  199. package/dist/server/mcp-http.js +95 -0
  200. package/dist/storage/git.d.ts +6 -0
  201. package/dist/storage/git.js +35 -0
  202. package/dist/storage/repo-manager.d.ts +87 -0
  203. package/dist/storage/repo-manager.js +249 -0
  204. package/dist/types/pipeline.d.ts +35 -0
  205. package/dist/types/pipeline.js +20 -0
  206. package/hooks/claude/code-mapper-hook.cjs +238 -0
  207. package/hooks/claude/pre-tool-use.sh +79 -0
  208. package/hooks/claude/session-start.sh +42 -0
  209. package/models/mlx-embedder.py +185 -0
  210. package/package.json +100 -0
  211. package/scripts/patch-tree-sitter-swift.cjs +74 -0
  212. package/vendor/leiden/index.cjs +355 -0
  213. package/vendor/leiden/utils.cjs +392 -0
@@ -0,0 +1,40 @@
1
+ /** @file call-routing.ts @description Per-language call routing dispatch, primarily for Ruby where imports, heritage, and property definitions are expressed as method calls */
2
+ import { SupportedLanguages } from '../../config/supported-languages.js';
3
+ export type CallRoutingResult = RubyCallRouting | null;
4
+ export type CallRouter = (calledName: string, callNode: any) => CallRoutingResult;
5
+ export declare const callRouters: Record<SupportedLanguages, CallRouter>;
6
+ export type RubyCallRouting = {
7
+ kind: 'import';
8
+ importPath: string;
9
+ isRelative: boolean;
10
+ } | {
11
+ kind: 'heritage';
12
+ items: RubyHeritageItem[];
13
+ } | {
14
+ kind: 'properties';
15
+ items: RubyPropertyItem[];
16
+ } | {
17
+ kind: 'call';
18
+ } | {
19
+ kind: 'skip';
20
+ };
21
+ export interface RubyHeritageItem {
22
+ enclosingClass: string;
23
+ mixinName: string;
24
+ heritageKind: 'include' | 'extend' | 'prepend';
25
+ }
26
+ export type RubyAccessorType = 'attr_accessor' | 'attr_reader' | 'attr_writer';
27
+ export interface RubyPropertyItem {
28
+ propName: string;
29
+ accessorType: RubyAccessorType;
30
+ startLine: number;
31
+ endLine: number;
32
+ }
33
+ /**
34
+ * Classify a Ruby call node and extract its semantic payload
35
+ *
36
+ * @param calledName - The method name (e.g. 'require', 'include', 'attr_accessor')
37
+ * @param callNode - The tree-sitter call AST node
38
+ * @returns Discriminated union describing the call's semantic role
39
+ */
40
+ export declare function routeRubyCall(calledName: string, callNode: any): RubyCallRouting;
@@ -0,0 +1,97 @@
1
+ // code-mapper/src/core/ingestion/call-routing.ts
2
+ /** @file call-routing.ts @description Per-language call routing dispatch, primarily for Ruby where imports, heritage, and property definitions are expressed as method calls */
3
+ import { SupportedLanguages } from '../../config/supported-languages.js';
4
+ // No-op router: returns null for every call (passthrough to normal processing)
5
+ const noRouting = () => null;
6
+ // Per-language call routing; noRouting = passthrough to normal call processing
7
+ export const callRouters = {
8
+ [SupportedLanguages.JavaScript]: noRouting,
9
+ [SupportedLanguages.TypeScript]: noRouting,
10
+ [SupportedLanguages.Python]: noRouting,
11
+ [SupportedLanguages.Java]: noRouting,
12
+ [SupportedLanguages.Kotlin]: noRouting,
13
+ [SupportedLanguages.Go]: noRouting,
14
+ [SupportedLanguages.Rust]: noRouting,
15
+ [SupportedLanguages.CSharp]: noRouting,
16
+ [SupportedLanguages.PHP]: noRouting,
17
+ [SupportedLanguages.Swift]: noRouting,
18
+ [SupportedLanguages.CPlusPlus]: noRouting,
19
+ [SupportedLanguages.C]: noRouting,
20
+ [SupportedLanguages.Ruby]: routeRubyCall,
21
+ };
22
+ // Pre-allocated singletons for common return values
23
+ const CALL_RESULT = { kind: 'call' };
24
+ const SKIP_RESULT = { kind: 'skip' };
25
+ // Max depth for parent-walking loops to prevent pathological AST traversals
26
+ const MAX_PARENT_DEPTH = 50;
27
+ /**
28
+ * Classify a Ruby call node and extract its semantic payload
29
+ *
30
+ * @param calledName - The method name (e.g. 'require', 'include', 'attr_accessor')
31
+ * @param callNode - The tree-sitter call AST node
32
+ * @returns Discriminated union describing the call's semantic role
33
+ */
34
+ export function routeRubyCall(calledName, callNode) {
35
+ // require / require_relative -> import
36
+ if (calledName === 'require' || calledName === 'require_relative') {
37
+ const argList = callNode.childForFieldName?.('arguments');
38
+ const stringNode = argList?.children?.find((c) => c.type === 'string');
39
+ const contentNode = stringNode?.children?.find((c) => c.type === 'string_content');
40
+ if (!contentNode)
41
+ return SKIP_RESULT;
42
+ let importPath = contentNode.text;
43
+ // Validate: reject null bytes, control chars, excessively long paths
44
+ if (!importPath || importPath.length > 1024 || /[\x00-\x1f]/.test(importPath)) {
45
+ return SKIP_RESULT;
46
+ }
47
+ const isRelative = calledName === 'require_relative';
48
+ if (isRelative && !importPath.startsWith('.')) {
49
+ importPath = './' + importPath;
50
+ }
51
+ return { kind: 'import', importPath, isRelative };
52
+ }
53
+ // include / extend / prepend -> heritage (mixin)
54
+ if (calledName === 'include' || calledName === 'extend' || calledName === 'prepend') {
55
+ let enclosingClass = null;
56
+ let current = callNode.parent;
57
+ let depth = 0;
58
+ while (current && ++depth <= MAX_PARENT_DEPTH) {
59
+ if (current.type === 'class' || current.type === 'module') {
60
+ const nameNode = current.childForFieldName?.('name');
61
+ if (nameNode) {
62
+ enclosingClass = nameNode.text;
63
+ break;
64
+ }
65
+ }
66
+ current = current.parent;
67
+ }
68
+ if (!enclosingClass)
69
+ return SKIP_RESULT;
70
+ const items = [];
71
+ const argList = callNode.childForFieldName?.('arguments');
72
+ for (const arg of (argList?.children ?? [])) {
73
+ if (arg.type === 'constant' || arg.type === 'scope_resolution') {
74
+ items.push({ enclosingClass, mixinName: arg.text, heritageKind: calledName });
75
+ }
76
+ }
77
+ return items.length > 0 ? { kind: 'heritage', items } : SKIP_RESULT;
78
+ }
79
+ // attr_accessor / attr_reader / attr_writer -> property definitions
80
+ if (calledName === 'attr_accessor' || calledName === 'attr_reader' || calledName === 'attr_writer') {
81
+ const items = [];
82
+ const argList = callNode.childForFieldName?.('arguments');
83
+ for (const arg of (argList?.children ?? [])) {
84
+ if (arg.type === 'simple_symbol') {
85
+ items.push({
86
+ propName: arg.text.startsWith(':') ? arg.text.slice(1) : arg.text,
87
+ accessorType: calledName,
88
+ startLine: arg.startPosition.row,
89
+ endLine: arg.endPosition.row,
90
+ });
91
+ }
92
+ }
93
+ return items.length > 0 ? { kind: 'properties', items } : SKIP_RESULT;
94
+ }
95
+ // Everything else -> regular call
96
+ return CALL_RESULT;
97
+ }
@@ -0,0 +1,30 @@
1
+ /** @file cluster-enricher.ts @description LLM-based enrichment for community clusters, generating semantic names, keywords, and descriptions */
2
+ import { CommunityNode } from './community-processor.js';
3
+ export interface ClusterEnrichment {
4
+ name: string;
5
+ keywords: string[];
6
+ description: string;
7
+ }
8
+ export interface EnrichmentResult {
9
+ enrichments: Map<string, ClusterEnrichment>;
10
+ tokensUsed: number;
11
+ }
12
+ export interface LLMClient {
13
+ generate: (prompt: string) => Promise<string>;
14
+ }
15
+ export interface ClusterMemberInfo {
16
+ name: string;
17
+ filePath: string;
18
+ type: string;
19
+ }
20
+ /**
21
+ * Enrich clusters with LLM-generated names, keywords, and descriptions
22
+ *
23
+ * @param communities - Community nodes to enrich
24
+ * @param memberMap - Map of communityId -> member info
25
+ * @param llmClient - LLM client for generation
26
+ * @param onProgress - Progress callback
27
+ */
28
+ export declare const enrichClusters: (communities: CommunityNode[], memberMap: Map<string, ClusterMemberInfo[]>, llmClient: LLMClient, onProgress?: (current: number, total: number) => void) => Promise<EnrichmentResult>;
29
+ /** Enrich multiple clusters in a single LLM call (batch mode, more token-efficient) */
30
+ export declare const enrichClustersBatch: (communities: CommunityNode[], memberMap: Map<string, ClusterMemberInfo[]>, llmClient: LLMClient, batchSize?: number, onProgress?: (current: number, total: number) => void) => Promise<EnrichmentResult>;
@@ -0,0 +1,151 @@
1
+ // code-mapper/src/core/ingestion/cluster-enricher.ts
2
+ /** @file cluster-enricher.ts @description LLM-based enrichment for community clusters, generating semantic names, keywords, and descriptions */
3
+ const buildEnrichmentPrompt = (members, heuristicLabel) => {
4
+ // Limit to 20 members to control token usage
5
+ const limitedMembers = members.slice(0, 20);
6
+ const memberList = limitedMembers
7
+ .map(m => `${m.name} (${m.type})`)
8
+ .join(', ');
9
+ return `Analyze this code cluster and provide a semantic name and short description.
10
+
11
+ Heuristic: "${heuristicLabel}"
12
+ Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
13
+
14
+ Reply with JSON only:
15
+ {"name": "2-4 word semantic name", "description": "One sentence describing purpose"}`;
16
+ };
17
+ const parseEnrichmentResponse = (response, fallbackLabel) => {
18
+ try {
19
+ // Extract JSON from response, handles markdown code blocks
20
+ const jsonMatch = response.match(/\{[\s\S]*\}/);
21
+ if (!jsonMatch) {
22
+ throw new Error('No JSON found in response');
23
+ }
24
+ const parsed = JSON.parse(jsonMatch[0]);
25
+ return {
26
+ name: parsed.name || fallbackLabel,
27
+ keywords: Array.isArray(parsed.keywords) ? parsed.keywords : [],
28
+ description: parsed.description || '',
29
+ };
30
+ }
31
+ catch {
32
+ // Fallback to heuristic label if parsing fails
33
+ return {
34
+ name: fallbackLabel,
35
+ keywords: [],
36
+ description: '',
37
+ };
38
+ }
39
+ };
40
+ /**
41
+ * Enrich clusters with LLM-generated names, keywords, and descriptions
42
+ *
43
+ * @param communities - Community nodes to enrich
44
+ * @param memberMap - Map of communityId -> member info
45
+ * @param llmClient - LLM client for generation
46
+ * @param onProgress - Progress callback
47
+ */
48
+ export const enrichClusters = async (communities, memberMap, llmClient, onProgress) => {
49
+ const enrichments = new Map();
50
+ let tokensUsed = 0;
51
+ for (let i = 0; i < communities.length; i++) {
52
+ const community = communities[i];
53
+ const members = memberMap.get(community.id) || [];
54
+ onProgress?.(i + 1, communities.length);
55
+ if (members.length === 0) {
56
+ // No members, fall back to heuristic
57
+ enrichments.set(community.id, {
58
+ name: community.heuristicLabel,
59
+ keywords: [],
60
+ description: '',
61
+ });
62
+ continue;
63
+ }
64
+ try {
65
+ const prompt = buildEnrichmentPrompt(members, community.heuristicLabel);
66
+ const response = await llmClient.generate(prompt);
67
+ // Rough token estimate (~4 chars per token)
68
+ tokensUsed += prompt.length / 4 + response.length / 4;
69
+ const enrichment = parseEnrichmentResponse(response, community.heuristicLabel);
70
+ enrichments.set(community.id, enrichment);
71
+ }
72
+ catch (error) {
73
+ // On error, fall back to heuristic
74
+ console.warn(`Failed to enrich cluster ${community.id}:`, error);
75
+ enrichments.set(community.id, {
76
+ name: community.heuristicLabel,
77
+ keywords: [],
78
+ description: '',
79
+ });
80
+ }
81
+ }
82
+ return { enrichments, tokensUsed };
83
+ };
84
+ /** Enrich multiple clusters in a single LLM call (batch mode, more token-efficient) */
85
+ export const enrichClustersBatch = async (communities, memberMap, llmClient, batchSize = 5, onProgress) => {
86
+ const enrichments = new Map();
87
+ let tokensUsed = 0;
88
+ // Process communities in batches
89
+ for (let i = 0; i < communities.length; i += batchSize) {
90
+ // Report batch progress
91
+ onProgress?.(Math.min(i + batchSize, communities.length), communities.length);
92
+ const batch = communities.slice(i, i + batchSize);
93
+ const batchPrompt = batch.map((community, idx) => {
94
+ const members = memberMap.get(community.id) || [];
95
+ const limitedMembers = members.slice(0, 15);
96
+ const memberList = limitedMembers
97
+ .map(m => `${m.name} (${m.type})`)
98
+ .join(', ');
99
+ return `Cluster ${idx + 1} (id: ${community.id}):
100
+ Heuristic: "${community.heuristicLabel}"
101
+ Members: ${memberList}`;
102
+ }).join('\n\n');
103
+ const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
104
+
105
+ ${batchPrompt}
106
+
107
+ Output JSON array:
108
+ [
109
+ {"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
110
+ ...
111
+ ]`;
112
+ try {
113
+ const response = await llmClient.generate(prompt);
114
+ tokensUsed += prompt.length / 4 + response.length / 4;
115
+ // Parse batch JSON response
116
+ const jsonMatch = response.match(/\[[\s\S]*\]/);
117
+ if (jsonMatch) {
118
+ const parsed = JSON.parse(jsonMatch[0]);
119
+ for (const item of parsed) {
120
+ enrichments.set(item.id, {
121
+ name: item.name,
122
+ keywords: item.keywords || [],
123
+ description: item.description || '',
124
+ });
125
+ }
126
+ }
127
+ }
128
+ catch (error) {
129
+ console.warn('Batch enrichment failed, falling back to heuristics:', error);
130
+ // Fall back to heuristics for this batch
131
+ for (const community of batch) {
132
+ enrichments.set(community.id, {
133
+ name: community.heuristicLabel,
134
+ keywords: [],
135
+ description: '',
136
+ });
137
+ }
138
+ }
139
+ }
140
+ // Fill in missing communities with heuristic labels
141
+ for (const community of communities) {
142
+ if (!enrichments.has(community.id)) {
143
+ enrichments.set(community.id, {
144
+ name: community.heuristicLabel,
145
+ keywords: [],
146
+ description: '',
147
+ });
148
+ }
149
+ }
150
+ return { enrichments, tokensUsed };
151
+ };
@@ -0,0 +1,26 @@
1
+ /** @file community-processor.ts @description Detects code communities using the Leiden algorithm on CALLS/EXTENDS/IMPLEMENTS edges, grouping symbols by functional area */
2
+ import { KnowledgeGraph } from '../graph/types.js';
3
+ export interface CommunityNode {
4
+ id: string;
5
+ label: string;
6
+ heuristicLabel: string;
7
+ cohesion: number;
8
+ symbolCount: number;
9
+ }
10
+ export interface CommunityMembership {
11
+ nodeId: string;
12
+ communityId: string;
13
+ }
14
+ export interface CommunityDetectionResult {
15
+ communities: CommunityNode[];
16
+ memberships: CommunityMembership[];
17
+ stats: {
18
+ totalCommunities: number;
19
+ modularity: number;
20
+ nodesProcessed: number;
21
+ };
22
+ }
23
+ export declare const COMMUNITY_COLORS: string[];
24
+ export declare const getCommunityColor: (communityIndex: number) => string;
25
+ /** Detect communities in the knowledge graph using the Leiden algorithm on CALLS/EXTENDS/IMPLEMENTS edges */
26
+ export declare const processCommunities: (knowledgeGraph: KnowledgeGraph, onProgress?: (message: string, progress: number) => void) => Promise<CommunityDetectionResult>;
@@ -0,0 +1,272 @@
1
+ // code-mapper/src/core/ingestion/community-processor.ts
2
+ /** @file community-processor.ts @description Detects code communities using the Leiden algorithm on CALLS/EXTENDS/IMPLEMENTS edges, grouping symbols by functional area */
3
+ // Leiden algorithm is vendored from graphology (never published to npm);
4
+ // loaded via createRequire for ESM compatibility
5
+ import Graph from 'graphology';
6
+ import { createRequire } from 'node:module';
7
+ import { fileURLToPath } from 'node:url';
8
+ import { dirname, resolve } from 'node:path';
9
+ const __filename = fileURLToPath(import.meta.url);
10
+ const __dirname = dirname(__filename);
11
+ // Navigate to package root, works from both src/ and dist/
12
+ const leidenPath = resolve(__dirname, '..', '..', '..', 'vendor', 'leiden', 'index.cjs');
13
+ const _require = createRequire(import.meta.url);
14
+ const leiden = _require(leidenPath);
15
+ // Community colors for visualization
16
+ export const COMMUNITY_COLORS = [
17
+ '#ef4444', // red
18
+ '#f97316', // orange
19
+ '#eab308', // yellow
20
+ '#22c55e', // green
21
+ '#06b6d4', // cyan
22
+ '#3b82f6', // blue
23
+ '#8b5cf6', // violet
24
+ '#d946ef', // fuchsia
25
+ '#ec4899', // pink
26
+ '#f43f5e', // rose
27
+ '#14b8a6', // teal
28
+ '#84cc16', // lime
29
+ ];
30
+ export const getCommunityColor = (communityIndex) => {
31
+ return COMMUNITY_COLORS[communityIndex % COMMUNITY_COLORS.length];
32
+ };
33
+ /** Detect communities in the knowledge graph using the Leiden algorithm on CALLS/EXTENDS/IMPLEMENTS edges */
34
+ export const processCommunities = async (knowledgeGraph, onProgress) => {
35
+ onProgress?.('Building graph for community detection...', 0);
36
+ // Pre-check symbol count to determine large-graph mode
37
+ let symbolCount = 0;
38
+ knowledgeGraph.forEachNode(node => {
39
+ if (node.label === 'Function' || node.label === 'Class' || node.label === 'Method' || node.label === 'Interface') {
40
+ symbolCount++;
41
+ }
42
+ });
43
+ const isLarge = symbolCount > 10_000;
44
+ const graph = buildGraphologyGraph(knowledgeGraph, isLarge);
45
+ if (graph.order === 0) {
46
+ return {
47
+ communities: [],
48
+ memberships: [],
49
+ stats: { totalCommunities: 0, modularity: 0, nodesProcessed: 0 }
50
+ };
51
+ }
52
+ const nodeCount = graph.order;
53
+ const edgeCount = graph.size;
54
+ onProgress?.(`Running Leiden on ${nodeCount} nodes, ${edgeCount} edges${isLarge ? ` (filtered from ${symbolCount} symbols)` : ''}...`, 30);
55
+ // Large graphs: higher resolution + capped iterations (~95%+ modularity in 2-3 iterations)
56
+ // Timeout: abort after 60s for pathological graph structures
57
+ const LEIDEN_TIMEOUT_MS = 60_000;
58
+ let details;
59
+ try {
60
+ details = await Promise.race([
61
+ Promise.resolve(leiden.detailed(graph, {
62
+ resolution: isLarge ? 2.0 : 1.0,
63
+ maxIterations: isLarge ? 3 : 0,
64
+ })),
65
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Leiden timeout')), LEIDEN_TIMEOUT_MS)),
66
+ ]);
67
+ }
68
+ catch (e) {
69
+ if (e.message === 'Leiden timeout') {
70
+ onProgress?.('Community detection timed out, using fallback...', 60);
71
+ // Fallback: assign all nodes to a single community
72
+ const communities = {};
73
+ graph.forEachNode((node) => { communities[node] = 0; });
74
+ details = { communities, count: 1, modularity: 0 };
75
+ }
76
+ else {
77
+ throw e;
78
+ }
79
+ }
80
+ onProgress?.(`Found ${details.count} communities...`, 60);
81
+ // Create community nodes with heuristic labels
82
+ const communityNodes = createCommunityNodes(details.communities, details.count, graph, knowledgeGraph);
83
+ onProgress?.('Creating membership edges...', 80);
84
+ // Create membership mappings
85
+ const memberships = [];
86
+ Object.entries(details.communities).forEach(([nodeId, communityNum]) => {
87
+ memberships.push({
88
+ nodeId,
89
+ communityId: `comm_${communityNum}`,
90
+ });
91
+ });
92
+ onProgress?.('Community detection complete!', 100);
93
+ return {
94
+ communities: communityNodes,
95
+ memberships,
96
+ stats: {
97
+ totalCommunities: details.count,
98
+ modularity: details.modularity,
99
+ nodesProcessed: graph.order,
100
+ }
101
+ };
102
+ };
103
+ /**
104
+ * Build a graphology graph with only symbol nodes and clustering edges
105
+ *
106
+ * For large graphs (>10K symbols), filters low-confidence edges and degree-1 nodes
107
+ * to reduce noise and Leiden runtime
108
+ */
109
+ const MIN_CONFIDENCE_LARGE = 0.5;
110
+ const buildGraphologyGraph = (knowledgeGraph, isLarge) => {
111
+ const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
112
+ const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
113
+ const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
114
+ const connectedNodes = new Set();
115
+ const nodeDegree = new Map();
116
+ knowledgeGraph.forEachRelationship(rel => {
117
+ if (!clusteringRelTypes.has(rel.type) || rel.sourceId === rel.targetId)
118
+ return;
119
+ if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
120
+ return;
121
+ connectedNodes.add(rel.sourceId);
122
+ connectedNodes.add(rel.targetId);
123
+ nodeDegree.set(rel.sourceId, (nodeDegree.get(rel.sourceId) || 0) + 1);
124
+ nodeDegree.set(rel.targetId, (nodeDegree.get(rel.targetId) || 0) + 1);
125
+ });
126
+ knowledgeGraph.forEachNode(node => {
127
+ if (!symbolTypes.has(node.label) || !connectedNodes.has(node.id))
128
+ return;
129
+ // For large graphs, skip degree-1 nodes (singletons that cost iteration time)
130
+ if (isLarge && (nodeDegree.get(node.id) || 0) < 2)
131
+ return;
132
+ graph.addNode(node.id, {
133
+ name: node.properties.name,
134
+ filePath: node.properties.filePath,
135
+ type: node.label,
136
+ });
137
+ });
138
+ knowledgeGraph.forEachRelationship(rel => {
139
+ if (!clusteringRelTypes.has(rel.type))
140
+ return;
141
+ if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
142
+ return;
143
+ if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
144
+ if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
145
+ graph.addEdge(rel.sourceId, rel.targetId);
146
+ }
147
+ }
148
+ });
149
+ return graph;
150
+ };
151
+ /** Create Community nodes with auto-generated labels based on member file paths */
152
+ const createCommunityNodes = (communities, communityCount, graph, knowledgeGraph) => {
153
+ // Group node IDs by community number
154
+ const communityMembers = new Map();
155
+ Object.entries(communities).forEach(([nodeId, commNum]) => {
156
+ if (!communityMembers.has(commNum)) {
157
+ communityMembers.set(commNum, []);
158
+ }
159
+ communityMembers.get(commNum).push(nodeId);
160
+ });
161
+ // Build node -> file path lookup
162
+ const nodePathMap = new Map();
163
+ for (const node of knowledgeGraph.iterNodes()) {
164
+ if (node.properties.filePath) {
165
+ nodePathMap.set(node.id, node.properties.filePath);
166
+ }
167
+ }
168
+ // Create community nodes, skip singletons
169
+ const communityNodes = [];
170
+ communityMembers.forEach((memberIds, commNum) => {
171
+ // Skip singleton communities
172
+ if (memberIds.length < 2)
173
+ return;
174
+ const heuristicLabel = generateHeuristicLabel(memberIds, nodePathMap, graph, commNum);
175
+ communityNodes.push({
176
+ id: `comm_${commNum}`,
177
+ label: heuristicLabel,
178
+ heuristicLabel,
179
+ cohesion: calculateCohesion(memberIds, graph),
180
+ symbolCount: memberIds.length,
181
+ });
182
+ });
183
+ // Sort by member count descending
184
+ communityNodes.sort((a, b) => b.symbolCount - a.symbolCount);
185
+ return communityNodes;
186
+ };
187
+ /** Generate a human-readable label from the most common folder name in the community */
188
+ const generateHeuristicLabel = (memberIds, nodePathMap, graph, commNum) => {
189
+ // Count folder occurrences from member file paths
190
+ const folderCounts = new Map();
191
+ memberIds.forEach(nodeId => {
192
+ const filePath = nodePathMap.get(nodeId) || '';
193
+ const parts = filePath.split('/').filter(Boolean);
194
+ // Use parent directory as the most specific folder
195
+ if (parts.length >= 2) {
196
+ const folder = parts[parts.length - 2];
197
+ // Skip generic folders
198
+ if (!['src', 'lib', 'core', 'utils', 'common', 'shared', 'helpers'].includes(folder.toLowerCase())) {
199
+ folderCounts.set(folder, (folderCounts.get(folder) || 0) + 1);
200
+ }
201
+ }
202
+ });
203
+ // Pick the most common folder
204
+ let maxCount = 0;
205
+ let bestFolder = '';
206
+ folderCounts.forEach((count, folder) => {
207
+ if (count > maxCount) {
208
+ maxCount = count;
209
+ bestFolder = folder;
210
+ }
211
+ });
212
+ if (bestFolder) {
213
+ // Capitalize and return
214
+ return bestFolder.charAt(0).toUpperCase() + bestFolder.slice(1);
215
+ }
216
+ // Fallback: detect common prefix in function names
217
+ const names = [];
218
+ memberIds.forEach(nodeId => {
219
+ const name = graph.getNodeAttribute(nodeId, 'name');
220
+ if (name)
221
+ names.push(name);
222
+ });
223
+ // Use common prefix if long enough
224
+ if (names.length > 2) {
225
+ const commonPrefix = findCommonPrefix(names);
226
+ if (commonPrefix.length > 2) {
227
+ return commonPrefix.charAt(0).toUpperCase() + commonPrefix.slice(1);
228
+ }
229
+ }
230
+ // Last resort: generic name with community ID
231
+ return `Cluster_${commNum}`;
232
+ };
233
+ /** Find common prefix among strings */
234
+ const findCommonPrefix = (strings) => {
235
+ if (strings.length === 0)
236
+ return '';
237
+ const sorted = strings.slice().sort();
238
+ const first = sorted[0];
239
+ const last = sorted[sorted.length - 1];
240
+ let i = 0;
241
+ while (i < first.length && first[i] === last[i]) {
242
+ i++;
243
+ }
244
+ return first.substring(0, i);
245
+ };
246
+ /** Estimate cohesion score (0-1) based on internal edge density, sampling for large communities */
247
+ const calculateCohesion = (memberIds, graph) => {
248
+ if (memberIds.length <= 1)
249
+ return 1.0;
250
+ const memberSet = new Set(memberIds);
251
+ // Sample up to 50 members to avoid O(N^2) cost
252
+ const SAMPLE_SIZE = 50;
253
+ const sample = memberIds.length <= SAMPLE_SIZE
254
+ ? memberIds
255
+ : memberIds.slice(0, SAMPLE_SIZE);
256
+ let internalEdges = 0;
257
+ let totalEdges = 0;
258
+ for (const nodeId of sample) {
259
+ if (!graph.hasNode(nodeId))
260
+ continue;
261
+ graph.forEachNeighbor(nodeId, (neighbor) => {
262
+ totalEdges++;
263
+ if (memberSet.has(neighbor)) {
264
+ internalEdges++;
265
+ }
266
+ });
267
+ }
268
+ // Cohesion = fraction of edges that are internal
269
+ if (totalEdges === 0)
270
+ return 1.0;
271
+ return Math.min(1.0, internalEdges / totalEdges);
272
+ };
@@ -0,0 +1,5 @@
1
+ /** @file constants.ts @description Tree-sitter parsing buffer size constants and adaptive sizing */
2
+ export declare const TREE_SITTER_BUFFER_SIZE: number;
3
+ export declare const TREE_SITTER_MAX_BUFFER: number;
4
+ /** Compute adaptive buffer size: 2x file size, clamped between 512 KB and 32 MB */
5
+ export declare const getTreeSitterBufferSize: (contentLength: number) => number;
@@ -0,0 +1,8 @@
1
+ // code-mapper/src/core/ingestion/constants.ts
2
+ /** @file constants.ts @description Tree-sitter parsing buffer size constants and adaptive sizing */
3
+ // Default minimum buffer size for tree-sitter parsing (512 KB)
4
+ export const TREE_SITTER_BUFFER_SIZE = 512 * 1024;
5
+ // Maximum buffer size cap (32 MB), also the file-size skip threshold
6
+ export const TREE_SITTER_MAX_BUFFER = 32 * 1024 * 1024;
7
+ /** Compute adaptive buffer size: 2x file size, clamped between 512 KB and 32 MB */
8
+ export const getTreeSitterBufferSize = (contentLength) => Math.min(Math.max(contentLength * 2, TREE_SITTER_BUFFER_SIZE), TREE_SITTER_MAX_BUFFER);
@@ -0,0 +1,23 @@
1
+ /** @file entry-point-scoring.ts @description Scores functions as entry point candidates using call ratio, export status, name patterns, and framework detection */
2
+ import { SupportedLanguages } from '../../config/supported-languages.js';
3
+ export interface EntryPointScoreResult {
4
+ score: number;
5
+ reasons: string[];
6
+ }
7
+ /**
8
+ * Calculate an entry point score for a function/method
9
+ *
10
+ * Score = baseScore * exportMultiplier * nameMultiplier * frameworkMultiplier
11
+ *
12
+ * @param name - Function/method name
13
+ * @param language - Programming language
14
+ * @param isExported - Whether the function is exported/public
15
+ * @param callerCount - Number of callers
16
+ * @param calleeCount - Number of callees
17
+ * @returns Score and reasons explaining the score
18
+ */
19
+ export declare function calculateEntryPointScore(name: string, language: SupportedLanguages, isExported: boolean, callerCount: number, calleeCount: number, filePath?: string): EntryPointScoreResult;
20
+ /** Check if a file path is a test file (excluded from entry points) */
21
+ export declare function isTestFile(filePath: string): boolean;
22
+ /** Check if a file path is a utility/helper file (lower entry point priority) */
23
+ export declare function isUtilityFile(filePath: string): boolean;