@zuvia-software-solutions/code-mapper 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +215 -0
- package/dist/cli/ai-context.d.ts +19 -0
- package/dist/cli/ai-context.js +168 -0
- package/dist/cli/analyze.d.ts +7 -0
- package/dist/cli/analyze.js +325 -0
- package/dist/cli/augment.d.ts +7 -0
- package/dist/cli/augment.js +27 -0
- package/dist/cli/clean.d.ts +5 -0
- package/dist/cli/clean.js +56 -0
- package/dist/cli/eval-server.d.ts +25 -0
- package/dist/cli/eval-server.js +365 -0
- package/dist/cli/index.d.ts +6 -0
- package/dist/cli/index.js +102 -0
- package/dist/cli/lazy-action.d.ts +6 -0
- package/dist/cli/lazy-action.js +19 -0
- package/dist/cli/list.d.ts +2 -0
- package/dist/cli/list.js +27 -0
- package/dist/cli/mcp.d.ts +8 -0
- package/dist/cli/mcp.js +35 -0
- package/dist/cli/refresh.d.ts +12 -0
- package/dist/cli/refresh.js +165 -0
- package/dist/cli/serve.d.ts +5 -0
- package/dist/cli/serve.js +8 -0
- package/dist/cli/setup.d.ts +6 -0
- package/dist/cli/setup.js +218 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.js +33 -0
- package/dist/cli/tool.d.ts +28 -0
- package/dist/cli/tool.js +87 -0
- package/dist/config/ignore-service.d.ts +32 -0
- package/dist/config/ignore-service.js +282 -0
- package/dist/config/supported-languages.d.ts +23 -0
- package/dist/config/supported-languages.js +52 -0
- package/dist/core/augmentation/engine.d.ts +22 -0
- package/dist/core/augmentation/engine.js +232 -0
- package/dist/core/embeddings/embedder.d.ts +35 -0
- package/dist/core/embeddings/embedder.js +171 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +41 -0
- package/dist/core/embeddings/embedding-pipeline.js +402 -0
- package/dist/core/embeddings/index.d.ts +5 -0
- package/dist/core/embeddings/index.js +6 -0
- package/dist/core/embeddings/text-generator.d.ts +20 -0
- package/dist/core/embeddings/text-generator.js +159 -0
- package/dist/core/embeddings/types.d.ts +60 -0
- package/dist/core/embeddings/types.js +23 -0
- package/dist/core/graph/graph.d.ts +4 -0
- package/dist/core/graph/graph.js +65 -0
- package/dist/core/graph/types.d.ts +69 -0
- package/dist/core/graph/types.js +3 -0
- package/dist/core/incremental/child-process.d.ts +8 -0
- package/dist/core/incremental/child-process.js +649 -0
- package/dist/core/incremental/refresh-coordinator.d.ts +32 -0
- package/dist/core/incremental/refresh-coordinator.js +147 -0
- package/dist/core/incremental/types.d.ts +78 -0
- package/dist/core/incremental/types.js +153 -0
- package/dist/core/incremental/watcher.d.ts +63 -0
- package/dist/core/incremental/watcher.js +338 -0
- package/dist/core/ingestion/ast-cache.d.ts +12 -0
- package/dist/core/ingestion/ast-cache.js +34 -0
- package/dist/core/ingestion/call-processor.d.ts +34 -0
- package/dist/core/ingestion/call-processor.js +937 -0
- package/dist/core/ingestion/call-routing.d.ts +40 -0
- package/dist/core/ingestion/call-routing.js +97 -0
- package/dist/core/ingestion/cluster-enricher.d.ts +30 -0
- package/dist/core/ingestion/cluster-enricher.js +151 -0
- package/dist/core/ingestion/community-processor.d.ts +26 -0
- package/dist/core/ingestion/community-processor.js +272 -0
- package/dist/core/ingestion/constants.d.ts +5 -0
- package/dist/core/ingestion/constants.js +8 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +23 -0
- package/dist/core/ingestion/entry-point-scoring.js +317 -0
- package/dist/core/ingestion/export-detection.d.ts +11 -0
- package/dist/core/ingestion/export-detection.js +203 -0
- package/dist/core/ingestion/filesystem-walker.d.ts +18 -0
- package/dist/core/ingestion/filesystem-walker.js +64 -0
- package/dist/core/ingestion/framework-detection.d.ts +42 -0
- package/dist/core/ingestion/framework-detection.js +405 -0
- package/dist/core/ingestion/heritage-processor.d.ts +15 -0
- package/dist/core/ingestion/heritage-processor.js +237 -0
- package/dist/core/ingestion/import-processor.d.ts +31 -0
- package/dist/core/ingestion/import-processor.js +416 -0
- package/dist/core/ingestion/language-config.d.ts +32 -0
- package/dist/core/ingestion/language-config.js +161 -0
- package/dist/core/ingestion/mro-processor.d.ts +32 -0
- package/dist/core/ingestion/mro-processor.js +343 -0
- package/dist/core/ingestion/named-binding-extraction.d.ts +51 -0
- package/dist/core/ingestion/named-binding-extraction.js +343 -0
- package/dist/core/ingestion/parsing-processor.d.ts +20 -0
- package/dist/core/ingestion/parsing-processor.js +282 -0
- package/dist/core/ingestion/pipeline.d.ts +3 -0
- package/dist/core/ingestion/pipeline.js +416 -0
- package/dist/core/ingestion/process-processor.d.ts +42 -0
- package/dist/core/ingestion/process-processor.js +357 -0
- package/dist/core/ingestion/resolution-context.d.ts +40 -0
- package/dist/core/ingestion/resolution-context.js +171 -0
- package/dist/core/ingestion/resolvers/csharp.d.ts +10 -0
- package/dist/core/ingestion/resolvers/csharp.js +101 -0
- package/dist/core/ingestion/resolvers/go.d.ts +8 -0
- package/dist/core/ingestion/resolvers/go.js +33 -0
- package/dist/core/ingestion/resolvers/index.d.ts +14 -0
- package/dist/core/ingestion/resolvers/index.js +10 -0
- package/dist/core/ingestion/resolvers/jvm.d.ts +9 -0
- package/dist/core/ingestion/resolvers/jvm.js +74 -0
- package/dist/core/ingestion/resolvers/php.d.ts +7 -0
- package/dist/core/ingestion/resolvers/php.js +30 -0
- package/dist/core/ingestion/resolvers/ruby.d.ts +9 -0
- package/dist/core/ingestion/resolvers/ruby.js +13 -0
- package/dist/core/ingestion/resolvers/rust.d.ts +5 -0
- package/dist/core/ingestion/resolvers/rust.js +62 -0
- package/dist/core/ingestion/resolvers/standard.d.ts +16 -0
- package/dist/core/ingestion/resolvers/standard.js +144 -0
- package/dist/core/ingestion/resolvers/utils.d.ts +18 -0
- package/dist/core/ingestion/resolvers/utils.js +113 -0
- package/dist/core/ingestion/structure-processor.d.ts +4 -0
- package/dist/core/ingestion/structure-processor.js +39 -0
- package/dist/core/ingestion/symbol-table.d.ts +34 -0
- package/dist/core/ingestion/symbol-table.js +48 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +20 -0
- package/dist/core/ingestion/tree-sitter-queries.js +691 -0
- package/dist/core/ingestion/type-env.d.ts +52 -0
- package/dist/core/ingestion/type-env.js +349 -0
- package/dist/core/ingestion/type-extractors/c-cpp.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/c-cpp.js +214 -0
- package/dist/core/ingestion/type-extractors/csharp.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/csharp.js +224 -0
- package/dist/core/ingestion/type-extractors/go.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/go.js +261 -0
- package/dist/core/ingestion/type-extractors/index.d.ts +20 -0
- package/dist/core/ingestion/type-extractors/index.js +30 -0
- package/dist/core/ingestion/type-extractors/jvm.d.ts +5 -0
- package/dist/core/ingestion/type-extractors/jvm.js +386 -0
- package/dist/core/ingestion/type-extractors/php.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/php.js +280 -0
- package/dist/core/ingestion/type-extractors/python.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/python.js +175 -0
- package/dist/core/ingestion/type-extractors/ruby.d.ts +12 -0
- package/dist/core/ingestion/type-extractors/ruby.js +218 -0
- package/dist/core/ingestion/type-extractors/rust.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/rust.js +290 -0
- package/dist/core/ingestion/type-extractors/shared.d.ts +81 -0
- package/dist/core/ingestion/type-extractors/shared.js +322 -0
- package/dist/core/ingestion/type-extractors/swift.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/swift.js +140 -0
- package/dist/core/ingestion/type-extractors/types.d.ts +111 -0
- package/dist/core/ingestion/type-extractors/types.js +4 -0
- package/dist/core/ingestion/type-extractors/typescript.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/typescript.js +227 -0
- package/dist/core/ingestion/utils.d.ts +73 -0
- package/dist/core/ingestion/utils.js +992 -0
- package/dist/core/ingestion/workers/parse-worker.d.ts +99 -0
- package/dist/core/ingestion/workers/parse-worker.js +1055 -0
- package/dist/core/ingestion/workers/worker-pool.d.ts +15 -0
- package/dist/core/ingestion/workers/worker-pool.js +123 -0
- package/dist/core/lbug/csv-generator.d.ts +28 -0
- package/dist/core/lbug/csv-generator.js +355 -0
- package/dist/core/lbug/lbug-adapter.d.ts +96 -0
- package/dist/core/lbug/lbug-adapter.js +753 -0
- package/dist/core/lbug/schema.d.ts +46 -0
- package/dist/core/lbug/schema.js +402 -0
- package/dist/core/search/bm25-index.d.ts +20 -0
- package/dist/core/search/bm25-index.js +123 -0
- package/dist/core/search/hybrid-search.d.ts +32 -0
- package/dist/core/search/hybrid-search.js +131 -0
- package/dist/core/search/query-cache.d.ts +18 -0
- package/dist/core/search/query-cache.js +47 -0
- package/dist/core/search/query-expansion.d.ts +19 -0
- package/dist/core/search/query-expansion.js +75 -0
- package/dist/core/search/reranker.d.ts +29 -0
- package/dist/core/search/reranker.js +122 -0
- package/dist/core/search/types.d.ts +154 -0
- package/dist/core/search/types.js +51 -0
- package/dist/core/semantic/tsgo-service.d.ts +67 -0
- package/dist/core/semantic/tsgo-service.js +355 -0
- package/dist/core/tree-sitter/parser-loader.d.ts +12 -0
- package/dist/core/tree-sitter/parser-loader.js +71 -0
- package/dist/lib/memory-guard.d.ts +35 -0
- package/dist/lib/memory-guard.js +70 -0
- package/dist/lib/utils.d.ts +3 -0
- package/dist/lib/utils.js +6 -0
- package/dist/mcp/compatible-stdio-transport.d.ts +32 -0
- package/dist/mcp/compatible-stdio-transport.js +209 -0
- package/dist/mcp/core/embedder.d.ts +24 -0
- package/dist/mcp/core/embedder.js +168 -0
- package/dist/mcp/core/lbug-adapter.d.ts +29 -0
- package/dist/mcp/core/lbug-adapter.js +330 -0
- package/dist/mcp/local/local-backend.d.ts +188 -0
- package/dist/mcp/local/local-backend.js +2759 -0
- package/dist/mcp/resources.d.ts +22 -0
- package/dist/mcp/resources.js +379 -0
- package/dist/mcp/server.d.ts +10 -0
- package/dist/mcp/server.js +217 -0
- package/dist/mcp/staleness.d.ts +10 -0
- package/dist/mcp/staleness.js +25 -0
- package/dist/mcp/tools.d.ts +21 -0
- package/dist/mcp/tools.js +202 -0
- package/dist/server/api.d.ts +5 -0
- package/dist/server/api.js +340 -0
- package/dist/server/mcp-http.d.ts +7 -0
- package/dist/server/mcp-http.js +95 -0
- package/dist/storage/git.d.ts +6 -0
- package/dist/storage/git.js +35 -0
- package/dist/storage/repo-manager.d.ts +87 -0
- package/dist/storage/repo-manager.js +249 -0
- package/dist/types/pipeline.d.ts +35 -0
- package/dist/types/pipeline.js +20 -0
- package/hooks/claude/code-mapper-hook.cjs +238 -0
- package/hooks/claude/pre-tool-use.sh +79 -0
- package/hooks/claude/session-start.sh +42 -0
- package/models/mlx-embedder.py +185 -0
- package/package.json +100 -0
- package/scripts/patch-tree-sitter-swift.cjs +74 -0
- package/vendor/leiden/index.cjs +355 -0
- package/vendor/leiden/utils.cjs +392 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/** @file call-routing.ts @description Per-language call routing dispatch, primarily for Ruby where imports, heritage, and property definitions are expressed as method calls */
|
|
2
|
+
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
3
|
+
export type CallRoutingResult = RubyCallRouting | null;
|
|
4
|
+
export type CallRouter = (calledName: string, callNode: any) => CallRoutingResult;
|
|
5
|
+
export declare const callRouters: Record<SupportedLanguages, CallRouter>;
|
|
6
|
+
export type RubyCallRouting = {
|
|
7
|
+
kind: 'import';
|
|
8
|
+
importPath: string;
|
|
9
|
+
isRelative: boolean;
|
|
10
|
+
} | {
|
|
11
|
+
kind: 'heritage';
|
|
12
|
+
items: RubyHeritageItem[];
|
|
13
|
+
} | {
|
|
14
|
+
kind: 'properties';
|
|
15
|
+
items: RubyPropertyItem[];
|
|
16
|
+
} | {
|
|
17
|
+
kind: 'call';
|
|
18
|
+
} | {
|
|
19
|
+
kind: 'skip';
|
|
20
|
+
};
|
|
21
|
+
export interface RubyHeritageItem {
|
|
22
|
+
enclosingClass: string;
|
|
23
|
+
mixinName: string;
|
|
24
|
+
heritageKind: 'include' | 'extend' | 'prepend';
|
|
25
|
+
}
|
|
26
|
+
export type RubyAccessorType = 'attr_accessor' | 'attr_reader' | 'attr_writer';
|
|
27
|
+
export interface RubyPropertyItem {
|
|
28
|
+
propName: string;
|
|
29
|
+
accessorType: RubyAccessorType;
|
|
30
|
+
startLine: number;
|
|
31
|
+
endLine: number;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Classify a Ruby call node and extract its semantic payload
|
|
35
|
+
*
|
|
36
|
+
* @param calledName - The method name (e.g. 'require', 'include', 'attr_accessor')
|
|
37
|
+
* @param callNode - The tree-sitter call AST node
|
|
38
|
+
* @returns Discriminated union describing the call's semantic role
|
|
39
|
+
*/
|
|
40
|
+
export declare function routeRubyCall(calledName: string, callNode: any): RubyCallRouting;
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
// code-mapper/src/core/ingestion/call-routing.ts
|
|
2
|
+
/** @file call-routing.ts @description Per-language call routing dispatch, primarily for Ruby where imports, heritage, and property definitions are expressed as method calls */
|
|
3
|
+
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
4
|
+
// No-op router: returns null for every call (passthrough to normal processing)
|
|
5
|
+
const noRouting = () => null;
|
|
6
|
+
// Per-language call routing; noRouting = passthrough to normal call processing
|
|
7
|
+
export const callRouters = {
|
|
8
|
+
[SupportedLanguages.JavaScript]: noRouting,
|
|
9
|
+
[SupportedLanguages.TypeScript]: noRouting,
|
|
10
|
+
[SupportedLanguages.Python]: noRouting,
|
|
11
|
+
[SupportedLanguages.Java]: noRouting,
|
|
12
|
+
[SupportedLanguages.Kotlin]: noRouting,
|
|
13
|
+
[SupportedLanguages.Go]: noRouting,
|
|
14
|
+
[SupportedLanguages.Rust]: noRouting,
|
|
15
|
+
[SupportedLanguages.CSharp]: noRouting,
|
|
16
|
+
[SupportedLanguages.PHP]: noRouting,
|
|
17
|
+
[SupportedLanguages.Swift]: noRouting,
|
|
18
|
+
[SupportedLanguages.CPlusPlus]: noRouting,
|
|
19
|
+
[SupportedLanguages.C]: noRouting,
|
|
20
|
+
[SupportedLanguages.Ruby]: routeRubyCall,
|
|
21
|
+
};
|
|
22
|
+
// Pre-allocated singletons for common return values
|
|
23
|
+
const CALL_RESULT = { kind: 'call' };
|
|
24
|
+
const SKIP_RESULT = { kind: 'skip' };
|
|
25
|
+
// Max depth for parent-walking loops to prevent pathological AST traversals
|
|
26
|
+
const MAX_PARENT_DEPTH = 50;
|
|
27
|
+
/**
|
|
28
|
+
* Classify a Ruby call node and extract its semantic payload
|
|
29
|
+
*
|
|
30
|
+
* @param calledName - The method name (e.g. 'require', 'include', 'attr_accessor')
|
|
31
|
+
* @param callNode - The tree-sitter call AST node
|
|
32
|
+
* @returns Discriminated union describing the call's semantic role
|
|
33
|
+
*/
|
|
34
|
+
export function routeRubyCall(calledName, callNode) {
|
|
35
|
+
// require / require_relative -> import
|
|
36
|
+
if (calledName === 'require' || calledName === 'require_relative') {
|
|
37
|
+
const argList = callNode.childForFieldName?.('arguments');
|
|
38
|
+
const stringNode = argList?.children?.find((c) => c.type === 'string');
|
|
39
|
+
const contentNode = stringNode?.children?.find((c) => c.type === 'string_content');
|
|
40
|
+
if (!contentNode)
|
|
41
|
+
return SKIP_RESULT;
|
|
42
|
+
let importPath = contentNode.text;
|
|
43
|
+
// Validate: reject null bytes, control chars, excessively long paths
|
|
44
|
+
if (!importPath || importPath.length > 1024 || /[\x00-\x1f]/.test(importPath)) {
|
|
45
|
+
return SKIP_RESULT;
|
|
46
|
+
}
|
|
47
|
+
const isRelative = calledName === 'require_relative';
|
|
48
|
+
if (isRelative && !importPath.startsWith('.')) {
|
|
49
|
+
importPath = './' + importPath;
|
|
50
|
+
}
|
|
51
|
+
return { kind: 'import', importPath, isRelative };
|
|
52
|
+
}
|
|
53
|
+
// include / extend / prepend -> heritage (mixin)
|
|
54
|
+
if (calledName === 'include' || calledName === 'extend' || calledName === 'prepend') {
|
|
55
|
+
let enclosingClass = null;
|
|
56
|
+
let current = callNode.parent;
|
|
57
|
+
let depth = 0;
|
|
58
|
+
while (current && ++depth <= MAX_PARENT_DEPTH) {
|
|
59
|
+
if (current.type === 'class' || current.type === 'module') {
|
|
60
|
+
const nameNode = current.childForFieldName?.('name');
|
|
61
|
+
if (nameNode) {
|
|
62
|
+
enclosingClass = nameNode.text;
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
current = current.parent;
|
|
67
|
+
}
|
|
68
|
+
if (!enclosingClass)
|
|
69
|
+
return SKIP_RESULT;
|
|
70
|
+
const items = [];
|
|
71
|
+
const argList = callNode.childForFieldName?.('arguments');
|
|
72
|
+
for (const arg of (argList?.children ?? [])) {
|
|
73
|
+
if (arg.type === 'constant' || arg.type === 'scope_resolution') {
|
|
74
|
+
items.push({ enclosingClass, mixinName: arg.text, heritageKind: calledName });
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return items.length > 0 ? { kind: 'heritage', items } : SKIP_RESULT;
|
|
78
|
+
}
|
|
79
|
+
// attr_accessor / attr_reader / attr_writer -> property definitions
|
|
80
|
+
if (calledName === 'attr_accessor' || calledName === 'attr_reader' || calledName === 'attr_writer') {
|
|
81
|
+
const items = [];
|
|
82
|
+
const argList = callNode.childForFieldName?.('arguments');
|
|
83
|
+
for (const arg of (argList?.children ?? [])) {
|
|
84
|
+
if (arg.type === 'simple_symbol') {
|
|
85
|
+
items.push({
|
|
86
|
+
propName: arg.text.startsWith(':') ? arg.text.slice(1) : arg.text,
|
|
87
|
+
accessorType: calledName,
|
|
88
|
+
startLine: arg.startPosition.row,
|
|
89
|
+
endLine: arg.endPosition.row,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return items.length > 0 ? { kind: 'properties', items } : SKIP_RESULT;
|
|
94
|
+
}
|
|
95
|
+
// Everything else -> regular call
|
|
96
|
+
return CALL_RESULT;
|
|
97
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/** @file cluster-enricher.ts @description LLM-based enrichment for community clusters, generating semantic names, keywords, and descriptions */
|
|
2
|
+
import { CommunityNode } from './community-processor.js';
|
|
3
|
+
export interface ClusterEnrichment {
|
|
4
|
+
name: string;
|
|
5
|
+
keywords: string[];
|
|
6
|
+
description: string;
|
|
7
|
+
}
|
|
8
|
+
export interface EnrichmentResult {
|
|
9
|
+
enrichments: Map<string, ClusterEnrichment>;
|
|
10
|
+
tokensUsed: number;
|
|
11
|
+
}
|
|
12
|
+
export interface LLMClient {
|
|
13
|
+
generate: (prompt: string) => Promise<string>;
|
|
14
|
+
}
|
|
15
|
+
export interface ClusterMemberInfo {
|
|
16
|
+
name: string;
|
|
17
|
+
filePath: string;
|
|
18
|
+
type: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Enrich clusters with LLM-generated names, keywords, and descriptions
|
|
22
|
+
*
|
|
23
|
+
* @param communities - Community nodes to enrich
|
|
24
|
+
* @param memberMap - Map of communityId -> member info
|
|
25
|
+
* @param llmClient - LLM client for generation
|
|
26
|
+
* @param onProgress - Progress callback
|
|
27
|
+
*/
|
|
28
|
+
export declare const enrichClusters: (communities: CommunityNode[], memberMap: Map<string, ClusterMemberInfo[]>, llmClient: LLMClient, onProgress?: (current: number, total: number) => void) => Promise<EnrichmentResult>;
|
|
29
|
+
/** Enrich multiple clusters in a single LLM call (batch mode, more token-efficient) */
|
|
30
|
+
export declare const enrichClustersBatch: (communities: CommunityNode[], memberMap: Map<string, ClusterMemberInfo[]>, llmClient: LLMClient, batchSize?: number, onProgress?: (current: number, total: number) => void) => Promise<EnrichmentResult>;
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
// code-mapper/src/core/ingestion/cluster-enricher.ts
|
|
2
|
+
/** @file cluster-enricher.ts @description LLM-based enrichment for community clusters, generating semantic names, keywords, and descriptions */
|
|
3
|
+
const buildEnrichmentPrompt = (members, heuristicLabel) => {
|
|
4
|
+
// Limit to 20 members to control token usage
|
|
5
|
+
const limitedMembers = members.slice(0, 20);
|
|
6
|
+
const memberList = limitedMembers
|
|
7
|
+
.map(m => `${m.name} (${m.type})`)
|
|
8
|
+
.join(', ');
|
|
9
|
+
return `Analyze this code cluster and provide a semantic name and short description.
|
|
10
|
+
|
|
11
|
+
Heuristic: "${heuristicLabel}"
|
|
12
|
+
Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
|
|
13
|
+
|
|
14
|
+
Reply with JSON only:
|
|
15
|
+
{"name": "2-4 word semantic name", "description": "One sentence describing purpose"}`;
|
|
16
|
+
};
|
|
17
|
+
const parseEnrichmentResponse = (response, fallbackLabel) => {
|
|
18
|
+
try {
|
|
19
|
+
// Extract JSON from response, handles markdown code blocks
|
|
20
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
21
|
+
if (!jsonMatch) {
|
|
22
|
+
throw new Error('No JSON found in response');
|
|
23
|
+
}
|
|
24
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
25
|
+
return {
|
|
26
|
+
name: parsed.name || fallbackLabel,
|
|
27
|
+
keywords: Array.isArray(parsed.keywords) ? parsed.keywords : [],
|
|
28
|
+
description: parsed.description || '',
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
// Fallback to heuristic label if parsing fails
|
|
33
|
+
return {
|
|
34
|
+
name: fallbackLabel,
|
|
35
|
+
keywords: [],
|
|
36
|
+
description: '',
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
/**
|
|
41
|
+
* Enrich clusters with LLM-generated names, keywords, and descriptions
|
|
42
|
+
*
|
|
43
|
+
* @param communities - Community nodes to enrich
|
|
44
|
+
* @param memberMap - Map of communityId -> member info
|
|
45
|
+
* @param llmClient - LLM client for generation
|
|
46
|
+
* @param onProgress - Progress callback
|
|
47
|
+
*/
|
|
48
|
+
export const enrichClusters = async (communities, memberMap, llmClient, onProgress) => {
|
|
49
|
+
const enrichments = new Map();
|
|
50
|
+
let tokensUsed = 0;
|
|
51
|
+
for (let i = 0; i < communities.length; i++) {
|
|
52
|
+
const community = communities[i];
|
|
53
|
+
const members = memberMap.get(community.id) || [];
|
|
54
|
+
onProgress?.(i + 1, communities.length);
|
|
55
|
+
if (members.length === 0) {
|
|
56
|
+
// No members, fall back to heuristic
|
|
57
|
+
enrichments.set(community.id, {
|
|
58
|
+
name: community.heuristicLabel,
|
|
59
|
+
keywords: [],
|
|
60
|
+
description: '',
|
|
61
|
+
});
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
try {
|
|
65
|
+
const prompt = buildEnrichmentPrompt(members, community.heuristicLabel);
|
|
66
|
+
const response = await llmClient.generate(prompt);
|
|
67
|
+
// Rough token estimate (~4 chars per token)
|
|
68
|
+
tokensUsed += prompt.length / 4 + response.length / 4;
|
|
69
|
+
const enrichment = parseEnrichmentResponse(response, community.heuristicLabel);
|
|
70
|
+
enrichments.set(community.id, enrichment);
|
|
71
|
+
}
|
|
72
|
+
catch (error) {
|
|
73
|
+
// On error, fall back to heuristic
|
|
74
|
+
console.warn(`Failed to enrich cluster ${community.id}:`, error);
|
|
75
|
+
enrichments.set(community.id, {
|
|
76
|
+
name: community.heuristicLabel,
|
|
77
|
+
keywords: [],
|
|
78
|
+
description: '',
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return { enrichments, tokensUsed };
|
|
83
|
+
};
|
|
84
|
+
/** Enrich multiple clusters in a single LLM call (batch mode, more token-efficient) */
|
|
85
|
+
export const enrichClustersBatch = async (communities, memberMap, llmClient, batchSize = 5, onProgress) => {
|
|
86
|
+
const enrichments = new Map();
|
|
87
|
+
let tokensUsed = 0;
|
|
88
|
+
// Process communities in batches
|
|
89
|
+
for (let i = 0; i < communities.length; i += batchSize) {
|
|
90
|
+
// Report batch progress
|
|
91
|
+
onProgress?.(Math.min(i + batchSize, communities.length), communities.length);
|
|
92
|
+
const batch = communities.slice(i, i + batchSize);
|
|
93
|
+
const batchPrompt = batch.map((community, idx) => {
|
|
94
|
+
const members = memberMap.get(community.id) || [];
|
|
95
|
+
const limitedMembers = members.slice(0, 15);
|
|
96
|
+
const memberList = limitedMembers
|
|
97
|
+
.map(m => `${m.name} (${m.type})`)
|
|
98
|
+
.join(', ');
|
|
99
|
+
return `Cluster ${idx + 1} (id: ${community.id}):
|
|
100
|
+
Heuristic: "${community.heuristicLabel}"
|
|
101
|
+
Members: ${memberList}`;
|
|
102
|
+
}).join('\n\n');
|
|
103
|
+
const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
|
|
104
|
+
|
|
105
|
+
${batchPrompt}
|
|
106
|
+
|
|
107
|
+
Output JSON array:
|
|
108
|
+
[
|
|
109
|
+
{"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
|
|
110
|
+
...
|
|
111
|
+
]`;
|
|
112
|
+
try {
|
|
113
|
+
const response = await llmClient.generate(prompt);
|
|
114
|
+
tokensUsed += prompt.length / 4 + response.length / 4;
|
|
115
|
+
// Parse batch JSON response
|
|
116
|
+
const jsonMatch = response.match(/\[[\s\S]*\]/);
|
|
117
|
+
if (jsonMatch) {
|
|
118
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
119
|
+
for (const item of parsed) {
|
|
120
|
+
enrichments.set(item.id, {
|
|
121
|
+
name: item.name,
|
|
122
|
+
keywords: item.keywords || [],
|
|
123
|
+
description: item.description || '',
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
catch (error) {
|
|
129
|
+
console.warn('Batch enrichment failed, falling back to heuristics:', error);
|
|
130
|
+
// Fall back to heuristics for this batch
|
|
131
|
+
for (const community of batch) {
|
|
132
|
+
enrichments.set(community.id, {
|
|
133
|
+
name: community.heuristicLabel,
|
|
134
|
+
keywords: [],
|
|
135
|
+
description: '',
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
// Fill in missing communities with heuristic labels
|
|
141
|
+
for (const community of communities) {
|
|
142
|
+
if (!enrichments.has(community.id)) {
|
|
143
|
+
enrichments.set(community.id, {
|
|
144
|
+
name: community.heuristicLabel,
|
|
145
|
+
keywords: [],
|
|
146
|
+
description: '',
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return { enrichments, tokensUsed };
|
|
151
|
+
};
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/** @file community-processor.ts @description Detects code communities using the Leiden algorithm on CALLS/EXTENDS/IMPLEMENTS edges, grouping symbols by functional area */
|
|
2
|
+
import { KnowledgeGraph } from '../graph/types.js';
|
|
3
|
+
export interface CommunityNode {
|
|
4
|
+
id: string;
|
|
5
|
+
label: string;
|
|
6
|
+
heuristicLabel: string;
|
|
7
|
+
cohesion: number;
|
|
8
|
+
symbolCount: number;
|
|
9
|
+
}
|
|
10
|
+
export interface CommunityMembership {
|
|
11
|
+
nodeId: string;
|
|
12
|
+
communityId: string;
|
|
13
|
+
}
|
|
14
|
+
export interface CommunityDetectionResult {
|
|
15
|
+
communities: CommunityNode[];
|
|
16
|
+
memberships: CommunityMembership[];
|
|
17
|
+
stats: {
|
|
18
|
+
totalCommunities: number;
|
|
19
|
+
modularity: number;
|
|
20
|
+
nodesProcessed: number;
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
export declare const COMMUNITY_COLORS: string[];
|
|
24
|
+
export declare const getCommunityColor: (communityIndex: number) => string;
|
|
25
|
+
/** Detect communities in the knowledge graph using the Leiden algorithm on CALLS/EXTENDS/IMPLEMENTS edges */
|
|
26
|
+
export declare const processCommunities: (knowledgeGraph: KnowledgeGraph, onProgress?: (message: string, progress: number) => void) => Promise<CommunityDetectionResult>;
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
// code-mapper/src/core/ingestion/community-processor.ts
|
|
2
|
+
/** @file community-processor.ts @description Detects code communities using the Leiden algorithm on CALLS/EXTENDS/IMPLEMENTS edges, grouping symbols by functional area */
|
|
3
|
+
// Leiden algorithm is vendored from graphology (never published to npm);
|
|
4
|
+
// loaded via createRequire for ESM compatibility
|
|
5
|
+
import Graph from 'graphology';
|
|
6
|
+
import { createRequire } from 'node:module';
|
|
7
|
+
import { fileURLToPath } from 'node:url';
|
|
8
|
+
import { dirname, resolve } from 'node:path';
|
|
9
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
10
|
+
const __dirname = dirname(__filename);
|
|
11
|
+
// Navigate to package root, works from both src/ and dist/
|
|
12
|
+
const leidenPath = resolve(__dirname, '..', '..', '..', 'vendor', 'leiden', 'index.cjs');
|
|
13
|
+
const _require = createRequire(import.meta.url);
|
|
14
|
+
const leiden = _require(leidenPath);
|
|
15
|
+
// Community colors for visualization
|
|
16
|
+
export const COMMUNITY_COLORS = [
|
|
17
|
+
'#ef4444', // red
|
|
18
|
+
'#f97316', // orange
|
|
19
|
+
'#eab308', // yellow
|
|
20
|
+
'#22c55e', // green
|
|
21
|
+
'#06b6d4', // cyan
|
|
22
|
+
'#3b82f6', // blue
|
|
23
|
+
'#8b5cf6', // violet
|
|
24
|
+
'#d946ef', // fuchsia
|
|
25
|
+
'#ec4899', // pink
|
|
26
|
+
'#f43f5e', // rose
|
|
27
|
+
'#14b8a6', // teal
|
|
28
|
+
'#84cc16', // lime
|
|
29
|
+
];
|
|
30
|
+
export const getCommunityColor = (communityIndex) => {
|
|
31
|
+
return COMMUNITY_COLORS[communityIndex % COMMUNITY_COLORS.length];
|
|
32
|
+
};
|
|
33
|
+
/** Detect communities in the knowledge graph using the Leiden algorithm on CALLS/EXTENDS/IMPLEMENTS edges */
|
|
34
|
+
export const processCommunities = async (knowledgeGraph, onProgress) => {
|
|
35
|
+
onProgress?.('Building graph for community detection...', 0);
|
|
36
|
+
// Pre-check symbol count to determine large-graph mode
|
|
37
|
+
let symbolCount = 0;
|
|
38
|
+
knowledgeGraph.forEachNode(node => {
|
|
39
|
+
if (node.label === 'Function' || node.label === 'Class' || node.label === 'Method' || node.label === 'Interface') {
|
|
40
|
+
symbolCount++;
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
const isLarge = symbolCount > 10_000;
|
|
44
|
+
const graph = buildGraphologyGraph(knowledgeGraph, isLarge);
|
|
45
|
+
if (graph.order === 0) {
|
|
46
|
+
return {
|
|
47
|
+
communities: [],
|
|
48
|
+
memberships: [],
|
|
49
|
+
stats: { totalCommunities: 0, modularity: 0, nodesProcessed: 0 }
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
const nodeCount = graph.order;
|
|
53
|
+
const edgeCount = graph.size;
|
|
54
|
+
onProgress?.(`Running Leiden on ${nodeCount} nodes, ${edgeCount} edges${isLarge ? ` (filtered from ${symbolCount} symbols)` : ''}...`, 30);
|
|
55
|
+
// Large graphs: higher resolution + capped iterations (~95%+ modularity in 2-3 iterations)
|
|
56
|
+
// Timeout: abort after 60s for pathological graph structures
|
|
57
|
+
const LEIDEN_TIMEOUT_MS = 60_000;
|
|
58
|
+
let details;
|
|
59
|
+
try {
|
|
60
|
+
details = await Promise.race([
|
|
61
|
+
Promise.resolve(leiden.detailed(graph, {
|
|
62
|
+
resolution: isLarge ? 2.0 : 1.0,
|
|
63
|
+
maxIterations: isLarge ? 3 : 0,
|
|
64
|
+
})),
|
|
65
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Leiden timeout')), LEIDEN_TIMEOUT_MS)),
|
|
66
|
+
]);
|
|
67
|
+
}
|
|
68
|
+
catch (e) {
|
|
69
|
+
if (e.message === 'Leiden timeout') {
|
|
70
|
+
onProgress?.('Community detection timed out, using fallback...', 60);
|
|
71
|
+
// Fallback: assign all nodes to a single community
|
|
72
|
+
const communities = {};
|
|
73
|
+
graph.forEachNode((node) => { communities[node] = 0; });
|
|
74
|
+
details = { communities, count: 1, modularity: 0 };
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
throw e;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
onProgress?.(`Found ${details.count} communities...`, 60);
|
|
81
|
+
// Create community nodes with heuristic labels
|
|
82
|
+
const communityNodes = createCommunityNodes(details.communities, details.count, graph, knowledgeGraph);
|
|
83
|
+
onProgress?.('Creating membership edges...', 80);
|
|
84
|
+
// Create membership mappings
|
|
85
|
+
const memberships = [];
|
|
86
|
+
Object.entries(details.communities).forEach(([nodeId, communityNum]) => {
|
|
87
|
+
memberships.push({
|
|
88
|
+
nodeId,
|
|
89
|
+
communityId: `comm_${communityNum}`,
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
onProgress?.('Community detection complete!', 100);
|
|
93
|
+
return {
|
|
94
|
+
communities: communityNodes,
|
|
95
|
+
memberships,
|
|
96
|
+
stats: {
|
|
97
|
+
totalCommunities: details.count,
|
|
98
|
+
modularity: details.modularity,
|
|
99
|
+
nodesProcessed: graph.order,
|
|
100
|
+
}
|
|
101
|
+
};
|
|
102
|
+
};
|
|
103
|
+
/**
|
|
104
|
+
* Build a graphology graph with only symbol nodes and clustering edges
|
|
105
|
+
*
|
|
106
|
+
* For large graphs (>10K symbols), filters low-confidence edges and degree-1 nodes
|
|
107
|
+
* to reduce noise and Leiden runtime
|
|
108
|
+
*/
|
|
109
|
+
const MIN_CONFIDENCE_LARGE = 0.5;
|
|
110
|
+
const buildGraphologyGraph = (knowledgeGraph, isLarge) => {
|
|
111
|
+
const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
|
|
112
|
+
const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
|
|
113
|
+
const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
|
|
114
|
+
const connectedNodes = new Set();
|
|
115
|
+
const nodeDegree = new Map();
|
|
116
|
+
knowledgeGraph.forEachRelationship(rel => {
|
|
117
|
+
if (!clusteringRelTypes.has(rel.type) || rel.sourceId === rel.targetId)
|
|
118
|
+
return;
|
|
119
|
+
if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
|
|
120
|
+
return;
|
|
121
|
+
connectedNodes.add(rel.sourceId);
|
|
122
|
+
connectedNodes.add(rel.targetId);
|
|
123
|
+
nodeDegree.set(rel.sourceId, (nodeDegree.get(rel.sourceId) || 0) + 1);
|
|
124
|
+
nodeDegree.set(rel.targetId, (nodeDegree.get(rel.targetId) || 0) + 1);
|
|
125
|
+
});
|
|
126
|
+
knowledgeGraph.forEachNode(node => {
|
|
127
|
+
if (!symbolTypes.has(node.label) || !connectedNodes.has(node.id))
|
|
128
|
+
return;
|
|
129
|
+
// For large graphs, skip degree-1 nodes (singletons that cost iteration time)
|
|
130
|
+
if (isLarge && (nodeDegree.get(node.id) || 0) < 2)
|
|
131
|
+
return;
|
|
132
|
+
graph.addNode(node.id, {
|
|
133
|
+
name: node.properties.name,
|
|
134
|
+
filePath: node.properties.filePath,
|
|
135
|
+
type: node.label,
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
knowledgeGraph.forEachRelationship(rel => {
|
|
139
|
+
if (!clusteringRelTypes.has(rel.type))
|
|
140
|
+
return;
|
|
141
|
+
if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
|
|
142
|
+
return;
|
|
143
|
+
if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
|
|
144
|
+
if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
|
|
145
|
+
graph.addEdge(rel.sourceId, rel.targetId);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
return graph;
|
|
150
|
+
};
|
|
151
|
+
/** Create Community nodes with auto-generated labels based on member file paths */
|
|
152
|
+
const createCommunityNodes = (communities, communityCount, graph, knowledgeGraph) => {
|
|
153
|
+
// Group node IDs by community number
|
|
154
|
+
const communityMembers = new Map();
|
|
155
|
+
Object.entries(communities).forEach(([nodeId, commNum]) => {
|
|
156
|
+
if (!communityMembers.has(commNum)) {
|
|
157
|
+
communityMembers.set(commNum, []);
|
|
158
|
+
}
|
|
159
|
+
communityMembers.get(commNum).push(nodeId);
|
|
160
|
+
});
|
|
161
|
+
// Build node -> file path lookup
|
|
162
|
+
const nodePathMap = new Map();
|
|
163
|
+
for (const node of knowledgeGraph.iterNodes()) {
|
|
164
|
+
if (node.properties.filePath) {
|
|
165
|
+
nodePathMap.set(node.id, node.properties.filePath);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// Create community nodes, skip singletons
|
|
169
|
+
const communityNodes = [];
|
|
170
|
+
communityMembers.forEach((memberIds, commNum) => {
|
|
171
|
+
// Skip singleton communities
|
|
172
|
+
if (memberIds.length < 2)
|
|
173
|
+
return;
|
|
174
|
+
const heuristicLabel = generateHeuristicLabel(memberIds, nodePathMap, graph, commNum);
|
|
175
|
+
communityNodes.push({
|
|
176
|
+
id: `comm_${commNum}`,
|
|
177
|
+
label: heuristicLabel,
|
|
178
|
+
heuristicLabel,
|
|
179
|
+
cohesion: calculateCohesion(memberIds, graph),
|
|
180
|
+
symbolCount: memberIds.length,
|
|
181
|
+
});
|
|
182
|
+
});
|
|
183
|
+
// Sort by member count descending
|
|
184
|
+
communityNodes.sort((a, b) => b.symbolCount - a.symbolCount);
|
|
185
|
+
return communityNodes;
|
|
186
|
+
};
|
|
187
|
+
/** Generate a human-readable label from the most common folder name in the community */
|
|
188
|
+
const generateHeuristicLabel = (memberIds, nodePathMap, graph, commNum) => {
|
|
189
|
+
// Count folder occurrences from member file paths
|
|
190
|
+
const folderCounts = new Map();
|
|
191
|
+
memberIds.forEach(nodeId => {
|
|
192
|
+
const filePath = nodePathMap.get(nodeId) || '';
|
|
193
|
+
const parts = filePath.split('/').filter(Boolean);
|
|
194
|
+
// Use parent directory as the most specific folder
|
|
195
|
+
if (parts.length >= 2) {
|
|
196
|
+
const folder = parts[parts.length - 2];
|
|
197
|
+
// Skip generic folders
|
|
198
|
+
if (!['src', 'lib', 'core', 'utils', 'common', 'shared', 'helpers'].includes(folder.toLowerCase())) {
|
|
199
|
+
folderCounts.set(folder, (folderCounts.get(folder) || 0) + 1);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
// Pick the most common folder
|
|
204
|
+
let maxCount = 0;
|
|
205
|
+
let bestFolder = '';
|
|
206
|
+
folderCounts.forEach((count, folder) => {
|
|
207
|
+
if (count > maxCount) {
|
|
208
|
+
maxCount = count;
|
|
209
|
+
bestFolder = folder;
|
|
210
|
+
}
|
|
211
|
+
});
|
|
212
|
+
if (bestFolder) {
|
|
213
|
+
// Capitalize and return
|
|
214
|
+
return bestFolder.charAt(0).toUpperCase() + bestFolder.slice(1);
|
|
215
|
+
}
|
|
216
|
+
// Fallback: detect common prefix in function names
|
|
217
|
+
const names = [];
|
|
218
|
+
memberIds.forEach(nodeId => {
|
|
219
|
+
const name = graph.getNodeAttribute(nodeId, 'name');
|
|
220
|
+
if (name)
|
|
221
|
+
names.push(name);
|
|
222
|
+
});
|
|
223
|
+
// Use common prefix if long enough
|
|
224
|
+
if (names.length > 2) {
|
|
225
|
+
const commonPrefix = findCommonPrefix(names);
|
|
226
|
+
if (commonPrefix.length > 2) {
|
|
227
|
+
return commonPrefix.charAt(0).toUpperCase() + commonPrefix.slice(1);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
// Last resort: generic name with community ID
|
|
231
|
+
return `Cluster_${commNum}`;
|
|
232
|
+
};
|
|
233
|
+
/** Find common prefix among strings */
|
|
234
|
+
const findCommonPrefix = (strings) => {
|
|
235
|
+
if (strings.length === 0)
|
|
236
|
+
return '';
|
|
237
|
+
const sorted = strings.slice().sort();
|
|
238
|
+
const first = sorted[0];
|
|
239
|
+
const last = sorted[sorted.length - 1];
|
|
240
|
+
let i = 0;
|
|
241
|
+
while (i < first.length && first[i] === last[i]) {
|
|
242
|
+
i++;
|
|
243
|
+
}
|
|
244
|
+
return first.substring(0, i);
|
|
245
|
+
};
|
|
246
|
+
/** Estimate cohesion score (0-1) based on internal edge density, sampling for large communities */
|
|
247
|
+
const calculateCohesion = (memberIds, graph) => {
|
|
248
|
+
if (memberIds.length <= 1)
|
|
249
|
+
return 1.0;
|
|
250
|
+
const memberSet = new Set(memberIds);
|
|
251
|
+
// Sample up to 50 members to avoid O(N^2) cost
|
|
252
|
+
const SAMPLE_SIZE = 50;
|
|
253
|
+
const sample = memberIds.length <= SAMPLE_SIZE
|
|
254
|
+
? memberIds
|
|
255
|
+
: memberIds.slice(0, SAMPLE_SIZE);
|
|
256
|
+
let internalEdges = 0;
|
|
257
|
+
let totalEdges = 0;
|
|
258
|
+
for (const nodeId of sample) {
|
|
259
|
+
if (!graph.hasNode(nodeId))
|
|
260
|
+
continue;
|
|
261
|
+
graph.forEachNeighbor(nodeId, (neighbor) => {
|
|
262
|
+
totalEdges++;
|
|
263
|
+
if (memberSet.has(neighbor)) {
|
|
264
|
+
internalEdges++;
|
|
265
|
+
}
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
// Cohesion = fraction of edges that are internal
|
|
269
|
+
if (totalEdges === 0)
|
|
270
|
+
return 1.0;
|
|
271
|
+
return Math.min(1.0, internalEdges / totalEdges);
|
|
272
|
+
};
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
/** @file constants.ts @description Tree-sitter parsing buffer size constants and adaptive sizing */
|
|
2
|
+
export declare const TREE_SITTER_BUFFER_SIZE: number;
|
|
3
|
+
export declare const TREE_SITTER_MAX_BUFFER: number;
|
|
4
|
+
/** Compute adaptive buffer size: 2x file size, clamped between 512 KB and 32 MB */
|
|
5
|
+
export declare const getTreeSitterBufferSize: (contentLength: number) => number;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
// code-mapper/src/core/ingestion/constants.ts
|
|
2
|
+
/** @file constants.ts @description Tree-sitter parsing buffer size constants and adaptive sizing */
|
|
3
|
+
// Default minimum buffer size for tree-sitter parsing (512 KB)
|
|
4
|
+
export const TREE_SITTER_BUFFER_SIZE = 512 * 1024;
|
|
5
|
+
// Maximum buffer size cap (32 MB), also the file-size skip threshold
|
|
6
|
+
export const TREE_SITTER_MAX_BUFFER = 32 * 1024 * 1024;
|
|
7
|
+
/** Compute adaptive buffer size: 2x file size, clamped between 512 KB and 32 MB */
|
|
8
|
+
export const getTreeSitterBufferSize = (contentLength) => Math.min(Math.max(contentLength * 2, TREE_SITTER_BUFFER_SIZE), TREE_SITTER_MAX_BUFFER);
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/** @file entry-point-scoring.ts @description Scores functions as entry point candidates using call ratio, export status, name patterns, and framework detection */
|
|
2
|
+
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
3
|
+
export interface EntryPointScoreResult {
|
|
4
|
+
score: number;
|
|
5
|
+
reasons: string[];
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Calculate an entry point score for a function/method
|
|
9
|
+
*
|
|
10
|
+
* Score = baseScore * exportMultiplier * nameMultiplier * frameworkMultiplier
|
|
11
|
+
*
|
|
12
|
+
* @param name - Function/method name
|
|
13
|
+
* @param language - Programming language
|
|
14
|
+
* @param isExported - Whether the function is exported/public
|
|
15
|
+
* @param callerCount - Number of callers
|
|
16
|
+
* @param calleeCount - Number of callees
|
|
17
|
+
* @returns Score and reasons explaining the score
|
|
18
|
+
*/
|
|
19
|
+
export declare function calculateEntryPointScore(name: string, language: SupportedLanguages, isExported: boolean, callerCount: number, calleeCount: number, filePath?: string): EntryPointScoreResult;
|
|
20
|
+
/** Check if a file path is a test file (excluded from entry points) */
|
|
21
|
+
export declare function isTestFile(filePath: string): boolean;
|
|
22
|
+
/** Check if a file path is a utility/helper file (lower entry point priority) */
|
|
23
|
+
export declare function isUtilityFile(filePath: string): boolean;
|