@getmikk/core 1.9.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -4
- package/src/contract/lock-reader.ts +6 -1
- package/src/error-handler.ts +4 -2
- package/src/graph/cluster-detector.ts +7 -2
- package/src/graph/confidence-engine.ts +57 -32
- package/src/graph/impact-analyzer.ts +5 -3
- package/src/graph/memory-manager.ts +98 -257
- package/src/graph/query-engine.ts +42 -45
- package/src/index.ts +2 -0
- package/src/parser/index.ts +5 -5
- package/src/parser/oxc-parser.ts +36 -3
- package/src/search/bm25.ts +11 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@getmikk/core",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -28,11 +28,11 @@
|
|
|
28
28
|
"oxc-resolver": "^11.19.1",
|
|
29
29
|
"tree-sitter-wasms": "^0.1.13",
|
|
30
30
|
"web-tree-sitter": "0.20.8",
|
|
31
|
-
"zod": "^3.22.0"
|
|
31
|
+
"zod": "^3.22.0",
|
|
32
|
+
"typescript": "^5.7.0"
|
|
32
33
|
},
|
|
33
34
|
"devDependencies": {
|
|
34
35
|
"@types/bun": "^1.3.10",
|
|
35
|
-
"@types/node": "^22.0.0"
|
|
36
|
-
"typescript": "^5.7.0"
|
|
36
|
+
"@types/node": "^22.0.0"
|
|
37
37
|
}
|
|
38
38
|
}
|
|
@@ -77,6 +77,10 @@ function compactifyLock(lock: MikkLock): any {
|
|
|
77
77
|
// P4: no hash, P6: no moduleId
|
|
78
78
|
}
|
|
79
79
|
// P7: integer calls/calledBy referencing fnIndex positions
|
|
80
|
+
const { name: parsedName } = parseEntityKey(fn.id, 'fn:')
|
|
81
|
+
if (fn.name && fn.name !== parsedName) {
|
|
82
|
+
c.name = fn.name
|
|
83
|
+
}
|
|
80
84
|
if (fn.calls.length > 0) c.calls = fn.calls.map(id => fnIndexMap.get(id) ?? -1).filter((n: number) => n >= 0)
|
|
81
85
|
if (fn.calledBy.length > 0) c.calledBy = fn.calledBy.map(id => fnIndexMap.get(id) ?? -1).filter((n: number) => n >= 0)
|
|
82
86
|
if (fn.params && fn.params.length > 0) c.params = fn.params
|
|
@@ -193,7 +197,8 @@ function hydrateLock(raw: any): any {
|
|
|
193
197
|
for (const [key, c] of Object.entries(raw.functions || {}) as [string, any][]) {
|
|
194
198
|
// P7: key is integer index -> look up full ID via fnIndex
|
|
195
199
|
const fullId = hasFnIndex ? (fnIndex[parseInt(key)] || key) : key
|
|
196
|
-
const { name, file } = parseEntityKey(fullId, 'fn:')
|
|
200
|
+
const { name: parsedName, file } = parseEntityKey(fullId, 'fn:')
|
|
201
|
+
const name = c.name || parsedName
|
|
197
202
|
const lines = c.lines || [c.startLine || 0, c.endLine || 0]
|
|
198
203
|
// P7: integer calls/calledBy -> resolve to full string IDs (backward compat: strings pass through)
|
|
199
204
|
const calls = (c.calls || []).map((v: any) => typeof v === 'number' ? (fnIndex[v] ?? null) : v).filter(Boolean)
|
package/src/error-handler.ts
CHANGED
|
@@ -426,5 +426,7 @@ export function createDefaultErrorListener(): (error: MikkError) => void {
|
|
|
426
426
|
}
|
|
427
427
|
}
|
|
428
428
|
|
|
429
|
-
//
|
|
430
|
-
|
|
429
|
+
// NOTE: Do NOT register listeners at module load time - every import would
|
|
430
|
+
// add a duplicate listener that is never cleaned up. Instead, call:
|
|
431
|
+
// ErrorHandler.getInstance().addListener(createDefaultErrorListener())
|
|
432
|
+
// once during application bootstrap (CLI entry-point, MCP server startup).
|
|
@@ -194,14 +194,19 @@ export class ClusterDetector {
|
|
|
194
194
|
}
|
|
195
195
|
for (const [name, dupes] of nameCount) {
|
|
196
196
|
if (dupes.length <= 1) continue
|
|
197
|
-
for (
|
|
197
|
+
for (let i = 0; i < dupes.length; i++) {
|
|
198
|
+
const cluster = dupes[i]
|
|
198
199
|
const segments = cluster.id.split('-')
|
|
199
200
|
.filter(s => s !== 'packages' && s !== 'apps' && s !== 'src')
|
|
200
201
|
const suffix = segments
|
|
201
202
|
.map(s => s.charAt(0).toUpperCase() + s.slice(1))
|
|
202
203
|
.join(' ')
|
|
203
|
-
|
|
204
|
+
|
|
205
|
+
if (suffix && suffix.toLowerCase() !== name.toLowerCase()) {
|
|
204
206
|
cluster.suggestedName = `${name} (${suffix})`
|
|
207
|
+
} else {
|
|
208
|
+
// Force disambiguation using the already-deduplicated cluster.id
|
|
209
|
+
cluster.suggestedName = `${name} (${cluster.id})`
|
|
205
210
|
}
|
|
206
211
|
}
|
|
207
212
|
}
|
|
@@ -1,60 +1,85 @@
|
|
|
1
1
|
import type { DependencyGraph } from './types.js'
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
4
|
+
* ConfidenceEngine — computes path-confidence for impact analysis results.
|
|
5
|
+
*
|
|
6
|
+
* ImpactAnalyzer builds paths by walking BACKWARDS through `inEdges`
|
|
7
|
+
* (dependent → dependency direction). After the BFS the paths are
|
|
8
|
+
* stored in forward-traversal order (changed-node → impacted-node).
|
|
9
|
+
*
|
|
10
|
+
* To find the edge between two consecutive path nodes we must therefore
|
|
11
|
+
* look in `inEdges[next]` for an edge whose `.from === current`, which is
|
|
12
|
+
* the same as looking in `outEdges[current]` for an edge whose `.to === next`.
|
|
13
|
+
* We prefer `outEdges` because it gives O(out-degree) scans instead of
|
|
14
|
+
* O(in-degree), but we fall back to `inEdges` so the engine is correct
|
|
15
|
+
* regardless of traversal direction stored in the path.
|
|
8
16
|
*/
|
|
9
17
|
export class ConfidenceEngine {
|
|
10
18
|
constructor(private graph: DependencyGraph) {}
|
|
11
19
|
|
|
12
20
|
/**
|
|
13
|
-
* Compute confidence
|
|
21
|
+
* Compute confidence along a specific ordered path of node IDs.
|
|
22
|
+
*
|
|
14
23
|
* @param pathIds Array of node IDs forming a path (e.g. ['A', 'B', 'C'])
|
|
15
|
-
*
|
|
24
|
+
* in forward (caller → callee) order.
|
|
25
|
+
* @returns Cumulative confidence from 0.0 to 1.0; 1.0 for trivial paths.
|
|
16
26
|
*/
|
|
17
|
-
|
|
18
|
-
if (pathIds.length < 2) return 1.0
|
|
27
|
+
calculatePathConfidence(pathIds: string[]): number {
|
|
28
|
+
if (pathIds.length < 2) return 1.0
|
|
19
29
|
|
|
20
|
-
let totalConfidence = 1.0
|
|
30
|
+
let totalConfidence = 1.0
|
|
21
31
|
|
|
22
32
|
for (let i = 0; i < pathIds.length - 1; i++) {
|
|
23
|
-
const current = pathIds[i]
|
|
24
|
-
const next
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
const current = pathIds[i]
|
|
34
|
+
const next = pathIds[i + 1]
|
|
35
|
+
|
|
36
|
+
// Prefer outEdges[current] for O(out-degree) look-up
|
|
37
|
+
const edges = this.graph.outEdges.get(current)
|
|
38
|
+
?? this.graph.inEdges.get(next) // fallback: scan inEdges of the next node
|
|
39
|
+
?? []
|
|
40
|
+
|
|
41
|
+
let maxEdgeConfidence = 0.0
|
|
42
|
+
for (const edge of edges) {
|
|
43
|
+
// outEdges: edge.from === current, edge.to === next
|
|
44
|
+
// inEdges: edge.to === next, edge.from === current
|
|
45
|
+
if (edge.to === next && edge.from === current) {
|
|
46
|
+
if ((edge.confidence ?? 1.0) > maxEdgeConfidence) {
|
|
47
|
+
maxEdgeConfidence = edge.confidence ?? 1.0
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (maxEdgeConfidence === 0.0) {
|
|
53
|
+
// Try inEdges[next] if outEdges produced no match
|
|
54
|
+
const inbound = this.graph.inEdges.get(next) ?? []
|
|
55
|
+
for (const edge of inbound) {
|
|
56
|
+
if (edge.from === current) {
|
|
57
|
+
if ((edge.confidence ?? 1.0) > maxEdgeConfidence) {
|
|
58
|
+
maxEdgeConfidence = edge.confidence ?? 1.0
|
|
59
|
+
}
|
|
34
60
|
}
|
|
35
61
|
}
|
|
36
62
|
}
|
|
37
63
|
|
|
38
64
|
if (maxEdgeConfidence === 0.0) {
|
|
39
|
-
|
|
65
|
+
// No edge found in either direction — path is broken or unresolvable
|
|
66
|
+
return 0.0
|
|
40
67
|
}
|
|
41
68
|
|
|
42
|
-
totalConfidence *= maxEdgeConfidence
|
|
69
|
+
totalConfidence *= maxEdgeConfidence
|
|
43
70
|
}
|
|
44
71
|
|
|
45
|
-
return totalConfidence
|
|
72
|
+
return totalConfidence
|
|
46
73
|
}
|
|
47
74
|
|
|
48
75
|
/**
|
|
49
|
-
*
|
|
50
|
-
* by averaging the confidence of all paths leading to it.
|
|
76
|
+
* Average confidence across all paths leading to a target node.
|
|
51
77
|
*/
|
|
52
|
-
|
|
53
|
-
if (paths.length === 0) return 1.0
|
|
54
|
-
|
|
55
|
-
const pathConfidences = paths.map(
|
|
56
|
-
const sum = pathConfidences.reduce((a, b) => a + b, 0)
|
|
57
|
-
|
|
58
|
-
return Number((sum / paths.length).toFixed(3));
|
|
78
|
+
calculateNodeAggregatedConfidence(paths: string[][]): number {
|
|
79
|
+
if (paths.length === 0) return 1.0
|
|
80
|
+
|
|
81
|
+
const pathConfidences = paths.map(p => this.calculatePathConfidence(p))
|
|
82
|
+
const sum = pathConfidences.reduce((a, b) => a + b, 0)
|
|
83
|
+
return Number((sum / paths.length).toFixed(3))
|
|
59
84
|
}
|
|
60
85
|
}
|
|
@@ -50,8 +50,8 @@ export class ImpactAnalyzer {
|
|
|
50
50
|
|
|
51
51
|
const dependents = this.graph.inEdges.get(current) || [];
|
|
52
52
|
for (const edge of dependents) {
|
|
53
|
-
|
|
54
|
-
//
|
|
53
|
+
// Allow 'contains' edges so if a function is changed, the file it belongs to is impacted,
|
|
54
|
+
// which then allows traversing 'imports' edges from other files.
|
|
55
55
|
if (!pathSet.has(edge.from)) {
|
|
56
56
|
const newPathSet = new Set(pathSet);
|
|
57
57
|
newPathSet.add(edge.from);
|
|
@@ -65,7 +65,9 @@ export class ImpactAnalyzer {
|
|
|
65
65
|
}
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
-
const impactedIds = Array.from(visited.keys()).filter(id =>
|
|
68
|
+
const impactedIds = Array.from(visited.keys()).filter(id =>
|
|
69
|
+
!changedNodeIds.includes(id) && id.startsWith('fn:')
|
|
70
|
+
);
|
|
69
71
|
|
|
70
72
|
let totalRisk = 0;
|
|
71
73
|
let totalConfidence = 0;
|
|
@@ -1,213 +1,136 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
2
|
+
* MemoryManager — monitors and limits heap usage during graph operations.
|
|
3
|
+
*
|
|
4
|
+
* Design notes:
|
|
5
|
+
* - No console.log/warn in production paths. All diagnostics are exposed
|
|
6
|
+
* via getMemoryStats() so callers can decide how to surface them.
|
|
7
|
+
* - The auto-GC timer is ref-unref'd so it doesn't keep the Node process alive.
|
|
8
|
+
* - dispose() must be called when the manager is no longer needed.
|
|
6
9
|
*/
|
|
7
10
|
|
|
8
|
-
// Memory thresholds in bytes
|
|
9
11
|
const MEMORY_THRESHOLDS = {
|
|
10
|
-
WARNING:
|
|
11
|
-
CRITICAL:
|
|
12
|
-
EMERGENCY: 400 * 1024 * 1024,
|
|
13
|
-
}
|
|
12
|
+
WARNING: 100 * 1024 * 1024, // 100 MB
|
|
13
|
+
CRITICAL: 200 * 1024 * 1024, // 200 MB
|
|
14
|
+
EMERGENCY: 400 * 1024 * 1024, // 400 MB
|
|
15
|
+
} as const
|
|
14
16
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
}
|
|
17
|
+
const DEFAULT_CONFIG = {
|
|
18
|
+
maxAge: 30 * 60 * 1000, // 30 minutes
|
|
19
|
+
maxNodes: 10_000,
|
|
20
|
+
gcInterval: 60 * 1000, // 1 minute
|
|
21
|
+
} as const
|
|
21
22
|
|
|
22
23
|
export interface MemoryStats {
|
|
23
|
-
heapUsed:
|
|
24
|
-
heapTotal:
|
|
25
|
-
external:
|
|
26
|
-
rss:
|
|
24
|
+
heapUsed: number
|
|
25
|
+
heapTotal: number
|
|
26
|
+
external: number
|
|
27
|
+
rss: number
|
|
27
28
|
percentage: number
|
|
28
29
|
status: 'normal' | 'warning' | 'critical' | 'emergency'
|
|
29
30
|
}
|
|
30
31
|
|
|
31
32
|
export interface MemoryManagerConfig {
|
|
32
|
-
maxAge?:
|
|
33
|
-
maxNodes?:
|
|
34
|
-
gcInterval?:
|
|
33
|
+
maxAge?: number
|
|
34
|
+
maxNodes?: number
|
|
35
|
+
gcInterval?: number
|
|
35
36
|
enableAutoGC?: boolean
|
|
36
37
|
}
|
|
37
38
|
|
|
38
|
-
/**
|
|
39
|
-
* Memory Manager for graph operations
|
|
40
|
-
*/
|
|
41
39
|
export class MemoryManager {
|
|
42
|
-
private
|
|
43
|
-
private
|
|
44
|
-
private
|
|
45
|
-
private
|
|
40
|
+
private readonly maxAge: number
|
|
41
|
+
private readonly maxNodes: number
|
|
42
|
+
private readonly gcInterval: number
|
|
43
|
+
private nodeCache = new Map<string, { data: unknown; timestamp: number }>()
|
|
44
|
+
private gcTimer?: ReturnType<typeof setInterval>
|
|
46
45
|
|
|
47
46
|
constructor(config: MemoryManagerConfig = {}) {
|
|
48
|
-
this.config
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
gcInterval: config.gcInterval ?? DEFAULT_CLEANUP_CONFIG.gcInterval,
|
|
52
|
-
enableAutoGC: config.enableAutoGC ?? true,
|
|
53
|
-
}
|
|
47
|
+
this.maxAge = config.maxAge ?? DEFAULT_CONFIG.maxAge
|
|
48
|
+
this.maxNodes = config.maxNodes ?? DEFAULT_CONFIG.maxNodes
|
|
49
|
+
this.gcInterval = config.gcInterval ?? DEFAULT_CONFIG.gcInterval
|
|
54
50
|
|
|
55
|
-
if (
|
|
56
|
-
this.startAutoGC()
|
|
57
|
-
}
|
|
51
|
+
if (config.enableAutoGC !== false) this.startAutoGC()
|
|
58
52
|
}
|
|
59
53
|
|
|
60
|
-
/**
|
|
61
|
-
* Get current memory statistics
|
|
62
|
-
*/
|
|
63
54
|
getMemoryStats(): MemoryStats {
|
|
64
|
-
const
|
|
65
|
-
const percentage = (
|
|
55
|
+
const u = process.memoryUsage()
|
|
56
|
+
const percentage = (u.heapUsed / u.heapTotal) * 100
|
|
66
57
|
|
|
67
58
|
let status: MemoryStats['status'] = 'normal'
|
|
68
|
-
if
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
status = 'critical'
|
|
72
|
-
} else if (usage.heapUsed > MEMORY_THRESHOLDS.WARNING) {
|
|
73
|
-
status = 'warning'
|
|
74
|
-
}
|
|
59
|
+
if (u.heapUsed > MEMORY_THRESHOLDS.EMERGENCY) status = 'emergency'
|
|
60
|
+
else if (u.heapUsed > MEMORY_THRESHOLDS.CRITICAL) status = 'critical'
|
|
61
|
+
else if (u.heapUsed > MEMORY_THRESHOLDS.WARNING) status = 'warning'
|
|
75
62
|
|
|
76
|
-
return {
|
|
77
|
-
heapUsed: usage.heapUsed,
|
|
78
|
-
heapTotal: usage.heapTotal,
|
|
79
|
-
external: usage.external,
|
|
80
|
-
rss: usage.rss,
|
|
81
|
-
percentage,
|
|
82
|
-
status,
|
|
83
|
-
}
|
|
63
|
+
return { heapUsed: u.heapUsed, heapTotal: u.heapTotal, external: u.external, rss: u.rss, percentage, status }
|
|
84
64
|
}
|
|
85
65
|
|
|
86
|
-
/**
|
|
87
|
-
* Check if memory usage is critical
|
|
88
|
-
*/
|
|
89
66
|
isMemoryCritical(): boolean {
|
|
90
|
-
const
|
|
91
|
-
return
|
|
67
|
+
const { status } = this.getMemoryStats()
|
|
68
|
+
return status === 'critical' || status === 'emergency'
|
|
92
69
|
}
|
|
93
70
|
|
|
94
|
-
/**
|
|
95
|
-
* Force garbage collection if available
|
|
96
|
-
*/
|
|
97
71
|
forceGC(): void {
|
|
98
|
-
if (global.gc)
|
|
99
|
-
global.gc()
|
|
100
|
-
this.lastGC = Date.now()
|
|
101
|
-
}
|
|
72
|
+
if (typeof global.gc === 'function') global.gc()
|
|
102
73
|
}
|
|
103
74
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
cacheNode(id: string, data: any): void {
|
|
108
|
-
// If we're at the node limit, remove oldest entries
|
|
109
|
-
if (this.nodeCache.size >= this.config.maxNodes) {
|
|
110
|
-
this.evictOldestNodes(Math.floor(this.config.maxNodes * 0.1)) // Remove 10%
|
|
75
|
+
cacheNode(id: string, data: unknown): void {
|
|
76
|
+
if (this.nodeCache.size >= this.maxNodes) {
|
|
77
|
+
this.evictOldest(Math.ceil(this.maxNodes * 0.1))
|
|
111
78
|
}
|
|
112
|
-
|
|
113
|
-
this.nodeCache.set(id, {
|
|
114
|
-
data,
|
|
115
|
-
timestamp: Date.now(),
|
|
116
|
-
})
|
|
79
|
+
this.nodeCache.set(id, { data, timestamp: Date.now() })
|
|
117
80
|
}
|
|
118
81
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
const cached = this.nodeCache.get(id)
|
|
124
|
-
if (!cached) return null
|
|
125
|
-
|
|
126
|
-
// Check if expired
|
|
127
|
-
if (Date.now() - cached.timestamp > this.config.maxAge) {
|
|
82
|
+
getCachedNode(id: string): unknown | null {
|
|
83
|
+
const entry = this.nodeCache.get(id)
|
|
84
|
+
if (!entry) return null
|
|
85
|
+
if (Date.now() - entry.timestamp > this.maxAge) {
|
|
128
86
|
this.nodeCache.delete(id)
|
|
129
87
|
return null
|
|
130
88
|
}
|
|
131
|
-
|
|
132
|
-
return cached.data
|
|
89
|
+
return entry.data
|
|
133
90
|
}
|
|
134
91
|
|
|
135
|
-
/**
|
|
136
|
-
* Clear node cache
|
|
137
|
-
*/
|
|
138
92
|
clearCache(): void {
|
|
139
93
|
this.nodeCache.clear()
|
|
140
94
|
}
|
|
141
95
|
|
|
142
|
-
/**
|
|
143
|
-
* Perform comprehensive memory cleanup
|
|
144
|
-
*/
|
|
145
96
|
cleanup(): void {
|
|
146
|
-
// Clear expired cache entries
|
|
147
97
|
const now = Date.now()
|
|
148
|
-
for (const [id,
|
|
149
|
-
if (now -
|
|
150
|
-
this.nodeCache.delete(id)
|
|
151
|
-
}
|
|
98
|
+
for (const [id, e] of this.nodeCache) {
|
|
99
|
+
if (now - e.timestamp > this.maxAge) this.nodeCache.delete(id)
|
|
152
100
|
}
|
|
153
|
-
|
|
154
|
-
// Force garbage collection
|
|
155
101
|
this.forceGC()
|
|
156
102
|
}
|
|
157
103
|
|
|
158
|
-
/**
|
|
159
|
-
* Evict oldest nodes from cache
|
|
160
|
-
*/
|
|
161
|
-
private evictOldestNodes(count: number): void {
|
|
162
|
-
const entries = Array.from(this.nodeCache.entries())
|
|
163
|
-
.sort((a, b) => a[1].timestamp - b[1].timestamp)
|
|
164
|
-
|
|
165
|
-
for (let i = 0; i < Math.min(count, entries.length); i++) {
|
|
166
|
-
this.nodeCache.delete(entries[i][0])
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
/**
|
|
171
|
-
* Start automatic garbage collection
|
|
172
|
-
*/
|
|
173
|
-
private startAutoGC(): void {
|
|
174
|
-
this.gcTimer = setInterval(() => {
|
|
175
|
-
const stats = this.getMemoryStats()
|
|
176
|
-
|
|
177
|
-
// If memory usage is high, perform cleanup
|
|
178
|
-
if (stats.status !== 'normal') {
|
|
179
|
-
this.cleanup()
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// Periodic cleanup regardless of memory pressure
|
|
183
|
-
if (Date.now() - this.lastGC > this.config.gcInterval) {
|
|
184
|
-
this.cleanup()
|
|
185
|
-
}
|
|
186
|
-
}, this.config.gcInterval)
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
/**
|
|
190
|
-
* Stop automatic garbage collection
|
|
191
|
-
*/
|
|
192
104
|
stopAutoGC(): void {
|
|
193
|
-
if (this.gcTimer) {
|
|
194
|
-
clearInterval(this.gcTimer)
|
|
195
|
-
this.gcTimer = undefined
|
|
196
|
-
}
|
|
105
|
+
if (this.gcTimer) { clearInterval(this.gcTimer); this.gcTimer = undefined }
|
|
197
106
|
}
|
|
198
107
|
|
|
199
|
-
/**
|
|
200
|
-
* Dispose of memory manager
|
|
201
|
-
*/
|
|
202
108
|
dispose(): void {
|
|
203
109
|
this.stopAutoGC()
|
|
204
110
|
this.clearCache()
|
|
205
111
|
this.forceGC()
|
|
206
112
|
}
|
|
113
|
+
|
|
114
|
+
private evictOldest(count: number): void {
|
|
115
|
+
const sorted = [...this.nodeCache.entries()].sort((a, b) => a[1].timestamp - b[1].timestamp)
|
|
116
|
+
for (let i = 0; i < Math.min(count, sorted.length); i++) {
|
|
117
|
+
this.nodeCache.delete(sorted[i][0])
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
private startAutoGC(): void {
|
|
122
|
+
this.gcTimer = setInterval(() => {
|
|
123
|
+
if (this.isMemoryCritical()) this.cleanup()
|
|
124
|
+
}, this.gcInterval)
|
|
125
|
+
|
|
126
|
+
// Don't keep the Node process alive just for GC checks
|
|
127
|
+
if (this.gcTimer.unref) this.gcTimer.unref()
|
|
128
|
+
}
|
|
207
129
|
}
|
|
208
130
|
|
|
209
131
|
/**
|
|
210
|
-
*
|
|
132
|
+
* MemoryAwareGraphBuilder — builds a graph from a lock with memory monitoring.
|
|
133
|
+
* Builds purely from the in-memory lock; does NOT re-parse source files.
|
|
211
134
|
*/
|
|
212
135
|
export class MemoryAwareGraphBuilder {
|
|
213
136
|
private memoryManager: MemoryManager
|
|
@@ -216,130 +139,48 @@ export class MemoryAwareGraphBuilder {
|
|
|
216
139
|
this.memoryManager = new MemoryManager(config)
|
|
217
140
|
}
|
|
218
141
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
const stats = this.memoryManager.getMemoryStats()
|
|
224
|
-
|
|
225
|
-
// Check memory before starting
|
|
226
|
-
if (this.memoryManager.isMemoryCritical()) {
|
|
227
|
-
console.warn('Memory usage is critical, performing cleanup before graph build')
|
|
228
|
-
this.memoryManager.cleanup()
|
|
229
|
-
}
|
|
142
|
+
buildGraph(lock: {
|
|
143
|
+
functions?: Record<string, { name: string; file: string; moduleId: string; isExported?: boolean; isAsync?: boolean; calls?: string[] }>
|
|
144
|
+
}) {
|
|
145
|
+
if (this.memoryManager.isMemoryCritical()) this.memoryManager.cleanup()
|
|
230
146
|
|
|
231
147
|
try {
|
|
232
|
-
|
|
233
|
-
return this.buildGraphInternal(lock)
|
|
148
|
+
return this.buildInternal(lock)
|
|
234
149
|
} finally {
|
|
235
|
-
// Cleanup after build
|
|
236
150
|
this.memoryManager.cleanup()
|
|
237
151
|
}
|
|
238
152
|
}
|
|
239
153
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
*/
|
|
243
|
-
private buildGraphInternal(lock: any): any {
|
|
244
|
-
const nodes = new Map<string, any>()
|
|
245
|
-
const edges: any[] = []
|
|
246
|
-
const outEdges = new Map<string, any[]>()
|
|
247
|
-
const inEdges = new Map<string, any[]>()
|
|
248
|
-
|
|
249
|
-
// Process functions with memory monitoring
|
|
250
|
-
for (const [id, fn] of Object.entries(lock.functions || {})) {
|
|
251
|
-
// Check memory periodically
|
|
252
|
-
if (nodes.size % 1000 === 0) {
|
|
253
|
-
if (this.memoryManager.isMemoryCritical()) {
|
|
254
|
-
console.warn('Memory pressure detected during graph build, forcing cleanup')
|
|
255
|
-
this.memoryManager.cleanup()
|
|
256
|
-
}
|
|
257
|
-
}
|
|
154
|
+
getMemoryStats(): MemoryStats { return this.memoryManager.getMemoryStats() }
|
|
155
|
+
dispose(): void { this.memoryManager.dispose() }
|
|
258
156
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
isExported: (fn as any).isExported,
|
|
267
|
-
isAsync: (fn as any).isAsync,
|
|
268
|
-
},
|
|
269
|
-
}
|
|
157
|
+
private buildInternal(lock: {
|
|
158
|
+
functions?: Record<string, { name: string; file: string; moduleId: string; isExported?: boolean; isAsync?: boolean; calls?: string[] }>
|
|
159
|
+
}) {
|
|
160
|
+
const nodes = new Map<string, unknown>()
|
|
161
|
+
const edges: unknown[] = []
|
|
162
|
+
const outEdges = new Map<string, unknown[]>()
|
|
163
|
+
const inEdges = new Map<string, unknown[]>()
|
|
270
164
|
|
|
271
|
-
|
|
165
|
+
for (const [id, fn] of Object.entries(lock.functions ?? {})) {
|
|
166
|
+
nodes.set(id, {
|
|
167
|
+
id, name: fn.name, file: fn.file, type: 'function', moduleId: fn.moduleId,
|
|
168
|
+
metadata: { isExported: fn.isExported, isAsync: fn.isAsync },
|
|
169
|
+
})
|
|
272
170
|
outEdges.set(id, [])
|
|
273
171
|
inEdges.set(id, [])
|
|
274
172
|
}
|
|
275
173
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
to: targetId,
|
|
284
|
-
type: 'calls',
|
|
285
|
-
}
|
|
286
|
-
edges.push(edge)
|
|
287
|
-
outEdges.get(id)?.push(edge)
|
|
288
|
-
inEdges.get(targetId)?.push(edge)
|
|
289
|
-
}
|
|
174
|
+
for (const [id, fn] of Object.entries(lock.functions ?? {})) {
|
|
175
|
+
for (const targetId of fn.calls ?? []) {
|
|
176
|
+
if (!nodes.has(targetId)) continue
|
|
177
|
+
const edge = { from: id, to: targetId, type: 'calls', confidence: 1.0 }
|
|
178
|
+
edges.push(edge)
|
|
179
|
+
outEdges.get(id)!.push(edge)
|
|
180
|
+
inEdges.get(targetId)!.push(edge)
|
|
290
181
|
}
|
|
291
182
|
}
|
|
292
183
|
|
|
293
|
-
return {
|
|
294
|
-
nodes,
|
|
295
|
-
edges,
|
|
296
|
-
outEdges,
|
|
297
|
-
inEdges,
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
/**
|
|
302
|
-
* Get memory statistics
|
|
303
|
-
*/
|
|
304
|
-
getMemoryStats(): MemoryStats {
|
|
305
|
-
return this.memoryManager.getMemoryStats()
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
/**
|
|
309
|
-
* Dispose of the graph builder
|
|
310
|
-
*/
|
|
311
|
-
dispose(): void {
|
|
312
|
-
this.memoryManager.dispose()
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
/**
|
|
317
|
-
* Utility function to monitor memory usage during operations
|
|
318
|
-
*/
|
|
319
|
-
export function withMemoryMonitoring<T>(
|
|
320
|
-
operation: () => T,
|
|
321
|
-
memoryManager?: MemoryManager
|
|
322
|
-
): T {
|
|
323
|
-
const manager = memoryManager || new MemoryManager({ enableAutoGC: false })
|
|
324
|
-
|
|
325
|
-
const initialStats = manager.getMemoryStats()
|
|
326
|
-
console.log(`Memory before operation: ${(initialStats.heapUsed / 1024 / 1024).toFixed(1)}MB`)
|
|
327
|
-
|
|
328
|
-
try {
|
|
329
|
-
const result = operation()
|
|
330
|
-
|
|
331
|
-
const finalStats = manager.getMemoryStats()
|
|
332
|
-
const delta = finalStats.heapUsed - initialStats.heapUsed
|
|
333
|
-
console.log(`Memory after operation: ${(finalStats.heapUsed / 1024 / 1024).toFixed(1)}MB (${delta >= 0 ? '+' : ''}${(delta / 1024 / 1024).toFixed(1)}MB)`)
|
|
334
|
-
|
|
335
|
-
if (finalStats.status !== 'normal') {
|
|
336
|
-
console.warn(`Memory status: ${finalStats.status}`)
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
return result
|
|
340
|
-
} finally {
|
|
341
|
-
if (!memoryManager) {
|
|
342
|
-
manager.dispose()
|
|
343
|
-
}
|
|
184
|
+
return { nodes, edges, outEdges, inEdges }
|
|
344
185
|
}
|
|
345
186
|
}
|
|
@@ -1,79 +1,76 @@
|
|
|
1
1
|
import type { DependencyGraph } from './types.js'
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* QueryEngine — high-performance graph traversal and path-finding.
|
|
5
|
+
*
|
|
6
|
+
* All BFS loops use an index pointer instead of Array.shift() to avoid
|
|
7
|
+
* the O(n) cost of shifting the underlying array on each dequeue.
|
|
7
8
|
*/
|
|
8
9
|
export class QueryEngine {
|
|
9
10
|
constructor(private graph: DependencyGraph) {}
|
|
10
11
|
|
|
11
|
-
/** Find all direct dependents (who calls
|
|
12
|
-
|
|
13
|
-
return (this.graph.inEdges.get(nodeId)
|
|
12
|
+
/** Find all direct dependents (who calls this node?) */
|
|
13
|
+
getDependents(nodeId: string): string[] {
|
|
14
|
+
return (this.graph.inEdges.get(nodeId) ?? [])
|
|
14
15
|
.filter(e => e.type !== 'contains')
|
|
15
|
-
.map(e => e.from)
|
|
16
|
+
.map(e => e.from)
|
|
16
17
|
}
|
|
17
18
|
|
|
18
|
-
/** Find all direct dependencies (
|
|
19
|
-
|
|
20
|
-
return (this.graph.outEdges.get(nodeId)
|
|
19
|
+
/** Find all direct dependencies (what does this node call?) */
|
|
20
|
+
getDependencies(nodeId: string): string[] {
|
|
21
|
+
return (this.graph.outEdges.get(nodeId) ?? [])
|
|
21
22
|
.filter(e => e.type !== 'contains')
|
|
22
|
-
.map(e => e.to)
|
|
23
|
+
.map(e => e.to)
|
|
23
24
|
}
|
|
24
25
|
|
|
25
|
-
/**
|
|
26
|
+
/**
|
|
26
27
|
* Find the shortest path between two nodes using BFS.
|
|
27
|
-
* Returns an array of node IDs or null if no path exists.
|
|
28
|
+
* Returns an ordered array of node IDs, or null if no path exists.
|
|
28
29
|
*/
|
|
29
|
-
|
|
30
|
-
if (!this.graph.nodes.has(start) || !this.graph.nodes.has(end)) return null
|
|
31
|
-
if (start === end) return [start]
|
|
30
|
+
findPath(start: string, end: string): string[] | null {
|
|
31
|
+
if (!this.graph.nodes.has(start) || !this.graph.nodes.has(end)) return null
|
|
32
|
+
if (start === end) return [start]
|
|
32
33
|
|
|
33
|
-
const
|
|
34
|
-
|
|
34
|
+
const visited = new Set<string>([start])
|
|
35
|
+
// Each entry: [nodeId, pathSoFar]
|
|
36
|
+
const queue: Array<[string, string[]]> = [[start, [start]]]
|
|
37
|
+
let head = 0
|
|
35
38
|
|
|
36
|
-
while (queue.length
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
const outwardEdges = this.graph.outEdges.get(id) || [];
|
|
40
|
-
for (const edge of outwardEdges) {
|
|
41
|
-
if (edge.type === 'contains') continue;
|
|
42
|
-
|
|
43
|
-
if (edge.to === end) {
|
|
44
|
-
return [...path, end];
|
|
45
|
-
}
|
|
39
|
+
while (head < queue.length) {
|
|
40
|
+
const [id, path] = queue[head++]
|
|
46
41
|
|
|
42
|
+
for (const edge of this.graph.outEdges.get(id) ?? []) {
|
|
43
|
+
if (edge.type === 'contains') continue
|
|
44
|
+
if (edge.to === end) return [...path, end]
|
|
47
45
|
if (!visited.has(edge.to)) {
|
|
48
|
-
visited.add(edge.to)
|
|
49
|
-
queue.push(
|
|
46
|
+
visited.add(edge.to)
|
|
47
|
+
queue.push([edge.to, [...path, edge.to]])
|
|
50
48
|
}
|
|
51
49
|
}
|
|
52
50
|
}
|
|
53
51
|
|
|
54
|
-
return null
|
|
52
|
+
return null
|
|
55
53
|
}
|
|
56
54
|
|
|
57
|
-
/**
|
|
58
|
-
* Get the full downstream
|
|
59
|
-
*
|
|
55
|
+
/**
|
|
56
|
+
* Get the full downstream (transitive dependents) of a node.
|
|
57
|
+
* Answers "What would break if I change X?"
|
|
60
58
|
*/
|
|
61
|
-
|
|
62
|
-
const visited = new Set<string>()
|
|
63
|
-
const queue: string[] = [nodeId]
|
|
59
|
+
getDownstreamImpact(nodeId: string): string[] {
|
|
60
|
+
const visited = new Set<string>()
|
|
61
|
+
const queue: string[] = [nodeId]
|
|
62
|
+
let head = 0
|
|
64
63
|
|
|
65
|
-
while (queue.length
|
|
66
|
-
const current = queue
|
|
67
|
-
const
|
|
68
|
-
|
|
69
|
-
for (const dep of dependents) {
|
|
64
|
+
while (head < queue.length) {
|
|
65
|
+
const current = queue[head++]
|
|
66
|
+
for (const dep of this.getDependents(current)) {
|
|
70
67
|
if (!visited.has(dep) && dep !== nodeId) {
|
|
71
|
-
visited.add(dep)
|
|
72
|
-
queue.push(dep)
|
|
68
|
+
visited.add(dep)
|
|
69
|
+
queue.push(dep)
|
|
73
70
|
}
|
|
74
71
|
}
|
|
75
72
|
}
|
|
76
73
|
|
|
77
|
-
return
|
|
74
|
+
return [...visited]
|
|
78
75
|
}
|
|
79
76
|
}
|
package/src/index.ts
CHANGED
|
@@ -8,6 +8,8 @@ export * from './hash/index.js'
|
|
|
8
8
|
export * from './search/index.js'
|
|
9
9
|
export * from './utils/errors.js'
|
|
10
10
|
export * from './utils/logger.js'
|
|
11
|
+
export { MikkError, ErrorHandler, ErrorBuilder, ErrorCategory, FileSystemError, ModuleLoadError, GraphError, TokenBudgetError, ValidationError, createDefaultErrorListener, createFileNotFoundError, createFileTooLargeError, createPermissionDeniedError, createModuleNotFoundError, createModuleLoadFailedError, createGraphBuildFailedError, createNodeNotFoundError, createTokenBudgetExceededError, createValidationError, isMikkError, getRootCause, toMikkError } from './error-handler.js'
|
|
12
|
+
export type { } from './error-handler.js'
|
|
11
13
|
export { discoverFiles, discoverContextFiles, readFileContent, writeFileContent, fileExists, setupMikkDirectory, readMikkIgnore, parseMikkIgnore, detectProjectLanguage, getDiscoveryPatterns, generateMikkIgnore, updateGitIgnore, cleanupGitIgnore } from './utils/fs.js'
|
|
12
14
|
export type { ContextFile, ContextFileType, ProjectLanguage } from './utils/fs.js'
|
|
13
15
|
export { minimatch } from './utils/minimatch.js'
|
package/src/parser/index.ts
CHANGED
|
@@ -100,7 +100,9 @@ export async function parseFiles(
|
|
|
100
100
|
const goFiles: ParsedFile[] = []
|
|
101
101
|
const treeFiles: ParsedFile[] = []
|
|
102
102
|
|
|
103
|
-
|
|
103
|
+
// Parse sequentially to avoid races in parser implementations that keep
|
|
104
|
+
// mutable per-instance state (e.g. language switching/counters).
|
|
105
|
+
for (const fp of filePaths) {
|
|
104
106
|
const ext = nodePath.extname(fp).toLowerCase()
|
|
105
107
|
|
|
106
108
|
// Build absolute posix path — this is the single source of truth for all IDs
|
|
@@ -111,7 +113,7 @@ export async function parseFiles(
|
|
|
111
113
|
content = await readFile(absoluteFp)
|
|
112
114
|
} catch {
|
|
113
115
|
// File unreadable — skip silently (deleted, permission error, binary)
|
|
114
|
-
|
|
116
|
+
continue
|
|
115
117
|
}
|
|
116
118
|
|
|
117
119
|
try {
|
|
@@ -129,9 +131,7 @@ export async function parseFiles(
|
|
|
129
131
|
} catch {
|
|
130
132
|
// Parser error — skip this file, don't abort the whole run
|
|
131
133
|
}
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
await Promise.all(parsePromises)
|
|
134
|
+
}
|
|
135
135
|
|
|
136
136
|
// Resolve imports batch-wise per parser (each has its own resolver)
|
|
137
137
|
let resolvedTreeFiles: ParsedFile[] = treeFiles
|
package/src/parser/oxc-parser.ts
CHANGED
|
@@ -12,7 +12,8 @@ import type {
|
|
|
12
12
|
ParsedExport,
|
|
13
13
|
ParsedParam,
|
|
14
14
|
CallExpression,
|
|
15
|
-
ParsedGeneric
|
|
15
|
+
ParsedGeneric,
|
|
16
|
+
ParsedRoute
|
|
16
17
|
} from './types.js';
|
|
17
18
|
|
|
18
19
|
// ---------------------------------------------------------------------------
|
|
@@ -303,6 +304,7 @@ export class OxcParser extends BaseParser {
|
|
|
303
304
|
const imports: ParsedImport[] = [];
|
|
304
305
|
const exports: ParsedExport[] = [];
|
|
305
306
|
const moduleCalls: CallExpression[] = [];
|
|
307
|
+
const routes: ParsedRoute[] = [];
|
|
306
308
|
|
|
307
309
|
const visit = (node: any, parent: any = null): void => {
|
|
308
310
|
if (!node || typeof node !== 'object') return;
|
|
@@ -608,8 +610,39 @@ export class OxcParser extends BaseParser {
|
|
|
608
610
|
// ── Module-level call expressions ─────────────────────────
|
|
609
611
|
case 'ExpressionStatement': {
|
|
610
612
|
if (node.expression?.type === 'CallExpression') {
|
|
611
|
-
const
|
|
613
|
+
const callExpr = node.expression;
|
|
614
|
+
const calls = extractCalls(callExpr, lineIndex);
|
|
612
615
|
moduleCalls.push(...calls);
|
|
616
|
+
|
|
617
|
+
// Route detection
|
|
618
|
+
const callee = callExpr.callee;
|
|
619
|
+
if (callee && (callee.type === 'StaticMemberExpression' || callee.type === 'MemberExpression')) {
|
|
620
|
+
const objName = resolveObjectName(callee.object);
|
|
621
|
+
const propName = resolvePropertyName(callee.property);
|
|
622
|
+
if (objName && propName && /^(router|app|express|.*[Rr]outer.*)$/i.test(objName) && /^(get|post|put|delete|patch|all)$/i.test(propName)) {
|
|
623
|
+
const args = callExpr.arguments || [];
|
|
624
|
+
const pathArg = args[0];
|
|
625
|
+
if (pathArg && (pathArg.type === 'StringLiteral' || pathArg.type === 'Literal' || pathArg.type === 'TemplateLiteral')) {
|
|
626
|
+
const pathVal = pathArg.value || (pathArg.quasis && pathArg.quasis[0]?.value?.raw) || '';
|
|
627
|
+
|
|
628
|
+
const handlerArg = args[args.length - 1];
|
|
629
|
+
const handlerStr = handlerArg ? content.slice(getSpan(handlerArg).start, getSpan(handlerArg).end).replace(/\s+/g, ' ').trim() : 'unknown';
|
|
630
|
+
|
|
631
|
+
const middlewares = args.slice(1, -1).map((a: any) =>
|
|
632
|
+
content.slice(getSpan(a).start, getSpan(a).end).replace(/\s+/g, ' ').trim()
|
|
633
|
+
);
|
|
634
|
+
|
|
635
|
+
routes.push({
|
|
636
|
+
method: propName.toUpperCase() as any,
|
|
637
|
+
path: String(pathVal),
|
|
638
|
+
handler: handlerStr.length > 80 ? handlerStr.slice(0, 80) + '...' : handlerStr,
|
|
639
|
+
middlewares,
|
|
640
|
+
file: normalizedFilePath,
|
|
641
|
+
line: lineIndex.getLine(getSpan(callExpr).start),
|
|
642
|
+
});
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
}
|
|
613
646
|
}
|
|
614
647
|
break;
|
|
615
648
|
}
|
|
@@ -640,7 +673,7 @@ export class OxcParser extends BaseParser {
|
|
|
640
673
|
generics,
|
|
641
674
|
imports,
|
|
642
675
|
exports,
|
|
643
|
-
routes
|
|
676
|
+
routes,
|
|
644
677
|
calls: moduleCalls,
|
|
645
678
|
hash: hashContent(content),
|
|
646
679
|
parsedAt: Date.now(),
|
package/src/search/bm25.ts
CHANGED
|
@@ -95,7 +95,14 @@ export class BM25Index {
|
|
|
95
95
|
|
|
96
96
|
// BM25 score component
|
|
97
97
|
const tfNorm = (tf * (K1 + 1)) / (tf + K1 * (1 - B + B * (doc.length / this.avgDocLength)))
|
|
98
|
-
|
|
98
|
+
let termScore = idf * tfNorm
|
|
99
|
+
|
|
100
|
+
// Bonus for direct name match in the ID
|
|
101
|
+
if (doc.id.toLowerCase().includes(term.toLowerCase())) {
|
|
102
|
+
termScore += 0.5
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
score += termScore
|
|
99
106
|
}
|
|
100
107
|
|
|
101
108
|
if (score > 0) {
|
|
@@ -181,7 +188,9 @@ export function buildFunctionTokens(fn: {
|
|
|
181
188
|
|
|
182
189
|
// Function name tokens (highest signal)
|
|
183
190
|
parts.push(...tokenize(fn.name))
|
|
184
|
-
parts.push(...tokenize(fn.name))
|
|
191
|
+
parts.push(...tokenize(fn.name))
|
|
192
|
+
parts.push(...tokenize(fn.name)) // Triple-weight the name
|
|
193
|
+
parts.push(`name_exact:${fn.name.toLowerCase()}`)
|
|
185
194
|
|
|
186
195
|
// File path tokens
|
|
187
196
|
const filename = fn.file.split('/').pop() ?? fn.file
|