@getmikk/core 2.0.13 → 2.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/package.json +2 -1
- package/src/analysis/index.ts +9 -0
- package/src/analysis/taint-analysis.ts +419 -0
- package/src/analysis/type-flow.ts +247 -0
- package/src/cache/incremental-cache.ts +278 -0
- package/src/cache/index.ts +1 -0
- package/src/contract/contract-generator.ts +31 -3
- package/src/contract/contract-reader.ts +1 -0
- package/src/contract/lock-compiler.ts +125 -12
- package/src/contract/schema.ts +4 -0
- package/src/error-handler.ts +2 -1
- package/src/graph/cluster-detector.ts +2 -4
- package/src/graph/dead-code-detector.ts +303 -117
- package/src/graph/graph-builder.ts +21 -161
- package/src/graph/impact-analyzer.ts +1 -0
- package/src/graph/index.ts +2 -0
- package/src/graph/rich-function-index.ts +1080 -0
- package/src/graph/symbol-table.ts +252 -0
- package/src/hash/hash-store.ts +1 -0
- package/src/index.ts +4 -0
- package/src/parser/base-extractor.ts +19 -0
- package/src/parser/boundary-checker.ts +31 -12
- package/src/parser/error-recovery.ts +647 -0
- package/src/parser/function-body-extractor.ts +248 -0
- package/src/parser/go/go-extractor.ts +249 -676
- package/src/parser/index.ts +138 -295
- package/src/parser/language-registry.ts +57 -0
- package/src/parser/oxc-parser.ts +166 -28
- package/src/parser/oxc-resolver.ts +179 -11
- package/src/parser/parser-constants.ts +1 -0
- package/src/parser/rust/rust-extractor.ts +109 -0
- package/src/parser/tree-sitter/parser.ts +400 -66
- package/src/parser/tree-sitter/queries.ts +106 -10
- package/src/parser/types.ts +20 -1
- package/src/search/bm25.ts +21 -8
- package/src/search/direct-search.ts +472 -0
- package/src/search/embedding-provider.ts +249 -0
- package/src/search/index.ts +12 -0
- package/src/search/semantic-search.ts +435 -0
- package/src/security/index.ts +1 -0
- package/src/security/scanner.ts +342 -0
- package/src/utils/artifact-transaction.ts +1 -0
- package/src/utils/atomic-write.ts +1 -0
- package/src/utils/errors.ts +89 -4
- package/src/utils/fs.ts +150 -65
- package/src/utils/json.ts +1 -0
- package/src/utils/language-registry.ts +96 -5
- package/src/utils/minimatch.ts +49 -6
- package/src/utils/path.ts +26 -0
- package/tests/dead-code.test.ts +3 -2
- package/tests/direct-search.test.ts +435 -0
- package/tests/error-recovery.test.ts +143 -0
- package/tests/fixtures/simple-api/src/index.ts +1 -1
- package/tests/go-parser.test.ts +19 -335
- package/tests/js-parser.test.ts +18 -1089
- package/tests/language-registry-all.test.ts +276 -0
- package/tests/language-registry.test.ts +6 -4
- package/tests/parse-diagnostics.test.ts +9 -96
- package/tests/parser.test.ts +42 -771
- package/tests/polyglot-parser.test.ts +117 -0
- package/tests/rich-function-index.test.ts +703 -0
- package/tests/tree-sitter-parser.test.ts +108 -80
- package/tests/ts-parser.test.ts +8 -8
- package/tests/verification.test.ts +175 -0
- package/src/parser/base-parser.ts +0 -16
- package/src/parser/go/go-parser.ts +0 -43
- package/src/parser/javascript/js-extractor.ts +0 -278
- package/src/parser/javascript/js-parser.ts +0 -101
- package/src/parser/typescript/ts-extractor.ts +0 -447
- package/src/parser/typescript/ts-parser.ts +0 -36
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
import * as fs from 'node:fs/promises'
|
|
2
|
+
import * as path from 'node:path'
|
|
3
|
+
import type { ParsedFile } from '../parser/types.js'
|
|
4
|
+
|
|
5
|
+
interface CacheEntry {
|
|
6
|
+
hash: string
|
|
7
|
+
parsedAt: string
|
|
8
|
+
size: number
|
|
9
|
+
lastAccessed: number
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
interface CacheMetadata {
|
|
13
|
+
version: number
|
|
14
|
+
entries: Map<string, CacheEntry>
|
|
15
|
+
lastPruned: number
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const CACHE_VERSION = 1
|
|
19
|
+
const MAX_CACHE_SIZE = 5000
|
|
20
|
+
const CACHE_TTL_MS = 24 * 60 * 60 * 1000
|
|
21
|
+
|
|
22
|
+
export class IncrementalCache {
|
|
23
|
+
private cacheDir: string
|
|
24
|
+
private metadata: CacheMetadata
|
|
25
|
+
private hits = 0
|
|
26
|
+
private misses = 0
|
|
27
|
+
private queue: Array<() => void> = []
|
|
28
|
+
private running = false
|
|
29
|
+
private initialized = false
|
|
30
|
+
private pendingInit: Promise<void> | null = null
|
|
31
|
+
|
|
32
|
+
constructor(projectRoot: string) {
|
|
33
|
+
this.cacheDir = path.join(projectRoot, '.mikk', 'cache')
|
|
34
|
+
this.metadata = {
|
|
35
|
+
version: CACHE_VERSION,
|
|
36
|
+
entries: new Map(),
|
|
37
|
+
lastPruned: Date.now(),
|
|
38
|
+
}
|
|
39
|
+
this.pendingInit = this.loadMetadata()
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
private async ensureInitialized(): Promise<void> {
|
|
43
|
+
if (this.initialized) return
|
|
44
|
+
if (this.pendingInit) {
|
|
45
|
+
await this.pendingInit
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
private async withMutex<T>(fn: () => Promise<T>): Promise<T> {
|
|
50
|
+
return new Promise((resolve, reject) => {
|
|
51
|
+
this.queue.push(async () => {
|
|
52
|
+
try {
|
|
53
|
+
await this.ensureInitialized()
|
|
54
|
+
const result = await fn()
|
|
55
|
+
resolve(result)
|
|
56
|
+
} catch (err) {
|
|
57
|
+
reject(err)
|
|
58
|
+
}
|
|
59
|
+
})
|
|
60
|
+
if (!this.running) {
|
|
61
|
+
this.processQueue()
|
|
62
|
+
}
|
|
63
|
+
})
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
private async processQueue(): Promise<void> {
|
|
67
|
+
this.running = true
|
|
68
|
+
while (this.queue.length > 0) {
|
|
69
|
+
const fn = this.queue.shift()!
|
|
70
|
+
await fn()
|
|
71
|
+
}
|
|
72
|
+
this.running = false
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
private getCacheFilePath(hash: string): string {
|
|
76
|
+
return path.join(this.cacheDir, `${hash}.json`)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
private async loadMetadata(): Promise<void> {
|
|
80
|
+
const metaPath = path.join(this.cacheDir, 'metadata.json')
|
|
81
|
+
try {
|
|
82
|
+
const raw = await fs.readFile(metaPath, 'utf-8')
|
|
83
|
+
const data = JSON.parse(raw)
|
|
84
|
+
this.metadata.version = data.version ?? CACHE_VERSION
|
|
85
|
+
this.metadata.lastPruned = data.lastPruned ?? Date.now()
|
|
86
|
+
this.metadata.entries = new Map(Object.entries(data.entries ?? {}))
|
|
87
|
+
this.initialized = true
|
|
88
|
+
} catch {
|
|
89
|
+
this.metadata.entries = new Map()
|
|
90
|
+
this.initialized = true
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
private async saveMetadata(): Promise<void> {
|
|
95
|
+
try {
|
|
96
|
+
await fs.mkdir(this.cacheDir, { recursive: true })
|
|
97
|
+
const metaPath = path.join(this.cacheDir, 'metadata.json')
|
|
98
|
+
const serializable = {
|
|
99
|
+
version: this.metadata.version,
|
|
100
|
+
lastPruned: this.metadata.lastPruned,
|
|
101
|
+
entries: Object.fromEntries(this.metadata.entries),
|
|
102
|
+
}
|
|
103
|
+
await fs.writeFile(metaPath, JSON.stringify(serializable), 'utf-8')
|
|
104
|
+
} catch {
|
|
105
|
+
// Silently fail — cache is non-critical
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Get cached parse result if content hash matches.
|
|
111
|
+
* Returns null if cache miss or stale.
|
|
112
|
+
*/
|
|
113
|
+
async get(filePath: string, contentHash: string): Promise<ParsedFile | null> {
|
|
114
|
+
return this.withMutex(async () => {
|
|
115
|
+
const entry = this.metadata.entries.get(filePath)
|
|
116
|
+
if (!entry) {
|
|
117
|
+
this.misses++
|
|
118
|
+
return null
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if (entry.hash !== contentHash) {
|
|
122
|
+
this.misses++
|
|
123
|
+
return null
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const parsedAt = new Date(entry.parsedAt).getTime()
|
|
127
|
+
if (Date.now() - parsedAt > CACHE_TTL_MS) {
|
|
128
|
+
this.metadata.entries.delete(filePath)
|
|
129
|
+
this.misses++
|
|
130
|
+
return null
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const cacheFile = this.getCacheFilePath(contentHash)
|
|
134
|
+
try {
|
|
135
|
+
const raw = await fs.readFile(cacheFile, 'utf-8')
|
|
136
|
+
const parsed = JSON.parse(raw) as ParsedFile
|
|
137
|
+
this.hits++
|
|
138
|
+
entry.lastAccessed = Date.now()
|
|
139
|
+
return parsed
|
|
140
|
+
} catch (err) {
|
|
141
|
+
console.warn(`Corrupted cache entry for ${filePath}:`, err)
|
|
142
|
+
this.metadata.entries.delete(filePath)
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
this.misses++
|
|
146
|
+
return null
|
|
147
|
+
})
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Store parse result in cache.
|
|
152
|
+
*/
|
|
153
|
+
async set(filePath: string, contentHash: string, parsed: ParsedFile): Promise<void> {
|
|
154
|
+
return this.withMutex(async () => {
|
|
155
|
+
if (this.metadata.entries.size >= MAX_CACHE_SIZE) {
|
|
156
|
+
await this.evictLRU()
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const entry: CacheEntry = {
|
|
160
|
+
hash: contentHash,
|
|
161
|
+
parsedAt: new Date().toISOString(),
|
|
162
|
+
size: JSON.stringify(parsed).length,
|
|
163
|
+
lastAccessed: Date.now()
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
this.metadata.entries.set(filePath, entry)
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
await fs.mkdir(this.cacheDir, { recursive: true })
|
|
170
|
+
const cacheFile = this.getCacheFilePath(contentHash)
|
|
171
|
+
await fs.writeFile(cacheFile, JSON.stringify(parsed), 'utf-8')
|
|
172
|
+
} catch {
|
|
173
|
+
// Silently fail — cache is non-critical
|
|
174
|
+
}
|
|
175
|
+
})
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Invalidate cache for a specific file.
|
|
180
|
+
*/
|
|
181
|
+
async invalidate(filePath: string): Promise<void> {
|
|
182
|
+
return this.withMutex(async () => {
|
|
183
|
+
const entry = this.metadata.entries.get(filePath)
|
|
184
|
+
if (entry) {
|
|
185
|
+
const cacheFile = this.getCacheFilePath(entry.hash)
|
|
186
|
+
try {
|
|
187
|
+
await fs.unlink(cacheFile)
|
|
188
|
+
} catch { /* ignore */ }
|
|
189
|
+
this.metadata.entries.delete(filePath)
|
|
190
|
+
}
|
|
191
|
+
})
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Clear entire cache.
|
|
196
|
+
*/
|
|
197
|
+
async clear(): Promise<void> {
|
|
198
|
+
return this.withMutex(async () => {
|
|
199
|
+
for (const [, entry] of this.metadata.entries) {
|
|
200
|
+
const cacheFile = this.getCacheFilePath(entry.hash)
|
|
201
|
+
try {
|
|
202
|
+
await fs.unlink(cacheFile)
|
|
203
|
+
} catch { /* ignore */ }
|
|
204
|
+
}
|
|
205
|
+
this.metadata.entries.clear()
|
|
206
|
+
await this.saveMetadata()
|
|
207
|
+
})
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Get cache statistics.
|
|
212
|
+
*/
|
|
213
|
+
getStats(): { hits: number; misses: number; hitRate: number; size: number } {
|
|
214
|
+
const total = this.hits + this.misses
|
|
215
|
+
return {
|
|
216
|
+
hits: this.hits,
|
|
217
|
+
misses: this.misses,
|
|
218
|
+
hitRate: total > 0 ? this.hits / total : 0,
|
|
219
|
+
size: this.metadata.entries.size,
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Persist cache metadata to disk.
|
|
225
|
+
* Call this after batch operations.
|
|
226
|
+
*/
|
|
227
|
+
async flush(): Promise<void> {
|
|
228
|
+
return this.withMutex(async () => {
|
|
229
|
+
await this.saveMetadata()
|
|
230
|
+
})
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Evict least recently used entries when cache is full.
|
|
235
|
+
*/
|
|
236
|
+
private async evictLRU(): Promise<void> {
|
|
237
|
+
const sorted = [...this.metadata.entries.entries()].sort(
|
|
238
|
+
(a, b) => new Date(a[1].parsedAt).getTime() - new Date(b[1].parsedAt).getTime()
|
|
239
|
+
)
|
|
240
|
+
const toRemove = Math.ceil(sorted.length * 0.2)
|
|
241
|
+
for (let i = 0; i < toRemove; i++) {
|
|
242
|
+
const [filePath, entry] = sorted[i]
|
|
243
|
+
const cacheFile = this.getCacheFilePath(entry.hash)
|
|
244
|
+
try {
|
|
245
|
+
await fs.unlink(cacheFile)
|
|
246
|
+
} catch { /* ignore */ }
|
|
247
|
+
this.metadata.entries.delete(filePath)
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Prune expired entries from cache.
|
|
253
|
+
*/
|
|
254
|
+
async prune(): Promise<void> {
|
|
255
|
+
return this.withMutex(async () => {
|
|
256
|
+
const now = Date.now()
|
|
257
|
+
const toDelete: string[] = []
|
|
258
|
+
for (const [filePath, entry] of this.metadata.entries) {
|
|
259
|
+
const parsedAt = new Date(entry.parsedAt).getTime()
|
|
260
|
+
if (now - parsedAt > CACHE_TTL_MS) {
|
|
261
|
+
toDelete.push(filePath)
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
for (const filePath of toDelete) {
|
|
265
|
+
const entry = this.metadata.entries.get(filePath)
|
|
266
|
+
if (entry) {
|
|
267
|
+
const cacheFile = this.getCacheFilePath(entry.hash)
|
|
268
|
+
try {
|
|
269
|
+
await fs.unlink(cacheFile)
|
|
270
|
+
} catch { /* ignore */ }
|
|
271
|
+
this.metadata.entries.delete(filePath)
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
this.metadata.lastPruned = now
|
|
275
|
+
await this.saveMetadata()
|
|
276
|
+
})
|
|
277
|
+
}
|
|
278
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { IncrementalCache } from './incremental-cache.js'
|
|
@@ -1,6 +1,26 @@
|
|
|
1
1
|
import type { MikkContract } from './schema.js'
|
|
2
2
|
import type { ModuleCluster } from '../graph/types.js'
|
|
3
3
|
import type { ParsedFile } from '../parser/types.js'
|
|
4
|
+
import { minimatch } from '../utils/minimatch.js'
|
|
5
|
+
|
|
6
|
+
/** Common vendor directories to exclude from contract generation */
|
|
7
|
+
const VENDOR_PATTERNS = [
|
|
8
|
+
'**/node_modules/**',
|
|
9
|
+
'**/venv/**',
|
|
10
|
+
'**/.venv/**',
|
|
11
|
+
'**/__pycache__/**',
|
|
12
|
+
'**/vendor/**',
|
|
13
|
+
'**/dist/**',
|
|
14
|
+
'**/build/**',
|
|
15
|
+
'**/.next/**',
|
|
16
|
+
'**/target/**',
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
/** Check if a path is from a vendor directory */
|
|
20
|
+
function isVendorPath(filePath: string): boolean {
|
|
21
|
+
const normalized = filePath.replace(/\\/g, '/')
|
|
22
|
+
return VENDOR_PATTERNS.some(pattern => minimatch(normalized, pattern))
|
|
23
|
+
}
|
|
4
24
|
|
|
5
25
|
/** Common entry point filenames across ecosystems (without extensions) */
|
|
6
26
|
const ENTRY_BASENAMES = ['index', 'main', 'app', 'server', 'mod', 'lib', '__init__', 'manage', 'program', 'startup']
|
|
@@ -39,7 +59,13 @@ export class ContractGenerator {
|
|
|
39
59
|
projectName: string,
|
|
40
60
|
packageJsonDescription?: string
|
|
41
61
|
): MikkContract {
|
|
42
|
-
|
|
62
|
+
// Filter out vendor files from clusters
|
|
63
|
+
const filteredClusters = clusters.map(cluster => ({
|
|
64
|
+
...cluster,
|
|
65
|
+
files: cluster.files.filter(f => !isVendorPath(f)),
|
|
66
|
+
})).filter(cluster => cluster.files.length > 0)
|
|
67
|
+
|
|
68
|
+
const modules = filteredClusters.map(cluster => ({
|
|
43
69
|
id: cluster.id,
|
|
44
70
|
name: cluster.suggestedName,
|
|
45
71
|
description: this.inferModuleDescription(cluster, parsedFiles),
|
|
@@ -50,14 +76,16 @@ export class ContractGenerator {
|
|
|
50
76
|
|
|
51
77
|
// Detect entry points — language-agnostic basename matching
|
|
52
78
|
const entryPoints = parsedFiles
|
|
79
|
+
.filter(f => !isVendorPath(f.path))
|
|
53
80
|
.filter(f => {
|
|
54
81
|
const basename = (f.path.split('/').pop() || '').replace(/\.[^.]+$/, '')
|
|
55
82
|
return ENTRY_BASENAMES.includes(basename)
|
|
56
83
|
})
|
|
57
84
|
.map(f => f.path)
|
|
58
85
|
|
|
59
|
-
const
|
|
60
|
-
const
|
|
86
|
+
const filteredParsedFiles = parsedFiles.filter(f => !isVendorPath(f.path))
|
|
87
|
+
const detectedLanguage = inferLanguageFromFiles(filteredParsedFiles)
|
|
88
|
+
const fallbackEntry = filteredParsedFiles[0]?.path ?? 'src/index'
|
|
61
89
|
|
|
62
90
|
return {
|
|
63
91
|
version: '1.0.0',
|
|
@@ -123,6 +123,7 @@ function capitalise(s: string): string {
|
|
|
123
123
|
*/
|
|
124
124
|
export class LockCompiler {
|
|
125
125
|
private projectRootPath: string | null = null
|
|
126
|
+
|
|
126
127
|
/** Main entry -- compile full lock from graph + contract + parsed files */
|
|
127
128
|
compile(
|
|
128
129
|
graph: DependencyGraph,
|
|
@@ -132,6 +133,7 @@ export class LockCompiler {
|
|
|
132
133
|
projectRoot?: string
|
|
133
134
|
): MikkLock {
|
|
134
135
|
this.projectRootPath = projectRoot ? nodePath.resolve(projectRoot) : null
|
|
136
|
+
|
|
135
137
|
const functions = this.compileFunctions(graph, contract)
|
|
136
138
|
const classes = this.compileClasses(graph, contract)
|
|
137
139
|
const generics = this.compileGenerics(graph, contract)
|
|
@@ -195,12 +197,20 @@ export class LockCompiler {
|
|
|
195
197
|
for (const [id, node] of graph.nodes) {
|
|
196
198
|
if (node.type !== 'function') continue
|
|
197
199
|
|
|
200
|
+
// Skip vendor files
|
|
201
|
+
if (this.isVendorPath(node.file)) continue
|
|
202
|
+
|
|
198
203
|
const moduleId = this.findModule(node.file, contract.declared.modules)
|
|
199
204
|
const displayName = node.name ?? ''
|
|
200
205
|
const metadata = node.metadata ?? {}
|
|
201
206
|
const inEdges = graph.inEdges.get(id) || []
|
|
202
207
|
const outEdges = graph.outEdges.get(id) || []
|
|
203
208
|
|
|
209
|
+
const params = metadata.params || []
|
|
210
|
+
const returnType = metadata.returnType || 'void'
|
|
211
|
+
const signatureHash = hashContent(`${displayName}(${params.map(p => p.type).join(',')}):${returnType}`)
|
|
212
|
+
const tokenVector = this.generateTokenVector(displayName, params, returnType, metadata.purpose)
|
|
213
|
+
|
|
204
214
|
result[id] = {
|
|
205
215
|
id,
|
|
206
216
|
name: displayName,
|
|
@@ -211,26 +221,94 @@ export class LockCompiler {
|
|
|
211
221
|
calls: outEdges.filter(e => e.type === 'calls').map(e => e.to),
|
|
212
222
|
calledBy: inEdges.filter(e => e.type === 'calls').map(e => e.from),
|
|
213
223
|
moduleId: moduleId || 'unknown',
|
|
214
|
-
...(
|
|
215
|
-
? { params: metadata.params }
|
|
216
|
-
: {}),
|
|
224
|
+
...(params.length > 0 ? { params } : {}),
|
|
217
225
|
...(metadata.returnType ? { returnType: metadata.returnType } : {}),
|
|
218
226
|
...(metadata.isAsync ? { isAsync: true } : {}),
|
|
219
227
|
...(metadata.isExported ? { isExported: true } : {}),
|
|
220
228
|
purpose: metadata.purpose || inferPurpose(
|
|
221
229
|
displayName,
|
|
222
|
-
|
|
223
|
-
|
|
230
|
+
params,
|
|
231
|
+
returnType,
|
|
224
232
|
metadata.isAsync,
|
|
225
233
|
),
|
|
226
234
|
edgeCasesHandled: metadata.edgeCasesHandled,
|
|
227
235
|
errorHandling: metadata.errorHandling,
|
|
236
|
+
signatureHash,
|
|
237
|
+
tokenVector,
|
|
228
238
|
}
|
|
229
239
|
}
|
|
230
240
|
|
|
231
241
|
return result
|
|
232
242
|
}
|
|
233
243
|
|
|
244
|
+
private generateTokenVector(
|
|
245
|
+
name: string,
|
|
246
|
+
params: Array<{ name: string; type: string; optional?: boolean }>,
|
|
247
|
+
returnType: string,
|
|
248
|
+
purpose?: string
|
|
249
|
+
): number[] {
|
|
250
|
+
const tokens: string[] = []
|
|
251
|
+
|
|
252
|
+
tokens.push(...name.match(/[A-Z][a-z]+|[a-z]+/g)?.map(t => t.toLowerCase()) || [])
|
|
253
|
+
|
|
254
|
+
for (const param of params) {
|
|
255
|
+
tokens.push(...param.name.match(/[A-Z][a-z]+|[a-z]+/g)?.map(t => t.toLowerCase()) || [])
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
tokens.push(...returnType.match(/[A-Z][a-z]+|[a-z]+/g)?.map(t => t.toLowerCase()) || [])
|
|
259
|
+
|
|
260
|
+
if (purpose) {
|
|
261
|
+
tokens.push(...purpose.match(/[a-z]{3,}/g)?.map(t => t.toLowerCase()) || [])
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const vocabulary = this.buildVocabulary()
|
|
265
|
+
const vector = new Array(64).fill(0)
|
|
266
|
+
|
|
267
|
+
for (const token of tokens) {
|
|
268
|
+
if (vocabulary.has(token)) {
|
|
269
|
+
const idx = vocabulary.get(token)!
|
|
270
|
+
const hash = this.simpleHash(token)
|
|
271
|
+
vector[idx % 64] += hash
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0))
|
|
276
|
+
if (magnitude > 0) {
|
|
277
|
+
for (let i = 0; i < vector.length; i++) {
|
|
278
|
+
vector[i] /= magnitude
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return vector
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
private buildVocabulary(): Map<string, number> {
|
|
286
|
+
const common = [
|
|
287
|
+
'get', 'set', 'add', 'remove', 'create', 'delete', 'update', 'find',
|
|
288
|
+
'load', 'save', 'parse', 'format', 'validate', 'check', 'handle',
|
|
289
|
+
'process', 'render', 'display', 'build', 'make', 'init', 'setup',
|
|
290
|
+
'config', 'user', 'auth', 'login', 'logout', 'token', 'data', 'file',
|
|
291
|
+
'path', 'config', 'options', 'params', 'args', 'error', 'result',
|
|
292
|
+
'async', 'promise', 'callback', 'event', 'handler', 'middleware',
|
|
293
|
+
'database', 'query', 'insert', 'update', 'delete', 'select', 'transaction',
|
|
294
|
+
'string', 'number', 'boolean', 'array', 'object', 'function', 'class',
|
|
295
|
+
'interface', 'type', 'enum', 'const', 'var', 'let', 'return', 'void',
|
|
296
|
+
]
|
|
297
|
+
|
|
298
|
+
const vocab = new Map<string, number>()
|
|
299
|
+
common.forEach((word, idx) => vocab.set(word, idx))
|
|
300
|
+
return vocab
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
private simpleHash(str: string): number {
|
|
304
|
+
let hash = 0
|
|
305
|
+
for (let i = 0; i < str.length; i++) {
|
|
306
|
+
hash = ((hash << 5) - hash) + str.charCodeAt(i)
|
|
307
|
+
hash = hash & hash
|
|
308
|
+
}
|
|
309
|
+
return Math.abs(hash % 10)
|
|
310
|
+
}
|
|
311
|
+
|
|
234
312
|
private compileClasses(
|
|
235
313
|
graph: DependencyGraph,
|
|
236
314
|
contract: MikkContract
|
|
@@ -238,6 +316,8 @@ export class LockCompiler {
|
|
|
238
316
|
const result: Record<string, any> = {}
|
|
239
317
|
for (const [id, node] of graph.nodes) {
|
|
240
318
|
if (node.type !== 'class') continue
|
|
319
|
+
if (this.isVendorPath(node.file)) continue
|
|
320
|
+
|
|
241
321
|
const moduleId = this.findModule(node.file, contract.declared.modules)
|
|
242
322
|
const className = node.name ?? ''
|
|
243
323
|
const metadata = node.metadata ?? {}
|
|
@@ -267,6 +347,8 @@ export class LockCompiler {
|
|
|
267
347
|
// Only include exported generics non-exported types/interfaces are
|
|
268
348
|
// internal implementation details that add noise without value.
|
|
269
349
|
if (!(node.metadata?.isExported)) continue
|
|
350
|
+
if (this.isVendorPath(node.file)) continue
|
|
351
|
+
|
|
270
352
|
const moduleId = this.findModule(node.file, contract.declared.modules)
|
|
271
353
|
const genericName = node.name ?? ''
|
|
272
354
|
const metadata = node.metadata ?? {}
|
|
@@ -314,21 +396,29 @@ export class LockCompiler {
|
|
|
314
396
|
): Record<string, MikkLock['modules'][string]> {
|
|
315
397
|
const result: Record<string, MikkLock['modules'][string]> = {}
|
|
316
398
|
|
|
399
|
+
// Build a map for fast file lookups - O(1) instead of O(n) per module
|
|
400
|
+
const fileHashMap = new Map<string, string>()
|
|
401
|
+
for (const file of parsedFiles) {
|
|
402
|
+
fileHashMap.set(file.path, file.hash)
|
|
403
|
+
}
|
|
404
|
+
|
|
317
405
|
for (const module of contract.declared.modules) {
|
|
318
|
-
const moduleFiles =
|
|
319
|
-
|
|
320
|
-
|
|
406
|
+
const moduleFiles: string[] = []
|
|
407
|
+
|
|
408
|
+
for (const file of parsedFiles) {
|
|
409
|
+
if (this.fileMatchesModule(file.path, module.paths)) {
|
|
410
|
+
moduleFiles.push(file.path)
|
|
411
|
+
}
|
|
412
|
+
}
|
|
321
413
|
|
|
322
|
-
const fileHashes = moduleFiles.map(f =>
|
|
323
|
-
const parsed = parsedFiles.find(pf => pf.path === f)
|
|
324
|
-
return parsed?.hash ?? ''
|
|
325
|
-
})
|
|
414
|
+
const fileHashes = moduleFiles.map(f => fileHashMap.get(f) ?? '')
|
|
326
415
|
|
|
327
416
|
result[module.id] = {
|
|
328
417
|
id: module.id,
|
|
329
418
|
files: moduleFiles,
|
|
330
419
|
hash: computeModuleHash(fileHashes),
|
|
331
420
|
fragmentPath: `.mikk/fragments/${module.id}.lock`,
|
|
421
|
+
...(module.parentId ? { parentId: module.parentId } : {}),
|
|
332
422
|
}
|
|
333
423
|
}
|
|
334
424
|
|
|
@@ -344,6 +434,9 @@ export class LockCompiler {
|
|
|
344
434
|
const result: Record<string, MikkLock['files'][string]> = {}
|
|
345
435
|
|
|
346
436
|
for (const file of parsedFiles) {
|
|
437
|
+
// Skip vendor files entirely
|
|
438
|
+
if (this.isVendorPath(file.path)) continue
|
|
439
|
+
|
|
347
440
|
const moduleId = this.findModule(file.path, contract.declared.modules)
|
|
348
441
|
|
|
349
442
|
// Collect file-level imports from the parsed file info directly
|
|
@@ -400,6 +493,9 @@ export class LockCompiler {
|
|
|
400
493
|
|
|
401
494
|
/** Check if a file path matches any of the module's path patterns */
|
|
402
495
|
private fileMatchesModule(filePath: string, patterns: string[]): boolean {
|
|
496
|
+
// Skip vendor paths - never match them to any module
|
|
497
|
+
if (this.isVendorPath(filePath)) return false
|
|
498
|
+
|
|
403
499
|
const relativePath = getModuleMatchPath(filePath, this.projectRootPath)
|
|
404
500
|
const normalizedRelative = relativePath.replace(/\\/g, '/').toLowerCase()
|
|
405
501
|
const normalizedAbsolute = filePath.replace(/\\/g, '/').toLowerCase()
|
|
@@ -419,4 +515,21 @@ export class LockCompiler {
|
|
|
419
515
|
}
|
|
420
516
|
return false
|
|
421
517
|
}
|
|
518
|
+
|
|
519
|
+
/** Check if a path is from a vendor directory */
|
|
520
|
+
private isVendorPath(filePath: string): boolean {
|
|
521
|
+
const normalized = filePath.replace(/\\/g, '/')
|
|
522
|
+
const vendorPatterns = [
|
|
523
|
+
'**/node_modules/**',
|
|
524
|
+
'**/venv/**',
|
|
525
|
+
'**/.venv/**',
|
|
526
|
+
'**/__pycache__/**',
|
|
527
|
+
'**/vendor/**',
|
|
528
|
+
'**/dist/**',
|
|
529
|
+
'**/build/**',
|
|
530
|
+
'**/.next/**',
|
|
531
|
+
'**/target/**',
|
|
532
|
+
]
|
|
533
|
+
return vendorPatterns.some(pattern => minimatch(normalized, pattern))
|
|
534
|
+
}
|
|
422
535
|
}
|
package/src/contract/schema.ts
CHANGED
|
@@ -10,6 +10,7 @@ export const MikkModuleSchema = z.object({
|
|
|
10
10
|
owners: z.array(z.string()).optional(),
|
|
11
11
|
paths: z.array(z.string()),
|
|
12
12
|
entryFunctions: z.array(z.string()).optional(),
|
|
13
|
+
parentId: z.string().optional(),
|
|
13
14
|
})
|
|
14
15
|
|
|
15
16
|
export const MikkDecisionSchema = z.object({
|
|
@@ -92,6 +93,8 @@ export const MikkLockFunctionSchema = z.object({
|
|
|
92
93
|
})).optional(),
|
|
93
94
|
confidence: z.number().optional(),
|
|
94
95
|
riskScore: z.number().optional(),
|
|
96
|
+
signatureHash: z.string().optional(),
|
|
97
|
+
tokenVector: z.array(z.number()).optional(),
|
|
95
98
|
})
|
|
96
99
|
|
|
97
100
|
export const MikkLockModuleSchema = z.object({
|
|
@@ -99,6 +102,7 @@ export const MikkLockModuleSchema = z.object({
|
|
|
99
102
|
files: z.array(z.string()),
|
|
100
103
|
hash: z.string(),
|
|
101
104
|
fragmentPath: z.string(),
|
|
105
|
+
parentId: z.string().optional(),
|
|
102
106
|
})
|
|
103
107
|
|
|
104
108
|
export const MikkLockImportSchema = z.object({
|
package/src/error-handler.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
1
2
|
/**
|
|
2
3
|
* Standardized Error Handling System
|
|
3
4
|
*
|
|
@@ -269,7 +270,7 @@ export function createFileNotFoundError(filePath: string): FileSystemError {
|
|
|
269
270
|
/**
|
|
270
271
|
* Create a file too large error
|
|
271
272
|
*/
|
|
272
|
-
export function createFileTooLargeError(filePath: string,
|
|
273
|
+
export function createFileTooLargeError(filePath: string, _size: number, _limit: number): FileSystemError {
|
|
273
274
|
return new FileSystemError('FILE_TOO_LARGE', filePath)
|
|
274
275
|
}
|
|
275
276
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
import type { DependencyGraph, ModuleCluster, GraphNode } from './types.js'
|
|
1
|
+
import type { DependencyGraph, ModuleCluster } from './types.js'
|
|
3
2
|
|
|
4
3
|
// ─── Domain keyword maps for semantic naming ────────────────────────
|
|
5
4
|
// Each entry maps a human-readable domain label to keywords found in
|
|
@@ -227,7 +226,6 @@ export class ClusterDetector {
|
|
|
227
226
|
private computeCouplingMatrix(files: string[]): Map<string, Map<string, number>> {
|
|
228
227
|
const matrix = new Map<string, Map<string, number>>()
|
|
229
228
|
const fileEdgeCounts = new Map<string, number>()
|
|
230
|
-
const pairCounts = new Map<string, number>()
|
|
231
229
|
|
|
232
230
|
// Count total edges per file
|
|
233
231
|
for (const fileId of files) {
|
|
@@ -450,7 +448,7 @@ export class ClusterDetector {
|
|
|
450
448
|
}
|
|
451
449
|
|
|
452
450
|
const result: ModuleCluster[] = []
|
|
453
|
-
for (const
|
|
451
|
+
for (const siblings of byBaseDir.values()) {
|
|
454
452
|
if (siblings.length <= 1) {
|
|
455
453
|
result.push(...siblings)
|
|
456
454
|
continue
|