@getmikk/core 2.0.13 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +4 -4
  2. package/package.json +2 -1
  3. package/src/analysis/index.ts +9 -0
  4. package/src/analysis/taint-analysis.ts +419 -0
  5. package/src/analysis/type-flow.ts +247 -0
  6. package/src/cache/incremental-cache.ts +278 -0
  7. package/src/cache/index.ts +1 -0
  8. package/src/contract/contract-generator.ts +31 -3
  9. package/src/contract/contract-reader.ts +1 -0
  10. package/src/contract/lock-compiler.ts +125 -12
  11. package/src/contract/schema.ts +4 -0
  12. package/src/error-handler.ts +2 -1
  13. package/src/graph/cluster-detector.ts +2 -4
  14. package/src/graph/dead-code-detector.ts +303 -117
  15. package/src/graph/graph-builder.ts +21 -161
  16. package/src/graph/impact-analyzer.ts +1 -0
  17. package/src/graph/index.ts +2 -0
  18. package/src/graph/rich-function-index.ts +1080 -0
  19. package/src/graph/symbol-table.ts +252 -0
  20. package/src/hash/hash-store.ts +1 -0
  21. package/src/index.ts +4 -0
  22. package/src/parser/base-extractor.ts +19 -0
  23. package/src/parser/boundary-checker.ts +31 -12
  24. package/src/parser/error-recovery.ts +647 -0
  25. package/src/parser/function-body-extractor.ts +248 -0
  26. package/src/parser/go/go-extractor.ts +249 -676
  27. package/src/parser/index.ts +138 -295
  28. package/src/parser/language-registry.ts +57 -0
  29. package/src/parser/oxc-parser.ts +166 -28
  30. package/src/parser/oxc-resolver.ts +179 -11
  31. package/src/parser/parser-constants.ts +1 -0
  32. package/src/parser/rust/rust-extractor.ts +109 -0
  33. package/src/parser/tree-sitter/parser.ts +400 -66
  34. package/src/parser/tree-sitter/queries.ts +106 -10
  35. package/src/parser/types.ts +20 -1
  36. package/src/search/bm25.ts +21 -8
  37. package/src/search/direct-search.ts +472 -0
  38. package/src/search/embedding-provider.ts +249 -0
  39. package/src/search/index.ts +12 -0
  40. package/src/search/semantic-search.ts +435 -0
  41. package/src/security/index.ts +1 -0
  42. package/src/security/scanner.ts +342 -0
  43. package/src/utils/artifact-transaction.ts +1 -0
  44. package/src/utils/atomic-write.ts +1 -0
  45. package/src/utils/errors.ts +89 -4
  46. package/src/utils/fs.ts +150 -65
  47. package/src/utils/json.ts +1 -0
  48. package/src/utils/language-registry.ts +96 -5
  49. package/src/utils/minimatch.ts +49 -6
  50. package/src/utils/path.ts +26 -0
  51. package/tests/dead-code.test.ts +3 -2
  52. package/tests/direct-search.test.ts +435 -0
  53. package/tests/error-recovery.test.ts +143 -0
  54. package/tests/fixtures/simple-api/src/index.ts +1 -1
  55. package/tests/go-parser.test.ts +19 -335
  56. package/tests/js-parser.test.ts +18 -1089
  57. package/tests/language-registry-all.test.ts +276 -0
  58. package/tests/language-registry.test.ts +6 -4
  59. package/tests/parse-diagnostics.test.ts +9 -96
  60. package/tests/parser.test.ts +42 -771
  61. package/tests/polyglot-parser.test.ts +117 -0
  62. package/tests/rich-function-index.test.ts +703 -0
  63. package/tests/tree-sitter-parser.test.ts +108 -80
  64. package/tests/ts-parser.test.ts +8 -8
  65. package/tests/verification.test.ts +175 -0
  66. package/src/parser/base-parser.ts +0 -16
  67. package/src/parser/go/go-parser.ts +0 -43
  68. package/src/parser/javascript/js-extractor.ts +0 -278
  69. package/src/parser/javascript/js-parser.ts +0 -101
  70. package/src/parser/typescript/ts-extractor.ts +0 -447
  71. package/src/parser/typescript/ts-parser.ts +0 -36
@@ -0,0 +1,278 @@
1
+ import * as fs from 'node:fs/promises'
2
+ import * as path from 'node:path'
3
+ import type { ParsedFile } from '../parser/types.js'
4
+
5
+ interface CacheEntry {
6
+ hash: string
7
+ parsedAt: string
8
+ size: number
9
+ lastAccessed: number
10
+ }
11
+
12
+ interface CacheMetadata {
13
+ version: number
14
+ entries: Map<string, CacheEntry>
15
+ lastPruned: number
16
+ }
17
+
18
+ const CACHE_VERSION = 1
19
+ const MAX_CACHE_SIZE = 5000
20
+ const CACHE_TTL_MS = 24 * 60 * 60 * 1000
21
+
22
+ export class IncrementalCache {
23
+ private cacheDir: string
24
+ private metadata: CacheMetadata
25
+ private hits = 0
26
+ private misses = 0
27
+ private queue: Array<() => void> = []
28
+ private running = false
29
+ private initialized = false
30
+ private pendingInit: Promise<void> | null = null
31
+
32
+ constructor(projectRoot: string) {
33
+ this.cacheDir = path.join(projectRoot, '.mikk', 'cache')
34
+ this.metadata = {
35
+ version: CACHE_VERSION,
36
+ entries: new Map(),
37
+ lastPruned: Date.now(),
38
+ }
39
+ this.pendingInit = this.loadMetadata()
40
+ }
41
+
42
+ private async ensureInitialized(): Promise<void> {
43
+ if (this.initialized) return
44
+ if (this.pendingInit) {
45
+ await this.pendingInit
46
+ }
47
+ }
48
+
49
+ private async withMutex<T>(fn: () => Promise<T>): Promise<T> {
50
+ return new Promise((resolve, reject) => {
51
+ this.queue.push(async () => {
52
+ try {
53
+ await this.ensureInitialized()
54
+ const result = await fn()
55
+ resolve(result)
56
+ } catch (err) {
57
+ reject(err)
58
+ }
59
+ })
60
+ if (!this.running) {
61
+ this.processQueue()
62
+ }
63
+ })
64
+ }
65
+
66
+ private async processQueue(): Promise<void> {
67
+ this.running = true
68
+ while (this.queue.length > 0) {
69
+ const fn = this.queue.shift()!
70
+ await fn()
71
+ }
72
+ this.running = false
73
+ }
74
+
75
+ private getCacheFilePath(hash: string): string {
76
+ return path.join(this.cacheDir, `${hash}.json`)
77
+ }
78
+
79
+ private async loadMetadata(): Promise<void> {
80
+ const metaPath = path.join(this.cacheDir, 'metadata.json')
81
+ try {
82
+ const raw = await fs.readFile(metaPath, 'utf-8')
83
+ const data = JSON.parse(raw)
84
+ this.metadata.version = data.version ?? CACHE_VERSION
85
+ this.metadata.lastPruned = data.lastPruned ?? Date.now()
86
+ this.metadata.entries = new Map(Object.entries(data.entries ?? {}))
87
+ this.initialized = true
88
+ } catch {
89
+ this.metadata.entries = new Map()
90
+ this.initialized = true
91
+ }
92
+ }
93
+
94
+ private async saveMetadata(): Promise<void> {
95
+ try {
96
+ await fs.mkdir(this.cacheDir, { recursive: true })
97
+ const metaPath = path.join(this.cacheDir, 'metadata.json')
98
+ const serializable = {
99
+ version: this.metadata.version,
100
+ lastPruned: this.metadata.lastPruned,
101
+ entries: Object.fromEntries(this.metadata.entries),
102
+ }
103
+ await fs.writeFile(metaPath, JSON.stringify(serializable), 'utf-8')
104
+ } catch {
105
+ // Silently fail — cache is non-critical
106
+ }
107
+ }
108
+
109
+ /**
110
+ * Get cached parse result if content hash matches.
111
+ * Returns null if cache miss or stale.
112
+ */
113
+ async get(filePath: string, contentHash: string): Promise<ParsedFile | null> {
114
+ return this.withMutex(async () => {
115
+ const entry = this.metadata.entries.get(filePath)
116
+ if (!entry) {
117
+ this.misses++
118
+ return null
119
+ }
120
+
121
+ if (entry.hash !== contentHash) {
122
+ this.misses++
123
+ return null
124
+ }
125
+
126
+ const parsedAt = new Date(entry.parsedAt).getTime()
127
+ if (Date.now() - parsedAt > CACHE_TTL_MS) {
128
+ this.metadata.entries.delete(filePath)
129
+ this.misses++
130
+ return null
131
+ }
132
+
133
+ const cacheFile = this.getCacheFilePath(contentHash)
134
+ try {
135
+ const raw = await fs.readFile(cacheFile, 'utf-8')
136
+ const parsed = JSON.parse(raw) as ParsedFile
137
+ this.hits++
138
+ entry.lastAccessed = Date.now()
139
+ return parsed
140
+ } catch (err) {
141
+ console.warn(`Corrupted cache entry for ${filePath}:`, err)
142
+ this.metadata.entries.delete(filePath)
143
+ }
144
+
145
+ this.misses++
146
+ return null
147
+ })
148
+ }
149
+
150
+ /**
151
+ * Store parse result in cache.
152
+ */
153
+ async set(filePath: string, contentHash: string, parsed: ParsedFile): Promise<void> {
154
+ return this.withMutex(async () => {
155
+ if (this.metadata.entries.size >= MAX_CACHE_SIZE) {
156
+ await this.evictLRU()
157
+ }
158
+
159
+ const entry: CacheEntry = {
160
+ hash: contentHash,
161
+ parsedAt: new Date().toISOString(),
162
+ size: JSON.stringify(parsed).length,
163
+ lastAccessed: Date.now()
164
+ }
165
+
166
+ this.metadata.entries.set(filePath, entry)
167
+
168
+ try {
169
+ await fs.mkdir(this.cacheDir, { recursive: true })
170
+ const cacheFile = this.getCacheFilePath(contentHash)
171
+ await fs.writeFile(cacheFile, JSON.stringify(parsed), 'utf-8')
172
+ } catch {
173
+ // Silently fail — cache is non-critical
174
+ }
175
+ })
176
+ }
177
+
178
+ /**
179
+ * Invalidate cache for a specific file.
180
+ */
181
+ async invalidate(filePath: string): Promise<void> {
182
+ return this.withMutex(async () => {
183
+ const entry = this.metadata.entries.get(filePath)
184
+ if (entry) {
185
+ const cacheFile = this.getCacheFilePath(entry.hash)
186
+ try {
187
+ await fs.unlink(cacheFile)
188
+ } catch { /* ignore */ }
189
+ this.metadata.entries.delete(filePath)
190
+ }
191
+ })
192
+ }
193
+
194
+ /**
195
+ * Clear entire cache.
196
+ */
197
+ async clear(): Promise<void> {
198
+ return this.withMutex(async () => {
199
+ for (const [, entry] of this.metadata.entries) {
200
+ const cacheFile = this.getCacheFilePath(entry.hash)
201
+ try {
202
+ await fs.unlink(cacheFile)
203
+ } catch { /* ignore */ }
204
+ }
205
+ this.metadata.entries.clear()
206
+ await this.saveMetadata()
207
+ })
208
+ }
209
+
210
+ /**
211
+ * Get cache statistics.
212
+ */
213
+ getStats(): { hits: number; misses: number; hitRate: number; size: number } {
214
+ const total = this.hits + this.misses
215
+ return {
216
+ hits: this.hits,
217
+ misses: this.misses,
218
+ hitRate: total > 0 ? this.hits / total : 0,
219
+ size: this.metadata.entries.size,
220
+ }
221
+ }
222
+
223
+ /**
224
+ * Persist cache metadata to disk.
225
+ * Call this after batch operations.
226
+ */
227
+ async flush(): Promise<void> {
228
+ return this.withMutex(async () => {
229
+ await this.saveMetadata()
230
+ })
231
+ }
232
+
233
+ /**
234
+ * Evict least recently used entries when cache is full.
235
+ */
236
+ private async evictLRU(): Promise<void> {
237
+ const sorted = [...this.metadata.entries.entries()].sort(
238
+ (a, b) => new Date(a[1].parsedAt).getTime() - new Date(b[1].parsedAt).getTime()
239
+ )
240
+ const toRemove = Math.ceil(sorted.length * 0.2)
241
+ for (let i = 0; i < toRemove; i++) {
242
+ const [filePath, entry] = sorted[i]
243
+ const cacheFile = this.getCacheFilePath(entry.hash)
244
+ try {
245
+ await fs.unlink(cacheFile)
246
+ } catch { /* ignore */ }
247
+ this.metadata.entries.delete(filePath)
248
+ }
249
+ }
250
+
251
+ /**
252
+ * Prune expired entries from cache.
253
+ */
254
+ async prune(): Promise<void> {
255
+ return this.withMutex(async () => {
256
+ const now = Date.now()
257
+ const toDelete: string[] = []
258
+ for (const [filePath, entry] of this.metadata.entries) {
259
+ const parsedAt = new Date(entry.parsedAt).getTime()
260
+ if (now - parsedAt > CACHE_TTL_MS) {
261
+ toDelete.push(filePath)
262
+ }
263
+ }
264
+ for (const filePath of toDelete) {
265
+ const entry = this.metadata.entries.get(filePath)
266
+ if (entry) {
267
+ const cacheFile = this.getCacheFilePath(entry.hash)
268
+ try {
269
+ await fs.unlink(cacheFile)
270
+ } catch { /* ignore */ }
271
+ this.metadata.entries.delete(filePath)
272
+ }
273
+ }
274
+ this.metadata.lastPruned = now
275
+ await this.saveMetadata()
276
+ })
277
+ }
278
+ }
@@ -0,0 +1 @@
1
+ export { IncrementalCache } from './incremental-cache.js'
@@ -1,6 +1,26 @@
1
1
  import type { MikkContract } from './schema.js'
2
2
  import type { ModuleCluster } from '../graph/types.js'
3
3
  import type { ParsedFile } from '../parser/types.js'
4
+ import { minimatch } from '../utils/minimatch.js'
5
+
6
+ /** Common vendor directories to exclude from contract generation */
7
+ const VENDOR_PATTERNS = [
8
+ '**/node_modules/**',
9
+ '**/venv/**',
10
+ '**/.venv/**',
11
+ '**/__pycache__/**',
12
+ '**/vendor/**',
13
+ '**/dist/**',
14
+ '**/build/**',
15
+ '**/.next/**',
16
+ '**/target/**',
17
+ ]
18
+
19
+ /** Check if a path is from a vendor directory */
20
+ function isVendorPath(filePath: string): boolean {
21
+ const normalized = filePath.replace(/\\/g, '/')
22
+ return VENDOR_PATTERNS.some(pattern => minimatch(normalized, pattern))
23
+ }
4
24
 
5
25
  /** Common entry point filenames across ecosystems (without extensions) */
6
26
  const ENTRY_BASENAMES = ['index', 'main', 'app', 'server', 'mod', 'lib', '__init__', 'manage', 'program', 'startup']
@@ -39,7 +59,13 @@ export class ContractGenerator {
39
59
  projectName: string,
40
60
  packageJsonDescription?: string
41
61
  ): MikkContract {
42
- const modules = clusters.map(cluster => ({
62
+ // Filter out vendor files from clusters
63
+ const filteredClusters = clusters.map(cluster => ({
64
+ ...cluster,
65
+ files: cluster.files.filter(f => !isVendorPath(f)),
66
+ })).filter(cluster => cluster.files.length > 0)
67
+
68
+ const modules = filteredClusters.map(cluster => ({
43
69
  id: cluster.id,
44
70
  name: cluster.suggestedName,
45
71
  description: this.inferModuleDescription(cluster, parsedFiles),
@@ -50,14 +76,16 @@ export class ContractGenerator {
50
76
 
51
77
  // Detect entry points — language-agnostic basename matching
52
78
  const entryPoints = parsedFiles
79
+ .filter(f => !isVendorPath(f.path))
53
80
  .filter(f => {
54
81
  const basename = (f.path.split('/').pop() || '').replace(/\.[^.]+$/, '')
55
82
  return ENTRY_BASENAMES.includes(basename)
56
83
  })
57
84
  .map(f => f.path)
58
85
 
59
- const detectedLanguage = inferLanguageFromFiles(parsedFiles)
60
- const fallbackEntry = parsedFiles[0]?.path ?? 'src/index'
86
+ const filteredParsedFiles = parsedFiles.filter(f => !isVendorPath(f.path))
87
+ const detectedLanguage = inferLanguageFromFiles(filteredParsedFiles)
88
+ const fallbackEntry = filteredParsedFiles[0]?.path ?? 'src/index'
61
89
 
62
90
  return {
63
91
  version: '1.0.0',
@@ -1,3 +1,4 @@
1
+ /* eslint-disable @typescript-eslint/no-explicit-any */
1
2
  import { MikkContractSchema, type MikkContract } from './schema.js'
2
3
  import { ContractNotFoundError } from '../utils/errors.js'
3
4
  import { readJsonSafe } from '../utils/json.js'
@@ -123,6 +123,7 @@ function capitalise(s: string): string {
123
123
  */
124
124
  export class LockCompiler {
125
125
  private projectRootPath: string | null = null
126
+
126
127
  /** Main entry -- compile full lock from graph + contract + parsed files */
127
128
  compile(
128
129
  graph: DependencyGraph,
@@ -132,6 +133,7 @@ export class LockCompiler {
132
133
  projectRoot?: string
133
134
  ): MikkLock {
134
135
  this.projectRootPath = projectRoot ? nodePath.resolve(projectRoot) : null
136
+
135
137
  const functions = this.compileFunctions(graph, contract)
136
138
  const classes = this.compileClasses(graph, contract)
137
139
  const generics = this.compileGenerics(graph, contract)
@@ -195,12 +197,20 @@ export class LockCompiler {
195
197
  for (const [id, node] of graph.nodes) {
196
198
  if (node.type !== 'function') continue
197
199
 
200
+ // Skip vendor files
201
+ if (this.isVendorPath(node.file)) continue
202
+
198
203
  const moduleId = this.findModule(node.file, contract.declared.modules)
199
204
  const displayName = node.name ?? ''
200
205
  const metadata = node.metadata ?? {}
201
206
  const inEdges = graph.inEdges.get(id) || []
202
207
  const outEdges = graph.outEdges.get(id) || []
203
208
 
209
+ const params = metadata.params || []
210
+ const returnType = metadata.returnType || 'void'
211
+ const signatureHash = hashContent(`${displayName}(${params.map(p => p.type).join(',')}):${returnType}`)
212
+ const tokenVector = this.generateTokenVector(displayName, params, returnType, metadata.purpose)
213
+
204
214
  result[id] = {
205
215
  id,
206
216
  name: displayName,
@@ -211,26 +221,94 @@ export class LockCompiler {
211
221
  calls: outEdges.filter(e => e.type === 'calls').map(e => e.to),
212
222
  calledBy: inEdges.filter(e => e.type === 'calls').map(e => e.from),
213
223
  moduleId: moduleId || 'unknown',
214
- ...(metadata.params && metadata.params.length > 0
215
- ? { params: metadata.params }
216
- : {}),
224
+ ...(params.length > 0 ? { params } : {}),
217
225
  ...(metadata.returnType ? { returnType: metadata.returnType } : {}),
218
226
  ...(metadata.isAsync ? { isAsync: true } : {}),
219
227
  ...(metadata.isExported ? { isExported: true } : {}),
220
228
  purpose: metadata.purpose || inferPurpose(
221
229
  displayName,
222
- metadata.params,
223
- metadata.returnType,
230
+ params,
231
+ returnType,
224
232
  metadata.isAsync,
225
233
  ),
226
234
  edgeCasesHandled: metadata.edgeCasesHandled,
227
235
  errorHandling: metadata.errorHandling,
236
+ signatureHash,
237
+ tokenVector,
228
238
  }
229
239
  }
230
240
 
231
241
  return result
232
242
  }
233
243
 
244
+ private generateTokenVector(
245
+ name: string,
246
+ params: Array<{ name: string; type: string; optional?: boolean }>,
247
+ returnType: string,
248
+ purpose?: string
249
+ ): number[] {
250
+ const tokens: string[] = []
251
+
252
+ tokens.push(...name.match(/[A-Z][a-z]+|[a-z]+/g)?.map(t => t.toLowerCase()) || [])
253
+
254
+ for (const param of params) {
255
+ tokens.push(...param.name.match(/[A-Z][a-z]+|[a-z]+/g)?.map(t => t.toLowerCase()) || [])
256
+ }
257
+
258
+ tokens.push(...returnType.match(/[A-Z][a-z]+|[a-z]+/g)?.map(t => t.toLowerCase()) || [])
259
+
260
+ if (purpose) {
261
+ tokens.push(...purpose.match(/[a-z]{3,}/g)?.map(t => t.toLowerCase()) || [])
262
+ }
263
+
264
+ const vocabulary = this.buildVocabulary()
265
+ const vector = new Array(64).fill(0)
266
+
267
+ for (const token of tokens) {
268
+ if (vocabulary.has(token)) {
269
+ const idx = vocabulary.get(token)!
270
+ const hash = this.simpleHash(token)
271
+ vector[idx % 64] += hash
272
+ }
273
+ }
274
+
275
+ const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0))
276
+ if (magnitude > 0) {
277
+ for (let i = 0; i < vector.length; i++) {
278
+ vector[i] /= magnitude
279
+ }
280
+ }
281
+
282
+ return vector
283
+ }
284
+
285
+ private buildVocabulary(): Map<string, number> {
286
+ const common = [
287
+ 'get', 'set', 'add', 'remove', 'create', 'delete', 'update', 'find',
288
+ 'load', 'save', 'parse', 'format', 'validate', 'check', 'handle',
289
+ 'process', 'render', 'display', 'build', 'make', 'init', 'setup',
290
+ 'config', 'user', 'auth', 'login', 'logout', 'token', 'data', 'file',
291
+ 'path', 'config', 'options', 'params', 'args', 'error', 'result',
292
+ 'async', 'promise', 'callback', 'event', 'handler', 'middleware',
293
+ 'database', 'query', 'insert', 'update', 'delete', 'select', 'transaction',
294
+ 'string', 'number', 'boolean', 'array', 'object', 'function', 'class',
295
+ 'interface', 'type', 'enum', 'const', 'var', 'let', 'return', 'void',
296
+ ]
297
+
298
+ const vocab = new Map<string, number>()
299
+ common.forEach((word, idx) => vocab.set(word, idx))
300
+ return vocab
301
+ }
302
+
303
+ private simpleHash(str: string): number {
304
+ let hash = 0
305
+ for (let i = 0; i < str.length; i++) {
306
+ hash = ((hash << 5) - hash) + str.charCodeAt(i)
307
+ hash = hash & hash
308
+ }
309
+ return Math.abs(hash % 10)
310
+ }
311
+
234
312
  private compileClasses(
235
313
  graph: DependencyGraph,
236
314
  contract: MikkContract
@@ -238,6 +316,8 @@ export class LockCompiler {
238
316
  const result: Record<string, any> = {}
239
317
  for (const [id, node] of graph.nodes) {
240
318
  if (node.type !== 'class') continue
319
+ if (this.isVendorPath(node.file)) continue
320
+
241
321
  const moduleId = this.findModule(node.file, contract.declared.modules)
242
322
  const className = node.name ?? ''
243
323
  const metadata = node.metadata ?? {}
@@ -267,6 +347,8 @@ export class LockCompiler {
267
347
  // Only include exported generics non-exported types/interfaces are
268
348
  // internal implementation details that add noise without value.
269
349
  if (!(node.metadata?.isExported)) continue
350
+ if (this.isVendorPath(node.file)) continue
351
+
270
352
  const moduleId = this.findModule(node.file, contract.declared.modules)
271
353
  const genericName = node.name ?? ''
272
354
  const metadata = node.metadata ?? {}
@@ -314,21 +396,29 @@ export class LockCompiler {
314
396
  ): Record<string, MikkLock['modules'][string]> {
315
397
  const result: Record<string, MikkLock['modules'][string]> = {}
316
398
 
399
+ // Build a map for fast file lookups - O(1) instead of O(n) per module
400
+ const fileHashMap = new Map<string, string>()
401
+ for (const file of parsedFiles) {
402
+ fileHashMap.set(file.path, file.hash)
403
+ }
404
+
317
405
  for (const module of contract.declared.modules) {
318
- const moduleFiles = parsedFiles
319
- .filter(f => this.fileMatchesModule(f.path, module.paths))
320
- .map(f => f.path)
406
+ const moduleFiles: string[] = []
407
+
408
+ for (const file of parsedFiles) {
409
+ if (this.fileMatchesModule(file.path, module.paths)) {
410
+ moduleFiles.push(file.path)
411
+ }
412
+ }
321
413
 
322
- const fileHashes = moduleFiles.map(f => {
323
- const parsed = parsedFiles.find(pf => pf.path === f)
324
- return parsed?.hash ?? ''
325
- })
414
+ const fileHashes = moduleFiles.map(f => fileHashMap.get(f) ?? '')
326
415
 
327
416
  result[module.id] = {
328
417
  id: module.id,
329
418
  files: moduleFiles,
330
419
  hash: computeModuleHash(fileHashes),
331
420
  fragmentPath: `.mikk/fragments/${module.id}.lock`,
421
+ ...(module.parentId ? { parentId: module.parentId } : {}),
332
422
  }
333
423
  }
334
424
 
@@ -344,6 +434,9 @@ export class LockCompiler {
344
434
  const result: Record<string, MikkLock['files'][string]> = {}
345
435
 
346
436
  for (const file of parsedFiles) {
437
+ // Skip vendor files entirely
438
+ if (this.isVendorPath(file.path)) continue
439
+
347
440
  const moduleId = this.findModule(file.path, contract.declared.modules)
348
441
 
349
442
  // Collect file-level imports from the parsed file info directly
@@ -400,6 +493,9 @@ export class LockCompiler {
400
493
 
401
494
  /** Check if a file path matches any of the module's path patterns */
402
495
  private fileMatchesModule(filePath: string, patterns: string[]): boolean {
496
+ // Skip vendor paths - never match them to any module
497
+ if (this.isVendorPath(filePath)) return false
498
+
403
499
  const relativePath = getModuleMatchPath(filePath, this.projectRootPath)
404
500
  const normalizedRelative = relativePath.replace(/\\/g, '/').toLowerCase()
405
501
  const normalizedAbsolute = filePath.replace(/\\/g, '/').toLowerCase()
@@ -419,4 +515,21 @@ export class LockCompiler {
419
515
  }
420
516
  return false
421
517
  }
518
+
519
+ /** Check if a path is from a vendor directory */
520
+ private isVendorPath(filePath: string): boolean {
521
+ const normalized = filePath.replace(/\\/g, '/')
522
+ const vendorPatterns = [
523
+ '**/node_modules/**',
524
+ '**/venv/**',
525
+ '**/.venv/**',
526
+ '**/__pycache__/**',
527
+ '**/vendor/**',
528
+ '**/dist/**',
529
+ '**/build/**',
530
+ '**/.next/**',
531
+ '**/target/**',
532
+ ]
533
+ return vendorPatterns.some(pattern => minimatch(normalized, pattern))
534
+ }
422
535
  }
@@ -10,6 +10,7 @@ export const MikkModuleSchema = z.object({
10
10
  owners: z.array(z.string()).optional(),
11
11
  paths: z.array(z.string()),
12
12
  entryFunctions: z.array(z.string()).optional(),
13
+ parentId: z.string().optional(),
13
14
  })
14
15
 
15
16
  export const MikkDecisionSchema = z.object({
@@ -92,6 +93,8 @@ export const MikkLockFunctionSchema = z.object({
92
93
  })).optional(),
93
94
  confidence: z.number().optional(),
94
95
  riskScore: z.number().optional(),
96
+ signatureHash: z.string().optional(),
97
+ tokenVector: z.array(z.number()).optional(),
95
98
  })
96
99
 
97
100
  export const MikkLockModuleSchema = z.object({
@@ -99,6 +102,7 @@ export const MikkLockModuleSchema = z.object({
99
102
  files: z.array(z.string()),
100
103
  hash: z.string(),
101
104
  fragmentPath: z.string(),
105
+ parentId: z.string().optional(),
102
106
  })
103
107
 
104
108
  export const MikkLockImportSchema = z.object({
@@ -1,3 +1,4 @@
1
+ /* eslint-disable @typescript-eslint/no-explicit-any */
1
2
  /**
2
3
  * Standardized Error Handling System
3
4
  *
@@ -269,7 +270,7 @@ export function createFileNotFoundError(filePath: string): FileSystemError {
269
270
  /**
270
271
  * Create a file too large error
271
272
  */
272
- export function createFileTooLargeError(filePath: string, size: number, limit: number): FileSystemError {
273
+ export function createFileTooLargeError(filePath: string, _size: number, _limit: number): FileSystemError {
273
274
  return new FileSystemError('FILE_TOO_LARGE', filePath)
274
275
  }
275
276
 
@@ -1,5 +1,4 @@
1
- import * as path from 'node:path'
2
- import type { DependencyGraph, ModuleCluster, GraphNode } from './types.js'
1
+ import type { DependencyGraph, ModuleCluster } from './types.js'
3
2
 
4
3
  // ─── Domain keyword maps for semantic naming ────────────────────────
5
4
  // Each entry maps a human-readable domain label to keywords found in
@@ -227,7 +226,6 @@ export class ClusterDetector {
227
226
  private computeCouplingMatrix(files: string[]): Map<string, Map<string, number>> {
228
227
  const matrix = new Map<string, Map<string, number>>()
229
228
  const fileEdgeCounts = new Map<string, number>()
230
- const pairCounts = new Map<string, number>()
231
229
 
232
230
  // Count total edges per file
233
231
  for (const fileId of files) {
@@ -450,7 +448,7 @@ export class ClusterDetector {
450
448
  }
451
449
 
452
450
  const result: ModuleCluster[] = []
453
- for (const [baseDir, siblings] of byBaseDir) {
451
+ for (const siblings of byBaseDir.values()) {
454
452
  if (siblings.length <= 1) {
455
453
  result.push(...siblings)
456
454
  continue