@getmikk/core 2.0.13 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,272 @@
1
+ import * as fs from 'node:fs'
2
+ import * as path from 'node:path'
3
+ import type { ParsedFile } from '../parser/types.js'
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // Incremental Analysis Cache — avoids re-parsing unchanged files
7
+ // ---------------------------------------------------------------------------
8
+
9
+ interface CacheEntry {
10
+ hash: string
11
+ parsedAt: string
12
+ // Store lightweight metadata instead of full ParsedFile to prevent metadata bloat
13
+ size: number
14
+ lastAccessed: number
15
+ }
16
+
17
+ interface CacheMetadata {
18
+ version: number
19
+ entries: Map<string, CacheEntry>
20
+ lastPruned: number
21
+ }
22
+
23
+ const CACHE_VERSION = 1
24
+ const MAX_CACHE_SIZE = 5000 // Max entries before LRU eviction
25
+ const CACHE_TTL_MS = 24 * 60 * 60 * 1000 // 24 hours
26
+
27
+ export class IncrementalCache {
28
+ private cacheDir: string
29
+ private metadata: CacheMetadata
30
+ private hits = 0
31
+ private misses = 0
32
+ // Mutex to prevent race conditions during concurrent access
33
+ private mutex = Promise.resolve()
34
+
35
+ constructor(projectRoot: string) {
36
+ this.cacheDir = path.join(projectRoot, '.mikk', 'cache')
37
+ this.metadata = {
38
+ version: CACHE_VERSION,
39
+ entries: new Map(),
40
+ lastPruned: Date.now(),
41
+ }
42
+ this.loadMetadata()
43
+ }
44
+
45
+ /**
46
+ * Simple mutex-like protection using a flag for basic race condition prevention
47
+ * Note: For production, a proper mutex library would be better
48
+ */
49
+ private isLocked = false
50
+ private async withMutex<T>(fn: () => Promise<T>): Promise<T> {
51
+ while (this.isLocked) {
52
+ await new Promise(resolve => setTimeout(resolve, 1))
53
+ }
54
+ this.isLocked = true
55
+ try {
56
+ return await fn()
57
+ } finally {
58
+ this.isLocked = false
59
+ }
60
+ }
61
+
62
+ private getCacheFilePath(hash: string): string {
63
+ return path.join(this.cacheDir, `${hash}.json`)
64
+ }
65
+
66
+ private loadMetadata(): void {
67
+ const metaPath = path.join(this.cacheDir, 'metadata.json')
68
+ try {
69
+ if (fs.existsSync(metaPath)) {
70
+ const raw = fs.readFileSync(metaPath, 'utf-8')
71
+ const data = JSON.parse(raw)
72
+ this.metadata.version = data.version ?? CACHE_VERSION
73
+ this.metadata.lastPruned = data.lastPruned ?? Date.now()
74
+ // Rebuild entries map
75
+ this.metadata.entries = new Map(Object.entries(data.entries ?? {}))
76
+ }
77
+ } catch {
78
+ // Corrupted metadata — start fresh
79
+ this.metadata.entries = new Map()
80
+ }
81
+ }
82
+
83
+ private saveMetadata(): void {
84
+ try {
85
+ fs.mkdirSync(this.cacheDir, { recursive: true })
86
+ const metaPath = path.join(this.cacheDir, 'metadata.json')
87
+ const serializable = {
88
+ version: this.metadata.version,
89
+ lastPruned: this.metadata.lastPruned,
90
+ entries: Object.fromEntries(this.metadata.entries),
91
+ }
92
+ fs.writeFileSync(metaPath, JSON.stringify(serializable), 'utf-8')
93
+ } catch {
94
+ // Silently fail — cache is non-critical
95
+ }
96
+ }
97
+
98
+ /**
99
+ * Get cached parse result if content hash matches.
100
+ * Returns null if cache miss or stale.
101
+ */
102
+ async get(filePath: string, contentHash: string): Promise<ParsedFile | null> {
103
+ return this.withMutex(async () => {
104
+ const entry = this.metadata.entries.get(filePath)
105
+ if (!entry) {
106
+ this.misses++
107
+ return null
108
+ }
109
+
110
+ if (entry.hash !== contentHash) {
111
+ this.misses++
112
+ return null
113
+ }
114
+
115
+ // Check TTL
116
+ const parsedAt = new Date(entry.parsedAt).getTime()
117
+ if (Date.now() - parsedAt > CACHE_TTL_MS) {
118
+ this.metadata.entries.delete(filePath)
119
+ this.misses++
120
+ return null
121
+ }
122
+
123
+ // Load from disk and reconstruct ParsedFile
124
+ const cacheFile = this.getCacheFilePath(contentHash)
125
+ try {
126
+ if (fs.existsSync(cacheFile)) {
127
+ const raw = fs.readFileSync(cacheFile, 'utf-8')
128
+ const parsed = JSON.parse(raw) as ParsedFile
129
+ this.hits++
130
+ // Update last accessed time
131
+ entry.lastAccessed = Date.now()
132
+ return parsed
133
+ }
134
+ } catch (err) {
135
+ // Corrupted cache entry
136
+ console.warn(`Corrupted cache entry for ${filePath}:`, err)
137
+ this.metadata.entries.delete(filePath)
138
+ }
139
+
140
+ this.misses++
141
+ return null
142
+ })
143
+ }
144
+
145
+ /**
146
+ * Store parse result in cache.
147
+ */
148
+ async set(filePath: string, contentHash: string, parsed: ParsedFile): Promise<void> {
149
+ return this.withMutex(async () => {
150
+ // Evict if cache is full
151
+ if (this.metadata.entries.size >= MAX_CACHE_SIZE) {
152
+ this.evictLRU()
153
+ }
154
+
155
+ const entry: CacheEntry = {
156
+ hash: contentHash,
157
+ parsedAt: new Date().toISOString(),
158
+ // Store file size for lightweight tracking instead of full ParsedFile
159
+ size: JSON.stringify(parsed).length,
160
+ lastAccessed: Date.now()
161
+ }
162
+
163
+ this.metadata.entries.set(filePath, entry)
164
+
165
+ // Write to disk
166
+ try {
167
+ fs.mkdirSync(this.cacheDir, { recursive: true })
168
+ const cacheFile = this.getCacheFilePath(contentHash)
169
+ fs.writeFileSync(cacheFile, JSON.stringify(parsed), 'utf-8')
170
+ } catch {
171
+ // Silently fail — cache is non-critical
172
+ }
173
+ })
174
+ }
175
+
176
+ /**
177
+ * Invalidate cache for a specific file.
178
+ */
179
+ async invalidate(filePath: string): Promise<void> {
180
+ return this.withMutex(async () => {
181
+ const entry = this.metadata.entries.get(filePath)
182
+ if (entry) {
183
+ const cacheFile = this.getCacheFilePath(entry.hash)
184
+ try {
185
+ if (fs.existsSync(cacheFile)) {
186
+ fs.unlinkSync(cacheFile)
187
+ }
188
+ } catch { /* ignore */ }
189
+ this.metadata.entries.delete(filePath)
190
+ }
191
+ })
192
+ }
193
+
194
+ /**
195
+ * Clear entire cache.
196
+ */
197
+ async clear(): Promise<void> {
198
+ return this.withMutex(async () => {
199
+ for (const [, entry] of this.metadata.entries) {
200
+ const cacheFile = this.getCacheFilePath(entry.hash)
201
+ try {
202
+ if (fs.existsSync(cacheFile)) {
203
+ fs.unlinkSync(cacheFile)
204
+ }
205
+ } catch { /* ignore */ }
206
+ }
207
+ this.metadata.entries.clear()
208
+ this.saveMetadata()
209
+ })
210
+ }
211
+
212
+ /**
213
+ * Get cache statistics.
214
+ */
215
+ getStats(): { hits: number; misses: number; hitRate: number; size: number } {
216
+ const total = this.hits + this.misses
217
+ return {
218
+ hits: this.hits,
219
+ misses: this.misses,
220
+ hitRate: total > 0 ? this.hits / total : 0,
221
+ size: this.metadata.entries.size,
222
+ }
223
+ }
224
+
225
+ /**
226
+ * Persist cache metadata to disk.
227
+ * Call this after batch operations.
228
+ */
229
+ async flush(): Promise<void> {
230
+ return this.withMutex(async () => {
231
+ this.saveMetadata()
232
+ })
233
+ }
234
+
235
+ /**
236
+ * Evict least recently used entries when cache is full.
237
+ */
238
+ private evictLRU(): void {
239
+ // Sort by parsedAt (oldest first) and remove oldest 20%
240
+ const sorted = [...this.metadata.entries.entries()].sort(
241
+ (a, b) => new Date(a[1].parsedAt).getTime() - new Date(b[1].parsedAt).getTime()
242
+ )
243
+ const toRemove = Math.ceil(sorted.length * 0.2)
244
+ for (let i = 0; i < toRemove; i++) {
245
+ const [filePath, entry] = sorted[i]
246
+ const cacheFile = this.getCacheFilePath(entry.hash)
247
+ try {
248
+ if (fs.existsSync(cacheFile)) {
249
+ fs.unlinkSync(cacheFile)
250
+ }
251
+ } catch { /* ignore */ }
252
+ this.metadata.entries.delete(filePath)
253
+ }
254
+ }
255
+
256
+ /**
257
+ * Prune expired entries from cache.
258
+ */
259
+ async prune(): Promise<void> {
260
+ return this.withMutex(async () => {
261
+ const now = Date.now()
262
+ for (const [filePath, entry] of this.metadata.entries) {
263
+ const parsedAt = new Date(entry.parsedAt).getTime()
264
+ if (now - parsedAt > CACHE_TTL_MS) {
265
+ this.invalidate(filePath)
266
+ }
267
+ }
268
+ this.metadata.lastPruned = now
269
+ this.saveMetadata()
270
+ })
271
+ }
272
+ }
@@ -0,0 +1 @@
1
+ export { IncrementalCache } from './incremental-cache.js'
@@ -1,6 +1,26 @@
1
1
  import type { MikkContract } from './schema.js'
2
2
  import type { ModuleCluster } from '../graph/types.js'
3
3
  import type { ParsedFile } from '../parser/types.js'
4
+ import { minimatch } from '../utils/minimatch.js'
5
+
6
+ /** Common vendor directories to exclude from contract generation */
7
+ const VENDOR_PATTERNS = [
8
+ '**/node_modules/**',
9
+ '**/venv/**',
10
+ '**/.venv/**',
11
+ '**/__pycache__/**',
12
+ '**/vendor/**',
13
+ '**/dist/**',
14
+ '**/build/**',
15
+ '**/.next/**',
16
+ '**/target/**',
17
+ ]
18
+
19
+ /** Check if a path is from a vendor directory */
20
+ function isVendorPath(filePath: string): boolean {
21
+ const normalized = filePath.replace(/\\/g, '/')
22
+ return VENDOR_PATTERNS.some(pattern => minimatch(normalized, pattern))
23
+ }
4
24
 
5
25
  /** Common entry point filenames across ecosystems (without extensions) */
6
26
  const ENTRY_BASENAMES = ['index', 'main', 'app', 'server', 'mod', 'lib', '__init__', 'manage', 'program', 'startup']
@@ -39,7 +59,13 @@ export class ContractGenerator {
39
59
  projectName: string,
40
60
  packageJsonDescription?: string
41
61
  ): MikkContract {
42
- const modules = clusters.map(cluster => ({
62
+ // Filter out vendor files from clusters
63
+ const filteredClusters = clusters.map(cluster => ({
64
+ ...cluster,
65
+ files: cluster.files.filter(f => !isVendorPath(f)),
66
+ })).filter(cluster => cluster.files.length > 0)
67
+
68
+ const modules = filteredClusters.map(cluster => ({
43
69
  id: cluster.id,
44
70
  name: cluster.suggestedName,
45
71
  description: this.inferModuleDescription(cluster, parsedFiles),
@@ -50,14 +76,16 @@ export class ContractGenerator {
50
76
 
51
77
  // Detect entry points — language-agnostic basename matching
52
78
  const entryPoints = parsedFiles
79
+ .filter(f => !isVendorPath(f.path))
53
80
  .filter(f => {
54
81
  const basename = (f.path.split('/').pop() || '').replace(/\.[^.]+$/, '')
55
82
  return ENTRY_BASENAMES.includes(basename)
56
83
  })
57
84
  .map(f => f.path)
58
85
 
59
- const detectedLanguage = inferLanguageFromFiles(parsedFiles)
60
- const fallbackEntry = parsedFiles[0]?.path ?? 'src/index'
86
+ const filteredParsedFiles = parsedFiles.filter(f => !isVendorPath(f.path))
87
+ const detectedLanguage = inferLanguageFromFiles(filteredParsedFiles)
88
+ const fallbackEntry = filteredParsedFiles[0]?.path ?? 'src/index'
61
89
 
62
90
  return {
63
91
  version: '1.0.0',
@@ -195,6 +195,9 @@ export class LockCompiler {
195
195
  for (const [id, node] of graph.nodes) {
196
196
  if (node.type !== 'function') continue
197
197
 
198
+ // Skip vendor files
199
+ if (this.isVendorPath(node.file)) continue
200
+
198
201
  const moduleId = this.findModule(node.file, contract.declared.modules)
199
202
  const displayName = node.name ?? ''
200
203
  const metadata = node.metadata ?? {}
@@ -238,6 +241,8 @@ export class LockCompiler {
238
241
  const result: Record<string, any> = {}
239
242
  for (const [id, node] of graph.nodes) {
240
243
  if (node.type !== 'class') continue
244
+ if (this.isVendorPath(node.file)) continue
245
+
241
246
  const moduleId = this.findModule(node.file, contract.declared.modules)
242
247
  const className = node.name ?? ''
243
248
  const metadata = node.metadata ?? {}
@@ -267,6 +272,8 @@ export class LockCompiler {
267
272
  // Only include exported generics non-exported types/interfaces are
268
273
  // internal implementation details that add noise without value.
269
274
  if (!(node.metadata?.isExported)) continue
275
+ if (this.isVendorPath(node.file)) continue
276
+
270
277
  const moduleId = this.findModule(node.file, contract.declared.modules)
271
278
  const genericName = node.name ?? ''
272
279
  const metadata = node.metadata ?? {}
@@ -329,6 +336,7 @@ export class LockCompiler {
329
336
  files: moduleFiles,
330
337
  hash: computeModuleHash(fileHashes),
331
338
  fragmentPath: `.mikk/fragments/${module.id}.lock`,
339
+ ...(module.parentId ? { parentId: module.parentId } : {}),
332
340
  }
333
341
  }
334
342
 
@@ -344,6 +352,9 @@ export class LockCompiler {
344
352
  const result: Record<string, MikkLock['files'][string]> = {}
345
353
 
346
354
  for (const file of parsedFiles) {
355
+ // Skip vendor files entirely
356
+ if (this.isVendorPath(file.path)) continue
357
+
347
358
  const moduleId = this.findModule(file.path, contract.declared.modules)
348
359
 
349
360
  // Collect file-level imports from the parsed file info directly
@@ -400,6 +411,9 @@ export class LockCompiler {
400
411
 
401
412
  /** Check if a file path matches any of the module's path patterns */
402
413
  private fileMatchesModule(filePath: string, patterns: string[]): boolean {
414
+ // Skip vendor paths - never match them to any module
415
+ if (this.isVendorPath(filePath)) return false
416
+
403
417
  const relativePath = getModuleMatchPath(filePath, this.projectRootPath)
404
418
  const normalizedRelative = relativePath.replace(/\\/g, '/').toLowerCase()
405
419
  const normalizedAbsolute = filePath.replace(/\\/g, '/').toLowerCase()
@@ -419,4 +433,21 @@ export class LockCompiler {
419
433
  }
420
434
  return false
421
435
  }
436
+
437
+ /** Check if a path is from a vendor directory */
438
+ private isVendorPath(filePath: string): boolean {
439
+ const normalized = filePath.replace(/\\/g, '/')
440
+ const vendorPatterns = [
441
+ '**/node_modules/**',
442
+ '**/venv/**',
443
+ '**/.venv/**',
444
+ '**/__pycache__/**',
445
+ '**/vendor/**',
446
+ '**/dist/**',
447
+ '**/build/**',
448
+ '**/.next/**',
449
+ '**/target/**',
450
+ ]
451
+ return vendorPatterns.some(pattern => minimatch(normalized, pattern))
452
+ }
422
453
  }
@@ -10,6 +10,7 @@ export const MikkModuleSchema = z.object({
10
10
  owners: z.array(z.string()).optional(),
11
11
  paths: z.array(z.string()),
12
12
  entryFunctions: z.array(z.string()).optional(),
13
+ parentId: z.string().optional(),
13
14
  })
14
15
 
15
16
  export const MikkDecisionSchema = z.object({
@@ -99,6 +100,7 @@ export const MikkLockModuleSchema = z.object({
99
100
  files: z.array(z.string()),
100
101
  hash: z.string(),
101
102
  fragmentPath: z.string(),
103
+ parentId: z.string().optional(),
102
104
  })
103
105
 
104
106
  export const MikkLockImportSchema = z.object({
package/src/index.ts CHANGED
@@ -6,6 +6,8 @@ export * from './graph/index.js'
6
6
  export * from './contract/index.js'
7
7
  export * from './hash/index.js'
8
8
  export * from './search/index.js'
9
+ export * from './cache/index.js'
10
+ export * from './security/index.js'
9
11
  export * from './utils/errors.js'
10
12
  export * from './utils/logger.js'
11
13
  export { MikkError, ErrorHandler, ErrorBuilder, ErrorCategory, FileSystemError, ModuleLoadError, GraphError, TokenBudgetError, ValidationError, createDefaultErrorListener, createFileNotFoundError, createFileTooLargeError, createPermissionDeniedError, createModuleNotFoundError, createModuleLoadFailedError, createGraphBuildFailedError, createNodeNotFoundError, createTokenBudgetExceededError, createValidationError, isMikkError, getRootCause, toMikkError } from './error-handler.js'