@getmikk/core 2.0.13 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +4 -4
  2. package/package.json +2 -1
  3. package/src/analysis/index.ts +9 -0
  4. package/src/analysis/taint-analysis.ts +419 -0
  5. package/src/analysis/type-flow.ts +247 -0
  6. package/src/cache/incremental-cache.ts +278 -0
  7. package/src/cache/index.ts +1 -0
  8. package/src/contract/contract-generator.ts +31 -3
  9. package/src/contract/contract-reader.ts +1 -0
  10. package/src/contract/lock-compiler.ts +125 -12
  11. package/src/contract/schema.ts +4 -0
  12. package/src/error-handler.ts +2 -1
  13. package/src/graph/cluster-detector.ts +2 -4
  14. package/src/graph/dead-code-detector.ts +303 -117
  15. package/src/graph/graph-builder.ts +21 -161
  16. package/src/graph/impact-analyzer.ts +1 -0
  17. package/src/graph/index.ts +2 -0
  18. package/src/graph/rich-function-index.ts +1080 -0
  19. package/src/graph/symbol-table.ts +252 -0
  20. package/src/hash/hash-store.ts +1 -0
  21. package/src/index.ts +4 -0
  22. package/src/parser/base-extractor.ts +19 -0
  23. package/src/parser/boundary-checker.ts +31 -12
  24. package/src/parser/error-recovery.ts +647 -0
  25. package/src/parser/function-body-extractor.ts +248 -0
  26. package/src/parser/go/go-extractor.ts +249 -676
  27. package/src/parser/index.ts +138 -295
  28. package/src/parser/language-registry.ts +57 -0
  29. package/src/parser/oxc-parser.ts +166 -28
  30. package/src/parser/oxc-resolver.ts +179 -11
  31. package/src/parser/parser-constants.ts +1 -0
  32. package/src/parser/rust/rust-extractor.ts +109 -0
  33. package/src/parser/tree-sitter/parser.ts +400 -66
  34. package/src/parser/tree-sitter/queries.ts +106 -10
  35. package/src/parser/types.ts +20 -1
  36. package/src/search/bm25.ts +21 -8
  37. package/src/search/direct-search.ts +472 -0
  38. package/src/search/embedding-provider.ts +249 -0
  39. package/src/search/index.ts +12 -0
  40. package/src/search/semantic-search.ts +435 -0
  41. package/src/security/index.ts +1 -0
  42. package/src/security/scanner.ts +342 -0
  43. package/src/utils/artifact-transaction.ts +1 -0
  44. package/src/utils/atomic-write.ts +1 -0
  45. package/src/utils/errors.ts +89 -4
  46. package/src/utils/fs.ts +150 -65
  47. package/src/utils/json.ts +1 -0
  48. package/src/utils/language-registry.ts +96 -5
  49. package/src/utils/minimatch.ts +49 -6
  50. package/src/utils/path.ts +26 -0
  51. package/tests/dead-code.test.ts +3 -2
  52. package/tests/direct-search.test.ts +435 -0
  53. package/tests/error-recovery.test.ts +143 -0
  54. package/tests/fixtures/simple-api/src/index.ts +1 -1
  55. package/tests/go-parser.test.ts +19 -335
  56. package/tests/js-parser.test.ts +18 -1089
  57. package/tests/language-registry-all.test.ts +276 -0
  58. package/tests/language-registry.test.ts +6 -4
  59. package/tests/parse-diagnostics.test.ts +9 -96
  60. package/tests/parser.test.ts +42 -771
  61. package/tests/polyglot-parser.test.ts +117 -0
  62. package/tests/rich-function-index.test.ts +703 -0
  63. package/tests/tree-sitter-parser.test.ts +108 -80
  64. package/tests/ts-parser.test.ts +8 -8
  65. package/tests/verification.test.ts +175 -0
  66. package/src/parser/base-parser.ts +0 -16
  67. package/src/parser/go/go-parser.ts +0 -43
  68. package/src/parser/javascript/js-extractor.ts +0 -278
  69. package/src/parser/javascript/js-parser.ts +0 -101
  70. package/src/parser/typescript/ts-extractor.ts +0 -447
  71. package/src/parser/typescript/ts-parser.ts +0 -36
@@ -0,0 +1,342 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Security Vulnerability Scanning — foundation for detecting common patterns
3
+ // ---------------------------------------------------------------------------
4
+
5
+ export interface SecurityFinding {
6
+ id: string
7
+ severity: 'critical' | 'high' | 'medium' | 'low' | 'info'
8
+ category: string
9
+ title: string
10
+ description: string
11
+ file: string
12
+ line: number
13
+ column?: number
14
+ code: string
15
+ suggestion?: string
16
+ cwe?: string
17
+ cve?: string
18
+ }
19
+
20
+ export interface SecurityReport {
21
+ findings: SecurityFinding[]
22
+ summary: {
23
+ total: number
24
+ critical: number
25
+ high: number
26
+ medium: number
27
+ low: number
28
+ info: number
29
+ }
30
+ scannedFiles: number
31
+ scanDuration: number
32
+ }
33
+
34
+ // ---------------------------------------------------------------------------
35
+ // Pattern definitions for common vulnerability categories
36
+ // ---------------------------------------------------------------------------
37
+
38
+ interface VulnerabilityPattern {
39
+ id: string
40
+ severity: SecurityFinding['severity']
41
+ category: string
42
+ title: string
43
+ description: string
44
+ regex: RegExp
45
+ suggestion?: string
46
+ cwe?: string
47
+ languages?: string[]
48
+ }
49
+
50
+ const VULNERABILITY_PATTERNS: VulnerabilityPattern[] = [
51
+ // SQL Injection
52
+ {
53
+ id: 'sql-injection',
54
+ severity: 'critical',
55
+ category: 'injection',
56
+ title: 'Potential SQL Injection',
57
+ description: 'String concatenation in SQL query detected. Use parameterized queries instead.',
58
+ regex: /(?:execute|query|cursor\.execute)\s*\(\s*["'].*(?:\+|\$\{)/,
59
+ suggestion: 'Use parameterized queries: cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))',
60
+ cwe: 'CWE-89',
61
+ languages: ['python', 'javascript', 'typescript'],
62
+ },
63
+ {
64
+ id: 'sql-injection-fstring',
65
+ severity: 'critical',
66
+ category: 'injection',
67
+ title: 'SQL Injection via f-string',
68
+ description: 'f-string used in SQL query. Use parameterized queries.',
69
+ regex: /(?:execute|query)\s*\(\s*f["']/,
70
+ suggestion: 'Use parameterized queries instead of f-strings in SQL.',
71
+ cwe: 'CWE-89',
72
+ languages: ['python'],
73
+ },
74
+
75
+ // Command Injection
76
+ {
77
+ id: 'command-injection',
78
+ severity: 'critical',
79
+ category: 'injection',
80
+ title: 'Potential Command Injection',
81
+ description: 'User input may be passed to shell command. Use subprocess with list args instead.',
82
+ regex: /(?:os\.system|subprocess\.call|subprocess\.Popen|exec|eval)\s*\(\s*(?:.*\+|.*\$\{)/,
83
+ suggestion: 'Use subprocess.run() with a list of arguments instead of shell=True.',
84
+ cwe: 'CWE-78',
85
+ languages: ['python'],
86
+ },
87
+ {
88
+ id: 'eval-usage',
89
+ severity: 'high',
90
+ category: 'injection',
91
+ title: 'Use of eval()',
92
+ description: 'eval() can execute arbitrary code. Use ast.literal_eval() for safe parsing.',
93
+ regex: /\beval\s*\(/,
94
+ suggestion: 'Use ast.literal_eval() for parsing Python literals, or json.loads() for JSON.',
95
+ cwe: 'CWE-95',
96
+ languages: ['python', 'javascript', 'typescript'],
97
+ },
98
+
99
+ // Hardcoded Secrets
100
+ {
101
+ id: 'hardcoded-password',
102
+ severity: 'high',
103
+ category: 'secrets',
104
+ title: 'Hardcoded Password',
105
+ description: 'Password appears to be hardcoded in source code.',
106
+ regex: /(?:password|passwd|pwd)\s*[:=]\s*["'][^"']{3,}["']/i,
107
+ suggestion: 'Use environment variables or a secrets manager.',
108
+ cwe: 'CWE-798',
109
+ },
110
+ {
111
+ id: 'hardcoded-api-key',
112
+ severity: 'high',
113
+ category: 'secrets',
114
+ title: 'Hardcoded API Key',
115
+ description: 'API key or token appears to be hardcoded.',
116
+ regex: /(?:api[_-]?key|api[_-]?secret|access[_-]?token|auth[_-]?token)\s*[:=]\s*["'][A-Za-z0-9_-]{8,}["']/i,
117
+ suggestion: 'Use environment variables or a secrets manager.',
118
+ cwe: 'CWE-798',
119
+ },
120
+ {
121
+ id: 'aws-key',
122
+ severity: 'critical',
123
+ category: 'secrets',
124
+ title: 'AWS Access Key',
125
+ description: 'AWS access key pattern detected.',
126
+ regex: /AKIA[0-9A-Z]{16}/,
127
+ suggestion: 'Remove AWS credentials from source code. Use IAM roles or environment variables.',
128
+ cwe: 'CWE-798',
129
+ },
130
+ {
131
+ id: 'private-key',
132
+ severity: 'critical',
133
+ category: 'secrets',
134
+ title: 'Private Key',
135
+ description: 'Private key content detected in source code.',
136
+ regex: /-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----/,
137
+ suggestion: 'Never embed private keys in source code. Use a secrets manager.',
138
+ cwe: 'CWE-798',
139
+ },
140
+
141
+ // XSS
142
+ {
143
+ id: 'xss-innerhtml',
144
+ severity: 'high',
145
+ category: 'xss',
146
+ title: 'Potential XSS via innerHTML',
147
+ description: 'Setting innerHTML with dynamic content can lead to XSS.',
148
+ regex: /\.innerHTML\s*=\s*(?!["']\s*;?\s*$)/,
149
+ suggestion: 'Use textContent or sanitize HTML with DOMPurify.',
150
+ cwe: 'CWE-79',
151
+ languages: ['javascript', 'typescript'],
152
+ },
153
+ {
154
+ id: 'xss-dangerouslySetInnerHTML',
155
+ severity: 'high',
156
+ category: 'xss',
157
+ title: 'Potential XSS via dangerouslySetInnerHTML',
158
+ description: 'dangerouslySetInnerHTML with dynamic content can lead to XSS.',
159
+ regex: /dangerouslySetInnerHTML\s*=\s*\{\{?\s*__html\s*:/,
160
+ suggestion: 'Sanitize HTML content with DOMPurify before using dangerouslySetInnerHTML.',
161
+ cwe: 'CWE-79',
162
+ languages: ['javascript', 'typescript'],
163
+ },
164
+
165
+ // Insecure Random
166
+ {
167
+ id: 'insecure-random',
168
+ severity: 'medium',
169
+ category: 'crypto',
170
+ title: 'Insecure Random Number Generator',
171
+ description: 'Math.random() is not cryptographically secure.',
172
+ regex: /Math\.random\s*\(\)/,
173
+ suggestion: 'Use crypto.getRandomValues() for security-sensitive operations.',
174
+ cwe: 'CWE-330',
175
+ languages: ['javascript', 'typescript'],
176
+ },
177
+
178
+ // Path Traversal
179
+ {
180
+ id: 'path-traversal',
181
+ severity: 'high',
182
+ category: 'path-traversal',
183
+ title: 'Potential Path Traversal',
184
+ description: 'User input used in file path without sanitization.',
185
+ regex: /(?:readFile|readFileSync|open|writeFile|writeFileSync)\s*\(\s*(?:.*\+|.*\$\{)/,
186
+ suggestion: 'Validate and sanitize file paths. Use path.resolve() with a whitelist.',
187
+ cwe: 'CWE-22',
188
+ languages: ['javascript', 'typescript', 'python'],
189
+ },
190
+
191
+ // Weak Cryptography
192
+ {
193
+ id: 'weak-hash-md5',
194
+ severity: 'medium',
195
+ category: 'crypto',
196
+ title: 'Weak Hashing Algorithm (MD5)',
197
+ description: 'MD5 is cryptographically broken. Use SHA-256 or better.',
198
+ regex: /(?:md5|MD5|hashlib\.md5)/,
199
+ suggestion: 'Use SHA-256 or SHA-3 for cryptographic hashing.',
200
+ cwe: 'CWE-328',
201
+ },
202
+ {
203
+ id: 'weak-hash-sha1',
204
+ severity: 'medium',
205
+ category: 'crypto',
206
+ title: 'Weak Hashing Algorithm (SHA-1)',
207
+ description: 'SHA-1 is deprecated for cryptographic use. Use SHA-256 or better.',
208
+ regex: /(?:sha1|SHA1|hashlib\.sha1)/,
209
+ suggestion: 'Use SHA-256 or SHA-3 for cryptographic hashing.',
210
+ cwe: 'CWE-328',
211
+ },
212
+
213
+ // Debug/Console in Production
214
+ {
215
+ id: 'console-log',
216
+ severity: 'info',
217
+ category: 'best-practice',
218
+ title: 'Console Log Statement',
219
+ description: 'Console.log statements should be removed before production.',
220
+ regex: /console\.(log|debug|info|warn)\s*\(/,
221
+ suggestion: 'Use a proper logging framework and remove debug statements.',
222
+ languages: ['javascript', 'typescript'],
223
+ },
224
+ {
225
+ id: 'print-debug',
226
+ severity: 'info',
227
+ category: 'best-practice',
228
+ title: 'Print Debug Statement',
229
+ description: 'Print statements should be removed before production.',
230
+ regex: /print\s*\(\s*["'][^"']*["']\s*\)/,
231
+ suggestion: 'Use the logging module instead of print statements.',
232
+ languages: ['python'],
233
+ },
234
+
235
+ // TODO/FIXME/HACK
236
+ {
237
+ id: 'todo-comment',
238
+ severity: 'info',
239
+ category: 'best-practice',
240
+ title: 'TODO Comment',
241
+ description: 'TODO comment found. Consider addressing this.',
242
+ regex: /\/\/\s*TODO|\/\*\s*TODO|#\s*TODO/i,
243
+ languages: ['javascript', 'typescript', 'python', 'go', 'java', 'rust'],
244
+ },
245
+ ]
246
+
247
+ // ---------------------------------------------------------------------------
248
+ // Scanner
249
+ // ---------------------------------------------------------------------------
250
+
251
+ export class SecurityScanner {
252
+ private patterns: VulnerabilityPattern[]
253
+
254
+ constructor(customPatterns?: VulnerabilityPattern[]) {
255
+ this.patterns = customPatterns ?? VULNERABILITY_PATTERNS
256
+ }
257
+
258
+ /**
259
+ * Scan a single file's content for security issues.
260
+ */
261
+ scanFile(filePath: string, content: string, language?: string): SecurityFinding[] {
262
+ const findings: SecurityFinding[] = []
263
+ const lines = content.split('\n')
264
+
265
+ for (const pattern of this.patterns) {
266
+ // Skip if language filter doesn't match
267
+ if (pattern.languages && language && !pattern.languages.includes(language)) {
268
+ continue
269
+ }
270
+
271
+ for (let i = 0; i < lines.length; i++) {
272
+ const line = lines[i]
273
+ const match = line.match(pattern.regex)
274
+ if (match) {
275
+ findings.push({
276
+ id: `${pattern.id}-${filePath}:${i + 1}`,
277
+ severity: pattern.severity,
278
+ category: pattern.category,
279
+ title: pattern.title,
280
+ description: pattern.description,
281
+ file: filePath,
282
+ line: i + 1,
283
+ column: match.index,
284
+ code: line.trim(),
285
+ suggestion: pattern.suggestion,
286
+ cwe: pattern.cwe,
287
+ })
288
+ }
289
+ }
290
+ }
291
+
292
+ return findings
293
+ }
294
+
295
+ /**
296
+ * Scan multiple files.
297
+ */
298
+ scanFiles(
299
+ files: Array<{ path: string; content: string; language?: string }>
300
+ ): SecurityReport {
301
+ const startTime = Date.now()
302
+ const allFindings: SecurityFinding[] = []
303
+
304
+ for (const file of files) {
305
+ const findings = this.scanFile(file.path, file.content, file.language)
306
+ allFindings.push(...findings)
307
+ }
308
+
309
+ const summary = {
310
+ total: allFindings.length,
311
+ critical: allFindings.filter(f => f.severity === 'critical').length,
312
+ high: allFindings.filter(f => f.severity === 'high').length,
313
+ medium: allFindings.filter(f => f.severity === 'medium').length,
314
+ low: allFindings.filter(f => f.severity === 'low').length,
315
+ info: allFindings.filter(f => f.severity === 'info').length,
316
+ }
317
+
318
+ return {
319
+ findings: allFindings.sort((a, b) => {
320
+ const severityOrder = { critical: 0, high: 1, medium: 2, low: 3, info: 4 }
321
+ return severityOrder[a.severity] - severityOrder[b.severity]
322
+ }),
323
+ summary,
324
+ scannedFiles: files.length,
325
+ scanDuration: Date.now() - startTime,
326
+ }
327
+ }
328
+
329
+ /**
330
+ * Add custom vulnerability patterns.
331
+ */
332
+ addPattern(pattern: VulnerabilityPattern): void {
333
+ this.patterns.push(pattern)
334
+ }
335
+
336
+ /**
337
+ * Get all available patterns.
338
+ */
339
+ getPatterns(): VulnerabilityPattern[] {
340
+ return [...this.patterns]
341
+ }
342
+ }
@@ -1,3 +1,4 @@
1
+ /* eslint-disable @typescript-eslint/no-explicit-any */
1
2
  import * as fs from 'node:fs/promises'
2
3
  import * as path from 'node:path'
3
4
  import { randomUUID } from 'node:crypto'
@@ -1,3 +1,4 @@
1
+ /* eslint-disable @typescript-eslint/no-explicit-any */
1
2
  import * as fs from 'node:fs/promises'
2
3
  import * as path from 'node:path'
3
4
  import { randomUUID } from 'node:crypto'
@@ -2,12 +2,25 @@ export class MikkError extends Error {
2
2
  constructor(message: string, public code: string) {
3
3
  super(message)
4
4
  this.name = 'MikkError'
5
+ Error.captureStackTrace?.(this, this.constructor)
6
+ }
7
+
8
+ toJSON() {
9
+ return {
10
+ name: this.name,
11
+ message: this.message,
12
+ code: this.code,
13
+ stack: this.stack,
14
+ }
5
15
  }
6
16
  }
7
17
 
8
18
  export class ParseError extends MikkError {
9
- constructor(file: string, cause: string) {
10
- super(`Failed to parse ${file}: ${cause}`, 'PARSE_ERROR')
19
+ constructor(file: string, cause: string | Error) {
20
+ const message = cause instanceof Error
21
+ ? `Failed to parse ${file}: ${cause.message}`
22
+ : `Failed to parse ${file}: ${cause}`
23
+ super(message, 'PARSE_ERROR')
11
24
  }
12
25
  }
13
26
 
@@ -18,8 +31,11 @@ export class ContractNotFoundError extends MikkError {
18
31
  }
19
32
 
20
33
  export class LockNotFoundError extends MikkError {
21
- constructor() {
22
- super(`No mikk.lock.json found. Run 'mikk analyze' first.`, 'LOCK_NOT_FOUND')
34
+ constructor(path?: string) {
35
+ const msg = path
36
+ ? `No mikk.lock.json found at ${path}. Run 'mikk analyze' first.`
37
+ : `No mikk.lock.json found. Run 'mikk analyze' first.`
38
+ super(msg, 'LOCK_NOT_FOUND')
23
39
  }
24
40
  }
25
41
 
@@ -40,3 +56,72 @@ export class SyncStateError extends MikkError {
40
56
  super(`Mikk is in ${status} state. Run 'mikk analyze' to sync.`, 'SYNC_STATE_ERROR')
41
57
  }
42
58
  }
59
+
60
+ export class EmbeddingError extends MikkError {
61
+ constructor(message: string, cause?: Error) {
62
+ const fullMessage = cause
63
+ ? `${message}: ${cause.message}`
64
+ : message
65
+ super(fullMessage, 'EMBEDDING_ERROR')
66
+ }
67
+ }
68
+
69
+ export class SearchError extends MikkError {
70
+ constructor(message: string, cause?: Error) {
71
+ const fullMessage = cause
72
+ ? `${message}: ${cause.message}`
73
+ : message
74
+ super(fullMessage, 'SEARCH_ERROR')
75
+ }
76
+ }
77
+
78
+ export class ValidationError extends MikkError {
79
+ constructor(message: string) {
80
+ super(message, 'VALIDATION_ERROR')
81
+ }
82
+ }
83
+
84
+ export class ConfigurationError extends MikkError {
85
+ constructor(message: string) {
86
+ super(message, 'CONFIGURATION_ERROR')
87
+ }
88
+ }
89
+
90
+ export class TimeoutError extends MikkError {
91
+ constructor(operation: string, timeoutMs: number) {
92
+ super(`Operation '${operation}' timed out after ${timeoutMs}ms`, 'TIMEOUT')
93
+ }
94
+ }
95
+
96
+ export class CacheError extends MikkError {
97
+ constructor(message: string, cause?: Error) {
98
+ const fullMessage = cause
99
+ ? `Cache error: ${message}: ${cause.message}`
100
+ : `Cache error: ${message}`
101
+ super(fullMessage, 'CACHE_ERROR')
102
+ }
103
+ }
104
+
105
+ export function isMikkError(error: unknown): error is MikkError {
106
+ return error instanceof MikkError
107
+ }
108
+
109
+ export function getErrorCode(error: unknown): string {
110
+ if (error instanceof MikkError) {
111
+ return error.code
112
+ }
113
+ if (error instanceof Error) {
114
+ return error.name.toUpperCase().replace(/\s+/g, '_')
115
+ }
116
+ return 'UNKNOWN'
117
+ }
118
+
119
+ export function formatError(error: unknown): string {
120
+ if (isMikkError(error)) {
121
+ return `[${error.code}] ${error.message}`
122
+ }
123
+ if (error instanceof Error) {
124
+ return `${error.name}: ${error.message}`
125
+ }
126
+ return String(error)
127
+ }