npm - @getmikk/core - Versions diffs - 2.0.12 → 2.0.14 - Mend

@getmikk/core 2.0.12 → 2.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/README.md +12 -3
package/package.json +1 -1
package/src/analysis/index.ts +9 -0
package/src/analysis/taint-analysis.ts +419 -0
package/src/analysis/type-flow.ts +247 -0
package/src/cache/incremental-cache.ts +272 -0
package/src/cache/index.ts +1 -0
package/src/contract/adr-manager.ts +5 -4
package/src/contract/contract-generator.ts +31 -3
package/src/contract/contract-writer.ts +3 -2
package/src/contract/lock-compiler.ts +34 -0
package/src/contract/lock-reader.ts +62 -5
package/src/contract/schema.ts +10 -0
package/src/index.ts +14 -1
package/src/parser/error-recovery.ts +646 -0
package/src/parser/index.ts +330 -74
package/src/parser/oxc-parser.ts +3 -2
package/src/parser/tree-sitter/parser.ts +59 -9
package/src/parser/tree-sitter/queries.ts +27 -0
package/src/parser/types.ts +1 -1
package/src/security/index.ts +1 -0
package/src/security/scanner.ts +342 -0
package/src/utils/artifact-transaction.ts +176 -0
package/src/utils/atomic-write.ts +131 -0
package/src/utils/fs.ts +76 -25
package/src/utils/language-registry.ts +95 -0
package/src/utils/minimatch.ts +49 -6
package/tests/adr-manager.test.ts +6 -0
package/tests/artifact-transaction.test.ts +73 -0
package/tests/contract.test.ts +12 -0
package/tests/dead-code.test.ts +12 -0
package/tests/esm-resolver.test.ts +6 -0
package/tests/fs.test.ts +22 -1
package/tests/fuzzy-match.test.ts +6 -0
package/tests/go-parser.test.ts +7 -0
package/tests/graph.test.ts +10 -0
package/tests/hash.test.ts +6 -0
package/tests/impact-classified.test.ts +13 -0
package/tests/js-parser.test.ts +10 -0
package/tests/language-registry.test.ts +64 -0
package/tests/parse-diagnostics.test.ts +115 -0
package/tests/parser.test.ts +36 -0
package/tests/tree-sitter-parser.test.ts +201 -0
package/tests/ts-parser.test.ts +6 -0

package/README.md CHANGED Viewed

@@ -15,13 +15,16 @@ Foundation package for the Mikk ecosystem. All other packages depend on core —
 ### Parsers
-Three language parsers, each following the same interface: `parse(filePath, content)` → `ParsedFile`.
+Three parser families follow the same interface: `parse(filePath, content)` → `ParsedFile`.
 **TypeScript / TSX**
-Uses the TypeScript Compiler API. Extracts: functions (name, params with types, return type, start/end line, async flag, decorators, generics), classes (methods, properties, inheritance), imports (named, default, namespace, type-only) with full resolution (tsconfig `paths` alias resolution, recursive `extends` chain, index file inference, extension inference). Every extracted function has its exact byte-accurate body location.
+Uses OXC (Rust parser). Extracts: functions (name, params with types, return type, start/end line, async flag, decorators, generics), classes (methods, properties, inheritance), imports (named, default, namespace, type-only) with full resolution (tsconfig `paths` alias resolution, recursive `extends` chain, index file inference, extension inference). Every extracted function has its exact byte-accurate body location.
 **JavaScript / JSX**
-Uses the TypeScript Compiler API with `ScriptKind` inference (detects JS/JSX/CJS/MJS). Handles: JSX expression containers, default exports, CommonJS `module.exports`, re-exports via barrel files.
+Uses OXC with `ScriptKind` inference (detects JS/JSX/CJS/MJS). Handles: JSX expression containers, default exports, CommonJS `module.exports`, re-exports via barrel files.
+**Polyglot (Tree-sitter)**
+Python, Java, Kotlin (`.kt`, `.kts`), Swift, C/C++ (`.cpp`, `.cc`, `.cxx`, `.hpp`, `.hxx`, `.hh`), C#, Rust, PHP, and Ruby via tree-sitter grammars.
 **Go**
 Regex + stateful scanning. No Go toolchain dependency. Extracts: functions, methods (with receiver types), structs, interfaces, package imports. `go.mod` used for project boundary detection.
@@ -81,6 +84,12 @@ Lock format v1.7.0:
 Read and write `mikk.json` and `mikk.lock.json`. `LockReader.write()` uses atomic temp-file + rename to prevent corruption.
+`AdrManager` writes `mikk.json` atomically as well (temp file + rename + file lock), reducing corruption risk in concurrent agent workflows.
+### Parse Diagnostics
+`parseFilesWithDiagnostics` returns both parsed files and parser/read/import-resolution diagnostics. This enables strict parse enforcement in CLI commands (`mikk init --strict-parsing`, `mikk analyze --strict-parsing`) for high-assurance pipelines.
 ### AdrManager
 CRUD for Architectural Decision Records in `mikk.json`. Add, update, remove, list, and get individual decisions. ADRs surface in all AI context queries via the MCP server.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@getmikk/core",
-  "version": "2.0.12",
+  "version": "2.0.14",
   "publishConfig": {
     "access": "public",
     "registry": "https://registry.npmjs.org/"

package/src/analysis/index.ts ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * Analysis modules - Type Flow and Taint Analysis for semantic code understanding
+ */
+export { TypeFlowAnalyzer } from './type-flow.js'
+export type { TypeFlowInfo, TypeParam, TypeEdge, TypeFlowResult } from './type-flow.js'
+export { TaintAnalyzer } from './taint-analysis.js'
+export type { TaintSource, TaintSink, TaintFlow, DataFlowResult } from './taint-analysis.js'

package/src/analysis/taint-analysis.ts ADDED Viewed

@@ -0,0 +1,419 @@
+/**
+ * Data Flow & Taint Analysis — tracks data propagation through code
+ * for security vulnerability detection.
+ */
+import type { MikkLock, MikkLockFunction } from '../contract/schema.js'
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface TaintSource {
+  name: string
+  description: string
+  severity: 'critical' | 'high' | 'medium' | 'low'
+  patterns: RegExp[]
+}
+export interface TaintSink {
+  name: string
+  description: string
+  severity: 'critical' | 'high' | 'medium' | 'low'
+  patterns: RegExp[]
+  sanitizers: string[]
+}
+export interface TaintFlow {
+  source: string
+  sink: string
+  path: string[]
+  vulnerability: string
+  severity: 'critical' | 'high' | 'medium' | 'low'
+  confidence: number
+}
+export interface DataFlowResult {
+  flows: TaintFlow[]
+  summary: {
+    totalFlows: number
+    critical: number
+    high: number
+    medium: number
+    low: number
+  }
+}
+// ---------------------------------------------------------------------------
+// Default Taint Sources and Sinks
+// ---------------------------------------------------------------------------
+const DEFAULT_TAINT_SOURCES: TaintSource[] = [
+  {
+    name: 'user-input',
+    description: 'User-controlled input',
+    severity: 'high',
+    patterns: [
+      /req\.(body|query|params|headers)/i,
+      /request\.(body|query|params|headers)/i,
+      /input\(/i,
+      /process\.env/i,
+      /process\.argv/i,
+      /\bstdin\b/i,
+      /readline\(/i,
+      /readFile\(/i,
+      /fetch\(/i,
+      /axios\(/i,
+      /http\.request\(/i,
+    ],
+  },
+  {
+    name: 'filesystem',
+    description: 'File system input',
+    severity: 'medium',
+    patterns: [
+      /readFile\(/i,
+      /readFileSync\(/i,
+      /readdir\(/i,
+      /createReadStream\(/i,
+      /fs\.readFile\(/i,
+    ],
+  },
+  {
+    name: 'database',
+    description: 'Database query results',
+    severity: 'medium',
+    patterns: [
+      /query\(/i,
+      /\.find\(/i,
+      /\.select\(/i,
+      /execute\(/i,
+      /\.fetch\(/i,
+    ],
+  },
+  {
+    name: 'network',
+    description: 'Network/API responses',
+    severity: 'medium',
+    patterns: [
+      /fetch\(/i,
+      /axios\(/i,
+      /http\.get\(/i,
+      /https\.get\(/i,
+      /request\(/i,
+      /\.json\(\)/i,
+    ],
+  },
+]
+const DEFAULT_TAINT_SINKS: TaintSink[] = [
+  {
+    name: 'sql-query',
+    description: 'SQL query execution',
+    severity: 'critical',
+    patterns: [
+      /execute\s*\(/i,
+      /query\s*\(/i,
+      /\.exec\(/i,
+      /cursor\.execute\(/i,
+      /db\.query\(/i,
+    ],
+    sanitizers: ['escape', 'sanitize', 'param', 'bind', 'prepare'],
+  },
+  {
+    name: 'command-injection',
+    description: 'OS command execution',
+    severity: 'critical',
+    patterns: [
+      /exec\s*\(/i,
+      /spawn\s*\(/i,
+      /execSync\s*\(/i,
+      /system\s*\(/i,
+      /popen\s*\(/i,
+      /child_process\./i,
+    ],
+    sanitizers: ['execFile', 'spawnSync', 'execFileSync'],
+  },
+  {
+    name: 'code-execution',
+    description: 'Dynamic code execution',
+    severity: 'critical',
+    patterns: [
+      /\beval\s*\(/i,
+      /\bFunction\s*\(/i,
+      /setTimeout\s*\(\s*\w+\s*,/i,
+      /setInterval\s*\(\s*\w+\s*,/i,
+    ],
+    sanitizers: [],
+  },
+  {
+    name: 'path-traversal',
+    description: 'File system operations',
+    severity: 'high',
+    patterns: [
+      /readFile\(/i,
+      /writeFile\(/i,
+      /open\(/i,
+      /createReadStream\(/i,
+      /stat\(/i,
+      /lstat\(/i,
+      /access\(/i,
+      /exists\(/i,
+    ],
+    sanitizers: ['normalize', 'resolve', 'basename', 'dirname', 'join'],
+  },
+  {
+    name: 'xss',
+    description: 'HTML/JS injection',
+    severity: 'high',
+    patterns: [
+      /\.innerHTML\s*=/i,
+      /\.outerHTML\s*=/i,
+      /dangerouslySetInnerHTML/i,
+      /document\.write\(/i,
+      /\.html\s*\(/i,
+    ],
+    sanitizers: ['escape', 'sanitize', 'text', 'encode', 'DOMPurify'],
+  },
+  {
+    name: 'prototype-pollution',
+    description: 'Object prototype manipulation',
+    severity: 'high',
+    patterns: [
+      /\[\s*['"]__proto__['"]\s*\]/i,
+      /\[\s*['"]constructor['"]\s*\]/i,
+      /Object\.assign\s*\(\s*\w+\s*,\s*\w+\s*\)/i,
+    ],
+    sanitizers: [],
+  },
+]
+// ---------------------------------------------------------------------------
+// Taint Analyzer
+// ---------------------------------------------------------------------------
+export class TaintAnalyzer {
+  private lock: MikkLock
+  private sources: TaintSource[]
+  private sinks: TaintSink[]
+  constructor(
+    lock: MikkLock,
+    sources?: TaintSource[],
+    sinks?: TaintSink[]
+  ) {
+    this.lock = lock
+    this.sources = sources || DEFAULT_TAINT_SOURCES
+    this.sinks = sinks || DEFAULT_TAINT_SINKS
+  }
+  /**
+   * Analyze the codebase for taint flows.
+   */
+  analyze(): DataFlowResult {
+    const flows: TaintFlow[] = []
+    const allFunctions = Object.values(this.lock.functions)
+    // Find functions that contain taint sources
+    const sourceFunctions = this.findTaintSources(allFunctions)
+    // Find functions that contain taint sinks
+    const sinkFunctions = this.findTaintSinks(allFunctions)
+    // Trace taint flows through call graph
+    for (const sourceFn of sourceFunctions) {
+      for (const sinkFn of sinkFunctions) {
+        if (sourceFn.id === sinkFn.fn.id) continue
+        const flow = this.traceTaintFlow(sourceFn, sinkFn.fn, sinkFn.sink, allFunctions)
+        if (flow) {
+          flows.push(flow)
+        }
+      }
+    }
+    return {
+      flows,
+      summary: {
+        totalFlows: flows.length,
+        critical: flows.filter(f => f.severity === 'critical').length,
+        high: flows.filter(f => f.severity === 'high').length,
+        medium: flows.filter(f => f.severity === 'medium').length,
+        low: flows.filter(f => f.severity === 'low').length,
+      },
+    }
+  }
+  /**
+   * Find functions that contain taint sources.
+   */
+  private findTaintSources(functions: MikkLockFunction[]): MikkLockFunction[] {
+    const sources: MikkLockFunction[] = []
+    for (const fn of functions) {
+      const fnText = `${fn.name} ${fn.purpose || ''}`.toLowerCase()
+      for (const source of this.sources) {
+        for (const pattern of source.patterns) {
+          if (pattern.test(fnText)) {
+            sources.push(fn)
+            break
+          }
+        }
+      }
+    }
+    return sources
+  }
+  /**
+   * Find functions that contain taint sinks.
+   */
+  private findTaintSinks(functions: MikkLockFunction[]): Array<{ fn: MikkLockFunction; sink: TaintSink }> {
+    const sinks: Array<{ fn: MikkLockFunction; sink: TaintSink }> = []
+    for (const fn of functions) {
+      const fnText = `${fn.name} ${fn.purpose || ''}`.toLowerCase()
+      for (const sink of this.sinks) {
+        for (const pattern of sink.patterns) {
+          if (pattern.test(fnText)) {
+            sinks.push({ fn, sink })
+            break
+          }
+        }
+      }
+    }
+    return sinks
+  }
+  /**
+   * Trace taint flow from source to sink through call graph.
+   */
+  private traceTaintFlow(
+    source: MikkLockFunction,
+    sinkFn: MikkLockFunction,
+    sink: TaintSink,
+    allFunctions: MikkLockFunction[]
+  ): TaintFlow | null {
+    // Direct call: source calls sink directly
+    if (source.calls?.includes(sinkFn.id)) {
+      return {
+        source: source.name,
+        sink: sinkFn.name,
+        path: [source.name, sinkFn.name],
+        vulnerability: `${source.name} -> ${sinkFn.name}`,
+        severity: sink.severity,
+        confidence: 0.9,
+      }
+    }
+    // Check if there's a path through the call graph
+    const path = this.findPath(source.id, sinkFn.id, allFunctions)
+    if (path) {
+      return {
+        source: source.name,
+        sink: sinkFn.name,
+        path: path.map(id => this.lock.functions[id]?.name || id),
+        vulnerability: `${source.name} -> ${path.length - 1} intermediate(s) -> ${sinkFn.name}`,
+        severity: sink.severity,
+        confidence: 0.7,
+      }
+    }
+    return null
+  }
+  /**
+   * Find path from source to sink through call graph.
+   */
+  private findPath(
+    sourceId: string,
+    sinkId: string,
+    allFunctions: MikkLockFunction[]
+  ): string[] | null {
+    const visited = new Set<string>()
+    const path: string[] = []
+    function dfs(currentId: string): boolean {
+      if (currentId === sinkId) {
+        path.push(currentId)
+        return true
+      }
+      if (visited.has(currentId)) return false
+      visited.add(currentId)
+      path.push(currentId)
+      const fn = allFunctions.find(f => f.id === currentId)
+      if (fn?.calls) {
+        for (const calleeId of fn.calls) {
+          if (dfs(calleeId)) return true
+        }
+      }
+      path.pop()
+      return false
+    }
+    if (dfs(sourceId)) {
+      return path
+    }
+    return null
+  }
+  /**
+   * Check if a function has sanitizers that mitigate taint.
+   */
+  private hasSanitizer(fn: MikkLockFunction, sink: TaintSink): boolean {
+    const fnText = `${fn.name} ${fn.purpose || ''}`.toLowerCase()
+    for (const sanitizer of sink.sanitizers) {
+      if (fnText.includes(sanitizer.toLowerCase())) {
+        return true
+      }
+    }
+    return false
+  }
+  /**
+   * Get security findings from taint analysis.
+   */
+  getFindings(): Array<{
+    severity: string
+    title: string
+    file: string
+    line: number
+    description: string
+  }> {
+    const result = this.analyze()
+    const findings: Array<{
+      severity: string
+      title: string
+      file: string
+      line: number
+      description: string
+    }> = []
+    for (const flow of result.flows) {
+      const sourceFn = Object.values(this.lock.functions).find(
+        f => f.name === flow.source
+      )
+      if (sourceFn) {
+        findings.push({
+          severity: flow.severity,
+          title: `Potential ${flow.sink} vulnerability`,
+          file: sourceFn.file,
+          line: sourceFn.startLine,
+          description: `Tainted data from ${flow.source} flows to ${flow.sink} via: ${flow.path.join(' -> ')}`,
+        })
+      }
+    }
+    return findings
+  }
+}

package/src/analysis/type-flow.ts ADDED Viewed

@@ -0,0 +1,247 @@
+/**
+ * Type Flow Analysis — tracks type propagation through function calls
+ * and provides type-aware code understanding beyond syntactic parsing.
+ */
+import type { MikkLock, MikkLockFunction } from '../contract/schema.js'
+import type { DependencyGraph, GraphEdge } from '../graph/types.js'
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface TypeFlowInfo {
+  /** Function ID */
+  functionId: string
+  /** Inferred parameter types */
+  paramTypes: TypeParam[]
+  /** Inferred return type */
+  returnType: string
+  /** Types that flow into this function from callers */
+  incomingTypes: TypeEdge[]
+  /** Types that flow out to callees */
+  outgoingTypes: TypeEdge[]
+  /** Type confidence score (0-1) */
+  confidence: number
+}
+export interface TypeParam {
+  name: string
+  type: string
+  source: 'annotation' | 'inference' | 'usage'
+  confidence: number
+}
+export interface TypeEdge {
+  from: string
+  to: string
+  type: string
+  confidence: number
+}
+export interface TypeFlowResult {
+  flows: Map<string, TypeFlowInfo>
+  summary: {
+    totalFunctions: number
+    typedFunctions: number
+    inferredFunctions: number
+    averageConfidence: number
+  }
+}
+// ---------------------------------------------------------------------------
+// Type Flow Analyzer
+// ---------------------------------------------------------------------------
+export class TypeFlowAnalyzer {
+  private lock: MikkLock
+  private graph: DependencyGraph
+  constructor(lock: MikkLock, graph: DependencyGraph) {
+    this.lock = lock
+    this.graph = graph
+  }
+  /**
+   * Analyze type flow across the entire codebase.
+   */
+  analyze(): TypeFlowResult {
+    const flows = new Map<string, TypeFlowInfo>()
+    const allFunctions = Object.values(this.lock.functions)
+    // Phase 1: Extract explicit type annotations
+    for (const fn of allFunctions) {
+      flows.set(fn.id, this.extractExplicitTypes(fn))
+    }
+    // Phase 2: Propagate types through call graph
+    this.propagateTypes(flows)
+    // Phase 3: Compute summary statistics
+    const summary = this.computeSummary(flows)
+    return { flows, summary }
+  }
+  /**
+   * Get type flow for a specific function.
+   */
+  getFunctionFlow(functionId: string): TypeFlowInfo | null {
+    const result = this.analyze()
+    return result.flows.get(functionId) ?? null
+  }
+  /**
+   * Find all functions that return a specific type.
+   */
+  findFunctionsByReturnType(typeName: string): MikkLockFunction[] {
+    const result = this.analyze()
+    const matches: MikkLockFunction[] = []
+    for (const [fnId, flow] of result.flows) {
+      if (flow.returnType.toLowerCase().includes(typeName.toLowerCase())) {
+        const fn = this.lock.functions[fnId]
+        if (fn) matches.push(fn)
+      }
+    }
+    return matches
+  }
+  /**
+   * Find all functions that accept a specific parameter type.
+   */
+  findFunctionsByParamType(typeName: string): MikkLockFunction[] {
+    const result = this.analyze()
+    const matches: MikkLockFunction[] = []
+    for (const [fnId, flow] of result.flows) {
+      for (const param of flow.paramTypes) {
+        if (param.type.toLowerCase().includes(typeName.toLowerCase())) {
+          const fn = this.lock.functions[fnId]
+          if (fn) matches.push(fn)
+          break
+        }
+      }
+    }
+    return matches
+  }
+  /**
+   * Extract explicit type annotations from function metadata.
+   */
+  private extractExplicitTypes(fn: MikkLockFunction): TypeFlowInfo {
+    const paramTypes: TypeParam[] = []
+    if (fn.params) {
+      for (const param of fn.params) {
+        paramTypes.push({
+          name: param.name,
+          type: param.type || 'unknown',
+          source: param.type ? 'annotation' : 'inference',
+          confidence: param.type ? 1.0 : 0.3,
+        })
+      }
+    }
+    return {
+      functionId: fn.id,
+      paramTypes,
+      returnType: fn.returnType || 'unknown',
+      incomingTypes: [],
+      outgoingTypes: [],
+      confidence: this.computeConfidence(paramTypes, fn.returnType),
+    }
+  }
+  /**
+   * Propagate types through the call graph.
+   */
+  private propagateTypes(flows: Map<string, TypeFlowInfo>): void {
+    // Build type propagation edges
+    for (const [fnId, flow] of flows) {
+      const fn = this.lock.functions[fnId]
+      if (!fn) continue
+      // Find outgoing calls
+      for (const calleeId of fn.calls || []) {
+        const calleeFlow = flows.get(calleeId)
+        if (!calleeFlow) continue
+        // Create type edges for parameters
+        for (let i = 0; i < calleeFlow.paramTypes.length; i++) {
+          const param = calleeFlow.paramTypes[i]
+          if (param.type !== 'unknown') {
+            flow.outgoingTypes.push({
+              from: fnId,
+              to: calleeId,
+              type: param.type,
+              confidence: param.confidence * 0.8,
+            })
+          }
+        }
+        // Create type edges for return type
+        if (calleeFlow.returnType !== 'unknown') {
+          flow.incomingTypes.push({
+            from: calleeId,
+            to: fnId,
+            type: calleeFlow.returnType,
+            confidence: calleeFlow.confidence * 0.8,
+          })
+        }
+      }
+    }
+  }
+  /**
+   * Compute confidence score for type information.
+   */
+  private computeConfidence(paramTypes: TypeParam[], returnType?: string): number {
+    let totalConfidence = 0
+    let count = 0
+    for (const param of paramTypes) {
+      totalConfidence += param.confidence
+      count++
+    }
+    if (returnType && returnType !== 'unknown') {
+      totalConfidence += 0.9
+      count++
+    }
+    return count > 0 ? totalConfidence / count : 0.1
+  }
+  /**
+   * Compute summary statistics.
+   */
+  private computeSummary(flows: Map<string, TypeFlowInfo>): TypeFlowResult['summary'] {
+    const totalFunctions = flows.size
+    let typedFunctions = 0
+    let inferredFunctions = 0
+    let totalConfidence = 0
+    for (const flow of flows.values()) {
+      if (flow.returnType !== 'unknown') {
+        typedFunctions++
+      }
+      const hasInferred = flow.paramTypes.some(p => p.source === 'inference' || p.source === 'usage')
+      if (hasInferred) {
+        inferredFunctions++
+      }
+      totalConfidence += flow.confidence
+    }
+    return {
+      totalFunctions,
+      typedFunctions,
+      inferredFunctions,
+      averageConfidence: totalFunctions > 0 ? totalConfidence / totalFunctions : 0,
+    }
+  }
+}