npm - @getmikk/core - Versions diffs - 2.0.13 → 2.0.15 - Mend

@getmikk/core 2.0.13 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/README.md +4 -4
package/package.json +2 -1
package/src/analysis/index.ts +9 -0
package/src/analysis/taint-analysis.ts +419 -0
package/src/analysis/type-flow.ts +247 -0
package/src/cache/incremental-cache.ts +278 -0
package/src/cache/index.ts +1 -0
package/src/contract/contract-generator.ts +31 -3
package/src/contract/contract-reader.ts +1 -0
package/src/contract/lock-compiler.ts +125 -12
package/src/contract/schema.ts +4 -0
package/src/error-handler.ts +2 -1
package/src/graph/cluster-detector.ts +2 -4
package/src/graph/dead-code-detector.ts +303 -117
package/src/graph/graph-builder.ts +21 -161
package/src/graph/impact-analyzer.ts +1 -0
package/src/graph/index.ts +2 -0
package/src/graph/rich-function-index.ts +1080 -0
package/src/graph/symbol-table.ts +252 -0
package/src/hash/hash-store.ts +1 -0
package/src/index.ts +4 -0
package/src/parser/base-extractor.ts +19 -0
package/src/parser/boundary-checker.ts +31 -12
package/src/parser/error-recovery.ts +647 -0
package/src/parser/function-body-extractor.ts +248 -0
package/src/parser/go/go-extractor.ts +249 -676
package/src/parser/index.ts +138 -295
package/src/parser/language-registry.ts +57 -0
package/src/parser/oxc-parser.ts +166 -28
package/src/parser/oxc-resolver.ts +179 -11
package/src/parser/parser-constants.ts +1 -0
package/src/parser/rust/rust-extractor.ts +109 -0
package/src/parser/tree-sitter/parser.ts +400 -66
package/src/parser/tree-sitter/queries.ts +106 -10
package/src/parser/types.ts +20 -1
package/src/search/bm25.ts +21 -8
package/src/search/direct-search.ts +472 -0
package/src/search/embedding-provider.ts +249 -0
package/src/search/index.ts +12 -0
package/src/search/semantic-search.ts +435 -0
package/src/security/index.ts +1 -0
package/src/security/scanner.ts +342 -0
package/src/utils/artifact-transaction.ts +1 -0
package/src/utils/atomic-write.ts +1 -0
package/src/utils/errors.ts +89 -4
package/src/utils/fs.ts +150 -65
package/src/utils/json.ts +1 -0
package/src/utils/language-registry.ts +96 -5
package/src/utils/minimatch.ts +49 -6
package/src/utils/path.ts +26 -0
package/tests/dead-code.test.ts +3 -2
package/tests/direct-search.test.ts +435 -0
package/tests/error-recovery.test.ts +143 -0
package/tests/fixtures/simple-api/src/index.ts +1 -1
package/tests/go-parser.test.ts +19 -335
package/tests/js-parser.test.ts +18 -1089
package/tests/language-registry-all.test.ts +276 -0
package/tests/language-registry.test.ts +6 -4
package/tests/parse-diagnostics.test.ts +9 -96
package/tests/parser.test.ts +42 -771
package/tests/polyglot-parser.test.ts +117 -0
package/tests/rich-function-index.test.ts +703 -0
package/tests/tree-sitter-parser.test.ts +108 -80
package/tests/ts-parser.test.ts +8 -8
package/tests/verification.test.ts +175 -0
package/src/parser/base-parser.ts +0 -16
package/src/parser/go/go-parser.ts +0 -43
package/src/parser/javascript/js-extractor.ts +0 -278
package/src/parser/javascript/js-parser.ts +0 -101
package/src/parser/typescript/ts-extractor.ts +0 -447
package/src/parser/typescript/ts-parser.ts +0 -36

package/README.md CHANGED Viewed

@@ -75,10 +75,10 @@ One root hash comparison = instant full drift detection. Persisted in SQLite wit
 Compiles a `DependencyGraph` + `MikkContract` + parsed files into a `MikkLock`. The lock file is the single source of truth for all MCP tools and CLI commands.
-Lock format v1.7.0:
-- Integer-based function index (`fnIndex`) — call graph edges stored as integer references, not repeated strings
-- Compact JSON output — no pretty-printing
-- Backward-compatible hydration for older formats
+Lock format:
+- Integer-based function index (`fnIndex`) — call graph edges stored as integer references
+- Compact JSON output
+- Backward-compatible hydration
 ### ContractReader / ContractWriter / LockReader

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@getmikk/core",
-  "version": "2.0.13",
+  "version": "2.0.15",
   "publishConfig": {
     "access": "public",
     "registry": "https://registry.npmjs.org/"
@@ -33,6 +33,7 @@
     "eslint": "^9.39.2"
   },
   "dependencies": {
+    "@google/generative-ai": "^0.21.0",
     "better-sqlite3": "^12.6.2",
     "fast-glob": "^3.3.0",
     "tree-sitter-wasms": "^0.1.13",

package/src/analysis/index.ts ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * Analysis modules - Type Flow and Taint Analysis for semantic code understanding
+ */
+export { TypeFlowAnalyzer } from './type-flow.js'
+export type { TypeFlowInfo, TypeParam, TypeEdge, TypeFlowResult } from './type-flow.js'
+export { TaintAnalyzer } from './taint-analysis.js'
+export type { TaintSource, TaintSink, TaintFlow, DataFlowResult } from './taint-analysis.js'

package/src/analysis/taint-analysis.ts ADDED Viewed

@@ -0,0 +1,419 @@
+/**
+ * Data Flow & Taint Analysis — tracks data propagation through code
+ * for security vulnerability detection.
+ */
+import type { MikkLock, MikkLockFunction } from '../contract/schema.js'
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface TaintSource {
+  name: string
+  description: string
+  severity: 'critical' | 'high' | 'medium' | 'low'
+  patterns: RegExp[]
+}
+export interface TaintSink {
+  name: string
+  description: string
+  severity: 'critical' | 'high' | 'medium' | 'low'
+  patterns: RegExp[]
+  sanitizers: string[]
+}
+export interface TaintFlow {
+  source: string
+  sink: string
+  path: string[]
+  vulnerability: string
+  severity: 'critical' | 'high' | 'medium' | 'low'
+  confidence: number
+}
+export interface DataFlowResult {
+  flows: TaintFlow[]
+  summary: {
+    totalFlows: number
+    critical: number
+    high: number
+    medium: number
+    low: number
+  }
+}
+// ---------------------------------------------------------------------------
+// Default Taint Sources and Sinks
+// ---------------------------------------------------------------------------
+const DEFAULT_TAINT_SOURCES: TaintSource[] = [
+  {
+    name: 'user-input',
+    description: 'User-controlled input',
+    severity: 'high',
+    patterns: [
+      /req\.(body|query|params|headers)/i,
+      /request\.(body|query|params|headers)/i,
+      /input\(/i,
+      /process\.env/i,
+      /process\.argv/i,
+      /\bstdin\b/i,
+      /readline\(/i,
+      /readFile\(/i,
+      /fetch\(/i,
+      /axios\(/i,
+      /http\.request\(/i,
+    ],
+  },
+  {
+    name: 'filesystem',
+    description: 'File system input',
+    severity: 'medium',
+    patterns: [
+      /readFile\(/i,
+      /readFileSync\(/i,
+      /readdir\(/i,
+      /createReadStream\(/i,
+      /fs\.readFile\(/i,
+    ],
+  },
+  {
+    name: 'database',
+    description: 'Database query results',
+    severity: 'medium',
+    patterns: [
+      /query\(/i,
+      /\.find\(/i,
+      /\.select\(/i,
+      /execute\(/i,
+      /\.fetch\(/i,
+    ],
+  },
+  {
+    name: 'network',
+    description: 'Network/API responses',
+    severity: 'medium',
+    patterns: [
+      /fetch\(/i,
+      /axios\(/i,
+      /http\.get\(/i,
+      /https\.get\(/i,
+      /request\(/i,
+      /\.json\(\)/i,
+    ],
+  },
+]
+const DEFAULT_TAINT_SINKS: TaintSink[] = [
+  {
+    name: 'sql-query',
+    description: 'SQL query execution',
+    severity: 'critical',
+    patterns: [
+      /execute\s*\(/i,
+      /query\s*\(/i,
+      /\.exec\(/i,
+      /cursor\.execute\(/i,
+      /db\.query\(/i,
+    ],
+    sanitizers: ['escape', 'sanitize', 'param', 'bind', 'prepare'],
+  },
+  {
+    name: 'command-injection',
+    description: 'OS command execution',
+    severity: 'critical',
+    patterns: [
+      /exec\s*\(/i,
+      /spawn\s*\(/i,
+      /execSync\s*\(/i,
+      /system\s*\(/i,
+      /popen\s*\(/i,
+      /child_process\./i,
+    ],
+    sanitizers: ['execFile', 'spawnSync', 'execFileSync'],
+  },
+  {
+    name: 'code-execution',
+    description: 'Dynamic code execution',
+    severity: 'critical',
+    patterns: [
+      /\beval\s*\(/i,
+      /\bFunction\s*\(/i,
+      /setTimeout\s*\(\s*\w+\s*,/i,
+      /setInterval\s*\(\s*\w+\s*,/i,
+    ],
+    sanitizers: [],
+  },
+  {
+    name: 'path-traversal',
+    description: 'File system operations',
+    severity: 'high',
+    patterns: [
+      /readFile\(/i,
+      /writeFile\(/i,
+      /open\(/i,
+      /createReadStream\(/i,
+      /stat\(/i,
+      /lstat\(/i,
+      /access\(/i,
+      /exists\(/i,
+    ],
+    sanitizers: ['normalize', 'resolve', 'basename', 'dirname', 'join'],
+  },
+  {
+    name: 'xss',
+    description: 'HTML/JS injection',
+    severity: 'high',
+    patterns: [
+      /\.innerHTML\s*=/i,
+      /\.outerHTML\s*=/i,
+      /dangerouslySetInnerHTML/i,
+      /document\.write\(/i,
+      /\.html\s*\(/i,
+    ],
+    sanitizers: ['escape', 'sanitize', 'text', 'encode', 'DOMPurify'],
+  },
+  {
+    name: 'prototype-pollution',
+    description: 'Object prototype manipulation',
+    severity: 'high',
+    patterns: [
+      /\[\s*['"]__proto__['"]\s*\]/i,
+      /\[\s*['"]constructor['"]\s*\]/i,
+      /Object\.assign\s*\(\s*\w+\s*,\s*\w+\s*\)/i,
+    ],
+    sanitizers: [],
+  },
+]
+// ---------------------------------------------------------------------------
+// Taint Analyzer
+// ---------------------------------------------------------------------------
+export class TaintAnalyzer {
+  private lock: MikkLock
+  private sources: TaintSource[]
+  private sinks: TaintSink[]
+  constructor(
+    lock: MikkLock,
+    sources?: TaintSource[],
+    sinks?: TaintSink[]
+  ) {
+    this.lock = lock
+    this.sources = sources || DEFAULT_TAINT_SOURCES
+    this.sinks = sinks || DEFAULT_TAINT_SINKS
+  }
+  /**
+   * Analyze the codebase for taint flows.
+   */
+  analyze(): DataFlowResult {
+    const flows: TaintFlow[] = []
+    const allFunctions = Object.values(this.lock.functions)
+    // Find functions that contain taint sources
+    const sourceFunctions = this.findTaintSources(allFunctions)
+    // Find functions that contain taint sinks
+    const sinkFunctions = this.findTaintSinks(allFunctions)
+    // Trace taint flows through call graph
+    for (const sourceFn of sourceFunctions) {
+      for (const sinkFn of sinkFunctions) {
+        if (sourceFn.id === sinkFn.fn.id) continue
+        const flow = this.traceTaintFlow(sourceFn, sinkFn.fn, sinkFn.sink, allFunctions)
+        if (flow) {
+          flows.push(flow)
+        }
+      }
+    }
+    return {
+      flows,
+      summary: {
+        totalFlows: flows.length,
+        critical: flows.filter(f => f.severity === 'critical').length,
+        high: flows.filter(f => f.severity === 'high').length,
+        medium: flows.filter(f => f.severity === 'medium').length,
+        low: flows.filter(f => f.severity === 'low').length,
+      },
+    }
+  }
+  /**
+   * Find functions that contain taint sources.
+   */
+  private findTaintSources(functions: MikkLockFunction[]): MikkLockFunction[] {
+    const sources: MikkLockFunction[] = []
+    for (const fn of functions) {
+      const fnText = `${fn.name} ${fn.purpose || ''}`.toLowerCase()
+      for (const source of this.sources) {
+        for (const pattern of source.patterns) {
+          if (pattern.test(fnText)) {
+            sources.push(fn)
+            break
+          }
+        }
+      }
+    }
+    return sources
+  }
+  /**
+   * Find functions that contain taint sinks.
+   */
+  private findTaintSinks(functions: MikkLockFunction[]): Array<{ fn: MikkLockFunction; sink: TaintSink }> {
+    const sinks: Array<{ fn: MikkLockFunction; sink: TaintSink }> = []
+    for (const fn of functions) {
+      const fnText = `${fn.name} ${fn.purpose || ''}`.toLowerCase()
+      for (const sink of this.sinks) {
+        for (const pattern of sink.patterns) {
+          if (pattern.test(fnText)) {
+            sinks.push({ fn, sink })
+            break
+          }
+        }
+      }
+    }
+    return sinks
+  }
+  /**
+   * Trace taint flow from source to sink through call graph.
+   */
+  private traceTaintFlow(
+    source: MikkLockFunction,
+    sinkFn: MikkLockFunction,
+    sink: TaintSink,
+    allFunctions: MikkLockFunction[]
+  ): TaintFlow | null {
+    // Direct call: source calls sink directly
+    if (source.calls?.includes(sinkFn.id)) {
+      return {
+        source: source.name,
+        sink: sinkFn.name,
+        path: [source.name, sinkFn.name],
+        vulnerability: `${source.name} -> ${sinkFn.name}`,
+        severity: sink.severity,
+        confidence: 0.9,
+      }
+    }
+    // Check if there's a path through the call graph
+    const path = this.findPath(source.id, sinkFn.id, allFunctions)
+    if (path) {
+      return {
+        source: source.name,
+        sink: sinkFn.name,
+        path: path.map(id => this.lock.functions[id]?.name || id),
+        vulnerability: `${source.name} -> ${path.length - 1} intermediate(s) -> ${sinkFn.name}`,
+        severity: sink.severity,
+        confidence: 0.7,
+      }
+    }
+    return null
+  }
+  /**
+   * Find path from source to sink through call graph.
+   */
+  private findPath(
+    sourceId: string,
+    sinkId: string,
+    allFunctions: MikkLockFunction[]
+  ): string[] | null {
+    const visited = new Set<string>()
+    const path: string[] = []
+    function dfs(currentId: string): boolean {
+      if (currentId === sinkId) {
+        path.push(currentId)
+        return true
+      }
+      if (visited.has(currentId)) return false
+      visited.add(currentId)
+      path.push(currentId)
+      const fn = allFunctions.find(f => f.id === currentId)
+      if (fn?.calls) {
+        for (const calleeId of fn.calls) {
+          if (dfs(calleeId)) return true
+        }
+      }
+      path.pop()
+      return false
+    }
+    if (dfs(sourceId)) {
+      return path
+    }
+    return null
+  }
+  /**
+   * Check if a function has sanitizers that mitigate taint.
+   */
+  private hasSanitizer(fn: MikkLockFunction, sink: TaintSink): boolean {
+    const fnText = `${fn.name} ${fn.purpose || ''}`.toLowerCase()
+    for (const sanitizer of sink.sanitizers) {
+      if (fnText.includes(sanitizer.toLowerCase())) {
+        return true
+      }
+    }
+    return false
+  }
+  /**
+   * Get security findings from taint analysis.
+   */
+  getFindings(): Array<{
+    severity: string
+    title: string
+    file: string
+    line: number
+    description: string
+  }> {
+    const result = this.analyze()
+    const findings: Array<{
+      severity: string
+      title: string
+      file: string
+      line: number
+      description: string
+    }> = []
+    for (const flow of result.flows) {
+      const sourceFn = Object.values(this.lock.functions).find(
+        f => f.name === flow.source
+      )
+      if (sourceFn) {
+        findings.push({
+          severity: flow.severity,
+          title: `Potential ${flow.sink} vulnerability`,
+          file: sourceFn.file,
+          line: sourceFn.startLine,
+          description: `Tainted data from ${flow.source} flows to ${flow.sink} via: ${flow.path.join(' -> ')}`,
+        })
+      }
+    }
+    return findings
+  }
+}

package/src/analysis/type-flow.ts ADDED Viewed

@@ -0,0 +1,247 @@
+/**
+ * Type Flow Analysis — tracks type propagation through function calls
+ * and provides type-aware code understanding beyond syntactic parsing.
+ */
+import type { MikkLock, MikkLockFunction } from '../contract/schema.js'
+import type { DependencyGraph } from '../graph/types.js'
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface TypeFlowInfo {
+  /** Function ID */
+  functionId: string
+  /** Inferred parameter types */
+  paramTypes: TypeParam[]
+  /** Inferred return type */
+  returnType: string
+  /** Types that flow into this function from callers */
+  incomingTypes: TypeEdge[]
+  /** Types that flow out to callees */
+  outgoingTypes: TypeEdge[]
+  /** Type confidence score (0-1) */
+  confidence: number
+}
+export interface TypeParam {
+  name: string
+  type: string
+  source: 'annotation' | 'inference' | 'usage'
+  confidence: number
+}
+export interface TypeEdge {
+  from: string
+  to: string
+  type: string
+  confidence: number
+}
+export interface TypeFlowResult {
+  flows: Map<string, TypeFlowInfo>
+  summary: {
+    totalFunctions: number
+    typedFunctions: number
+    inferredFunctions: number
+    averageConfidence: number
+  }
+}
+// ---------------------------------------------------------------------------
+// Type Flow Analyzer
+// ---------------------------------------------------------------------------
+export class TypeFlowAnalyzer {
+  private lock: MikkLock
+  private graph: DependencyGraph
+  constructor(lock: MikkLock, graph: DependencyGraph) {
+    this.lock = lock
+    this.graph = graph
+  }
+  /**
+   * Analyze type flow across the entire codebase.
+   */
+  analyze(): TypeFlowResult {
+    const flows = new Map<string, TypeFlowInfo>()
+    const allFunctions = Object.values(this.lock.functions)
+    // Phase 1: Extract explicit type annotations
+    for (const fn of allFunctions) {
+      flows.set(fn.id, this.extractExplicitTypes(fn))
+    }
+    // Phase 2: Propagate types through call graph
+    this.propagateTypes(flows)
+    // Phase 3: Compute summary statistics
+    const summary = this.computeSummary(flows)
+    return { flows, summary }
+  }
+  /**
+   * Get type flow for a specific function.
+   */
+  getFunctionFlow(functionId: string): TypeFlowInfo | null {
+    const result = this.analyze()
+    return result.flows.get(functionId) ?? null
+  }
+  /**
+   * Find all functions that return a specific type.
+   */
+  findFunctionsByReturnType(typeName: string): MikkLockFunction[] {
+    const result = this.analyze()
+    const matches: MikkLockFunction[] = []
+    for (const [fnId, flow] of result.flows) {
+      if (flow.returnType.toLowerCase().includes(typeName.toLowerCase())) {
+        const fn = this.lock.functions[fnId]
+        if (fn) matches.push(fn)
+      }
+    }
+    return matches
+  }
+  /**
+   * Find all functions that accept a specific parameter type.
+   */
+  findFunctionsByParamType(typeName: string): MikkLockFunction[] {
+    const result = this.analyze()
+    const matches: MikkLockFunction[] = []
+    for (const [fnId, flow] of result.flows) {
+      for (const param of flow.paramTypes) {
+        if (param.type.toLowerCase().includes(typeName.toLowerCase())) {
+          const fn = this.lock.functions[fnId]
+          if (fn) matches.push(fn)
+          break
+        }
+      }
+    }
+    return matches
+  }
+  /**
+   * Extract explicit type annotations from function metadata.
+   */
+  private extractExplicitTypes(fn: MikkLockFunction): TypeFlowInfo {
+    const paramTypes: TypeParam[] = []
+    if (fn.params) {
+      for (const param of fn.params) {
+        paramTypes.push({
+          name: param.name,
+          type: param.type || 'unknown',
+          source: param.type ? 'annotation' : 'inference',
+          confidence: param.type ? 1.0 : 0.3,
+        })
+      }
+    }
+    return {
+      functionId: fn.id,
+      paramTypes,
+      returnType: fn.returnType || 'unknown',
+      incomingTypes: [],
+      outgoingTypes: [],
+      confidence: this.computeConfidence(paramTypes, fn.returnType),
+    }
+  }
+  /**
+   * Propagate types through the call graph.
+   */
+  private propagateTypes(flows: Map<string, TypeFlowInfo>): void {
+    // Build type propagation edges
+    for (const [fnId, flow] of flows) {
+      const fn = this.lock.functions[fnId]
+      if (!fn) continue
+      // Find outgoing calls
+      for (const calleeId of fn.calls || []) {
+        const calleeFlow = flows.get(calleeId)
+        if (!calleeFlow) continue
+        // Create type edges for parameters
+        for (let i = 0; i < calleeFlow.paramTypes.length; i++) {
+          const param = calleeFlow.paramTypes[i]
+          if (param.type !== 'unknown') {
+            flow.outgoingTypes.push({
+              from: fnId,
+              to: calleeId,
+              type: param.type,
+              confidence: param.confidence * 0.8,
+            })
+          }
+        }
+        // Create type edges for return type
+        if (calleeFlow.returnType !== 'unknown') {
+          flow.incomingTypes.push({
+            from: calleeId,
+            to: fnId,
+            type: calleeFlow.returnType,
+            confidence: calleeFlow.confidence * 0.8,
+          })
+        }
+      }
+    }
+  }
+  /**
+   * Compute confidence score for type information.
+   */
+  private computeConfidence(paramTypes: TypeParam[], returnType?: string): number {
+    let totalConfidence = 0
+    let count = 0
+    for (const param of paramTypes) {
+      totalConfidence += param.confidence
+      count++
+    }
+    if (returnType && returnType !== 'unknown') {
+      totalConfidence += 0.9
+      count++
+    }
+    return count > 0 ? totalConfidence / count : 0.1
+  }
+  /**
+   * Compute summary statistics.
+   */
+  private computeSummary(flows: Map<string, TypeFlowInfo>): TypeFlowResult['summary'] {
+    const totalFunctions = flows.size
+    let typedFunctions = 0
+    let inferredFunctions = 0
+    let totalConfidence = 0
+    for (const flow of flows.values()) {
+      if (flow.returnType !== 'unknown') {
+        typedFunctions++
+      }
+      const hasInferred = flow.paramTypes.some(p => p.source === 'inference' || p.source === 'usage')
+      if (hasInferred) {
+        inferredFunctions++
+      }
+      totalConfidence += flow.confidence
+    }
+    return {
+      totalFunctions,
+      typedFunctions,
+      inferredFunctions,
+      averageConfidence: totalFunctions > 0 ? totalConfidence / totalFunctions : 0,
+    }
+  }
+}