npm - dravix-agent - Versions diffs - 0.1.0 - Mend

dravix-agent 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

package/.claude/settings.example.json +30 -0
package/ARCHITECTURE.md +410 -0
package/LICENSE +21 -0
package/README.md +153 -0
package/ROADMAP.md +117 -0
package/data/vulnkb.json +666 -0
package/dist/bin/aegis.d.ts +3 -0
package/dist/bin/aegis.d.ts.map +1 -0
package/dist/bin/aegis.js +489 -0
package/dist/bin/aegis.js.map +1 -0
package/dist/cache.d.ts +9 -0
package/dist/cache.d.ts.map +1 -0
package/dist/cache.js +146 -0
package/dist/cache.js.map +1 -0
package/dist/engines/ai-sinks.d.ts +52 -0
package/dist/engines/ai-sinks.d.ts.map +1 -0
package/dist/engines/ai-sinks.js +204 -0
package/dist/engines/ai-sinks.js.map +1 -0
package/dist/engines/eslint.d.ts +9 -0
package/dist/engines/eslint.d.ts.map +1 -0
package/dist/engines/eslint.js +245 -0
package/dist/engines/eslint.js.map +1 -0
package/dist/engines/joern.d.ts +3 -0
package/dist/engines/joern.d.ts.map +1 -0
package/dist/engines/joern.js +98 -0
package/dist/engines/joern.js.map +1 -0
package/dist/engines/js-sinks.d.ts +70 -0
package/dist/engines/js-sinks.d.ts.map +1 -0
package/dist/engines/js-sinks.js +370 -0
package/dist/engines/js-sinks.js.map +1 -0
package/dist/engines/llm-critic.d.ts +130 -0
package/dist/engines/llm-critic.d.ts.map +1 -0
package/dist/engines/llm-critic.js +551 -0
package/dist/engines/llm-critic.js.map +1 -0
package/dist/engines/pragma.d.ts +20 -0
package/dist/engines/pragma.d.ts.map +1 -0
package/dist/engines/pragma.js +83 -0
package/dist/engines/pragma.js.map +1 -0
package/dist/engines/property-test.d.ts +3 -0
package/dist/engines/property-test.d.ts.map +1 -0
package/dist/engines/property-test.js +134 -0
package/dist/engines/property-test.js.map +1 -0
package/dist/engines/pyright.d.ts +10 -0
package/dist/engines/pyright.d.ts.map +1 -0
package/dist/engines/pyright.js +143 -0
package/dist/engines/pyright.js.map +1 -0
package/dist/engines/pysa.d.ts +3 -0
package/dist/engines/pysa.d.ts.map +1 -0
package/dist/engines/pysa.js +83 -0
package/dist/engines/pysa.js.map +1 -0
package/dist/engines/python-sinks.d.ts +82 -0
package/dist/engines/python-sinks.d.ts.map +1 -0
package/dist/engines/python-sinks.js +459 -0
package/dist/engines/python-sinks.js.map +1 -0
package/dist/engines/registry.d.ts +26 -0
package/dist/engines/registry.d.ts.map +1 -0
package/dist/engines/registry.js +70 -0
package/dist/engines/registry.js.map +1 -0
package/dist/engines/secret-scan.d.ts +22 -0
package/dist/engines/secret-scan.d.ts.map +1 -0
package/dist/engines/secret-scan.js +179 -0
package/dist/engines/secret-scan.js.map +1 -0
package/dist/engines/semgrep.d.ts +10 -0
package/dist/engines/semgrep.d.ts.map +1 -0
package/dist/engines/semgrep.js +200 -0
package/dist/engines/semgrep.js.map +1 -0
package/dist/engines/treesitter.d.ts +18 -0
package/dist/engines/treesitter.d.ts.map +1 -0
package/dist/engines/treesitter.js +135 -0
package/dist/engines/treesitter.js.map +1 -0
package/dist/engines/tsc.d.ts +10 -0
package/dist/engines/tsc.d.ts.map +1 -0
package/dist/engines/tsc.js +142 -0
package/dist/engines/tsc.js.map +1 -0
package/dist/engines/types.d.ts +47 -0
package/dist/engines/types.d.ts.map +1 -0
package/dist/engines/types.js +27 -0
package/dist/engines/types.js.map +1 -0
package/dist/findings.d.ts +121 -0
package/dist/findings.d.ts.map +1 -0
package/dist/findings.js +98 -0
package/dist/findings.js.map +1 -0
package/dist/hooks/claude-code.d.ts +3 -0
package/dist/hooks/claude-code.d.ts.map +1 -0
package/dist/hooks/claude-code.js +187 -0
package/dist/hooks/claude-code.js.map +1 -0
package/dist/index/context.d.ts +127 -0
package/dist/index/context.d.ts.map +1 -0
package/dist/index/context.js +267 -0
package/dist/index/context.js.map +1 -0
package/dist/index/embeddings.d.ts +68 -0
package/dist/index/embeddings.d.ts.map +1 -0
package/dist/index/embeddings.js +570 -0
package/dist/index/embeddings.js.map +1 -0
package/dist/index/graph_routing.d.ts +36 -0
package/dist/index/graph_routing.d.ts.map +1 -0
package/dist/index/graph_routing.js +170 -0
package/dist/index/graph_routing.js.map +1 -0
package/dist/index/joern.d.ts +76 -0
package/dist/index/joern.d.ts.map +1 -0
package/dist/index/joern.js +782 -0
package/dist/index/joern.js.map +1 -0
package/dist/index/property-test.d.ts +88 -0
package/dist/index/property-test.d.ts.map +1 -0
package/dist/index/property-test.js +466 -0
package/dist/index/property-test.js.map +1 -0
package/dist/index/proto/scip.proto +897 -0
package/dist/index/pysa.d.ts +91 -0
package/dist/index/pysa.d.ts.map +1 -0
package/dist/index/pysa.js +617 -0
package/dist/index/pysa.js.map +1 -0
package/dist/index/scip.d.ts +76 -0
package/dist/index/scip.d.ts.map +1 -0
package/dist/index/scip.js +541 -0
package/dist/index/scip.js.map +1 -0
package/dist/index/vulrag.d.ts +86 -0
package/dist/index/vulrag.d.ts.map +1 -0
package/dist/index/vulrag.js +242 -0
package/dist/index/vulrag.js.map +1 -0
package/dist/index.d.ts +9 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +8 -0
package/dist/index.js.map +1 -0
package/dist/install/claude-code.d.ts +31 -0
package/dist/install/claude-code.d.ts.map +1 -0
package/dist/install/claude-code.js +447 -0
package/dist/install/claude-code.js.map +1 -0
package/dist/lang.d.ts +5 -0
package/dist/lang.d.ts.map +1 -0
package/dist/lang.js +52 -0
package/dist/lang.js.map +1 -0
package/dist/learning/suppressions.d.ts +70 -0
package/dist/learning/suppressions.d.ts.map +1 -0
package/dist/learning/suppressions.js +179 -0
package/dist/learning/suppressions.js.map +1 -0
package/dist/mcp/server.d.ts +2 -0
package/dist/mcp/server.d.ts.map +1 -0
package/dist/mcp/server.js +187 -0
package/dist/mcp/server.js.map +1 -0
package/dist/mcp/tools/explain.d.ts +58 -0
package/dist/mcp/tools/explain.d.ts.map +1 -0
package/dist/mcp/tools/explain.js +60 -0
package/dist/mcp/tools/explain.js.map +1 -0
package/dist/mcp/tools/precheck.d.ts +29 -0
package/dist/mcp/tools/precheck.d.ts.map +1 -0
package/dist/mcp/tools/precheck.js +42 -0
package/dist/mcp/tools/precheck.js.map +1 -0
package/dist/mcp/tools/validate.d.ts +73 -0
package/dist/mcp/tools/validate.d.ts.map +1 -0
package/dist/mcp/tools/validate.js +66 -0
package/dist/mcp/tools/validate.js.map +1 -0
package/dist/mcp/warm.d.ts +88 -0
package/dist/mcp/warm.d.ts.map +1 -0
package/dist/mcp/warm.js +331 -0
package/dist/mcp/warm.js.map +1 -0
package/dist/orchestrator.d.ts +46 -0
package/dist/orchestrator.d.ts.map +1 -0
package/dist/orchestrator.js +596 -0
package/dist/orchestrator.js.map +1 -0
package/dist/policy.d.ts +51 -0
package/dist/policy.d.ts.map +1 -0
package/dist/policy.js +201 -0
package/dist/policy.js.map +1 -0
package/dist/risk.d.ts +31 -0
package/dist/risk.d.ts.map +1 -0
package/dist/risk.js +92 -0
package/dist/risk.js.map +1 -0
package/dist/stats.d.ts +72 -0
package/dist/stats.d.ts.map +1 -0
package/dist/stats.js +217 -0
package/dist/stats.js.map +1 -0
package/dist/telemetry/collector.d.ts +10 -0
package/dist/telemetry/collector.d.ts.map +1 -0
package/dist/telemetry/collector.js +75 -0
package/dist/telemetry/collector.js.map +1 -0
package/dist/telemetry/consent.d.ts +9 -0
package/dist/telemetry/consent.d.ts.map +1 -0
package/dist/telemetry/consent.js +42 -0
package/dist/telemetry/consent.js.map +1 -0
package/dist/telemetry/installation.d.ts +2 -0
package/dist/telemetry/installation.d.ts.map +1 -0
package/dist/telemetry/installation.js +32 -0
package/dist/telemetry/installation.js.map +1 -0
package/dist/telemetry/sanitizer.d.ts +5 -0
package/dist/telemetry/sanitizer.d.ts.map +1 -0
package/dist/telemetry/sanitizer.js +60 -0
package/dist/telemetry/sanitizer.js.map +1 -0
package/dist/telemetry/types.d.ts +39 -0
package/dist/telemetry/types.d.ts.map +1 -0
package/dist/telemetry/types.js +4 -0
package/dist/telemetry/types.js.map +1 -0
package/dist/telemetry/uploader.d.ts +12 -0
package/dist/telemetry/uploader.d.ts.map +1 -0
package/dist/telemetry/uploader.js +92 -0
package/dist/telemetry/uploader.js.map +1 -0
package/dist/util/logger.d.ts +19 -0
package/dist/util/logger.d.ts.map +1 -0
package/dist/util/logger.js +58 -0
package/dist/util/logger.js.map +1 -0
package/dist/util/safe-paths.d.ts +8 -0
package/dist/util/safe-paths.d.ts.map +1 -0
package/dist/util/safe-paths.js +102 -0
package/dist/util/safe-paths.js.map +1 -0
package/dist/util/subprocess.d.ts +32 -0
package/dist/util/subprocess.d.ts.map +1 -0
package/dist/util/subprocess.js +137 -0
package/dist/util/subprocess.js.map +1 -0
package/package.json +93 -0

package/.claude/settings.example.json ADDED Viewed

@@ -0,0 +1,30 @@
+{
+  "//": "Aegis-v2 hook configuration for Claude Code. Copy to your project's .claude/settings.json (or merge into the existing file).",
+  "//install": "Requires Aegis-v2 installed globally (npm i -g @aegis/aegis-v2) or local path to dist/bin/aegis.js.",
+  "hooks": {
+    "PostToolUse": [
+      {
+        "matcher": "Write|Edit",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "aegis hook",
+            "timeout": 10
+          }
+        ]
+      }
+    ],
+    "PreToolUse": [
+      {
+        "matcher": "Write|Edit",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "aegis hook",
+            "timeout": 3
+          }
+        ]
+      }
+    ]
+  }
+}

package/ARCHITECTURE.md ADDED Viewed

@@ -0,0 +1,410 @@
+# Aegis-v2 — Architecture
+> Real-time MCP code-write gate for AI agents. Built **OSS-only**, **TypeScript-native**,
+> **fully owned** — no commercial-product lock-in.
+---
+## 0. North Star
+A `PreToolUse` / `PostToolUse` hook for Claude Code / Cursor / Codex / opencode / Hermes
+that intercepts every file write the agent is about to make, runs a multi-layer hybrid
+analysis in **p50 ≤ 400 ms**, and either ALLOWS, WARNS, or BLOCKS with a remediation
+prompt fed back to the agent.
+**Two product properties that nothing else combines:**
+1. **The gate is in the WRITE LOOP** — not a post-merge scan dashboard. Bad code
+   never reaches disk.
+2. **The agent receives a structured remediation prompt** when blocked — it can
+   regenerate without human intervention.
+---
+## 1. Honest baselines we have to beat
+Latest published numbers (2024-2026) on real bug-detection benchmarks. **Every
+commercial tool is well under 50% recall on hard datasets.**
+| Tool / approach | Benchmark | Recall |
+|---|---|---|
+| Single 7B LLM (state-of-the-art) | PrimeVul | **3.09% F1** [Ding 2024](https://arxiv.org/abs/2403.18624) |
+| GPT-3.5 / GPT-4 (zero-shot) | PrimeVul stringent | "akin to random guessing" |
+| Best commercial SAST (CodeQL) | EASE-2024 Java curated | 18.4% |
+| Semgrep Pro | EASE-2024 | 14.3% |
+| Snyk DeepCode | EASE-2024 | 11.2% |
+| **Macroscope** (2026 SOTA) | 118 OSS runtime bugs | **48%** |
+| CodeRabbit | same | 46% |
+| Cursor BugBot | same | 42% |
+| **Vul-RAG** (LLM + knowledge retrieval) | LinuxVul | +16-24% over pure LLM ([Du 2024](https://arxiv.org/abs/2406.11147)) |
+**Implication:** Aegis-v2 must combine static engines + retrieved vuln knowledge +
+project graph + LLM critic with structured evidence trails. **No single layer is
+enough.** A 150M model alone classifies at near-chance on logic bugs (PrimeVul).
+The 150M's role is **router**, not detector.
+---
+## 2. Architecture overview (one picture)
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│ AI agent (Claude Code / Cursor / Codex CLI / opencode / Hermes)     │
+│   │                                                                 │
+│   ├── PreToolUse hook  ───►  aegis precheck_change()                │
+│   │       p50 ≤ 50 ms;  symbol resolution + tree-sitter sanity       │
+│   │                                                                 │
+│   ├── Write / Edit tool ───►  (file written to disk)                │
+│   │                                                                 │
+│   └── PostToolUse hook ───►  aegis validate_edit()                  │
+│           p50 ≤ 400 ms  /  p95 ≤ 4 s on deep                         │
+│           exit 0 = allow ─ exit 2 = BLOCK + remediation prompt       │
+│                                                                     │
+│   ┌─────────────────────────────────────────────────────────────┐   │
+│   │ LAYER 1 — Fast Deterministic   ≤ 200 ms (parallel)          │   │
+│   │  • tree-sitter incremental parse                            │   │
+│   │  • Semgrep OSS  (community rules: p/default, p/security)    │   │
+│   │  • Pyright strict  (Python) / tsc strict (TS) / mypy        │   │
+│   │  • ESLint  + @typescript-eslint/no-floating-promises,       │   │
+│   │     no-misused-promises, require-await                      │   │
+│   │  • Secret scan (gitleaks rules embedded)                    │   │
+│   └─────────────────┬───────────────────────────────────────────┘   │
+│                     │ findings + AST + symbols                      │
+│   ┌─────────────────▼───────────────────────────────────────────┐   │
+│   │ LAYER 2 — Project Context   ≤ 300 ms (cached)               │   │
+│   │  • SCIP index lookup  (scip-typescript / scip-python)       │   │
+│   │  • Joern CPG diff query  (data-flow paths, callers)         │   │
+│   │  • Pysa pre-computed taint summaries  (Python)              │   │
+│   │  • Local code embedding (CodeT5+ 110M)  → FAISS top-K       │   │
+│   └─────────────────┬───────────────────────────────────────────┘   │
+│                     │ enriched evidence trail                       │
+│   ┌─────────────────▼───────────────────────────────────────────┐   │
+│   │ LAYER 3 — 150M Router  ≤ 100 ms (on-device)                 │   │
+│   │  • Classifies: clean / style / logic / security / race      │   │
+│   │  • Predicts CWE bucket (89, 79, 362, 476, 285, 918, ...)    │   │
+│   │  • Action: pass / warn / deep_review                        │   │
+│   │  • If deep_review → escalate to Layer 4                     │   │
+│   └─────────────────┬───────────────────────────────────────────┘   │
+│                     │ score + category                              │
+│   ┌─────────────────▼───────────────────────────────────────────┐   │
+│   │ LAYER 4 — LLM Critic   1-3 s  (~10% of edits, async)        │   │
+│   │  • Vul-RAG retrieve  k=3 CVE knowledge by predicted CWE     │   │
+│   │  • Claude / GPT critic prompt:                              │   │
+│   │      diff + data-flow path + callers + Vul-RAG knowledge    │   │
+│   │  • Structured JSON: {verdict, cwe, evidence, fix, conf}     │   │
+│   │  • Ensemble vote (second-opinion: different model vendor)   │   │
+│   └─────────────────┬───────────────────────────────────────────┘   │
+└─────────────────────│───────────────────────────────────────────────┘
+                      ▼
+       exit 2 + remediation JSON → agent regenerates
+       exit 0 → write proceeds
+```
+### Layer responsibilities
+| Layer | Owns | Latency budget | Fail-open? |
+|---|---|---|---|
+| 1 Fast Deterministic | Pattern bugs, type errors, secrets, async footguns | ≤ 200 ms | **No** — if engine crashes, log + skip just that engine |
+| 2 Project Context | Cross-file symbols, data flow, callers, embeddings | ≤ 300 ms | Yes — empty context is acceptable |
+| 3 150M Router | Classify edit + suspected CWE; decide if Layer 4 needed | ≤ 100 ms | Yes — fall back to threshold-based router |
+| 4 LLM Critic | Final verdict on hard cases, with full evidence | 1-3 s (async) | Yes — Layer 1+2 decision stands |
+---
+## 3. MCP tool surface
+Three tools exposed via the MCP protocol (stdio transport):
+### `precheck_change`
+**When:** `PreToolUse` hook, before the agent calls `Write` / `Edit`.
+**Input:** `{ file_path, proposed_content }`.
+**Latency:** ≤ 50 ms hard cap.
+**Output:** quick advisory only (`{ allow|warn, reasons[] }`) — never blocks.
+**Layers used:** 1 (tree-sitter sanity only — no full engine run).
+### `validate_edit`
+**When:** `PostToolUse` hook, after the agent writes the file.
+**Input:** `{ file_path, content, diff?, project_root? }`.
+**Latency:** p50 ≤ 400 ms, p95 ≤ 4 s.
+**Output:** `{ verdict: allow|warn|block, findings: [...], remediation_prompt? }`.
+**Exit code mapping:** allow → 0, warn → 0 (with stderr message), block → 2.
+**Layers used:** 1 + 2 always; 3 always; 4 only when 3 says `deep_review`.
+### `explain_risk`
+**When:** agent or human asks "why was this blocked?".
+**Input:** `{ finding_id }` or `{ file_path, line }`.
+**Output:** detailed evidence trail (engine path, data-flow nodes, Vul-RAG citations).
+**Layers used:** read-only retrieval from the finding store.
+---
+## 4. Unified Finding schema (zod, single source of truth)
+```ts
+const Finding = z.object({
+  id: z.string(),               // sha256(engine + file + line + rule)[:16]
+  engine: z.enum([
+    "semgrep", "pyright", "eslint", "treesitter", "secret",
+    "joern", "pysa", "codeql", "infer-racerd",
+    "router-150m", "llm-critic",
+  ]),
+  file: z.string(),
+  line: z.number().int().positive().optional(),
+  col: z.number().int().positive().optional(),
+  rule_id: z.string(),
+  cwe: z.string().regex(/^CWE-\d+$/).optional(),
+  severity: z.enum(["info", "low", "medium", "high", "critical"]),
+  message: z.string().max(500),
+  evidence: z.object({
+    snippet: z.string().max(2000).optional(),
+    dataflow: z.array(z.object({
+      file: z.string(), line: z.number(), label: z.string(),
+    })).max(20).optional(),
+    callers: z.array(z.string()).max(10).optional(),
+    related_cves: z.array(z.string()).max(5).optional(),
+  }).optional(),
+  confidence: z.number().min(0).max(1),  // 0-1, source-stamped
+  source: z.enum(["pattern", "dataflow", "taint", "router", "critic", "ensemble"]),
+  remediation: z.string().max(2000).optional(),
+});
+```
+Every layer produces `Finding[]` in this exact shape. **No engine-specific output
+escapes the orchestrator.** That single contract is why we can swap engines without
+touching downstream logic.
+---
+## 5. Risk scoring → action mapping
+```
+confidence × severity_weight  →  action
+  ≥ 0.85 × {high, critical}   →  BLOCK   (exit 2, remediation prompt)
+  0.6-0.85 × {medium+}        →  WARN    (exit 0, stderr message)
+  any × {low, info}           →  ALLOW   (silent)
+  any single critical secret  →  BLOCK   (regardless of confidence)
+  any taint sink reached      →  BLOCK   (Pysa/Joern with confidence > 0.7)
+```
+The thresholds live in `src/risk.ts` as a single function (`scoreToAction`) so we
+can A/B-tune them without touching engines.
+---
+## 6. Latency budgets (production targets)
+| Stage | p50 | p95 | Worst |
+|---|---|---|---|
+| PreToolUse precheck | 30 ms | 80 ms | 100 ms |
+| Layer 1 (parallel, all engines) | 150 ms | 400 ms | 500 ms |
+| Layer 2 cached query | 50 ms | 200 ms | 300 ms |
+| Layer 3 router inference | 80 ms | 120 ms | 150 ms |
+| Layer 4 LLM critic (10% of edits) | 1.5 s | 3 s | 5 s |
+| **Total p50 (no critic)** | **~310 ms** | | |
+| **Total p95 (with critic)** | | **~4 s** | |
+**Hard limit:** `validate_edit` returns within 5 s or the gate FAILS-OPEN with a
+warning. Better to miss a bug than to break the agent's flow.
+---
+## 7. Caching strategy
+| Cache | Key | TTL | Backend |
+|---|---|---|---|
+| Tree-sitter AST | sha256(content) | session | in-memory LRU |
+| Semgrep findings per file | sha256(content + rules_version) | session | in-memory |
+| SCIP index | per-file mtime + content hash | manual invalidation | LMDB / RocksDB |
+| Joern CPG | per-file mtime + content hash | manual invalidation | OverflowDB |
+| Vul-RAG retrieval | sha256(query_embedding) | 24 h | LMDB |
+| LLM critic verdict | sha256(diff + context + cwe) | 7 d | LMDB |
+`.aegis/` per project, gitignored.
+---
+## 8. Threat model (MCP attack surface)
+**Why this matters:** 2025-2026 supply-chain attacks (Shai-Hulud,
+SANDWORM_MODE, Mini Shai-Hulud) specifically targeted MCP servers via
+prompt injection and registry poisoning. Live CVEs we must defend against:
+- **CVE-2025-49596** — MCP Inspector RCE
+- **CVE-2025-6514** — mcp-remote command injection (437K+ downloads)
+- **CVE-2025-53967** — Figma/Framelink
+- **CVE-2025-54136** — Cursor zero-click prompt injection
+- **CVE-2025-54994** — typosquat package
+**Aegis-v2 invariants** (must hold in every release):
+1. **No registry-based auto-update.** Version pinned in `package.json` + checksum.
+2. **No shell-out to user-controlled paths.** All subprocess args go through
+   `escapeArg()`; file paths validated against `path.resolve()` + allowlist.
+3. **Findings are treated as DATA in any LLM prompt**, never as instructions —
+   wrapped in nonce-marked fences (same pattern as our argus cascade).
+4. **No outbound network in Layer 1-3.** Layer 4 LLM critic is the ONLY outbound
+   call, and it goes only to user-configured endpoint (Anthropic / OpenAI).
+5. **Minimal-permission install.** The hook reads files in the project root; never
+   `~/.ssh`, `~/.aws`, `~/Documents`, etc. Explicit denylist in `src/util/safe-paths.ts`.
+6. **No tool result is auto-executed.** Remediation prompts are TEXT for the agent,
+   never code we run ourselves.
+7. **Audit log** of every gate decision, locally at `.aegis/audit.jsonl`, append-only.
+---
+## 9. Engines (Phase-by-phase rollout)
+Following the research's effort/impact ranking:
+| Phase | Engines | Why |
+|---|---|---|
+| **0 (Week 1)** | Semgrep OSS + Pyright + tsc + ESLint + tree-sitter + secret-scan | 60% of catches "free"; covers SQLi/XSS/cmd-inj/secret patterns + type errors + async footguns |
+| **1 (Weeks 2-4)** | + SCIP indexer + Joern CPG + CodeT5+ embeddings + FAISS | Cross-file context — answers "who calls this?", "what's the type of x?", "are there similar functions?" |
+| **2 (Weeks 5-8)** | + Pysa taint (Python) + RacerD-on-Joern queries (TS/JS/Python) + Vul-RAG KB + LLM critic + 150M router | Deep semantic — taint reachability, races, CVE knowledge, project-aware verdict |
+| **3 (Weeks 9-12)** | + suppression learning + telemetry + property-based testing gate + policy-as-code | Production hardening |
+### Phase 0 engine choices (what ships first)
+| Engine | What it catches | Latency | Setup |
+|---|---|---|---|
+| **tree-sitter** | Syntax errors, basic structure | 5-20 ms | `tree-sitter` + `tree-sitter-python` etc. |
+| **Semgrep OSS** | Pattern bugs (`p/default` + `p/security-audit` + custom) | 100-500 ms | `pysemgrep` subprocess |
+| **Pyright** | Python type errors, unresolved imports | 100-300 ms | `pyright` CLI subprocess |
+| **tsc** | TS/JS type errors (with `--noEmit`) | 200-500 ms | `tsc` subprocess |
+| **ESLint** | JS/TS lint rules — `no-floating-promises`, `no-misused-promises`, `require-await` (catches ~60% of async bugs) | 100-300 ms | `eslint --rulesdir` |
+| **Secret scan** | Hardcoded keys, tokens | 20-50 ms | Embedded regex set (gitleaks rules) |
+---
+## 10. Comparison to existing tools
+| Tool | Type | Real-time gate? | Cross-file? | LLM critic? | OSS? |
+|---|---|---|---|---|---|
+| **Semgrep MCP** (`semgrep/mcp`) | MCP wrapper | Yes (post-write) | Limited | No | Yes |
+| **Codacy MCP** | MCP wrapper | Yes | Yes (paid) | No | Yes (free tier) |
+| **Snyk MCP** | Commercial | Yes (CLI tier) | Yes | Yes (DeepCode) | No |
+| **Mobb Vibe Shield** | Commercial | Yes | Limited | Yes (fix author) | OpenGrep core |
+| **CodeRabbit MCP** | Commercial | No (PR-time) | Yes | Yes | No |
+| **Aegis-v2** | OSS, ours | **Yes (PRE+POST hook)** | **Yes (SCIP+Joern)** | **Yes (Vul-RAG)** | **Yes** |
+**Differentiators:**
+1. Phase 0 alone matches Semgrep MCP on free tier.
+2. Phase 1-2 adds what Codacy charges for + adds LLM critic with Vul-RAG (no
+   competitor does this combo open-source).
+3. Phase 3 adds property-based testing gate (no competitor at all).
+---
+## 11. Project layout (committed today)
+```
+aegis-v2/
+├── README.md
+├── ARCHITECTURE.md            ← this file
+├── ROADMAP.md                 ← per-phase tasks, deliverables, exit criteria
+├── LICENSE                    ← MIT
+├── package.json
+├── tsconfig.json
+├── vitest.config.ts
+├── .gitignore
+├── .claude/
+│   └── settings.example.json  ← copy-paste hook config for users
+│
+├── src/
+│   ├── index.ts               ← package entry; re-exports
+│   ├── mcp/
+│   │   ├── server.ts          ← MCP stdio server bootstrap
+│   │   ├── transport.ts
+│   │   └── tools/
+│   │       ├── precheck.ts    ← precheck_change
+│   │       ├── validate.ts    ← validate_edit
+│   │       └── explain.ts     ← explain_risk
+│   ├── engines/
+│   │   ├── types.ts           ← Engine interface
+│   │   ├── registry.ts        ← engine registration
+│   │   ├── treesitter.ts      ← Phase 0
+│   │   ├── semgrep.ts         ← Phase 0
+│   │   ├── pyright.ts         ← Phase 0
+│   │   ├── tsc.ts             ← Phase 0
+│   │   ├── eslint.ts          ← Phase 0
+│   │   ├── secret-scan.ts     ← Phase 0
+│   │   ├── scip.ts            ← Phase 1 (stub)
+│   │   ├── joern.ts           ← Phase 1 (stub)
+│   │   ├── embeddings.ts      ← Phase 1 (stub)
+│   │   ├── pysa.ts            ← Phase 2 (stub)
+│   │   ├── racerd.ts          ← Phase 2 (stub)
+│   │   ├── vulrag.ts          ← Phase 2 (stub)
+│   │   ├── router-150m.ts     ← Phase 2 (stub)
+│   │   └── llm-critic.ts      ← Phase 2 (stub)
+│   ├── orchestrator.ts        ← run engines in parallel, merge findings
+│   ├── findings.ts            ← unified Finding zod schema
+│   ├── risk.ts                ← scoring + action mapping
+│   ├── lang.ts                ← language detection from file ext
+│   ├── cache.ts               ← LMDB wrapper
+│   ├── hooks/
+│   │   └── claude-code.ts     ← stdin/stdout protocol for Claude Code hooks
+│   └── util/
+│       ├── subprocess.ts      ← safe spawn with timeouts
+│       ├── safe-paths.ts      ← path validation + denylist
+│       └── logger.ts
+│
+├── tests/
+│   ├── fixtures/              ← good + bad sample files per language
+│   ├── engines/               ← per-engine unit tests
+│   ├── orchestrator.test.ts
+│   ├── risk.test.ts
+│   └── mcp.test.ts            ← MCP protocol roundtrip
+│
+└── docs/
+    ├── hook-setup.md          ← step-by-step user install
+    ├── phase-1-project-context.md   ← extended design for SCIP+Joern+embeddings
+    ├── phase-2-deep-logic.md        ← Pysa, RacerD, Vul-RAG, 150M router, LLM critic
+    ├── phase-3-production.md        ← telemetry, suppression learning, property tests
+    └── benchmarks.md          ← Macroscope + internal eval methodology
+```
+---
+## 12. Open decisions (logged here for posterity)
+| Decision | Choice | Why | Reversible? |
+|---|---|---|---|
+| Language | TypeScript / Node 18+ | MCP SDK is first-class TS; CodeGraph / SCIP-typescript / scip-python all bundle TS clients; aligns with research recommendation | Hard — would require full rewrite |
+| Stack philosophy | OSS-only (free engines + free Claude/GPT subscription) | No vendor lock; full control; can be self-hosted | Easy — can add commercial adapters later |
+| MCP transport | stdio | Lowest latency; no auth surface; matches every agent's expected wiring | Easy — Streamable HTTP can be added later |
+| Findings schema | Unified `Finding` (zod) | Single contract = engine swap without downstream changes | Hard — would need migration |
+| Cache backend | LMDB | Embedded, zero-deps, mmap-fast | Easy — can swap for RocksDB |
+| Phase 0 engines | Semgrep OSS + Pyright + tsc + ESLint + tree-sitter + secret-scan | Cheapest highest-coverage starting set per research | Easy — engines are pluggable |
+| Hook strategy | Both PreToolUse (advisory) + PostToolUse (blocking) | Pre is cheap; post is authoritative. Together = user sees warnings during editing AND can never write blocked code. | Easy |
+| Local LLM | Defer 150M router to Phase 2 | Phase 0 doesn't need it; ship value sooner | Easy |
+| Critic model | Claude (default) + OpenAI GPT (ensemble) | Different vendors avoid common-mode bias | Easy |
+| Audit log | `.aegis/audit.jsonl` append-only | Local, simple, gitignored | Easy |
+---
+## 13. Citations
+Research that shaped this design:
+- Ding et al., **PrimeVul** (arxiv 2403.18624, 2024) — vulnerability detection benchmark; LLMs at ~3% F1 in stringent settings.
+- Du et al., **Vul-RAG** (arxiv 2406.11147, 2024) — knowledge-augmented retrieval; +16-24% over pure LLM.
+- Blackshear et al., **RacerD** (OOPSLA 2018) — compositional inter-procedural race detection; 2500+ races fixed in production at Meta.
+- Macroscope **Code-Review Benchmark** (Feb 2026) — 118 bugs across 45 OSS repos; 48% top, traditional SAST < 20%.
+- aiXcoder **COLA** (arxiv 2503.15301, 2025) — LLMs ignore unfocused context; targeted retrieval wins.
+- **RepoGraph** (ICLR 2025, arxiv 2410.14684) — AST-based def/ref/invoke graph; +2-3 pp on SWE-bench.
+- **IRIS** (ICLR 2025, arxiv 2405.17238) — LLM-augmented CodeQL; doubles recall on Java curated set.
+OSS engines:
+- [Semgrep OSS](https://github.com/semgrep/semgrep) — Apache 2.0
+- [Pyright](https://github.com/microsoft/pyright) — MIT
+- [ESLint](https://github.com/eslint/eslint) — MIT
+- [tree-sitter](https://github.com/tree-sitter/tree-sitter) — MIT
+- [Joern](https://github.com/joernio/joern) — Apache 2.0
+- [Infer / RacerD](https://github.com/facebook/infer) — MIT
+- [Pysa / Pyre](https://github.com/facebook/pyre-check) — MIT
+- [SCIP](https://github.com/sourcegraph/scip) + [scip-typescript](https://github.com/sourcegraph/scip-typescript) + [scip-python](https://github.com/sourcegraph/scip-python) — Apache 2.0
+- [LMDB-js](https://github.com/kriszyp/lmdb-js) — MIT
+- [@modelcontextprotocol/sdk](https://github.com/modelcontextprotocol/typescript-sdk) — MIT

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Aegis-v2 contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,153 @@
+# Aegis-v2
+**Real-time MCP code-write gate for AI agents.**
+When Claude Code / Cursor / Codex / opencode / Hermes is about to write a file,
+Aegis runs an in-process multi-engine analysis and decides ALLOW / WARN / BLOCK
+— in median ≤ 400 ms. Blocked writes return a structured remediation prompt
+the agent can act on automatically.
+OSS-only. TypeScript-native. Fully self-hosted.
+## Status
+**Phase 0** — Shipped. See `ARCHITECTURE.md` for the full 4-phase plan.
+Phase 0 capability surface:
+| Engine | Catches | Languages |
+|---|---|---|
+| `secret-scan` (built-in) | Hardcoded secrets (AWS / GitHub / OpenAI / Slack / JWT / PEM keys) | all |
+| `treesitter` | Parse errors / "agent wrote half a file" | py, js, jsx, ts, tsx |
+| `eslint` | `no-floating-promises`, `no-misused-promises`, `no-eval`, ~25 lint rules | js, ts, jsx, tsx |
+| `pyright` (if installed) | Python type errors | py |
+| `tsc` (if installed) | TS type errors | ts, tsx, js, jsx (`checkJs`) |
+| `semgrep` (if installed) | Pattern-based security rules from `p/default` | all major langs |
+## Install
+```bash
+cd aegis-v2
+npm install
+npm run build
+# global install (optional)
+npm link
+```
+External engines that the gate auto-detects (install where you want them):
+```bash
+# Python type checker
+pip install --user pyright
+# Multi-language pattern engine
+pip install --user semgrep
+# TypeScript compiler — likely already in your projects
+npm i -g typescript
+```
+Aegis runs without these — they're additive. Each one not found = degraded
+coverage, no crash.
+Run `aegis doctor` to see which engines are live on your machine.
+## Wire to Claude Code (the hook)
+Copy `.claude/settings.example.json` to your project's `.claude/settings.json`
+(or merge into the existing file):
+```json
+{
+  "hooks": {
+    "PostToolUse": [
+      { "matcher": "Write|Edit", "hooks": [{ "type": "command", "command": "aegis hook", "timeout": 10 }] }
+    ]
+  }
+}
+```
+Restart Claude Code. From the next session, every `Write` / `Edit` is gated.
+Blocked writes show the remediation prompt and Claude regenerates.
+## Use as an MCP server
+Add to `~/.claude.json` (Claude Code) — but **prefer the hook** for blocking;
+the MCP tools are for the agent to opt-in to checking BEFORE writing:
+```json
+{
+  "mcpServers": {
+    "aegis": {
+      "type": "stdio",
+      "command": "aegis",
+      "args": ["mcp"]
+    }
+  }
+}
+```
+The MCP server exposes three tools:
+- `precheck_change(file_path, content)` — fast advisory, ≤ 200 ms, never blocks.
+- `validate_edit(file_path, content)` — authoritative, returns verdict + remediation.
+- `explain_risk(finding_id|file)` — look up the prior gate decision from `.aegis/audit.jsonl`.
+## CLI
+```bash
+aegis doctor                          # list engines + availability
+aegis scan path/to/file.py            # print verdict + findings as JSON; exit 0/2
+aegis mcp                             # start stdio MCP server
+aegis hook                            # consume Claude-Code hook payload on stdin
+```
+## Environment
+| Variable | Default | Purpose |
+|---|---|---|
+| `AEGIS_LOG_LEVEL` | `info` | `debug` \| `info` \| `warn` \| `error` |
+| `AEGIS_TOTAL_TIMEOUT_MS` | `5000` | Overall gate budget |
+| `AEGIS_SEMGREP_CONFIG` | `p/default` | Semgrep ruleset |
+| `AEGIS_SEMGREP_BIN` | — | Override semgrep binary path |
+| `AEGIS_PYRIGHT_BIN` | — | Override pyright binary path |
+| `AEGIS_TSC_BIN` | — | Override tsc binary path |
+## Tests
+```bash
+npm test
+```
+Unit-tested today: findings schema, lang detection, risk scoring, secret-scan
+engine, tree-sitter engine, ESLint engine, orchestrator end-to-end with
+path-safety + timeout.
+## Audit log
+Every gate decision (allow / warn / block) appends a line to
+`.aegis/audit.jsonl` under your project root. Append-only, no PII —
+only the file path, action, engine, rule_id, severity, and confidence.
+## Security posture
+See `ARCHITECTURE.md §8` (Threat model). Key invariants:
+1. No registry-based auto-update.
+2. No shell expansion on user paths.
+3. Findings + file content are treated as **data** in any LLM prompt
+   (Phase 2+), never as instructions.
+4. Phase 0-1 makes no outbound network calls.
+5. Path safety denylist covers `~/.ssh`, `~/.aws`, `~/.gnupg`, `~/.kube`,
+   `~/.docker`, `~/.npmrc`, `~/.pgpass`, `/etc`, `/root`, `/proc`, `/sys`,
+   `C:\Windows`, `C:\Program Files`.
+6. Audit log local-only, append-only, gitignored.
+## License
+MIT — see `LICENSE`.
+## Roadmap
+See `ROADMAP.md` for the full Phase 0 → 3 plan and per-phase exit criteria.
+Phase 1 (project-aware context via SCIP + Joern + embeddings) is next.