npm - codetrap - Versions diffs - 0.1.7 → 0.1.8 - Mend

codetrap 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/README.md +151 -52
package/docs/installation.md +113 -29
package/package.json +4 -3
package/plugins/codetrap-agent/.codex-plugin/plugin.json +1 -2
package/plugins/codetrap-agent/hooks/post-flight-capture.example.md +19 -17
package/plugins/codetrap-agent/hooks.json +2 -2
package/{skills → plugins/codetrap-agent/skills}/codetrap-add/SKILL.md +10 -4
package/plugins/codetrap-agent/skills/codetrap-capture/SKILL.md +14 -3
package/plugins/codetrap-agent/skills/codetrap-capture-external/SKILL.md +52 -9
package/plugins/codetrap-agent/skills/codetrap-check/SKILL.md +74 -6
package/{skills → plugins/codetrap-agent/skills}/codetrap-search/SKILL.md +6 -5
package/plugins/codetrap-agent/templates/AGENTS.codetrap.md +31 -5
package/scripts/search-policy-sweep.ts +131 -0
package/src/commands/workflow.ts +144 -68
package/src/db/embedding-queries.ts +230 -48
package/src/db/queries.ts +0 -25
package/src/db/repository.ts +32 -21
package/src/db/schema.ts +80 -0
package/src/index.ts +28 -3
package/src/lib/command-requests.ts +112 -1
package/src/lib/config.ts +57 -7
package/src/lib/constants.ts +1 -1
package/src/lib/doctor.ts +42 -12
package/src/lib/embedder.ts +118 -3
package/src/lib/embedding-health.ts +3 -1
package/src/lib/embedding-job.ts +3 -0
package/src/lib/embedding-management.ts +65 -0
package/src/lib/embedding-runtime.ts +177 -0
package/src/lib/output-json.ts +0 -2
package/src/lib/scope-context.ts +12 -6
package/src/lib/scope-migration.ts +2 -1
package/src/lib/scope.ts +0 -2
package/src/lib/search-eval.ts +38 -18
package/src/lib/search-policy-sweep.ts +563 -0
package/src/lib/search-policy.ts +0 -4
package/src/lib/search-service.ts +14 -15
package/src/lib/session-candidate-document.ts +175 -0
package/src/lib/session-candidate-scope.ts +6 -0
package/src/lib/session-capture.ts +298 -32
package/src/lib/session-codec.ts +1 -8
package/src/lib/session-operations.ts +83 -60
package/src/lib/session-review.ts +327 -0
package/src/lib/session-store.ts +87 -73
package/src/lib/store.ts +74 -10
package/src/lib/string-list.ts +3 -0
package/src/lib/text-lines.ts +7 -0
package/src/lib/trap-search-document.ts +2 -1
package/src/lib/value-types.ts +3 -0
package/src/web/client-review.ts +171 -0
package/src/web/client-script.ts +426 -51
package/src/web/client-shell.ts +414 -0
package/src/web/client-text.ts +112 -0
package/src/web/project-registry.ts +3 -5
package/src/web/server.ts +117 -103
package/src/web/static.ts +364 -19
package/skills/codetrap-capture-external/SKILL.md +0 -62
package/skills/codetrap-check/SKILL.md +0 -69
package/src/lib/embedding-index.ts +0 -53

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codetrap",
-  "version": "0.1.7",
+  "version": "0.1.8",
   "description": "Capture and retrieve coding pitfalls so AI doesn't repeat mistakes",
   "type": "module",
   "license": "MIT",
@@ -33,7 +33,6 @@
     "src/web",
     "src/index.ts",
     "src/mcp-server.ts",
-    "skills",
     "plugins",
     ".agents/plugins/marketplace.json",
     "scripts",
@@ -52,6 +51,7 @@
     "release:preflight": "bun run scripts/release-preflight.ts",
     "check:release-version": "bun run scripts/check-release-version.ts",
     "eval:dogfood": "bun run scripts/dogfood-eval.ts",
+    "eval:search-policy": "bun run scripts/search-policy-sweep.ts",
     "build": "bun build ./src/index.ts --compile --outfile dist/codetrap && bun build ./src/mcp-server.ts --compile --outfile dist/codetrap-serve",
     "build:cli": "bun build ./src/index.ts --compile --outfile dist/codetrap",
     "build:serve": "bun build ./src/mcp-server.ts --compile --outfile dist/codetrap-serve"
@@ -63,6 +63,7 @@
     "@modelcontextprotocol/sdk": "^1.0.0"
   },
   "devDependencies": {
-    "@types/bun": "latest"
+    "@types/bun": "latest",
+    "playwright-core": "^1.56.1"
   }
 }

package/plugins/codetrap-agent/.codex-plugin/plugin.json CHANGED Viewed

@@ -12,12 +12,11 @@
   "license": "MIT",
   "keywords": ["agent", "memory", "cli", "mcp", "pitfalls"],
   "skills": "./skills/",
-  "hooks": "./hooks.json",
   "mcpServers": "./.mcp.json",
   "interface": {
     "displayName": "codetrap Agent",
     "shortDescription": "Check local pitfall memory before code changes.",
-    "longDescription": "Installs CLI-first guidance, optional MCP config, and example hooks so coding agents can search codetrap before risky edits, propose new trap captures after failures, and save useful lessons from external references.",
+    "longDescription": "Installs CLI-first guidance and optional MCP config so coding agents can search codetrap before risky edits, propose new trap captures after failures, and save useful lessons from external references. Hook files are packaged as examples, not auto-installed.",
     "developerName": "codetrap maintainers",
     "category": "Productivity",
     "capabilities": ["Tools", "Memory", "Code"],

package/plugins/codetrap-agent/hooks/post-flight-capture.example.md CHANGED Viewed

@@ -1,25 +1,27 @@
 # Post-flight Codetrap Capture
-Use this template after a task reveals a reusable pitfall. Do not write the trap automatically; ask the user to confirm first.
+Use this template after a task reveals a reusable pitfall. Do not write the trap automatically; put it in the session candidate inbox first.
-```json
-{
-  "title": "Short pitfall title",
-  "category": "bug",
-  "scope": "project",
-  "context": "When this situation appears...",
-  "mistake": "The agent tends to...",
-  "fix": "Do this instead...",
-  "tags": ["area", "tool"],
-  "severity": "warning",
-  "path_globs": ["src/example/**"],
-  "module": "example",
-  "owner": "platform"
-}
+```markdown
+Title: Short pitfall title
+Category: bug
+Scope: project
+Context: When this situation appears...
+Mistake: The agent tends to...
+Fix: Do this instead...
+Tags: area, tool
+Severity: warning
+Path globs: src/example/**
+Module: example
+Owner: platform
 ```
-After confirmation:
+Capture the candidate:
 ```bash
-codetrap add --json '<json above>' --output-json
+codetrap session capture --trap-markdown-file candidate.md --kind review --json
 ```
+Hook-based clients can set `CODETRAP_CANDIDATE_FILE` to a Markdown file with the same fields and run the packaged `post_task` command. The hook must still leave the candidate in the inbox; it must not accept or write a confirmed trap automatically.
+Then review it with `codetrap session candidate <candidate-id> --session <session-id> --json` and accept, edit, reject, or supersede it explicitly.

package/plugins/codetrap-agent/hooks.json CHANGED Viewed

@@ -5,7 +5,7 @@
     "command": "codetrap search \"$CODETRAP_QUERY\" --mode hybrid --json"
   },
   "post_task": {
-    "description": "After repeated failures or user corrections, propose a structured trap; write only after user confirmation.",
-    "command": "codetrap add --json '{...}' --output-json"
+    "description": "After repeated failures or user corrections, capture a structured trap draft in the session candidate inbox; do not write a confirmed trap automatically.",
+    "command": "codetrap session capture --trap-markdown-file \"$CODETRAP_CANDIDATE_FILE\" --kind review --json"
   }
 }

package/{skills → plugins/codetrap-agent/skills}/codetrap-add/SKILL.md RENAMED Viewed

@@ -1,10 +1,16 @@
 ---
 name: codetrap-add
-description: Record a coding pitfall as a structured codetrap entry. Use when the user wants to save a lesson learned, recurring AI mistake, project convention, or runs /codetrap-add.
+description: Record a confirmed coding pitfall as a structured codetrap entry after explicit user approval. For agent-discovered post-flight lessons, prefer codetrap-capture and the session candidate inbox.
 ---
 You are helping the user record a "coding pitfall" (a mistake pattern that AI coding assistants tend to make, and the correct approach). These pitfalls are stored in a local database and will be used to warn AI in future sessions.
+This skill writes confirmed memory. Do not use it for autonomous post-flight agent discoveries, repeated failures, or review feedback unless the user explicitly asks to save the trap as confirmed memory. For agent-drafted lessons, prefer:
+```bash
+codetrap session capture --trap-markdown - --kind review --json
+```
 ## Step 1: Gather information
 Ask the user to describe what went wrong. Guide them to provide:
@@ -40,9 +46,9 @@ Pick the best-fitting category:
 - `bug` — Common logic errors, edge cases
 - `other` — Everything else
-## Step 4: Structure and save
+## Step 4: Structure and confirm
-Convert the user's description into this JSON structure and call the CLI:
+Convert the user's description into this JSON structure, show the draft to the user, and ask for explicit confirmation before writing it as confirmed memory:
 ```bash
 codetrap add --json '{
@@ -62,7 +68,7 @@ codetrap add --json '{
 }' --output-json
 ```
-If the CLI is not available, use the MCP tool `add_trap` instead.
+Only after the user confirms the draft should you call the CLI. If the CLI is not available and the user explicitly confirmed the save, use the MCP tool `add_trap` instead.
 ## Step 5: Confirm

package/plugins/codetrap-agent/skills/codetrap-capture/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: codetrap-capture
-description: Propose a new codetrap after repeated failures, user corrections, or review feedback.
+description: Propose a new codetrap candidate after repeated failures, user corrections, or review feedback without writing confirmed memory automatically.
 ---
 Use this after a task exposes a recurring mistake pattern. Draft a candidate trap with:
@@ -12,8 +12,19 @@ Use this after a task exposes a recurring mistake pattern. Draft a candidate tra
 - tags
 - optional `path_globs`, `module`, and `owner`
-Ask the user to confirm before writing. After confirmation, run:
+Do not write the confirmed trap directly. Put the draft into the session candidate inbox:
 ```bash
-codetrap add --json '{...}' --output-json
+cat <<'EOF' | codetrap session capture --trap-markdown - --kind review --json
+Title: <durable pitfall>
+Context: <when it triggers>
+Mistake: <what the agent did wrong>
+Fix: <what to do instead>
+Severity: warning
+Tags: <area>,<tool>
+EOF
 ```
+Use `--trap-json` only when you already have a structured object. Prefer Markdown for agent-drafted lessons because it avoids shell-escaping long text.
+If no session is active, `session capture` creates and closes a post-flight session automatically. Tell the user the returned candidate id and session id, then ask whether they want to accept, edit, reject, or supersede the candidate. Pending candidates are also visible through `codetrap session status`, `codetrap session list`, `codetrap doctor`, and `codetrap web`.

package/plugins/codetrap-agent/skills/codetrap-capture-external/SKILL.md CHANGED Viewed

@@ -5,15 +5,58 @@ description: Extract durable coding pitfalls from an external article, blog post
 Use this when the user shares an external source and wants to save useful lessons for future AI coding work.
-The agent should read the source. The codetrap CLI should not fetch URLs or crawl the web; it only stores confirmed lessons and evidence.
+The external source is read by the agent. Do not ask codetrap CLI to fetch URLs or crawl the web. codetrap stays a local memory store.
-Workflow:
+## Step 1: Read The Source
-1. Read the URL, article text, issue, paper, or reference.
-2. Extract every candidate trap that has a clear trigger, mistake, and fix. Do not force a fixed count.
-3. Filter out broad summaries, one-off facts, vague advice, and source details that will not change future coding behavior.
-4. Rank the recommended candidates and ask the user which ones to save.
-5. After confirmation, run `codetrap add --json '<trap-json>' --output-json`.
-6. Attach the source with `codetrap add_trap_evidence <id> --scope <project|global> --source_type article --source_ref "<url-or-source-id>" --note "External lesson captured from <short source title>." --output-json`.
+Open or read the provided URL, article text, issue, paper, or reference. Identify lessons that could change future implementation behavior.
-Default to `global` for generally reusable engineering lessons. Use `project` only when the source lesson is specific to the current repository or stack.
+Do not summarize the whole source into codetrap. Extract only durable pitfalls with a clear trigger, mistake, and fix.
+## Step 2: Extract Candidate Traps
+Create as many candidate traps as pass the quality bar. Do not force a fixed count.
+Each candidate must include:
+- `context`: when this lesson applies
+- `mistake`: what an AI coding agent might do wrong
+- `fix`: what it should do instead
+- `severity`: `warning`, `error`, or `critical`
+- `tags`: useful retrieval terms
+- optional `path_globs`, `module`, and `owner` when the lesson is project-specific
+Reject or omit candidates that are broad summaries, one-off facts, vague advice, marketing claims, or source details that would not change future coding behavior.
+## Step 3: Rank And Ask
+Present the recommended candidates in priority order. Include a short reason for each recommendation.
+Ask the user which candidates to save. Do not write any trap until the user confirms.
+If a candidate is useful but needs a narrower scope, ask for or propose edits before saving.
+## Step 4: Save Confirmed Lessons
+For each confirmed candidate, call:
+```bash
+codetrap add --json '<trap-json>' --output-json
+```
+Then attach the external source as evidence:
+```bash
+codetrap add_trap_evidence <id> \
+  --scope <project|global> \
+  --source_type article \
+  --source_ref "<url-or-source-id>" \
+  --note "External lesson captured from <short source title>." \
+  --output-json
+```
+Use `global` for generally reusable lessons across projects. Use `project` only when the lesson is specific to the current repository or technology stack.
+## Step 5: Confirm
+Tell the user which trap IDs were saved, their scopes, and the source reference attached as evidence.

package/plugins/codetrap-agent/skills/codetrap-check/SKILL.md CHANGED Viewed

@@ -1,16 +1,84 @@
 ---
 name: codetrap-check
-description: Check codetrap from the current project cwd before non-trivial code edits.
+description: Check the codetrap pitfall database before code changes and apply relevant lessons. Use before non-trivial coding work, when touching risky areas, or when the user runs /codetrap-check.
 ---
-Before risky code changes, run:
+Before generating any non-trivial code, pause and check the codetrap database for relevant pitfalls. This is a "pre-flight check" that prevents you from repeating known mistakes.
+## When to trigger
+Run this check when:
+1. The user asks you to write or modify code
+2. The task touches an area with recorded pitfalls (API, auth, database, security, etc.)
+3. The user explicitly runs `/codetrap-check`
+Do NOT run for: trivial text changes, questions about code, documentation-only changes.
+## Step 1: Extract key terms
+From the user's request, extract search keywords. Focus on:
+- Technology names: "axios", "prisma", "jwt", "react"
+- Patterns: "middleware", "endpoint", "migration", "hook"
+- Domains: "authentication", "database", "routing", "state"
+## Step 2: Search the database
+Default to the CLI from the current project cwd:
+```bash
+codetrap search "<keywords>" --mode hybrid --json
+```
+When the task targets a known file or subsystem, include applicability hints:
+```bash
+codetrap search "<keywords>" --path src/db/repository.ts --module db --json
+```
+If the query comes from another tool, stdin is also supported:
+```bash
+echo "<keywords>" | codetrap search --mode hybrid --json
+```
+MCP `search_traps` is optional. Use it only when it is already available and project-scoped correctly; pass `cwd` when the client supports it.
+Review the top 3 returned action cards before deciding that no trap applies. Do not stop after only the first result; relevant traps may rank second or third. If fewer than 3 cards are returned, review all returned cards.
+Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. Severity alone is not enough to apply a trap. Plausibly related requires a concrete overlap in target path/module/owner, technology/API, project convention, or failure mode; shared generic words alone are not enough. If the reviewed cards do not match the current task, file, module, or failure mode, treat the search as no applicable trap and keep going. When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
+## Step 3: Apply the lessons
+For each relevant trap found in the reviewed top cards:
+1. Confirm the trap context matches the current task, file, module, or failure mode
+2. For matching cards, run `next_action.command` from CLI JSON before editing when the card is highly relevant or has `critical`/`error` severity; with MCP, call `get_trap` with `next_action.details_args.id` and `next_action.details_args.scope`
+3. Adjust your code generation to follow the correct approach
+4. If a trap matches exactly what you were about to do, explicitly tell the user: "I was about to [avoid], but the codetrap database says [do_instead]. I'll do it the right way."
+## Step 4: Report
+Briefly tell the user which traps you found and how you adjusted:
+```
+Checked codetrap: found 2 relevant pitfalls. Avoiding [X] and using [Y] instead.
+```
+If this is an explicit `/codetrap-check` run or first-run setup and no traps match, say: "Checked codetrap: no applicable traps found; continuing." For routine automatic checks, keep the report short.
+## Step 5: Record new pitfalls
+If while writing code you discover a NEW pitfall that isn't in the database, draft a post-flight trap candidate and put it in the session inbox:
 ```bash
-codetrap search "<task keywords>" --mode hybrid --json
+cat <<'EOF' | codetrap session capture --trap-markdown - --kind review --json
+Title: <durable pitfall>
+Context: <when it triggers>
+Mistake: <what the agent did wrong>
+Fix: <what to do instead>
+EOF
 ```
-Review the top 3 action cards. If a card is highly relevant, or has `critical` or `error` severity and is plausibly related, run its `next_action.command` before editing.
+Use `--trap-json` only when you already have a structured object.
-Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. If a trap seems irrelevant, ignore it. When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
+Do not accept it automatically. Tell the user the returned candidate id and session id, then ask whether they want to accept, edit, reject, or supersede it.
-Use MCP only as an optional adapter. When calling MCP tools, pass `cwd` when the client supports it.
+If there may be older unreviewed candidates, use `codetrap session status`, `codetrap session list`, `codetrap doctor`, or `codetrap web` to surface the pending review queue.

package/{skills → plugins/codetrap-agent/skills}/codetrap-search/SKILL.md RENAMED Viewed

@@ -48,18 +48,19 @@ search_traps(query="<keywords>", scope=<optional>, category=<optional>, path=<op
 Review the top 3 action cards before deciding that no trap applies. Do not rely only on the first result; a relevant trap can rank second or third. If fewer than 3 cards are returned, review all returned cards.
-Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. If a trap seems irrelevant, ignore it. When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
+Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. Severity alone is not enough to apply a trap. Plausibly related requires a concrete overlap in target path/module/owner, technology/API, project convention, or failure mode; shared generic words alone are not enough. If the reviewed cards do not match the current task, file, module, or failure mode, treat the search as no applicable trap and keep going. When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
 ## How to present results
 1. Show the most relevant reviewed traps first (project scope traps before global)
 2. Summarize each reviewed card's title, severity, `avoid`, and `do_instead`
-3. If any reviewed card is highly relevant, has matching context, or has `critical`/`error` severity and is plausibly related, and you are about to edit code, run the CLI `next_action.command`; with MCP, call `get_trap` with the card's `id` and `scope` before proceeding
+3. For matching cards, run the CLI `next_action.command` before editing when the card is highly relevant or has `critical`/`error` severity; with MCP, call `get_trap` with the card's `id` and `scope` before proceeding
 4. If no results, tell the user (this is a new area with no recorded pitfalls yet)
 ## Example
-User: "I need to add a new API endpoint"
-→ Search: `codetrap search "API endpoint" --mode hybrid --json`
+User: "I need to add a new API endpoint that calls an external service"
+→ Search: `codetrap search "API endpoint external service" --mode hybrid --json`
 → Results show: "Don't use axios, use fetchWrapper" (project, error)
-→ Tell user: "I see a project convention: always use fetchWrapper instead of axios. I'll follow that."
+→ Because the task includes outbound HTTP, tell user: "I see a matching project convention: always use fetchWrapper instead of axios. I'll follow that."
+→ If the endpoint does not make outbound HTTP calls, ignore this card even if severity is error.

package/plugins/codetrap-agent/templates/AGENTS.codetrap.md CHANGED Viewed

@@ -6,24 +6,50 @@ Before non-trivial code edits, check local pitfall memory from the current proje
 codetrap search "<keywords>" --mode hybrid --json
 ```
-Review the top 3 action cards before deciding no trap applies. If a card is highly relevant, or has `critical` or `error` severity and is plausibly related, inspect it before editing:
+Review the top 3 action cards, or all returned cards if fewer than 3, before deciding no trap applies. Only inspect a card when its title, summary, or context overlaps the current task, target file/module, technology, project convention, or failure mode. For matching cards, inspect before editing when the card is highly relevant or has `critical` or `error` severity:
 ```bash
 codetrap show <id> --scope <project|global> --json
 ```
-Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. If a trap seems irrelevant, ignore it.
+Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. Severity alone is not enough to apply a trap. Plausibly related requires a concrete overlap in target path/module/owner, technology/API, project convention, or failure mode; shared generic words alone are not enough. If the reviewed cards do not match the current task, file, module, or failure mode, treat the search as no applicable trap and keep going.
 When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
 When editing a specific area, pass applicability hints:
 ```bash
-codetrap search "<keywords>" --path src/db/repository.ts --module db --json
+codetrap search "<keywords>" --path path/to/file --module module-name --json
 ```
-After user corrections, repeated test failures, or review feedback, propose a new trap. Only write it after user confirmation:
+After user corrections, repeated test failures, or review feedback, have the agent draft a structured candidate and put it in the session inbox. Do not write directly to the confirmed trap database:
 ```bash
-codetrap add --json '{...}' --output-json
+cat <<'EOF' | codetrap session capture --trap-markdown - --kind review --json
+Title: <durable pitfall>
+Context: <when it triggers>
+Mistake: <what the agent did wrong>
+Fix: <what to do instead>
+EOF
 ```
+Use `--trap-json` only when the caller already has a structured object.
+Review the candidate with `codetrap session candidate <candidate-id> --session <session-id> --json`, then accept, edit, reject, or supersede it explicitly.
+Use `codetrap session status`, `codetrap session list`, `codetrap doctor`, or `codetrap web` to find pending candidates that still need review.
+## Codetrap Dogfood Eval
+After each pre-edit codetrap search, record whether the search was:
+- useful_hit
+- miss
+- noisy_hit
+- no_relevant_trap
+Record the observation in `dogfood-log.md` with the task, query, mode, top results, judgment, whether the result changed the next action, and the promotion lane.
+When a real query should reliably find an existing trap, save it as a live eval case with query, mode, scope, and gold target.
+Do not promote every observation. Only promote representative cases that protect search quality.

package/scripts/search-policy-sweep.ts ADDED Viewed

@@ -0,0 +1,131 @@
+#!/usr/bin/env bun
+import {
+  formatPolicySweepReport,
+  readLiveEvalCases,
+  runFixturePolicySweep,
+  runLivePolicySweep,
+  type GoldTarget,
+  type LiveEvalCase,
+  type PolicySweepReport,
+  type SweepCandidateReport,
+} from "../src/lib/search-policy-sweep";
+import { SEARCH_MODES, SCOPES, type Scope, type SearchMode } from "../src/lib/constants";
+async function main(): Promise<void> {
+  const args = parseArgs(process.argv.slice(2));
+  const command = args.positionals[0] ?? "fixture";
+  try {
+    if (command === "fixture") {
+      const report = await runFixturePolicySweep({ fixturePath: args.opts.fixture });
+      print(report, args.opts.json === "true", args.opts["include-cases"] === "true");
+      return;
+    }
+    if (command === "live") {
+      const cases = liveCasesFromArgs(args);
+      const report = await runLivePolicySweep({
+        cwd: args.opts.cwd ?? process.cwd(),
+        cases,
+        defaultScope: scopeField(args.opts.scope, "scope") ?? "project",
+      });
+      print(report, args.opts.json === "true", args.opts["include-cases"] === "true");
+      return;
+    }
+    throw new Error(usage());
+  } catch (error) {
+    console.error(error instanceof Error ? error.message : String(error));
+    process.exit(1);
+  }
+}
+function print(report: PolicySweepReport, json: boolean, includeCases: boolean): void {
+  console.log(json ? JSON.stringify(includeCases ? report : compactReport(report), null, 2) : formatPolicySweepReport(report));
+}
+function compactReport(report: PolicySweepReport): Omit<PolicySweepReport, "baseline" | "best" | "candidates"> & {
+  baseline: Omit<SweepCandidateReport, "cases">;
+  best: Omit<SweepCandidateReport, "cases">;
+  candidates: Omit<SweepCandidateReport, "cases">[];
+} {
+  return {
+    ...report,
+    baseline: compactCandidate(report.baseline),
+    best: compactCandidate(report.best),
+    candidates: report.candidates.map(compactCandidate),
+  };
+}
+function compactCandidate(candidate: SweepCandidateReport): Omit<SweepCandidateReport, "cases"> {
+  const { cases: _cases, ...rest } = candidate;
+  return rest;
+}
+function liveCasesFromArgs(args: { opts: Record<string, string>; positionals: string[] }): LiveEvalCase[] {
+  if (args.opts.queries) return readLiveEvalCases(args.opts.queries);
+  if (!args.opts.query) throw new Error(usage());
+  const gold = goldFromArgs(args.opts);
+  return [{
+    query: args.opts.query,
+    mode: modeField(args.opts.mode, "mode") ?? "hybrid",
+    scope: scopeField(args.opts.scope, "scope") ?? "project",
+    gold: gold.length > 0 ? gold : undefined,
+  }];
+}
+function goldFromArgs(opts: Record<string, string>): GoldTarget[] {
+  if (!opts["gold-id"] && !opts["gold-title"]) return [];
+  const id = opts["gold-id"] ? Number(opts["gold-id"]) : undefined;
+  if (id !== undefined && (!Number.isInteger(id) || id <= 0)) throw new Error("--gold-id must be a positive integer.");
+  const title = opts["gold-title"]?.trim() || undefined;
+  return [{
+    id,
+    title,
+    scope: scopeField(opts.scope, "scope"),
+  }];
+}
+function modeField(value: string | undefined, key: string): SearchMode | undefined {
+  if (value === undefined) return undefined;
+  if (!(SEARCH_MODES as readonly string[]).includes(value)) {
+    throw new Error(`--${key} must be one of: ${SEARCH_MODES.join(", ")}`);
+  }
+  return value as SearchMode;
+}
+function scopeField(value: string | undefined, key: string): Scope | undefined {
+  if (value === undefined) return undefined;
+  if (!(SCOPES as readonly string[]).includes(value)) {
+    throw new Error(`--${key} must be one of: ${SCOPES.join(", ")}`);
+  }
+  return value as Scope;
+}
+function parseArgs(args: string[]): { opts: Record<string, string>; positionals: string[] } {
+  const opts: Record<string, string> = {};
+  const positionals: string[] = [];
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i];
+    if (arg.startsWith("--")) {
+      const key = arg.slice(2);
+      opts[key] = args[i + 1] && !args[i + 1].startsWith("--") ? args[++i] : "true";
+    } else {
+      positionals.push(arg);
+    }
+  }
+  return { opts, positionals };
+}
+function usage(): string {
+  return [
+    "Usage:",
+    "  bun run eval:search-policy -- fixture [--fixture path] [--json]",
+    "  bun run eval:search-policy -- live --cwd /path/to/project --queries live-queries.json [--scope project|global] [--json]",
+    "  bun run eval:search-policy -- live --cwd /path/to/project --query '<query>' [--gold-id n] [--gold-title '<title>'] [--scope project|global] [--mode fts|semantic|hybrid] [--json]",
+    "  Add --include-cases with --json to include full per-case output.",
+  ].join("\n");
+}
+await main();