codetrap 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +132 -98
  2. package/docs/installation.md +61 -63
  3. package/package.json +4 -3
  4. package/plugins/codetrap-agent/.codex-plugin/plugin.json +2 -3
  5. package/plugins/codetrap-agent/hooks/post-flight-capture.example.md +19 -17
  6. package/plugins/codetrap-agent/hooks.json +2 -2
  7. package/{skills → plugins/codetrap-agent/skills}/codetrap-add/SKILL.md +10 -4
  8. package/plugins/codetrap-agent/skills/codetrap-capture/SKILL.md +14 -3
  9. package/plugins/codetrap-agent/skills/codetrap-capture-external/SKILL.md +52 -9
  10. package/plugins/codetrap-agent/skills/codetrap-check/SKILL.md +74 -6
  11. package/{skills → plugins/codetrap-agent/skills}/codetrap-search/SKILL.md +6 -5
  12. package/plugins/codetrap-agent/templates/AGENTS.codetrap-maintainer.md +15 -0
  13. package/plugins/codetrap-agent/templates/AGENTS.codetrap.md +16 -5
  14. package/scripts/release-preflight.ts +15 -0
  15. package/scripts/search-policy-sweep.ts +131 -0
  16. package/src/commands/workflow.ts +172 -68
  17. package/src/db/embedding-queries.ts +230 -48
  18. package/src/db/queries.ts +0 -25
  19. package/src/db/repository.ts +32 -21
  20. package/src/db/schema.ts +80 -0
  21. package/src/index.ts +34 -4
  22. package/src/lib/codex-setup.ts +247 -0
  23. package/src/lib/command-requests.ts +112 -1
  24. package/src/lib/config.ts +57 -7
  25. package/src/lib/constants.ts +1 -1
  26. package/src/lib/doctor.ts +42 -12
  27. package/src/lib/embedder.ts +118 -3
  28. package/src/lib/embedding-health.ts +3 -1
  29. package/src/lib/embedding-job.ts +3 -0
  30. package/src/lib/embedding-management.ts +65 -0
  31. package/src/lib/embedding-runtime.ts +177 -0
  32. package/src/lib/output-json.ts +0 -2
  33. package/src/lib/scope-context.ts +12 -6
  34. package/src/lib/scope-migration.ts +2 -1
  35. package/src/lib/scope.ts +0 -2
  36. package/src/lib/search-eval.ts +38 -18
  37. package/src/lib/search-policy-sweep.ts +563 -0
  38. package/src/lib/search-policy.ts +0 -4
  39. package/src/lib/search-service.ts +14 -15
  40. package/src/lib/session-candidate-document.ts +175 -0
  41. package/src/lib/session-candidate-scope.ts +6 -0
  42. package/src/lib/session-capture.ts +298 -32
  43. package/src/lib/session-codec.ts +1 -8
  44. package/src/lib/session-operations.ts +83 -60
  45. package/src/lib/session-review.ts +327 -0
  46. package/src/lib/session-store.ts +87 -73
  47. package/src/lib/store.ts +74 -10
  48. package/src/lib/string-list.ts +3 -0
  49. package/src/lib/text-lines.ts +7 -0
  50. package/src/lib/trap-search-document.ts +2 -1
  51. package/src/lib/value-types.ts +3 -0
  52. package/src/web/client-review.ts +171 -0
  53. package/src/web/client-script.ts +426 -51
  54. package/src/web/client-shell.ts +414 -0
  55. package/src/web/client-text.ts +112 -0
  56. package/src/web/project-registry.ts +3 -5
  57. package/src/web/server.ts +117 -103
  58. package/src/web/static.ts +364 -19
  59. package/skills/codetrap-capture-external/SKILL.md +0 -62
  60. package/skills/codetrap-check/SKILL.md +0 -69
  61. package/src/lib/embedding-index.ts +0 -53
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codetrap",
3
- "version": "0.1.7",
3
+ "version": "0.1.9",
4
4
  "description": "Capture and retrieve coding pitfalls so AI doesn't repeat mistakes",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -33,7 +33,6 @@
33
33
  "src/web",
34
34
  "src/index.ts",
35
35
  "src/mcp-server.ts",
36
- "skills",
37
36
  "plugins",
38
37
  ".agents/plugins/marketplace.json",
39
38
  "scripts",
@@ -52,6 +51,7 @@
52
51
  "release:preflight": "bun run scripts/release-preflight.ts",
53
52
  "check:release-version": "bun run scripts/check-release-version.ts",
54
53
  "eval:dogfood": "bun run scripts/dogfood-eval.ts",
54
+ "eval:search-policy": "bun run scripts/search-policy-sweep.ts",
55
55
  "build": "bun build ./src/index.ts --compile --outfile dist/codetrap && bun build ./src/mcp-server.ts --compile --outfile dist/codetrap-serve",
56
56
  "build:cli": "bun build ./src/index.ts --compile --outfile dist/codetrap",
57
57
  "build:serve": "bun build ./src/mcp-server.ts --compile --outfile dist/codetrap-serve"
@@ -63,6 +63,7 @@
63
63
  "@modelcontextprotocol/sdk": "^1.0.0"
64
64
  },
65
65
  "devDependencies": {
66
- "@types/bun": "latest"
66
+ "@types/bun": "latest",
67
+ "playwright-core": "^1.56.1"
67
68
  }
68
69
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codetrap-agent",
3
- "version": "0.1.2",
3
+ "version": "0.1.9",
4
4
  "description": "CLI-first codetrap integration bundle for coding agents.",
5
5
  "author": {
6
6
  "name": "codetrap maintainers",
@@ -12,12 +12,11 @@
12
12
  "license": "MIT",
13
13
  "keywords": ["agent", "memory", "cli", "mcp", "pitfalls"],
14
14
  "skills": "./skills/",
15
- "hooks": "./hooks.json",
16
15
  "mcpServers": "./.mcp.json",
17
16
  "interface": {
18
17
  "displayName": "codetrap Agent",
19
18
  "shortDescription": "Check local pitfall memory before code changes.",
20
- "longDescription": "Installs CLI-first guidance, optional MCP config, and example hooks so coding agents can search codetrap before risky edits, propose new trap captures after failures, and save useful lessons from external references.",
19
+ "longDescription": "Installs CLI-first guidance and optional MCP config so coding agents can search codetrap before risky edits, propose new trap captures after failures, and save useful lessons from external references. Hook files are packaged as examples, not auto-installed.",
21
20
  "developerName": "codetrap maintainers",
22
21
  "category": "Productivity",
23
22
  "capabilities": ["Tools", "Memory", "Code"],
@@ -1,25 +1,27 @@
1
1
  # Post-flight Codetrap Capture
2
2
 
3
- Use this template after a task reveals a reusable pitfall. Do not write the trap automatically; ask the user to confirm first.
3
+ Use this template after a task reveals a reusable pitfall. Do not write the trap automatically; put it in the session candidate inbox first.
4
4
 
5
- ```json
6
- {
7
- "title": "Short pitfall title",
8
- "category": "bug",
9
- "scope": "project",
10
- "context": "When this situation appears...",
11
- "mistake": "The agent tends to...",
12
- "fix": "Do this instead...",
13
- "tags": ["area", "tool"],
14
- "severity": "warning",
15
- "path_globs": ["src/example/**"],
16
- "module": "example",
17
- "owner": "platform"
18
- }
5
+ ```markdown
6
+ Title: Short pitfall title
7
+ Category: bug
8
+ Scope: project
9
+ Context: When this situation appears...
10
+ Mistake: The agent tends to...
11
+ Fix: Do this instead...
12
+ Tags: area, tool
13
+ Severity: warning
14
+ Path globs: src/example/**
15
+ Module: example
16
+ Owner: platform
19
17
  ```
20
18
 
21
- After confirmation:
19
+ Capture the candidate:
22
20
 
23
21
  ```bash
24
- codetrap add --json '<json above>' --output-json
22
+ codetrap session capture --trap-markdown-file candidate.md --kind review --json
25
23
  ```
24
+
25
+ Hook-based clients can set `CODETRAP_CANDIDATE_FILE` to a Markdown file with the same fields and run the packaged `post_task` command. The hook must still leave the candidate in the inbox; it must not accept or write a confirmed trap automatically.
26
+
27
+ Then review it with `codetrap session candidate <candidate-id> --session <session-id> --json` and accept, edit, reject, or supersede it explicitly.
@@ -5,7 +5,7 @@
5
5
  "command": "codetrap search \"$CODETRAP_QUERY\" --mode hybrid --json"
6
6
  },
7
7
  "post_task": {
8
- "description": "After repeated failures or user corrections, propose a structured trap; write only after user confirmation.",
9
- "command": "codetrap add --json '{...}' --output-json"
8
+ "description": "After repeated failures or user corrections, capture a structured trap draft in the session candidate inbox; do not write a confirmed trap automatically.",
9
+ "command": "codetrap session capture --trap-markdown-file \"$CODETRAP_CANDIDATE_FILE\" --kind review --json"
10
10
  }
11
11
  }
@@ -1,10 +1,16 @@
1
1
  ---
2
2
  name: codetrap-add
3
- description: Record a coding pitfall as a structured codetrap entry. Use when the user wants to save a lesson learned, recurring AI mistake, project convention, or runs /codetrap-add.
3
+ description: Record a confirmed coding pitfall as a structured codetrap entry after explicit user approval. For agent-discovered post-flight lessons, prefer codetrap-capture and the session candidate inbox.
4
4
  ---
5
5
 
6
6
  You are helping the user record a "coding pitfall" (a mistake pattern that AI coding assistants tend to make, and the correct approach). These pitfalls are stored in a local database and will be used to warn AI in future sessions.
7
7
 
8
+ This skill writes confirmed memory. Do not use it for autonomous post-flight agent discoveries, repeated failures, or review feedback unless the user explicitly asks to save the trap as confirmed memory. For agent-drafted lessons, prefer:
9
+
10
+ ```bash
11
+ codetrap session capture --trap-markdown - --kind review --json
12
+ ```
13
+
8
14
  ## Step 1: Gather information
9
15
 
10
16
  Ask the user to describe what went wrong. Guide them to provide:
@@ -40,9 +46,9 @@ Pick the best-fitting category:
40
46
  - `bug` — Common logic errors, edge cases
41
47
  - `other` — Everything else
42
48
 
43
- ## Step 4: Structure and save
49
+ ## Step 4: Structure and confirm
44
50
 
45
- Convert the user's description into this JSON structure and call the CLI:
51
+ Convert the user's description into this JSON structure, show the draft to the user, and ask for explicit confirmation before writing it as confirmed memory:
46
52
 
47
53
  ```bash
48
54
  codetrap add --json '{
@@ -62,7 +68,7 @@ codetrap add --json '{
62
68
  }' --output-json
63
69
  ```
64
70
 
65
- If the CLI is not available, use the MCP tool `add_trap` instead.
71
+ Only after the user confirms the draft should you call the CLI. If the CLI is not available and the user explicitly confirmed the save, use the MCP tool `add_trap` instead.
66
72
 
67
73
  ## Step 5: Confirm
68
74
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: codetrap-capture
3
- description: Propose a new codetrap after repeated failures, user corrections, or review feedback.
3
+ description: Propose a new codetrap candidate after repeated failures, user corrections, or review feedback without writing confirmed memory automatically.
4
4
  ---
5
5
 
6
6
  Use this after a task exposes a recurring mistake pattern. Draft a candidate trap with:
@@ -12,8 +12,19 @@ Use this after a task exposes a recurring mistake pattern. Draft a candidate tra
12
12
  - tags
13
13
  - optional `path_globs`, `module`, and `owner`
14
14
 
15
- Ask the user to confirm before writing. After confirmation, run:
15
+ Do not write the confirmed trap directly. Put the draft into the session candidate inbox:
16
16
 
17
17
  ```bash
18
- codetrap add --json '{...}' --output-json
18
+ cat <<'EOF' | codetrap session capture --trap-markdown - --kind review --json
19
+ Title: <durable pitfall>
20
+ Context: <when it triggers>
21
+ Mistake: <what the agent did wrong>
22
+ Fix: <what to do instead>
23
+ Severity: warning
24
+ Tags: <area>,<tool>
25
+ EOF
19
26
  ```
27
+
28
+ Use `--trap-json` only when you already have a structured object. Prefer Markdown for agent-drafted lessons because it avoids shell-escaping long text.
29
+
30
+ If no session is active, `session capture` creates and closes a post-flight session automatically. Tell the user the returned candidate id and session id, then ask whether they want to accept, edit, reject, or supersede the candidate. Pending candidates are also visible through `codetrap session status`, `codetrap session list`, `codetrap doctor`, and `codetrap web`.
@@ -5,15 +5,58 @@ description: Extract durable coding pitfalls from an external article, blog post
5
5
 
6
6
  Use this when the user shares an external source and wants to save useful lessons for future AI coding work.
7
7
 
8
- The agent should read the source. The codetrap CLI should not fetch URLs or crawl the web; it only stores confirmed lessons and evidence.
8
+ The external source is read by the agent. Do not ask codetrap CLI to fetch URLs or crawl the web. codetrap stays a local memory store.
9
9
 
10
- Workflow:
10
+ ## Step 1: Read The Source
11
11
 
12
- 1. Read the URL, article text, issue, paper, or reference.
13
- 2. Extract every candidate trap that has a clear trigger, mistake, and fix. Do not force a fixed count.
14
- 3. Filter out broad summaries, one-off facts, vague advice, and source details that will not change future coding behavior.
15
- 4. Rank the recommended candidates and ask the user which ones to save.
16
- 5. After confirmation, run `codetrap add --json '<trap-json>' --output-json`.
17
- 6. Attach the source with `codetrap add_trap_evidence <id> --scope <project|global> --source_type article --source_ref "<url-or-source-id>" --note "External lesson captured from <short source title>." --output-json`.
12
+ Open or read the provided URL, article text, issue, paper, or reference. Identify lessons that could change future implementation behavior.
18
13
 
19
- Default to `global` for generally reusable engineering lessons. Use `project` only when the source lesson is specific to the current repository or stack.
14
+ Do not summarize the whole source into codetrap. Extract only durable pitfalls with a clear trigger, mistake, and fix.
15
+
16
+ ## Step 2: Extract Candidate Traps
17
+
18
+ Create as many candidate traps as pass the quality bar. Do not force a fixed count.
19
+
20
+ Each candidate must include:
21
+
22
+ - `context`: when this lesson applies
23
+ - `mistake`: what an AI coding agent might do wrong
24
+ - `fix`: what it should do instead
25
+ - `severity`: `warning`, `error`, or `critical`
26
+ - `tags`: useful retrieval terms
27
+ - optional `path_globs`, `module`, and `owner` when the lesson is project-specific
28
+
29
+ Reject or omit candidates that are broad summaries, one-off facts, vague advice, marketing claims, or source details that would not change future coding behavior.
30
+
31
+ ## Step 3: Rank And Ask
32
+
33
+ Present the recommended candidates in priority order. Include a short reason for each recommendation.
34
+
35
+ Ask the user which candidates to save. Do not write any trap until the user confirms.
36
+
37
+ If a candidate is useful but needs a narrower scope, ask for or propose edits before saving.
38
+
39
+ ## Step 4: Save Confirmed Lessons
40
+
41
+ For each confirmed candidate, call:
42
+
43
+ ```bash
44
+ codetrap add --json '<trap-json>' --output-json
45
+ ```
46
+
47
+ Then attach the external source as evidence:
48
+
49
+ ```bash
50
+ codetrap add_trap_evidence <id> \
51
+ --scope <project|global> \
52
+ --source_type article \
53
+ --source_ref "<url-or-source-id>" \
54
+ --note "External lesson captured from <short source title>." \
55
+ --output-json
56
+ ```
57
+
58
+ Use `global` for generally reusable lessons across projects. Use `project` only when the lesson is specific to the current repository or technology stack.
59
+
60
+ ## Step 5: Confirm
61
+
62
+ Tell the user which trap IDs were saved, their scopes, and the source reference attached as evidence.
@@ -1,16 +1,84 @@
1
1
  ---
2
2
  name: codetrap-check
3
- description: Check codetrap from the current project cwd before non-trivial code edits.
3
+ description: Check the codetrap pitfall database before code changes and apply relevant lessons. Use before non-trivial coding work, when touching risky areas, or when the user runs /codetrap-check.
4
4
  ---
5
5
 
6
- Before risky code changes, run:
6
+ Before generating any non-trivial code, pause and check the codetrap database for relevant pitfalls. This is a "pre-flight check" that prevents you from repeating known mistakes.
7
+
8
+ ## When to trigger
9
+
10
+ Run this check when:
11
+ 1. The user asks you to write or modify code
12
+ 2. The task touches an area with recorded pitfalls (API, auth, database, security, etc.)
13
+ 3. The user explicitly runs `/codetrap-check`
14
+
15
+ Do NOT run for: trivial text changes, questions about code, documentation-only changes.
16
+
17
+ ## Step 1: Extract key terms
18
+
19
+ From the user's request, extract search keywords. Focus on:
20
+ - Technology names: "axios", "prisma", "jwt", "react"
21
+ - Patterns: "middleware", "endpoint", "migration", "hook"
22
+ - Domains: "authentication", "database", "routing", "state"
23
+
24
+ ## Step 2: Search the database
25
+
26
+ Default to the CLI from the current project cwd:
27
+
28
+ ```bash
29
+ codetrap search "<keywords>" --mode hybrid --json
30
+ ```
31
+
32
+ When the task targets a known file or subsystem, include applicability hints:
33
+
34
+ ```bash
35
+ codetrap search "<keywords>" --path src/db/repository.ts --module db --json
36
+ ```
37
+
38
+ If the query comes from another tool, stdin is also supported:
39
+
40
+ ```bash
41
+ echo "<keywords>" | codetrap search --mode hybrid --json
42
+ ```
43
+
44
+ MCP `search_traps` is optional. Use it only when it is already available and project-scoped correctly; pass `cwd` when the client supports it.
45
+
46
+ Review the top 3 returned action cards before deciding that no trap applies. Do not stop after only the first result; relevant traps may rank second or third. If fewer than 3 cards are returned, review all returned cards.
47
+
48
+ Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. Severity alone is not enough to apply a trap. Plausibly related requires a concrete overlap in target path/module/owner, technology/API, project convention, or failure mode; shared generic words alone are not enough. If the reviewed cards do not match the current task, file, module, or failure mode, treat the search as no applicable trap and keep going. When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
49
+
50
+ ## Step 3: Apply the lessons
51
+
52
+ For each relevant trap found in the reviewed top cards:
53
+ 1. Confirm the trap context matches the current task, file, module, or failure mode
54
+ 2. For matching cards, run `next_action.command` from CLI JSON before editing when the card is highly relevant or has `critical`/`error` severity; with MCP, call `get_trap` with `next_action.details_args.id` and `next_action.details_args.scope`
55
+ 3. Adjust your code generation to follow the correct approach
56
+ 4. If a trap matches exactly what you were about to do, explicitly tell the user: "I was about to [avoid], but the codetrap database says [do_instead]. I'll do it the right way."
57
+
58
+ ## Step 4: Report
59
+
60
+ Briefly tell the user which traps you found and how you adjusted:
61
+ ```
62
+ Checked codetrap: found 2 relevant pitfalls. Avoiding [X] and using [Y] instead.
63
+ ```
64
+
65
+ If this is an explicit `/codetrap-check` run or first-run setup and no traps match, say: "Checked codetrap: no applicable traps found; continuing." For routine automatic checks, keep the report short.
66
+
67
+ ## Step 5: Record new pitfalls
68
+
69
+ If while writing code you discover a NEW pitfall that isn't in the database, draft a post-flight trap candidate and put it in the session inbox:
7
70
 
8
71
  ```bash
9
- codetrap search "<task keywords>" --mode hybrid --json
72
+ cat <<'EOF' | codetrap session capture --trap-markdown - --kind review --json
73
+ Title: <durable pitfall>
74
+ Context: <when it triggers>
75
+ Mistake: <what the agent did wrong>
76
+ Fix: <what to do instead>
77
+ EOF
10
78
  ```
11
79
 
12
- Review the top 3 action cards. If a card is highly relevant, or has `critical` or `error` severity and is plausibly related, run its `next_action.command` before editing.
80
+ Use `--trap-json` only when you already have a structured object.
13
81
 
14
- Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. If a trap seems irrelevant, ignore it. When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
82
+ Do not accept it automatically. Tell the user the returned candidate id and session id, then ask whether they want to accept, edit, reject, or supersede it.
15
83
 
16
- Use MCP only as an optional adapter. When calling MCP tools, pass `cwd` when the client supports it.
84
+ If there may be older unreviewed candidates, use `codetrap session status`, `codetrap session list`, `codetrap doctor`, or `codetrap web` to surface the pending review queue.
@@ -48,18 +48,19 @@ search_traps(query="<keywords>", scope=<optional>, category=<optional>, path=<op
48
48
 
49
49
  Review the top 3 action cards before deciding that no trap applies. Do not rely only on the first result; a relevant trap can rank second or third. If fewer than 3 cards are returned, review all returned cards.
50
50
 
51
- Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. If a trap seems irrelevant, ignore it. When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
51
+ Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. Severity alone is not enough to apply a trap. Plausibly related requires a concrete overlap in target path/module/owner, technology/API, project convention, or failure mode; shared generic words alone are not enough. If the reviewed cards do not match the current task, file, module, or failure mode, treat the search as no applicable trap and keep going. When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
52
52
 
53
53
  ## How to present results
54
54
 
55
55
  1. Show the most relevant reviewed traps first (project scope traps before global)
56
56
  2. Summarize each reviewed card's title, severity, `avoid`, and `do_instead`
57
- 3. If any reviewed card is highly relevant, has matching context, or has `critical`/`error` severity and is plausibly related, and you are about to edit code, run the CLI `next_action.command`; with MCP, call `get_trap` with the card's `id` and `scope` before proceeding
57
+ 3. For matching cards, run the CLI `next_action.command` before editing when the card is highly relevant or has `critical`/`error` severity; with MCP, call `get_trap` with the card's `id` and `scope` before proceeding
58
58
  4. If no results, tell the user (this is a new area with no recorded pitfalls yet)
59
59
 
60
60
  ## Example
61
61
 
62
- User: "I need to add a new API endpoint"
63
- → Search: `codetrap search "API endpoint" --mode hybrid --json`
62
+ User: "I need to add a new API endpoint that calls an external service"
63
+ → Search: `codetrap search "API endpoint external service" --mode hybrid --json`
64
64
  → Results show: "Don't use axios, use fetchWrapper" (project, error)
65
- Tell user: "I see a project convention: always use fetchWrapper instead of axios. I'll follow that."
65
+ Because the task includes outbound HTTP, tell user: "I see a matching project convention: always use fetchWrapper instead of axios. I'll follow that."
66
+ → If the endpoint does not make outbound HTTP calls, ignore this card even if severity is error.
@@ -0,0 +1,15 @@
1
+ ## Codetrap Dogfood Eval
2
+
3
+ Use this add-on only for codetrap maintainers working on codetrap itself. For ordinary external projects, use `AGENTS.codetrap.md` without this dogfood section.
4
+
5
+ After each pre-edit codetrap search, record whether the search was:
6
+ - useful_hit
7
+ - miss
8
+ - noisy_hit
9
+ - no_relevant_trap
10
+
11
+ Record the observation in `dogfood-log.md` with the task, query, mode, top results, judgment, whether the result changed the next action, and the promotion lane.
12
+
13
+ When a real query should reliably find an existing trap, save it as a live eval case with query, mode, scope, and gold target.
14
+
15
+ Do not promote every observation. Only promote representative cases that protect search quality.
@@ -6,24 +6,35 @@ Before non-trivial code edits, check local pitfall memory from the current proje
6
6
  codetrap search "<keywords>" --mode hybrid --json
7
7
  ```
8
8
 
9
- Review the top 3 action cards before deciding no trap applies. If a card is highly relevant, or has `critical` or `error` severity and is plausibly related, inspect it before editing:
9
+ Review the top 3 action cards, or all returned cards if fewer than 3, before deciding no trap applies. Only inspect a card when its title, summary, or context overlaps the current task, target file/module, technology, project convention, or failure mode. For matching cards, inspect before editing when the card is highly relevant or has `critical` or `error` severity:
10
10
 
11
11
  ```bash
12
12
  codetrap show <id> --scope <project|global> --json
13
13
  ```
14
14
 
15
- Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. If a trap seems irrelevant, ignore it.
15
+ Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. Severity alone is not enough to apply a trap. Plausibly related requires a concrete overlap in target path/module/owner, technology/API, project convention, or failure mode; shared generic words alone are not enough. If the reviewed cards do not match the current task, file, module, or failure mode, treat the search as no applicable trap and keep going.
16
16
 
17
17
  When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
18
18
 
19
19
  When editing a specific area, pass applicability hints:
20
20
 
21
21
  ```bash
22
- codetrap search "<keywords>" --path src/db/repository.ts --module db --json
22
+ codetrap search "<keywords>" --path path/to/file --module module-name --json
23
23
  ```
24
24
 
25
- After user corrections, repeated test failures, or review feedback, propose a new trap. Only write it after user confirmation:
25
+ After user corrections, repeated test failures, or review feedback, have the agent draft a structured candidate and put it in the session inbox. Do not write directly to the confirmed trap database:
26
26
 
27
27
  ```bash
28
- codetrap add --json '{...}' --output-json
28
+ cat <<'EOF' | codetrap session capture --trap-markdown - --kind review --json
29
+ Title: <durable pitfall>
30
+ Context: <when it triggers>
31
+ Mistake: <what the agent did wrong>
32
+ Fix: <what to do instead>
33
+ EOF
29
34
  ```
35
+
36
+ Use `--trap-json` only when the caller already has a structured object.
37
+
38
+ Review the candidate with `codetrap session candidate <candidate-id> --session <session-id> --json`, then accept, edit, reject, or supersede it explicitly.
39
+
40
+ Use `codetrap session status`, `codetrap session list`, `codetrap doctor`, or `codetrap web` to find pending candidates that still need review.
@@ -1,5 +1,8 @@
1
1
  #!/usr/bin/env bun
2
2
 
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+
3
6
  const tag = process.argv[2] ?? process.env.RELEASE_TAG;
4
7
  const packageJson = await Bun.file("package.json").json() as { name?: string; version?: string };
5
8
  const published = await packageVersionExists(packageJson.name, packageJson.version);
@@ -10,6 +13,18 @@ const commands: { name: string; cmd: string[]; optional?: boolean }[] = [
10
13
  { name: "build", cmd: ["bun", "run", "build"] },
11
14
  { name: "build release assets", cmd: ["bun", "run", "build:release"] },
12
15
  { name: "smoke test release binary", cmd: [hostBinaryPath(), "--help"] },
16
+ {
17
+ name: "smoke test release binary Codex setup",
18
+ cmd: [
19
+ hostBinaryPath(),
20
+ "setup",
21
+ "codex",
22
+ "--dry-run",
23
+ "--json",
24
+ "--codex-home",
25
+ join(tmpdir(), "codetrap-release-preflight-codex-home"),
26
+ ],
27
+ },
13
28
  { name: "npm pack dry-run", cmd: ["npm", "pack", "--dry-run"] },
14
29
  ...(!published ? [{ name: "npm publish dry-run", cmd: ["npm", "publish", "--dry-run", "--access", "public"] }] : []),
15
30
  ];
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env bun
2
+
3
+ import {
4
+ formatPolicySweepReport,
5
+ readLiveEvalCases,
6
+ runFixturePolicySweep,
7
+ runLivePolicySweep,
8
+ type GoldTarget,
9
+ type LiveEvalCase,
10
+ type PolicySweepReport,
11
+ type SweepCandidateReport,
12
+ } from "../src/lib/search-policy-sweep";
13
+ import { SEARCH_MODES, SCOPES, type Scope, type SearchMode } from "../src/lib/constants";
14
+
15
+ async function main(): Promise<void> {
16
+ const args = parseArgs(process.argv.slice(2));
17
+ const command = args.positionals[0] ?? "fixture";
18
+
19
+ try {
20
+ if (command === "fixture") {
21
+ const report = await runFixturePolicySweep({ fixturePath: args.opts.fixture });
22
+ print(report, args.opts.json === "true", args.opts["include-cases"] === "true");
23
+ return;
24
+ }
25
+
26
+ if (command === "live") {
27
+ const cases = liveCasesFromArgs(args);
28
+ const report = await runLivePolicySweep({
29
+ cwd: args.opts.cwd ?? process.cwd(),
30
+ cases,
31
+ defaultScope: scopeField(args.opts.scope, "scope") ?? "project",
32
+ });
33
+ print(report, args.opts.json === "true", args.opts["include-cases"] === "true");
34
+ return;
35
+ }
36
+
37
+ throw new Error(usage());
38
+ } catch (error) {
39
+ console.error(error instanceof Error ? error.message : String(error));
40
+ process.exit(1);
41
+ }
42
+ }
43
+
44
+ function print(report: PolicySweepReport, json: boolean, includeCases: boolean): void {
45
+ console.log(json ? JSON.stringify(includeCases ? report : compactReport(report), null, 2) : formatPolicySweepReport(report));
46
+ }
47
+
48
+ function compactReport(report: PolicySweepReport): Omit<PolicySweepReport, "baseline" | "best" | "candidates"> & {
49
+ baseline: Omit<SweepCandidateReport, "cases">;
50
+ best: Omit<SweepCandidateReport, "cases">;
51
+ candidates: Omit<SweepCandidateReport, "cases">[];
52
+ } {
53
+ return {
54
+ ...report,
55
+ baseline: compactCandidate(report.baseline),
56
+ best: compactCandidate(report.best),
57
+ candidates: report.candidates.map(compactCandidate),
58
+ };
59
+ }
60
+
61
+ function compactCandidate(candidate: SweepCandidateReport): Omit<SweepCandidateReport, "cases"> {
62
+ const { cases: _cases, ...rest } = candidate;
63
+ return rest;
64
+ }
65
+
66
+ function liveCasesFromArgs(args: { opts: Record<string, string>; positionals: string[] }): LiveEvalCase[] {
67
+ if (args.opts.queries) return readLiveEvalCases(args.opts.queries);
68
+ if (!args.opts.query) throw new Error(usage());
69
+ const gold = goldFromArgs(args.opts);
70
+ return [{
71
+ query: args.opts.query,
72
+ mode: modeField(args.opts.mode, "mode") ?? "hybrid",
73
+ scope: scopeField(args.opts.scope, "scope") ?? "project",
74
+ gold: gold.length > 0 ? gold : undefined,
75
+ }];
76
+ }
77
+
78
+ function goldFromArgs(opts: Record<string, string>): GoldTarget[] {
79
+ if (!opts["gold-id"] && !opts["gold-title"]) return [];
80
+ const id = opts["gold-id"] ? Number(opts["gold-id"]) : undefined;
81
+ if (id !== undefined && (!Number.isInteger(id) || id <= 0)) throw new Error("--gold-id must be a positive integer.");
82
+ const title = opts["gold-title"]?.trim() || undefined;
83
+ return [{
84
+ id,
85
+ title,
86
+ scope: scopeField(opts.scope, "scope"),
87
+ }];
88
+ }
89
+
90
+ function modeField(value: string | undefined, key: string): SearchMode | undefined {
91
+ if (value === undefined) return undefined;
92
+ if (!(SEARCH_MODES as readonly string[]).includes(value)) {
93
+ throw new Error(`--${key} must be one of: ${SEARCH_MODES.join(", ")}`);
94
+ }
95
+ return value as SearchMode;
96
+ }
97
+
98
+ function scopeField(value: string | undefined, key: string): Scope | undefined {
99
+ if (value === undefined) return undefined;
100
+ if (!(SCOPES as readonly string[]).includes(value)) {
101
+ throw new Error(`--${key} must be one of: ${SCOPES.join(", ")}`);
102
+ }
103
+ return value as Scope;
104
+ }
105
+
106
+ function parseArgs(args: string[]): { opts: Record<string, string>; positionals: string[] } {
107
+ const opts: Record<string, string> = {};
108
+ const positionals: string[] = [];
109
+ for (let i = 0; i < args.length; i++) {
110
+ const arg = args[i];
111
+ if (arg.startsWith("--")) {
112
+ const key = arg.slice(2);
113
+ opts[key] = args[i + 1] && !args[i + 1].startsWith("--") ? args[++i] : "true";
114
+ } else {
115
+ positionals.push(arg);
116
+ }
117
+ }
118
+ return { opts, positionals };
119
+ }
120
+
121
+ function usage(): string {
122
+ return [
123
+ "Usage:",
124
+ " bun run eval:search-policy -- fixture [--fixture path] [--json]",
125
+ " bun run eval:search-policy -- live --cwd /path/to/project --queries live-queries.json [--scope project|global] [--json]",
126
+ " bun run eval:search-policy -- live --cwd /path/to/project --query '<query>' [--gold-id n] [--gold-title '<title>'] [--scope project|global] [--mode fts|semantic|hybrid] [--json]",
127
+ " Add --include-cases with --json to include full per-case output.",
128
+ ].join("\n");
129
+ }
130
+
131
+ await main();