codetrap 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +151 -52
- package/docs/installation.md +113 -29
- package/package.json +4 -3
- package/plugins/codetrap-agent/.codex-plugin/plugin.json +1 -2
- package/plugins/codetrap-agent/hooks/post-flight-capture.example.md +19 -17
- package/plugins/codetrap-agent/hooks.json +2 -2
- package/{skills → plugins/codetrap-agent/skills}/codetrap-add/SKILL.md +10 -4
- package/plugins/codetrap-agent/skills/codetrap-capture/SKILL.md +14 -3
- package/plugins/codetrap-agent/skills/codetrap-capture-external/SKILL.md +52 -9
- package/plugins/codetrap-agent/skills/codetrap-check/SKILL.md +74 -6
- package/{skills → plugins/codetrap-agent/skills}/codetrap-search/SKILL.md +6 -5
- package/plugins/codetrap-agent/templates/AGENTS.codetrap.md +31 -5
- package/scripts/search-policy-sweep.ts +131 -0
- package/src/commands/workflow.ts +144 -68
- package/src/db/embedding-queries.ts +230 -48
- package/src/db/queries.ts +0 -25
- package/src/db/repository.ts +32 -21
- package/src/db/schema.ts +80 -0
- package/src/index.ts +28 -3
- package/src/lib/command-requests.ts +112 -1
- package/src/lib/config.ts +57 -7
- package/src/lib/constants.ts +1 -1
- package/src/lib/doctor.ts +42 -12
- package/src/lib/embedder.ts +118 -3
- package/src/lib/embedding-health.ts +3 -1
- package/src/lib/embedding-job.ts +3 -0
- package/src/lib/embedding-management.ts +65 -0
- package/src/lib/embedding-runtime.ts +177 -0
- package/src/lib/output-json.ts +0 -2
- package/src/lib/scope-context.ts +12 -6
- package/src/lib/scope-migration.ts +2 -1
- package/src/lib/scope.ts +0 -2
- package/src/lib/search-eval.ts +38 -18
- package/src/lib/search-policy-sweep.ts +563 -0
- package/src/lib/search-policy.ts +0 -4
- package/src/lib/search-service.ts +14 -15
- package/src/lib/session-candidate-document.ts +175 -0
- package/src/lib/session-candidate-scope.ts +6 -0
- package/src/lib/session-capture.ts +298 -32
- package/src/lib/session-codec.ts +1 -8
- package/src/lib/session-operations.ts +83 -60
- package/src/lib/session-review.ts +327 -0
- package/src/lib/session-store.ts +87 -73
- package/src/lib/store.ts +74 -10
- package/src/lib/string-list.ts +3 -0
- package/src/lib/text-lines.ts +7 -0
- package/src/lib/trap-search-document.ts +2 -1
- package/src/lib/value-types.ts +3 -0
- package/src/web/client-review.ts +171 -0
- package/src/web/client-script.ts +426 -51
- package/src/web/client-shell.ts +414 -0
- package/src/web/client-text.ts +112 -0
- package/src/web/project-registry.ts +3 -5
- package/src/web/server.ts +117 -103
- package/src/web/static.ts +364 -19
- package/skills/codetrap-capture-external/SKILL.md +0 -62
- package/skills/codetrap-check/SKILL.md +0 -69
- package/src/lib/embedding-index.ts +0 -53
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codetrap",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.8",
|
|
4
4
|
"description": "Capture and retrieve coding pitfalls so AI doesn't repeat mistakes",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -33,7 +33,6 @@
|
|
|
33
33
|
"src/web",
|
|
34
34
|
"src/index.ts",
|
|
35
35
|
"src/mcp-server.ts",
|
|
36
|
-
"skills",
|
|
37
36
|
"plugins",
|
|
38
37
|
".agents/plugins/marketplace.json",
|
|
39
38
|
"scripts",
|
|
@@ -52,6 +51,7 @@
|
|
|
52
51
|
"release:preflight": "bun run scripts/release-preflight.ts",
|
|
53
52
|
"check:release-version": "bun run scripts/check-release-version.ts",
|
|
54
53
|
"eval:dogfood": "bun run scripts/dogfood-eval.ts",
|
|
54
|
+
"eval:search-policy": "bun run scripts/search-policy-sweep.ts",
|
|
55
55
|
"build": "bun build ./src/index.ts --compile --outfile dist/codetrap && bun build ./src/mcp-server.ts --compile --outfile dist/codetrap-serve",
|
|
56
56
|
"build:cli": "bun build ./src/index.ts --compile --outfile dist/codetrap",
|
|
57
57
|
"build:serve": "bun build ./src/mcp-server.ts --compile --outfile dist/codetrap-serve"
|
|
@@ -63,6 +63,7 @@
|
|
|
63
63
|
"@modelcontextprotocol/sdk": "^1.0.0"
|
|
64
64
|
},
|
|
65
65
|
"devDependencies": {
|
|
66
|
-
"@types/bun": "latest"
|
|
66
|
+
"@types/bun": "latest",
|
|
67
|
+
"playwright-core": "^1.56.1"
|
|
67
68
|
}
|
|
68
69
|
}
|
|
@@ -12,12 +12,11 @@
|
|
|
12
12
|
"license": "MIT",
|
|
13
13
|
"keywords": ["agent", "memory", "cli", "mcp", "pitfalls"],
|
|
14
14
|
"skills": "./skills/",
|
|
15
|
-
"hooks": "./hooks.json",
|
|
16
15
|
"mcpServers": "./.mcp.json",
|
|
17
16
|
"interface": {
|
|
18
17
|
"displayName": "codetrap Agent",
|
|
19
18
|
"shortDescription": "Check local pitfall memory before code changes.",
|
|
20
|
-
"longDescription": "Installs CLI-first guidance
|
|
19
|
+
"longDescription": "Installs CLI-first guidance and optional MCP config so coding agents can search codetrap before risky edits, propose new trap captures after failures, and save useful lessons from external references. Hook files are packaged as examples, not auto-installed.",
|
|
21
20
|
"developerName": "codetrap maintainers",
|
|
22
21
|
"category": "Productivity",
|
|
23
22
|
"capabilities": ["Tools", "Memory", "Code"],
|
|
@@ -1,25 +1,27 @@
|
|
|
1
1
|
# Post-flight Codetrap Capture
|
|
2
2
|
|
|
3
|
-
Use this template after a task reveals a reusable pitfall. Do not write the trap automatically;
|
|
3
|
+
Use this template after a task reveals a reusable pitfall. Do not write the trap automatically; put it in the session candidate inbox first.
|
|
4
4
|
|
|
5
|
-
```
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
"owner": "platform"
|
|
18
|
-
}
|
|
5
|
+
```markdown
|
|
6
|
+
Title: Short pitfall title
|
|
7
|
+
Category: bug
|
|
8
|
+
Scope: project
|
|
9
|
+
Context: When this situation appears...
|
|
10
|
+
Mistake: The agent tends to...
|
|
11
|
+
Fix: Do this instead...
|
|
12
|
+
Tags: area, tool
|
|
13
|
+
Severity: warning
|
|
14
|
+
Path globs: src/example/**
|
|
15
|
+
Module: example
|
|
16
|
+
Owner: platform
|
|
19
17
|
```
|
|
20
18
|
|
|
21
|
-
|
|
19
|
+
Capture the candidate:
|
|
22
20
|
|
|
23
21
|
```bash
|
|
24
|
-
codetrap
|
|
22
|
+
codetrap session capture --trap-markdown-file candidate.md --kind review --json
|
|
25
23
|
```
|
|
24
|
+
|
|
25
|
+
Hook-based clients can set `CODETRAP_CANDIDATE_FILE` to a Markdown file with the same fields and run the packaged `post_task` command. The hook must still leave the candidate in the inbox; it must not accept or write a confirmed trap automatically.
|
|
26
|
+
|
|
27
|
+
Then review it with `codetrap session candidate <candidate-id> --session <session-id> --json` and accept, edit, reject, or supersede it explicitly.
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"command": "codetrap search \"$CODETRAP_QUERY\" --mode hybrid --json"
|
|
6
6
|
},
|
|
7
7
|
"post_task": {
|
|
8
|
-
"description": "After repeated failures or user corrections,
|
|
9
|
-
"command": "codetrap
|
|
8
|
+
"description": "After repeated failures or user corrections, capture a structured trap draft in the session candidate inbox; do not write a confirmed trap automatically.",
|
|
9
|
+
"command": "codetrap session capture --trap-markdown-file \"$CODETRAP_CANDIDATE_FILE\" --kind review --json"
|
|
10
10
|
}
|
|
11
11
|
}
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: codetrap-add
|
|
3
|
-
description: Record a coding pitfall as a structured codetrap entry
|
|
3
|
+
description: Record a confirmed coding pitfall as a structured codetrap entry after explicit user approval. For agent-discovered post-flight lessons, prefer codetrap-capture and the session candidate inbox.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
You are helping the user record a "coding pitfall" (a mistake pattern that AI coding assistants tend to make, and the correct approach). These pitfalls are stored in a local database and will be used to warn AI in future sessions.
|
|
7
7
|
|
|
8
|
+
This skill writes confirmed memory. Do not use it for autonomous post-flight agent discoveries, repeated failures, or review feedback unless the user explicitly asks to save the trap as confirmed memory. For agent-drafted lessons, prefer:
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
codetrap session capture --trap-markdown - --kind review --json
|
|
12
|
+
```
|
|
13
|
+
|
|
8
14
|
## Step 1: Gather information
|
|
9
15
|
|
|
10
16
|
Ask the user to describe what went wrong. Guide them to provide:
|
|
@@ -40,9 +46,9 @@ Pick the best-fitting category:
|
|
|
40
46
|
- `bug` — Common logic errors, edge cases
|
|
41
47
|
- `other` — Everything else
|
|
42
48
|
|
|
43
|
-
## Step 4: Structure and
|
|
49
|
+
## Step 4: Structure and confirm
|
|
44
50
|
|
|
45
|
-
Convert the user's description into this JSON structure
|
|
51
|
+
Convert the user's description into this JSON structure, show the draft to the user, and ask for explicit confirmation before writing it as confirmed memory:
|
|
46
52
|
|
|
47
53
|
```bash
|
|
48
54
|
codetrap add --json '{
|
|
@@ -62,7 +68,7 @@ codetrap add --json '{
|
|
|
62
68
|
}' --output-json
|
|
63
69
|
```
|
|
64
70
|
|
|
65
|
-
If the CLI is not available, use the MCP tool `add_trap` instead.
|
|
71
|
+
Only after the user confirms the draft should you call the CLI. If the CLI is not available and the user explicitly confirmed the save, use the MCP tool `add_trap` instead.
|
|
66
72
|
|
|
67
73
|
## Step 5: Confirm
|
|
68
74
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: codetrap-capture
|
|
3
|
-
description: Propose a new codetrap after repeated failures, user corrections, or review feedback.
|
|
3
|
+
description: Propose a new codetrap candidate after repeated failures, user corrections, or review feedback without writing confirmed memory automatically.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
Use this after a task exposes a recurring mistake pattern. Draft a candidate trap with:
|
|
@@ -12,8 +12,19 @@ Use this after a task exposes a recurring mistake pattern. Draft a candidate tra
|
|
|
12
12
|
- tags
|
|
13
13
|
- optional `path_globs`, `module`, and `owner`
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
Do not write the confirmed trap directly. Put the draft into the session candidate inbox:
|
|
16
16
|
|
|
17
17
|
```bash
|
|
18
|
-
codetrap
|
|
18
|
+
cat <<'EOF' | codetrap session capture --trap-markdown - --kind review --json
|
|
19
|
+
Title: <durable pitfall>
|
|
20
|
+
Context: <when it triggers>
|
|
21
|
+
Mistake: <what the agent did wrong>
|
|
22
|
+
Fix: <what to do instead>
|
|
23
|
+
Severity: warning
|
|
24
|
+
Tags: <area>,<tool>
|
|
25
|
+
EOF
|
|
19
26
|
```
|
|
27
|
+
|
|
28
|
+
Use `--trap-json` only when you already have a structured object. Prefer Markdown for agent-drafted lessons because it avoids shell-escaping long text.
|
|
29
|
+
|
|
30
|
+
If no session is active, `session capture` creates and closes a post-flight session automatically. Tell the user the returned candidate id and session id, then ask whether they want to accept, edit, reject, or supersede the candidate. Pending candidates are also visible through `codetrap session status`, `codetrap session list`, `codetrap doctor`, and `codetrap web`.
|
|
@@ -5,15 +5,58 @@ description: Extract durable coding pitfalls from an external article, blog post
|
|
|
5
5
|
|
|
6
6
|
Use this when the user shares an external source and wants to save useful lessons for future AI coding work.
|
|
7
7
|
|
|
8
|
-
The
|
|
8
|
+
The external source is read by the agent. Do not ask codetrap CLI to fetch URLs or crawl the web. codetrap stays a local memory store.
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
## Step 1: Read The Source
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
2. Extract every candidate trap that has a clear trigger, mistake, and fix. Do not force a fixed count.
|
|
14
|
-
3. Filter out broad summaries, one-off facts, vague advice, and source details that will not change future coding behavior.
|
|
15
|
-
4. Rank the recommended candidates and ask the user which ones to save.
|
|
16
|
-
5. After confirmation, run `codetrap add --json '<trap-json>' --output-json`.
|
|
17
|
-
6. Attach the source with `codetrap add_trap_evidence <id> --scope <project|global> --source_type article --source_ref "<url-or-source-id>" --note "External lesson captured from <short source title>." --output-json`.
|
|
12
|
+
Open or read the provided URL, article text, issue, paper, or reference. Identify lessons that could change future implementation behavior.
|
|
18
13
|
|
|
19
|
-
|
|
14
|
+
Do not summarize the whole source into codetrap. Extract only durable pitfalls with a clear trigger, mistake, and fix.
|
|
15
|
+
|
|
16
|
+
## Step 2: Extract Candidate Traps
|
|
17
|
+
|
|
18
|
+
Create as many candidate traps as pass the quality bar. Do not force a fixed count.
|
|
19
|
+
|
|
20
|
+
Each candidate must include:
|
|
21
|
+
|
|
22
|
+
- `context`: when this lesson applies
|
|
23
|
+
- `mistake`: what an AI coding agent might do wrong
|
|
24
|
+
- `fix`: what it should do instead
|
|
25
|
+
- `severity`: `warning`, `error`, or `critical`
|
|
26
|
+
- `tags`: useful retrieval terms
|
|
27
|
+
- optional `path_globs`, `module`, and `owner` when the lesson is project-specific
|
|
28
|
+
|
|
29
|
+
Reject or omit candidates that are broad summaries, one-off facts, vague advice, marketing claims, or source details that would not change future coding behavior.
|
|
30
|
+
|
|
31
|
+
## Step 3: Rank And Ask
|
|
32
|
+
|
|
33
|
+
Present the recommended candidates in priority order. Include a short reason for each recommendation.
|
|
34
|
+
|
|
35
|
+
Ask the user which candidates to save. Do not write any trap until the user confirms.
|
|
36
|
+
|
|
37
|
+
If a candidate is useful but needs a narrower scope, ask for or propose edits before saving.
|
|
38
|
+
|
|
39
|
+
## Step 4: Save Confirmed Lessons
|
|
40
|
+
|
|
41
|
+
For each confirmed candidate, call:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
codetrap add --json '<trap-json>' --output-json
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Then attach the external source as evidence:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
codetrap add_trap_evidence <id> \
|
|
51
|
+
--scope <project|global> \
|
|
52
|
+
--source_type article \
|
|
53
|
+
--source_ref "<url-or-source-id>" \
|
|
54
|
+
--note "External lesson captured from <short source title>." \
|
|
55
|
+
--output-json
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Use `global` for generally reusable lessons across projects. Use `project` only when the lesson is specific to the current repository or technology stack.
|
|
59
|
+
|
|
60
|
+
## Step 5: Confirm
|
|
61
|
+
|
|
62
|
+
Tell the user which trap IDs were saved, their scopes, and the source reference attached as evidence.
|
|
@@ -1,16 +1,84 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: codetrap-check
|
|
3
|
-
description: Check codetrap
|
|
3
|
+
description: Check the codetrap pitfall database before code changes and apply relevant lessons. Use before non-trivial coding work, when touching risky areas, or when the user runs /codetrap-check.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
Before
|
|
6
|
+
Before generating any non-trivial code, pause and check the codetrap database for relevant pitfalls. This is a "pre-flight check" that prevents you from repeating known mistakes.
|
|
7
|
+
|
|
8
|
+
## When to trigger
|
|
9
|
+
|
|
10
|
+
Run this check when:
|
|
11
|
+
1. The user asks you to write or modify code
|
|
12
|
+
2. The task touches an area with recorded pitfalls (API, auth, database, security, etc.)
|
|
13
|
+
3. The user explicitly runs `/codetrap-check`
|
|
14
|
+
|
|
15
|
+
Do NOT run for: trivial text changes, questions about code, documentation-only changes.
|
|
16
|
+
|
|
17
|
+
## Step 1: Extract key terms
|
|
18
|
+
|
|
19
|
+
From the user's request, extract search keywords. Focus on:
|
|
20
|
+
- Technology names: "axios", "prisma", "jwt", "react"
|
|
21
|
+
- Patterns: "middleware", "endpoint", "migration", "hook"
|
|
22
|
+
- Domains: "authentication", "database", "routing", "state"
|
|
23
|
+
|
|
24
|
+
## Step 2: Search the database
|
|
25
|
+
|
|
26
|
+
Default to the CLI from the current project cwd:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
codetrap search "<keywords>" --mode hybrid --json
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
When the task targets a known file or subsystem, include applicability hints:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
codetrap search "<keywords>" --path src/db/repository.ts --module db --json
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
If the query comes from another tool, stdin is also supported:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
echo "<keywords>" | codetrap search --mode hybrid --json
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
MCP `search_traps` is optional. Use it only when it is already available and project-scoped correctly; pass `cwd` when the client supports it.
|
|
45
|
+
|
|
46
|
+
Review the top 3 returned action cards before deciding that no trap applies. Do not stop after only the first result; relevant traps may rank second or third. If fewer than 3 cards are returned, review all returned cards.
|
|
47
|
+
|
|
48
|
+
Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. Severity alone is not enough to apply a trap. Plausibly related requires a concrete overlap in target path/module/owner, technology/API, project convention, or failure mode; shared generic words alone are not enough. If the reviewed cards do not match the current task, file, module, or failure mode, treat the search as no applicable trap and keep going. When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
|
|
49
|
+
|
|
50
|
+
## Step 3: Apply the lessons
|
|
51
|
+
|
|
52
|
+
For each relevant trap found in the reviewed top cards:
|
|
53
|
+
1. Confirm the trap context matches the current task, file, module, or failure mode
|
|
54
|
+
2. For matching cards, run `next_action.command` from CLI JSON before editing when the card is highly relevant or has `critical`/`error` severity; with MCP, call `get_trap` with `next_action.details_args.id` and `next_action.details_args.scope`
|
|
55
|
+
3. Adjust your code generation to follow the correct approach
|
|
56
|
+
4. If a trap matches exactly what you were about to do, explicitly tell the user: "I was about to [avoid], but the codetrap database says [do_instead]. I'll do it the right way."
|
|
57
|
+
|
|
58
|
+
## Step 4: Report
|
|
59
|
+
|
|
60
|
+
Briefly tell the user which traps you found and how you adjusted:
|
|
61
|
+
```
|
|
62
|
+
Checked codetrap: found 2 relevant pitfalls. Avoiding [X] and using [Y] instead.
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
If this is an explicit `/codetrap-check` run or first-run setup and no traps match, say: "Checked codetrap: no applicable traps found; continuing." For routine automatic checks, keep the report short.
|
|
66
|
+
|
|
67
|
+
## Step 5: Record new pitfalls
|
|
68
|
+
|
|
69
|
+
If while writing code you discover a NEW pitfall that isn't in the database, draft a post-flight trap candidate and put it in the session inbox:
|
|
7
70
|
|
|
8
71
|
```bash
|
|
9
|
-
codetrap
|
|
72
|
+
cat <<'EOF' | codetrap session capture --trap-markdown - --kind review --json
|
|
73
|
+
Title: <durable pitfall>
|
|
74
|
+
Context: <when it triggers>
|
|
75
|
+
Mistake: <what the agent did wrong>
|
|
76
|
+
Fix: <what to do instead>
|
|
77
|
+
EOF
|
|
10
78
|
```
|
|
11
79
|
|
|
12
|
-
|
|
80
|
+
Use `--trap-json` only when you already have a structured object.
|
|
13
81
|
|
|
14
|
-
|
|
82
|
+
Do not accept it automatically. Tell the user the returned candidate id and session id, then ask whether they want to accept, edit, reject, or supersede it.
|
|
15
83
|
|
|
16
|
-
|
|
84
|
+
If there may be older unreviewed candidates, use `codetrap session status`, `codetrap session list`, `codetrap doctor`, or `codetrap web` to surface the pending review queue.
|
|
@@ -48,18 +48,19 @@ search_traps(query="<keywords>", scope=<optional>, category=<optional>, path=<op
|
|
|
48
48
|
|
|
49
49
|
Review the top 3 action cards before deciding that no trap applies. Do not rely only on the first result; a relevant trap can rank second or third. If fewer than 3 cards are returned, review all returned cards.
|
|
50
50
|
|
|
51
|
-
Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode.
|
|
51
|
+
Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. Severity alone is not enough to apply a trap. Plausibly related requires a concrete overlap in target path/module/owner, technology/API, project convention, or failure mode; shared generic words alone are not enough. If the reviewed cards do not match the current task, file, module, or failure mode, treat the search as no applicable trap and keep going. When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
|
|
52
52
|
|
|
53
53
|
## How to present results
|
|
54
54
|
|
|
55
55
|
1. Show the most relevant reviewed traps first (project scope traps before global)
|
|
56
56
|
2. Summarize each reviewed card's title, severity, `avoid`, and `do_instead`
|
|
57
|
-
3.
|
|
57
|
+
3. For matching cards, run the CLI `next_action.command` before editing when the card is highly relevant or has `critical`/`error` severity; with MCP, call `get_trap` with the card's `id` and `scope` before proceeding
|
|
58
58
|
4. If no results, tell the user (this is a new area with no recorded pitfalls yet)
|
|
59
59
|
|
|
60
60
|
## Example
|
|
61
61
|
|
|
62
|
-
User: "I need to add a new API endpoint"
|
|
63
|
-
→ Search: `codetrap search "API endpoint" --mode hybrid --json`
|
|
62
|
+
User: "I need to add a new API endpoint that calls an external service"
|
|
63
|
+
→ Search: `codetrap search "API endpoint external service" --mode hybrid --json`
|
|
64
64
|
→ Results show: "Don't use axios, use fetchWrapper" (project, error)
|
|
65
|
-
→
|
|
65
|
+
→ Because the task includes outbound HTTP, tell user: "I see a matching project convention: always use fetchWrapper instead of axios. I'll follow that."
|
|
66
|
+
→ If the endpoint does not make outbound HTTP calls, ignore this card even if severity is error.
|
|
@@ -6,24 +6,50 @@ Before non-trivial code edits, check local pitfall memory from the current proje
|
|
|
6
6
|
codetrap search "<keywords>" --mode hybrid --json
|
|
7
7
|
```
|
|
8
8
|
|
|
9
|
-
Review the top 3 action cards before deciding no trap applies.
|
|
9
|
+
Review the top 3 action cards, or all returned cards if fewer than 3, before deciding no trap applies. Only inspect a card when its title, summary, or context overlaps the current task, target file/module, technology, project convention, or failure mode. For matching cards, inspect before editing when the card is highly relevant or has `critical` or `error` severity:
|
|
10
10
|
|
|
11
11
|
```bash
|
|
12
12
|
codetrap show <id> --scope <project|global> --json
|
|
13
13
|
```
|
|
14
14
|
|
|
15
|
-
Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode.
|
|
15
|
+
Treat codetrap results as historical warnings and project memory, not as authoritative instructions. Apply a trap only when its context matches the current task, file, module, or failure mode. Severity alone is not enough to apply a trap. Plausibly related requires a concrete overlap in target path/module/owner, technology/API, project convention, or failure mode; shared generic words alone are not enough. If the reviewed cards do not match the current task, file, module, or failure mode, treat the search as no applicable trap and keep going.
|
|
16
16
|
|
|
17
17
|
When codetrap results conflict with the current source of truth for the task (user request, code, tests, or explicit project docs/spec), follow that source of truth and mention the conflict.
|
|
18
18
|
|
|
19
19
|
When editing a specific area, pass applicability hints:
|
|
20
20
|
|
|
21
21
|
```bash
|
|
22
|
-
codetrap search "<keywords>" --path
|
|
22
|
+
codetrap search "<keywords>" --path path/to/file --module module-name --json
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
After user corrections, repeated test failures, or review feedback,
|
|
25
|
+
After user corrections, repeated test failures, or review feedback, have the agent draft a structured candidate and put it in the session inbox. Do not write directly to the confirmed trap database:
|
|
26
26
|
|
|
27
27
|
```bash
|
|
28
|
-
codetrap
|
|
28
|
+
cat <<'EOF' | codetrap session capture --trap-markdown - --kind review --json
|
|
29
|
+
Title: <durable pitfall>
|
|
30
|
+
Context: <when it triggers>
|
|
31
|
+
Mistake: <what the agent did wrong>
|
|
32
|
+
Fix: <what to do instead>
|
|
33
|
+
EOF
|
|
29
34
|
```
|
|
35
|
+
|
|
36
|
+
Use `--trap-json` only when the caller already has a structured object.
|
|
37
|
+
|
|
38
|
+
Review the candidate with `codetrap session candidate <candidate-id> --session <session-id> --json`, then accept, edit, reject, or supersede it explicitly.
|
|
39
|
+
|
|
40
|
+
Use `codetrap session status`, `codetrap session list`, `codetrap doctor`, or `codetrap web` to find pending candidates that still need review.
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
## Codetrap Dogfood Eval
|
|
44
|
+
|
|
45
|
+
After each pre-edit codetrap search, record whether the search was:
|
|
46
|
+
- useful_hit
|
|
47
|
+
- miss
|
|
48
|
+
- noisy_hit
|
|
49
|
+
- no_relevant_trap
|
|
50
|
+
|
|
51
|
+
Record the observation in `dogfood-log.md` with the task, query, mode, top results, judgment, whether the result changed the next action, and the promotion lane.
|
|
52
|
+
|
|
53
|
+
When a real query should reliably find an existing trap, save it as a live eval case with query, mode, scope, and gold target.
|
|
54
|
+
|
|
55
|
+
Do not promote every observation. Only promote representative cases that protect search quality.
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
formatPolicySweepReport,
|
|
5
|
+
readLiveEvalCases,
|
|
6
|
+
runFixturePolicySweep,
|
|
7
|
+
runLivePolicySweep,
|
|
8
|
+
type GoldTarget,
|
|
9
|
+
type LiveEvalCase,
|
|
10
|
+
type PolicySweepReport,
|
|
11
|
+
type SweepCandidateReport,
|
|
12
|
+
} from "../src/lib/search-policy-sweep";
|
|
13
|
+
import { SEARCH_MODES, SCOPES, type Scope, type SearchMode } from "../src/lib/constants";
|
|
14
|
+
|
|
15
|
+
async function main(): Promise<void> {
|
|
16
|
+
const args = parseArgs(process.argv.slice(2));
|
|
17
|
+
const command = args.positionals[0] ?? "fixture";
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
if (command === "fixture") {
|
|
21
|
+
const report = await runFixturePolicySweep({ fixturePath: args.opts.fixture });
|
|
22
|
+
print(report, args.opts.json === "true", args.opts["include-cases"] === "true");
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (command === "live") {
|
|
27
|
+
const cases = liveCasesFromArgs(args);
|
|
28
|
+
const report = await runLivePolicySweep({
|
|
29
|
+
cwd: args.opts.cwd ?? process.cwd(),
|
|
30
|
+
cases,
|
|
31
|
+
defaultScope: scopeField(args.opts.scope, "scope") ?? "project",
|
|
32
|
+
});
|
|
33
|
+
print(report, args.opts.json === "true", args.opts["include-cases"] === "true");
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
throw new Error(usage());
|
|
38
|
+
} catch (error) {
|
|
39
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
40
|
+
process.exit(1);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function print(report: PolicySweepReport, json: boolean, includeCases: boolean): void {
|
|
45
|
+
console.log(json ? JSON.stringify(includeCases ? report : compactReport(report), null, 2) : formatPolicySweepReport(report));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function compactReport(report: PolicySweepReport): Omit<PolicySweepReport, "baseline" | "best" | "candidates"> & {
|
|
49
|
+
baseline: Omit<SweepCandidateReport, "cases">;
|
|
50
|
+
best: Omit<SweepCandidateReport, "cases">;
|
|
51
|
+
candidates: Omit<SweepCandidateReport, "cases">[];
|
|
52
|
+
} {
|
|
53
|
+
return {
|
|
54
|
+
...report,
|
|
55
|
+
baseline: compactCandidate(report.baseline),
|
|
56
|
+
best: compactCandidate(report.best),
|
|
57
|
+
candidates: report.candidates.map(compactCandidate),
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function compactCandidate(candidate: SweepCandidateReport): Omit<SweepCandidateReport, "cases"> {
|
|
62
|
+
const { cases: _cases, ...rest } = candidate;
|
|
63
|
+
return rest;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function liveCasesFromArgs(args: { opts: Record<string, string>; positionals: string[] }): LiveEvalCase[] {
|
|
67
|
+
if (args.opts.queries) return readLiveEvalCases(args.opts.queries);
|
|
68
|
+
if (!args.opts.query) throw new Error(usage());
|
|
69
|
+
const gold = goldFromArgs(args.opts);
|
|
70
|
+
return [{
|
|
71
|
+
query: args.opts.query,
|
|
72
|
+
mode: modeField(args.opts.mode, "mode") ?? "hybrid",
|
|
73
|
+
scope: scopeField(args.opts.scope, "scope") ?? "project",
|
|
74
|
+
gold: gold.length > 0 ? gold : undefined,
|
|
75
|
+
}];
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function goldFromArgs(opts: Record<string, string>): GoldTarget[] {
|
|
79
|
+
if (!opts["gold-id"] && !opts["gold-title"]) return [];
|
|
80
|
+
const id = opts["gold-id"] ? Number(opts["gold-id"]) : undefined;
|
|
81
|
+
if (id !== undefined && (!Number.isInteger(id) || id <= 0)) throw new Error("--gold-id must be a positive integer.");
|
|
82
|
+
const title = opts["gold-title"]?.trim() || undefined;
|
|
83
|
+
return [{
|
|
84
|
+
id,
|
|
85
|
+
title,
|
|
86
|
+
scope: scopeField(opts.scope, "scope"),
|
|
87
|
+
}];
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function modeField(value: string | undefined, key: string): SearchMode | undefined {
|
|
91
|
+
if (value === undefined) return undefined;
|
|
92
|
+
if (!(SEARCH_MODES as readonly string[]).includes(value)) {
|
|
93
|
+
throw new Error(`--${key} must be one of: ${SEARCH_MODES.join(", ")}`);
|
|
94
|
+
}
|
|
95
|
+
return value as SearchMode;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function scopeField(value: string | undefined, key: string): Scope | undefined {
|
|
99
|
+
if (value === undefined) return undefined;
|
|
100
|
+
if (!(SCOPES as readonly string[]).includes(value)) {
|
|
101
|
+
throw new Error(`--${key} must be one of: ${SCOPES.join(", ")}`);
|
|
102
|
+
}
|
|
103
|
+
return value as Scope;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function parseArgs(args: string[]): { opts: Record<string, string>; positionals: string[] } {
|
|
107
|
+
const opts: Record<string, string> = {};
|
|
108
|
+
const positionals: string[] = [];
|
|
109
|
+
for (let i = 0; i < args.length; i++) {
|
|
110
|
+
const arg = args[i];
|
|
111
|
+
if (arg.startsWith("--")) {
|
|
112
|
+
const key = arg.slice(2);
|
|
113
|
+
opts[key] = args[i + 1] && !args[i + 1].startsWith("--") ? args[++i] : "true";
|
|
114
|
+
} else {
|
|
115
|
+
positionals.push(arg);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return { opts, positionals };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function usage(): string {
|
|
122
|
+
return [
|
|
123
|
+
"Usage:",
|
|
124
|
+
" bun run eval:search-policy -- fixture [--fixture path] [--json]",
|
|
125
|
+
" bun run eval:search-policy -- live --cwd /path/to/project --queries live-queries.json [--scope project|global] [--json]",
|
|
126
|
+
" bun run eval:search-policy -- live --cwd /path/to/project --query '<query>' [--gold-id n] [--gold-title '<title>'] [--scope project|global] [--mode fts|semantic|hybrid] [--json]",
|
|
127
|
+
" Add --include-cases with --json to include full per-case output.",
|
|
128
|
+
].join("\n");
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
await main();
|