claude-dev-env 1.2.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,140 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- UserPromptSubmit hook that detects "hook" mentions and injects
4
- context about our current hook structure and how to add new hooks.
5
- """
6
-
7
- import json
8
- import sys
9
-
10
-
11
- TRIGGER_PHRASES = [
12
- "hook",
13
- ]
14
-
15
- EXCLUDE_PHRASES = [
16
- "react hook",
17
- "usehook",
18
- "usestate",
19
- "useeffect",
20
- "custom hook",
21
- "git hook",
22
- "pre-commit hook",
23
- "webhook",
24
- ]
25
-
26
- CONTEXT = """
27
- <hook-structure-context>
28
- ## Claude Code Hook System
29
-
30
- ### Architecture
31
- - **Runner pattern**: `run-hook-wrapper.js` (Node.js) -> `run-hook.py` (Python) -> individual hook
32
- - **Hook directory**: `hooks/`
33
-
34
- ### Hook Organization (subfolder structure)
35
- ```
36
- hooks/
37
- |-- rewrite-plugin-paths.py
38
- |-- session/
39
- | |-- compact-context-reinject.py
40
- | |-- plugin-data-dir-cleanup.py
41
- | +-- hook-structure-context.py
42
- |-- notification/
43
- | |-- attention-needed-notify.py
44
- | |-- claude-notification-handler.py
45
- | +-- notification_utils.py
46
- |-- advisory/
47
- | |-- refactor-guard.py
48
- | +-- migration-safety-advisor.py
49
- |-- validation/
50
- | |-- code-style-validator.py
51
- | |-- hook-format-validator.py
52
- | |-- mypy_validator.py
53
- | +-- e2e-test-validator.py
54
- |-- lifecycle/
55
- | |-- config-change-guard.py
56
- | +-- session-end-cleanup.py
57
- |-- blocking/
58
- | |-- pyautogui-scroll-blocker.py
59
- | |-- sensitive-file-protector.py
60
- | |-- write-existing-file-blocker.py
61
- | +-- destructive-command-blocker.py
62
- |-- git-hooks/
63
- | +-- post-commit.py
64
- |-- github-action/
65
- | +-- test_workflow.py
66
- +-- validators/
67
- |-- (validation check modules)
68
- +-- test_files/
69
- ```
70
-
71
- ### Event Types
72
- | Event | When | Input (stdin JSON) | Can Block? |
73
- |-------|------|-------------------|------------|
74
- | SessionStart | Session begins | `{}` | No |
75
- | UserPromptSubmit | User sends message | `{"prompt": "..."}` | No (advisory) |
76
- | PreToolUse | Before tool execution | `{"tool_name": "...", "tool_input": {...}}` | YES (exit 2) |
77
- | PostToolUse | After tool execution | `{"tool_name": "...", "tool_input": {...}, "tool_output": "..."}` | No |
78
- | SubagentStop | Agent completes | `{"agent_type": "...", ...}` | No |
79
- | Stop | Session ends | `{}` | No |
80
-
81
- ### How to Add a New Hook
82
-
83
- 1. **Create the hook file** in the appropriate subfolder:
84
- ```python
85
- #!/usr/bin/env python3
86
- import json
87
- import sys
88
-
89
- hook_input = json.load(sys.stdin)
90
- print("Context to inject")
91
- sys.exit(0)
92
- ```
93
-
94
- 2. **Register in settings.json** under the correct event type:
95
- ```json
96
- {
97
- "type": "command",
98
- "command": "node -e \\"process.argv.splice(1,0,'_');require(require('os').homedir()+'/.claude/hooks/run-hook-wrapper.js')\\" \\"session/your-hook.py\\"",
99
- "timeout": 15000
100
- }
101
- ```
102
-
103
- 3. **Key rules**:
104
- - Always use the `run-hook-wrapper.js` pattern (cross-platform)
105
- - Set explicit timeouts (10000-30000ms)
106
- - PreToolUse hooks use `matcher` to scope which tools they fire on
107
- - UserPromptSubmit hooks match on `prompt` content
108
- - Print output = context injected into Claude's conversation
109
- - Advisory hooks exit 0; blocking hooks exit 2 with `hookSpecificOutput.permissionDecision`
110
- </hook-structure-context>
111
- """
112
-
113
-
114
- def main() -> None:
115
- try:
116
- hook_input = json.load(sys.stdin)
117
- except json.JSONDecodeError:
118
- sys.exit(0)
119
-
120
- prompt = hook_input.get("prompt", "")
121
-
122
- if not prompt:
123
- sys.exit(0)
124
-
125
- message_lower = prompt.lower()
126
-
127
- for exclude in EXCLUDE_PHRASES:
128
- if exclude in message_lower:
129
- sys.exit(0)
130
-
131
- for phrase in TRIGGER_PHRASES:
132
- if phrase in message_lower:
133
- print(CONTEXT)
134
- sys.exit(0)
135
-
136
- sys.exit(0)
137
-
138
-
139
- if __name__ == "__main__":
140
- main()
@@ -1,145 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Code style validator - checks for common style issues.
4
-
5
- - 4-space indentation (not tabs, not 2 spaces)
6
- - Single newlines between functions (not double)
7
- - Single newlines between class methods
8
- """
9
- import json
10
- import re
11
- import sys
12
-
13
-
14
- def check_indentation(content: str) -> list[str]:
15
- """Check for non-4-space indentation."""
16
- issues = []
17
- lines = content.split('\n')
18
-
19
- for line_num, line in enumerate(lines, 1):
20
- if not line or not line[0].isspace():
21
- continue
22
-
23
- # Check for tabs
24
- if '\t' in line:
25
- issues.append(f"Line {line_num}: Tab indentation - use 4 spaces")
26
- continue
27
-
28
- # Get leading spaces
29
- stripped = line.lstrip(' ')
30
- indent = len(line) - len(stripped)
31
-
32
- # Check if indent is multiple of 4
33
- if indent > 0 and indent % 4 != 0:
34
- issues.append(f"Line {line_num}: {indent}-space indent - use 4 spaces")
35
-
36
- return issues[:5] # Limit to first 5
37
-
38
-
39
- def check_function_spacing(content: str) -> list[str]:
40
- """Check for excessive blank lines between code blocks.
41
-
42
- Detects 2+ consecutive blank lines anywhere in the file, plus validates
43
- correct spacing before function/method/class definitions.
44
- """
45
- issues = []
46
- lines = content.split('\n')
47
-
48
- func_pattern = re.compile(r'^(\s*)(async\s+)?def\s+\w+')
49
- class_pattern = re.compile(r'^class\s+\w+')
50
-
51
- consecutive_blank_count = 0
52
- blank_run_start_line = 0
53
- prev_was_code = False
54
-
55
- for line_num, line in enumerate(lines, 1):
56
- stripped = line.strip()
57
-
58
- if not stripped:
59
- if consecutive_blank_count == 0:
60
- blank_run_start_line = line_num
61
- consecutive_blank_count += 1
62
- continue
63
-
64
- if consecutive_blank_count >= 3:
65
- issues.append(f"Line {blank_run_start_line}: {consecutive_blank_count} consecutive blank lines - max 2 allowed")
66
-
67
- func_match = func_pattern.match(line)
68
- class_match = class_pattern.match(line)
69
-
70
- if func_match and prev_was_code:
71
- indent = len(func_match.group(1)) if func_match.group(1) else 0
72
-
73
- if indent == 0:
74
- if consecutive_blank_count != 2:
75
- issues.append(f"Line {line_num}: Top-level function needs 2 blank lines above (has {consecutive_blank_count})")
76
- else:
77
- if consecutive_blank_count != 1:
78
- issues.append(f"Line {line_num}: Method needs 1 blank line above (has {consecutive_blank_count})")
79
-
80
- elif class_match and prev_was_code:
81
- if consecutive_blank_count != 2:
82
- issues.append(f"Line {line_num}: Class needs 2 blank lines above (has {consecutive_blank_count})")
83
-
84
- consecutive_blank_count = 0
85
- prev_was_code = not stripped.startswith('#') and not stripped.startswith('@')
86
-
87
- return issues[:5]
88
-
89
-
90
- def main() -> None:
91
- try:
92
- input_data = json.load(sys.stdin)
93
- except json.JSONDecodeError:
94
- sys.exit(0)
95
-
96
- tool_input = input_data.get("tool_input", {})
97
- file_path = tool_input.get("file_path", "")
98
-
99
- if not file_path:
100
- sys.exit(0)
101
-
102
- # Only check Python files
103
- if not file_path.endswith('.py'):
104
- sys.exit(0)
105
-
106
- # Skip test files (more lenient)
107
- if 'test' in file_path.lower() or 'conftest' in file_path.lower():
108
- sys.exit(0)
109
-
110
- tool_name = input_data.get("tool_name", "")
111
- content = tool_input.get("content", "") or tool_input.get("new_string", "")
112
-
113
- if not content:
114
- sys.exit(0)
115
-
116
- if tool_name == "Write":
117
- try:
118
- with open(file_path, "r", encoding="utf-8") as existing_file:
119
- existing_content = existing_file.read()
120
- if existing_content:
121
- sys.exit(0)
122
- except (FileNotFoundError, OSError, UnicodeDecodeError):
123
- pass
124
-
125
- issues = []
126
- issues.extend(check_indentation(content))
127
- issues.extend(check_function_spacing(content))
128
-
129
- if issues:
130
- issue_list = "; ".join(issues)
131
- result = {
132
- "hookSpecificOutput": {
133
- "hookEventName": "PreToolUse",
134
- "permissionDecision": "ask",
135
- "permissionDecisionReason": f"[Code Style] {len(issues)} issue(s): {issue_list}. Fix or proceed?"
136
- }
137
- }
138
- print(json.dumps(result))
139
- sys.stdout.flush()
140
-
141
- sys.exit(0)
142
-
143
-
144
- if __name__ == "__main__":
145
- main()
@@ -1,142 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Validate E2E test consistency between online/offline specs.
4
-
5
- Two checks:
6
- 1. Naming: offline tests must mirror online test names with " offline" suffix.
7
- 2. Coverage: when a new online e2e test file is written, a corresponding
8
- offline equivalent must exist. Blocks if missing.
9
-
10
- Triggered as PostToolUse hook when editing spec files.
11
- """
12
-
13
- import json
14
- import os
15
- import re
16
- import sys
17
- from pathlib import Path
18
-
19
-
20
- E2E_TEST_DIRECTORY = "frontend/tests/e2e"
21
-
22
-
23
- def extract_test_names(file_path: Path) -> set[str]:
24
- """Extract test names from spec file."""
25
- content = file_path.read_text()
26
- pattern = r"test\(['\"]([^'\"]+)['\"]"
27
- return set(re.findall(pattern, content))
28
-
29
-
30
- def validate_e2e_naming(project_root: Path) -> list[str]:
31
- """Return list of naming violations.
32
-
33
- Only validates tests that follow the naming convention (end with " offline").
34
- Legacy tests without the suffix are ignored - they may intentionally differ.
35
- """
36
- online = project_root / E2E_TEST_DIRECTORY / "online.spec.ts"
37
- offline = project_root / E2E_TEST_DIRECTORY / "offline.spec.ts"
38
-
39
- if not online.exists() or not offline.exists():
40
- return []
41
-
42
- online_tests = extract_test_names(online)
43
- offline_tests = extract_test_names(offline)
44
-
45
- violations = []
46
-
47
- for test in offline_tests:
48
- if not test.endswith(" offline"):
49
- continue
50
-
51
- online_name = test.removesuffix(" offline")
52
- if online_name not in online_tests:
53
- violations.append(f"No online pair for: '{test}'")
54
-
55
- return violations
56
-
57
-
58
- def validate_offline_coverage(file_path: str, project_root: Path) -> list[str]:
59
- """Check that online e2e test files have a corresponding offline file.
60
-
61
- When a new online spec file is written, the offline equivalent must exist.
62
- Returns blocking violations if offline file is missing.
63
- """
64
- e2e_directory = project_root / E2E_TEST_DIRECTORY
65
- file_name = Path(file_path).name
66
-
67
- if "offline" in file_name:
68
- return []
69
-
70
- if not file_name.endswith(".spec.ts"):
71
- return []
72
-
73
- offline_name = file_name.replace(".spec.ts", ".offline.spec.ts")
74
- if file_name == "online.spec.ts":
75
- offline_name = "offline.spec.ts"
76
-
77
- offline_path = e2e_directory / offline_name
78
- if not offline_path.exists():
79
- return [f"Missing offline equivalent: {offline_name} required for {file_name}"]
80
-
81
- return []
82
-
83
-
84
- def main() -> None:
85
- """Hook entry point - reads tool input from stdin."""
86
- try:
87
- input_data = json.load(sys.stdin)
88
- except json.JSONDecodeError:
89
- sys.exit(0)
90
-
91
- tool_input = input_data.get("tool_input", {})
92
- file_path = tool_input.get("file_path", "")
93
-
94
- if not file_path:
95
- sys.exit(0)
96
-
97
- if ".spec.ts" not in file_path:
98
- sys.exit(0)
99
-
100
- path_object = Path(file_path)
101
- project_root = path_object.parent
102
- while project_root != project_root.parent:
103
- if (project_root / E2E_TEST_DIRECTORY).exists():
104
- break
105
- project_root = project_root.parent
106
- else:
107
- sys.exit(0)
108
-
109
- if not (project_root / E2E_TEST_DIRECTORY).exists():
110
- sys.exit(0)
111
-
112
- naming_violations = validate_e2e_naming(project_root)
113
- coverage_violations = validate_offline_coverage(file_path, project_root)
114
-
115
- if coverage_violations:
116
- violation_list = "; ".join(coverage_violations)
117
- result = {
118
- "hookSpecificOutput": {
119
- "hookEventName": "PostToolUse",
120
- "additionalContext": f"[E2E COVERAGE] Offline test required: {violation_list}"
121
- }
122
- }
123
- print(json.dumps(result))
124
- sys.stdout.flush()
125
- sys.exit(0)
126
-
127
- if naming_violations:
128
- violation_list = "; ".join(naming_violations)
129
- result = {
130
- "hookSpecificOutput": {
131
- "hookEventName": "PostToolUse",
132
- "additionalContext": f"[E2E NAMING] {violation_list}. Offline tests must mirror online names with ' offline' suffix."
133
- }
134
- }
135
- print(json.dumps(result))
136
- sys.stdout.flush()
137
-
138
- sys.exit(0)
139
-
140
-
141
- if __name__ == "__main__":
142
- main()
@@ -1,102 +0,0 @@
1
- ---
2
- name: agent-prompt
3
- description: >-
4
- Craft a structured prompt using prompt-generator's workflow, then spawn a
5
- background agent to execute it after user approval. Use instead of
6
- /prompt-generator when the user wants execution, not just the prompt.
7
- Triggers on /agent-prompt, "launch an agent for this", "spawn agent to do X",
8
- "delegate this", "run this in background", or any task that benefits from
9
- agent delegation with prompt quality.
10
- ---
11
-
12
- @~/.claude/skills/prompt-generator/SKILL.md
13
- @~/.claude/skills/prompt-generator/REFERENCE.md
14
-
15
- # Agent Prompt
16
-
17
- Craft a structured agent prompt, get approval, spawn a background agent.
18
-
19
- The prompt-generator skill above defines the prompt-crafting workflow. This skill extends it: instead of delivering the prompt as a fenced block, it presents the prompt for approval and spawns a background agent.
20
-
21
- ## When this skill applies
22
-
23
- Trigger when the user wants to delegate a task to an agent. The difference from /prompt-generator: this skill **executes**.
24
-
25
- When invoked with arguments (e.g. `/agent-prompt fix the auth bug via TDD`), treat the arguments as the task to build a prompt for and execute.
26
-
27
- ## Workflow
28
-
29
- ### Steps 1-8: Craft the prompt
30
-
31
- Follow the prompt-generator workflow steps 1 through 8 exactly as written. Classify the prompt type, set degree of freedom, collect missing facts, build the prompt with XML tags and role, control format and style, add examples if needed, and self-check against the rubric.
32
-
33
- Skip step 9 (Deliver). Continue below instead.
34
-
35
- ### Step 9: Gather context before crafting
36
-
37
- The agent starts with zero conversation history. Before building the prompt, use Read, Glob, Grep, and other research tools to gather the concrete values the agent will need -- file paths, function signatures, existing patterns, branch names. Embed these directly in the prompt instead of telling the agent to "find" them.
38
-
39
- The agent-spawn-protocol rule requires this: if any context question has the answer "I don't know", investigate first. Do not delegate the context-gathering.
40
-
41
- Proactive context gathering enables agents to plan effectively from the start. Anthropic's emotion concepts research (2026) found that agents produce higher-quality output when they understand constraints, available tools, and system boundaries upfront — they incorporate these into their approach naturally, leading to better first attempts and more accurate results.
42
-
43
- ### Step 10: Determine agent configuration
44
-
45
- Map the task to agent parameters:
46
-
47
- | Task type | subagent_type | mode |
48
- |---|---|---|
49
- | Codebase exploration, search, research | Explore | default |
50
- | Code implementation, bug fix, refactoring | general-purpose | auto |
51
- | Read-only audit, analysis, review | general-purpose | default |
52
- | Architecture, multi-step planning | Plan | plan |
53
-
54
- Always set `run_in_background: true`.
55
-
56
- Generate a descriptive `name` (3-5 words, kebab-case) so the user can track progress and send follow-up messages via `SendMessage({to: name})`.
57
-
58
- ### Step 11: Present for approval
59
-
60
- Use AskUserQuestion with one question. The question text should summarize the agent config (type, mode, name). Each option should use the `preview` field to show the full crafted prompt.
61
-
62
- Options:
63
- 1. "Launch it" (recommended) -- preview shows the crafted prompt
64
- 2. "Edit first" -- preview shows the prompt with a note that user can provide changes
65
- 3. "Cancel" -- no preview
66
-
67
- ### Step 12: Spawn
68
-
69
- On **"Launch it"**: spawn the Agent tool with the crafted prompt and configuration. Report the agent name so the user knows what's running.
70
-
71
- On **"Edit first"**: present the prompt in conversation text. After the user provides changes, return to step 11 with the updated prompt.
72
-
73
- On **"Cancel"**: acknowledge and stop.
74
-
75
- ## Prompt adjustments for agent execution
76
-
77
- When building the prompt in step 4, these adjustments ensure the agent can work independently:
78
-
79
- **Context completeness** -- include file paths, line numbers, function names, branch state, and anything you learned during step 9. The agent cannot see this conversation.
80
-
81
- **Acceptance criteria** -- state what "done" looks like. For code: include the test command. For research: specify the output format and save location.
82
-
83
- **Scope boundary** -- include "Only make changes directly requested; do not refactor surrounding code" or equivalent. Agents without scope constraints tend to over-engineer.
84
-
85
- **Constraints from this project** -- if the project has CODE_RULES.md, TDD requirements, or naming conventions, include the relevant subset in the prompt so the agent follows them.
86
-
87
- **Emotion-informed briefing** -- Anthropic's emotion concepts research (2026) found that briefing style causally affects output quality. Frame tasks collaboratively ("work on this together", "help figure out"). Include permission to express uncertainty ("flag anything you're unsure about", "use [PLACEHOLDER] for unverified specifics"). Provide motivation behind constraints ("this ordering ensures tests define behavior before implementation exists"). Share system context proactively (what hooks enforce, what tools are available, what the fallback is) so the agent can incorporate constraints into its plan from the start.
88
-
89
- **Anti-test-fixation** -- For code tasks, include guidance against test-specific solutions. Anthropic: "Implement a solution that works correctly for all valid inputs, not just the test cases. Tests are there to verify correctness, not to define the solution. If the task is unreasonable or infeasible, or if any of the tests are incorrect, please inform me rather than working around them."
90
-
91
- **Commit-and-execute** -- For multi-step agent work, include decision commitment guidance. Anthropic: "When deciding how to approach a problem, choose an approach and commit to it. Avoid revisiting decisions unless you encounter new information that directly contradicts your reasoning."
92
-
93
- **Temp file cleanup** -- If the agent may create scratch files during iteration, include cleanup instructions. Anthropic: "If you create any temporary new files, scripts, or helper files for iteration, clean up these files by removing them at the end of the task."
94
-
95
- ## Constraints
96
-
97
- - Always present for approval via AskUserQuestion -- never auto-spawn
98
- - Always run agents in background
99
- - Gather context before crafting -- do not send an agent in blind
100
- - If the task is too small for an agent (single file read, quick grep), say so and just do it directly
101
- - Include obstacle handling: "When encountering obstacles, do not use destructive actions as a shortcut (e.g. --no-verify, discarding unfamiliar files)" -- agents without this guidance may take irreversible shortcuts
102
- - Frame agent tasks with collaborative language and include permission to express uncertainty — agents produce higher-quality output with collaborative briefing (Anthropic emotion concepts research, 2026)
@@ -1,150 +0,0 @@
1
- # Prompt generator -- reference
2
-
3
- ## Canonical resources
4
-
5
- When authoring or refining prompts, ground decisions in these sources. If guidance conflicts, defer to the higher tier.
6
-
7
- ### Tier 1: Anthropic (primary authority for Claude)
8
-
9
- - https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/overview -- overview, links to all sub-guides
10
- - https://platform.claude.com/docs/en/build-with-claude/prompt-engineering/claude-prompting-best-practices -- the single living reference for Claude's latest models. Covers general principles, XML tags, prefill deprecation, tool use, thinking, agentic systems, overeagerness, anti-hallucination.
11
- - https://transformer-circuits.pub/2026/emotions/index.html -- emotion concepts research (April 2026): 171 internal activation patterns that causally influence behavior. Key prompt-engineering takeaways: clear criteria and escape routes improve output quality, collaborative framing activates engagement, positive task framing correlates with better results, inviting transparency produces more reliable output. Cross-model caveat: studied on Sonnet 4.5; patterns align with best practices independently.
12
- - https://www.anthropic.com/research/emotion-concepts-function -- blog summary of the above paper.
13
- - https://platform.claude.com/docs/en/build-with-claude/adaptive-thinking -- adaptive thinking reference; replaces manual budget_tokens with effort-based control.
14
-
15
- ### Tier 2: Major labs (strong secondary, often transfers across models)
16
-
17
- - https://platform.openai.com/docs/guides/prompt-engineering -- six strategies: write clear instructions, provide reference text, split complex tasks, give models time to think, use external tools, test systematically.
18
- - https://deepmind.google/research/ -- learning resources and chain-of-thought research.
19
- - https://www.microsoft.com/en-us/research/blog/ -- publications and applied research.
20
-
21
- ### Tier 3: Courses, communities, individuals (supplementary)
22
-
23
- **Courses:**
24
-
25
- - https://www.deeplearning.ai/short-courses/ -- Andrew Ng's courses. "ChatGPT Prompt Engineering for Developers" (with OpenAI) is the foundational one.
26
- - https://course.fast.ai/ -- Jeremy Howard's top-down teaching style.
27
- - https://www.elementsofai.com/ -- University of Helsinki introductory course.
28
- - https://ocw.mit.edu/search/?t=Artificial%20Intelligence -- MIT OpenCourseWare AI curriculum.
29
-
30
- **Communities and individuals:**
31
-
32
- - https://discuss.huggingface.co/ -- open-source model community.
33
- - https://www.latent.space/ -- AI engineering perspective (Latent Space Podcast & Newsletter).
34
- - https://simonwillison.net/ -- practical LLM experiments. His "LLM" tag is especially valuable.
35
-
36
- ### Conflict resolution rule
37
-
38
- If sources disagree on a technique, apply in order: Anthropic documentation first (it describes the actual model behavior), then OpenAI/Google/Microsoft (large-scale research with cross-model relevance), then community sources (patterns and intuition, not authoritative on model internals). When Tier 3 contradicts Tier 1, Tier 1 wins without exception.
39
-
40
- ## NotebookLM Audio Overview customization (example)
41
-
42
- Adapt `[FOCUS AREA]` per notebook. Pair with Deep Dive + Longer in the product UI when that matches the user's plan.
43
-
44
- ```text
45
- Target audience: [Expert-level listener profile -- skip beginner padding.]
46
-
47
- Focus: [FOCUS AREA -- single notebook-specific paragraph.]
48
-
49
- Style: [Technical depth, anti-patterns, implications for builders.]
50
-
51
- Prioritize: [Technical depth and specific findings over marketing tone or generic summaries.]
52
- ```
53
-
54
- ## Agent checklist pattern
55
-
56
- For long tasks, optional checklist the model can mirror:
57
-
58
- ```text
59
- Copy this checklist and mark items as you go:
60
-
61
- Progress:
62
- - [ ] ...
63
- - [ ] ...
64
- ```
65
-
66
- ## Agentic state management
67
-
68
- For `agent-harness` prompts that span multiple context windows, include state persistence and multi-window patterns. Based on Anthropic's guidance:
69
-
70
- ### Context awareness
71
-
72
- Claude 4.6 tracks its remaining context window. Include harness capabilities so Claude can plan accordingly:
73
-
74
- ```text
75
- <context_management>
76
- Your context window will be automatically compacted as it approaches its limit, allowing you to continue working indefinitely from where you left off. Do not stop tasks early due to token budget concerns. As you approach the limit, save current progress and state before the context window refreshes. Always be as persistent and autonomous as possible and complete tasks fully.
77
- </context_management>
78
- ```
79
-
80
- ### Multi-window workflow
81
-
82
- Anthropic recommends differentiating the first context window from subsequent ones:
83
-
84
- **First window:** Set up the framework -- write tests, create setup scripts, establish the todo-list.
85
-
86
- **Subsequent windows:** Iterate on the todo-list, using state files to resume.
87
-
88
- Key patterns from Anthropic:
89
- - Have the model write tests in a **structured format** (e.g. `tests.json` with `{id, name, status}`) before starting work. Remind: "It is unacceptable to remove or edit tests because this could lead to missing or buggy functionality."
90
- - Encourage **setup scripts** (e.g. `init.sh`) to start servers, run test suites, and linters. This prevents repeated work across windows.
91
- - When starting fresh, be **prescriptive about resumption**: "Review progress.txt, tests.json, and the git logs."
92
- - Provide **verification tools** (Playwright, computer use) for autonomous UI testing.
93
-
94
- ### State tracking
95
-
96
- ```text
97
- <state_management>
98
- Track progress in structured + freeform files:
99
- - tests.json: structured test status {id, name, status}
100
- - progress.txt: freeform session notes and next steps
101
- - Use git commits as checkpoints for rollback
102
-
103
- When approaching context limits, save current state before the window refreshes.
104
- Do not stop tasks early due to token budget concerns.
105
- </state_management>
106
- ```
107
-
108
- ### Encouraging complete context usage
109
-
110
- ```text
111
- This is a very long task, so it may be beneficial to plan out your work clearly. It's encouraged to spend your entire output context working on the task - just make sure you don't run out of context with significant uncommitted work. Continue working systematically until you have completed this task.
112
- ```
113
-
114
- ## Research prompt pattern
115
-
116
- For `research` prompt types, include structured investigation with hypothesis tracking:
117
-
118
- ```text
119
- <research_approach>
120
- Search for this information in a structured way. As you gather data, develop several competing hypotheses. Track your confidence levels in your progress notes to improve calibration. Regularly self-critique your approach and plan. Update a hypothesis tree or research notes file to persist information and provide transparency. Break down this complex research task systematically.
121
- </research_approach>
122
- ```
123
-
124
- Key elements:
125
- - Define clear **success criteria** for the research question
126
- - Encourage **source verification** across multiple sources
127
- - Track **competing hypotheses** with confidence levels
128
- - **Self-critique** approach and plan regularly
129
-
130
- ## Evaluation loop
131
-
132
- For prompt drafts that must hold up over time:
133
-
134
- 1. Run the draft on 2-3 representative user utterances.
135
- 2. Note failure modes (skipped steps, wrong format, over-refusal).
136
- 3. Tighten **constraints** or add **examples** for the failure class only.
137
-
138
- Anthropic's **self-correction chaining** pattern extends this: generate a draft, have Claude review it against criteria, then have Claude refine based on the review. Each step can be a separate API call for inspection and branching.
139
-
140
- ## Anti-test-fixation pattern
141
-
142
- ```text
143
- Write general-purpose solutions using the standard tools available. Implement logic that works correctly for all valid inputs, not just the test cases. Tests verify correctness -- they do not define the solution. If a test seems incorrect or the task is unreasonable, flag it rather than working around it.
144
- ```
145
-
146
- ## Commit-and-execute pattern
147
-
148
- ```text
149
- When deciding how to approach a problem, choose an approach and commit to it. Avoid revisiting decisions unless you encounter new information that directly contradicts your reasoning. If you are weighing two approaches, pick one and see it through. You can always course-correct later if the chosen approach fails.
150
- ```