connectry-architect-mcp 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/data/curriculum.json +74 -0
  2. package/dist/data/handouts/1.1-design-and-implement-agentic-loops-for-autonomous-task-execution.md +36 -0
  3. package/dist/data/handouts/1.2-orchestrate-multi-agent-systems-with-coordinator-subagent-patterns.md +125 -0
  4. package/dist/data/handouts/1.3-configure-subagent-invocation-context-passing-and-spawning.md +109 -0
  5. package/dist/data/handouts/1.4-implement-multi-step-workflows-with-enforcement-and-handoff-patterns.md +120 -0
  6. package/dist/data/handouts/1.5-apply-agent-sdk-hooks-for-tool-call-interception-and-data-normalization.md +155 -0
  7. package/dist/data/handouts/1.6-design-task-decomposition-strategies-for-complex-workflows.md +104 -0
  8. package/dist/data/handouts/1.7-manage-session-state-resumption-and-forking.md +154 -0
  9. package/dist/data/handouts/2.1-design-effective-tool-interfaces-with-clear-descriptions-and-boundaries.md +133 -0
  10. package/dist/data/handouts/2.2-implement-structured-error-responses-for-mcp-tools.md +138 -0
  11. package/dist/data/handouts/2.3-distribute-tools-appropriately-across-agents-and-configure-tool-choice.md +123 -0
  12. package/dist/data/handouts/2.4-integrate-mcp-servers-into-claude-code-and-agent-workflows.md +142 -0
  13. package/dist/data/handouts/2.5-select-and-apply-built-in-tools-effectively.md +95 -0
  14. package/dist/data/handouts/3.1-configure-claude-md-files-with-appropriate-hierarchy-and-scoping.md +124 -0
  15. package/dist/data/handouts/3.2-create-and-configure-custom-slash-commands-and-skills.md +126 -0
  16. package/dist/data/handouts/3.3-apply-path-specific-rules-for-conditional-convention-loading.md +121 -0
  17. package/dist/data/handouts/3.4-determine-when-to-use-plan-mode-vs-direct-execution.md +108 -0
  18. package/dist/data/handouts/3.5-apply-iterative-refinement-techniques-for-progressive-improvement.md +130 -0
  19. package/dist/data/handouts/3.6-integrate-claude-code-into-ci-cd-pipelines.md +166 -0
  20. package/dist/data/handouts/4.1-design-prompts-with-explicit-criteria-to-improve-precision.md +182 -0
  21. package/dist/data/handouts/4.2-apply-few-shot-prompting-to-improve-output-consistency.md +109 -0
  22. package/dist/data/handouts/4.3-enforce-structured-output-using-tool-use-and-json-schemas.md +150 -0
  23. package/dist/data/handouts/4.4-implement-validation-retry-and-feedback-loops.md +186 -0
  24. package/dist/data/handouts/4.5-design-efficient-batch-processing-strategies.md +143 -0
  25. package/dist/data/handouts/4.6-design-multi-instance-and-multi-pass-review-architectures.md +249 -0
  26. package/dist/data/handouts/5.1-manage-conversation-context-to-preserve-critical-information.md +156 -0
  27. package/dist/data/handouts/5.2-design-effective-escalation-and-ambiguity-resolution-patterns.md +168 -0
  28. package/dist/data/handouts/5.3-implement-error-propagation-strategies-across-multi-agent-systems.md +176 -0
  29. package/dist/data/handouts/5.4-manage-context-effectively-in-large-codebase-exploration.md +131 -0
  30. package/dist/data/handouts/5.5-design-human-review-workflows-and-confidence-calibration.md +130 -0
  31. package/dist/data/handouts/5.6-preserve-information-provenance-and-handle-uncertainty-in-synthesis.md +197 -0
  32. package/dist/data/questions/domain-1.json +2223 -0
  33. package/dist/data/questions/domain-2.json +1631 -0
  34. package/dist/data/questions/domain-3.json +1926 -0
  35. package/dist/data/questions/domain-4.json +1957 -0
  36. package/dist/data/questions/domain-5.json +1948 -0
  37. package/dist/index.js +191 -49
  38. package/dist/index.js.map +1 -1
  39. package/package.json +3 -3
@@ -0,0 +1,74 @@
1
+ {
2
+ "domains": [
3
+ {
4
+ "id": 1,
5
+ "title": "Agentic Architecture & Orchestration",
6
+ "weight": 27,
7
+ "mentalModel": "The model drives decisions, code enforces guardrails",
8
+ "taskStatements": [
9
+ { "id": "1.1", "domainId": 1, "title": "Design and implement agentic loops for autonomous task execution", "description": "Understanding the agentic loop lifecycle: sending requests, inspecting stop_reason, executing tools, and returning results.", "mentalModel": "The model drives decisions, code enforces guardrails" },
10
+ { "id": "1.2", "domainId": 1, "title": "Orchestrate multi-agent systems with coordinator-subagent patterns", "description": "Hub-and-spoke architecture, isolated context, task decomposition, and result aggregation.", "mentalModel": "The model drives decisions, code enforces guardrails" },
11
+ { "id": "1.3", "domainId": 1, "title": "Configure subagent invocation, context passing, and spawning", "description": "Task tool, allowedTools, explicit context passing, parallel subagent execution.", "mentalModel": "The model drives decisions, code enforces guardrails" },
12
+ { "id": "1.4", "domainId": 1, "title": "Implement multi-step workflows with enforcement and handoff patterns", "description": "Programmatic enforcement vs prompt-based guidance, structured handoff protocols.", "mentalModel": "The model drives decisions, code enforces guardrails" },
13
+ { "id": "1.5", "domainId": 1, "title": "Apply Agent SDK hooks for tool call interception and data normalization", "description": "PostToolUse hooks, tool call interception, deterministic vs probabilistic compliance.", "mentalModel": "The model drives decisions, code enforces guardrails" },
14
+ { "id": "1.6", "domainId": 1, "title": "Design task decomposition strategies for complex workflows", "description": "Prompt chaining vs dynamic decomposition, per-file analysis vs cross-file integration.", "mentalModel": "The model drives decisions, code enforces guardrails" },
15
+ { "id": "1.7", "domainId": 1, "title": "Manage session state, resumption, and forking", "description": "Named sessions, fork_session, structured summaries vs stale context.", "mentalModel": "The model drives decisions, code enforces guardrails" }
16
+ ]
17
+ },
18
+ {
19
+ "id": 2,
20
+ "title": "Tool Design & MCP Integration",
21
+ "weight": 18,
22
+ "mentalModel": "Tool descriptions are the LLM's only guide — design them like API docs",
23
+ "taskStatements": [
24
+ { "id": "2.1", "domainId": 2, "title": "Design effective tool interfaces with clear descriptions and boundaries", "description": "Tool descriptions as selection mechanism, disambiguation, splitting vs consolidating.", "mentalModel": "Tool descriptions are the LLM's only guide — design them like API docs" },
25
+ { "id": "2.2", "domainId": 2, "title": "Implement structured error responses for MCP tools", "description": "isError flag, error categories, retryable vs non-retryable, structured metadata.", "mentalModel": "Tool descriptions are the LLM's only guide — design them like API docs" },
26
+ { "id": "2.3", "domainId": 2, "title": "Distribute tools appropriately across agents and configure tool choice", "description": "Scoped tool access, tool_choice options, forced selection patterns.", "mentalModel": "Tool descriptions are the LLM's only guide — design them like API docs" },
27
+ { "id": "2.4", "domainId": 2, "title": "Integrate MCP servers into Claude Code and agent workflows", "description": "Project vs user scope, .mcp.json, environment variable expansion, MCP resources.", "mentalModel": "Tool descriptions are the LLM's only guide — design them like API docs" },
28
+ { "id": "2.5", "domainId": 2, "title": "Select and apply built-in tools effectively", "description": "Grep vs Glob vs Read/Write/Edit, incremental codebase understanding.", "mentalModel": "Tool descriptions are the LLM's only guide — design them like API docs" }
29
+ ]
30
+ },
31
+ {
32
+ "id": 3,
33
+ "title": "Claude Code Configuration & Workflows",
34
+ "weight": 20,
35
+ "mentalModel": "Hierarchy of instructions: user → project → directory, each scoped to its audience",
36
+ "taskStatements": [
37
+ { "id": "3.1", "domainId": 3, "title": "Configure CLAUDE.md files with appropriate hierarchy and scoping", "description": "User-level, project-level, directory-level, @import syntax, .claude/rules/.", "mentalModel": "Hierarchy of instructions: user → project → directory, each scoped to its audience" },
38
+ { "id": "3.2", "domainId": 3, "title": "Create and configure custom slash commands and skills", "description": "Project vs user scope, context: fork, allowed-tools, argument-hint frontmatter.", "mentalModel": "Hierarchy of instructions: user → project → directory, each scoped to its audience" },
39
+ { "id": "3.3", "domainId": 3, "title": "Apply path-specific rules for conditional convention loading", "description": "YAML frontmatter paths, glob patterns, conditional activation.", "mentalModel": "Hierarchy of instructions: user → project → directory, each scoped to its audience" },
40
+ { "id": "3.4", "domainId": 3, "title": "Determine when to use plan mode vs direct execution", "description": "Complexity assessment, architectural decisions, Explore subagent.", "mentalModel": "Hierarchy of instructions: user → project → directory, each scoped to its audience" },
41
+ { "id": "3.5", "domainId": 3, "title": "Apply iterative refinement techniques for progressive improvement", "description": "Input/output examples, test-driven iteration, interview pattern.", "mentalModel": "Hierarchy of instructions: user → project → directory, each scoped to its audience" },
42
+ { "id": "3.6", "domainId": 3, "title": "Integrate Claude Code into CI/CD pipelines", "description": "-p flag, --output-format json, --json-schema, session context isolation.", "mentalModel": "Hierarchy of instructions: user → project → directory, each scoped to its audience" }
43
+ ]
44
+ },
45
+ {
46
+ "id": 4,
47
+ "title": "Prompt Engineering & Structured Output",
48
+ "weight": 20,
49
+ "mentalModel": "Specificity beats vagueness. Examples beat instructions. Schemas beat parsing.",
50
+ "taskStatements": [
51
+ { "id": "4.1", "domainId": 4, "title": "Design prompts with explicit criteria to improve precision", "description": "Explicit criteria vs vague instructions, false positive management.", "mentalModel": "Specificity beats vagueness. Examples beat instructions. Schemas beat parsing." },
52
+ { "id": "4.2", "domainId": 4, "title": "Apply few-shot prompting to improve output consistency", "description": "Targeted examples, ambiguous case handling, format demonstration.", "mentalModel": "Specificity beats vagueness. Examples beat instructions. Schemas beat parsing." },
53
+ { "id": "4.3", "domainId": 4, "title": "Enforce structured output using tool use and JSON schemas", "description": "tool_use with schemas, tool_choice options, nullable fields, enum patterns.", "mentalModel": "Specificity beats vagueness. Examples beat instructions. Schemas beat parsing." },
54
+ { "id": "4.4", "domainId": 4, "title": "Implement validation, retry, and feedback loops", "description": "Retry-with-error-feedback, limits of retry, detected_pattern tracking.", "mentalModel": "Specificity beats vagueness. Examples beat instructions. Schemas beat parsing." },
55
+ { "id": "4.5", "domainId": 4, "title": "Design efficient batch processing strategies", "description": "Message Batches API, latency tolerance, custom_id, failure handling.", "mentalModel": "Specificity beats vagueness. Examples beat instructions. Schemas beat parsing." },
56
+ { "id": "4.6", "domainId": 4, "title": "Design multi-instance and multi-pass review architectures", "description": "Self-review limitations, independent review instances, per-file + cross-file passes.", "mentalModel": "Specificity beats vagueness. Examples beat instructions. Schemas beat parsing." }
57
+ ]
58
+ },
59
+ {
60
+ "id": 5,
61
+ "title": "Context Management & Reliability",
62
+ "weight": 15,
63
+ "mentalModel": "Context is finite and degrades — extract facts, trim noise, verify provenance",
64
+ "taskStatements": [
65
+ { "id": "5.1", "domainId": 5, "title": "Manage conversation context to preserve critical information", "description": "Progressive summarization risks, lost-in-the-middle, tool output trimming.", "mentalModel": "Context is finite and degrades — extract facts, trim noise, verify provenance" },
66
+ { "id": "5.2", "domainId": 5, "title": "Design effective escalation and ambiguity resolution patterns", "description": "Escalation triggers, customer preferences, sentiment unreliability.", "mentalModel": "Context is finite and degrades — extract facts, trim noise, verify provenance" },
67
+ { "id": "5.3", "domainId": 5, "title": "Implement error propagation strategies across multi-agent systems", "description": "Structured error context, access failures vs empty results, partial results.", "mentalModel": "Context is finite and degrades — extract facts, trim noise, verify provenance" },
68
+ { "id": "5.4", "domainId": 5, "title": "Manage context effectively in large codebase exploration", "description": "Context degradation, scratchpad files, subagent delegation, /compact.", "mentalModel": "Context is finite and degrades — extract facts, trim noise, verify provenance" },
69
+ { "id": "5.5", "domainId": 5, "title": "Design human review workflows and confidence calibration", "description": "Stratified sampling, field-level confidence, accuracy by document type.", "mentalModel": "Context is finite and degrades — extract facts, trim noise, verify provenance" },
70
+ { "id": "5.6", "domainId": 5, "title": "Preserve information provenance and handle uncertainty in synthesis", "description": "Claim-source mappings, conflict annotation, temporal data handling.", "mentalModel": "Context is finite and degrades — extract facts, trim noise, verify provenance" }
71
+ ]
72
+ }
73
+ ]
74
+ }
@@ -0,0 +1,36 @@
1
+ # 1.1 — Agentic Loops for Autonomous Task Execution
2
+
3
+ ## Concept
4
+
5
+ When Claude uses tools, it follows a cycle: send a message → Claude decides to call a tool → execute the tool → return the result → Claude decides what to do next. This is the **agentic loop**.
6
+
7
+ The key mechanism is `stop_reason`:
8
+ - `"tool_use"` → Claude wants to use another tool. Keep the loop going.
9
+ - `"end_turn"` → Claude is done. Present the response.
10
+
11
+ ## Code Example
12
+
13
+ ```typescript
14
+ while (true) {
15
+ const response = await client.messages.create({ /* ... */ });
16
+
17
+ if (response.stop_reason === 'end_turn') {
18
+ return response; // Done — present to user
19
+ }
20
+
21
+ // Execute requested tools
22
+ const toolResults = await executeTools(response.content);
23
+ messages.push({ role: 'assistant', content: response.content });
24
+ messages.push({ role: 'user', content: toolResults });
25
+ }
26
+ ```
27
+
28
+ ## Common Mistakes
29
+
30
+ 1. **Parsing text to detect completion** — Don't check if the assistant said "I'm done." Use `stop_reason`.
31
+ 2. **Arbitrary iteration caps** — Don't set `maxIterations = 5` as the primary stopping mechanism.
32
+ 3. **Checking for text content** — Don't assume the loop is done because the response contains text.
33
+
34
+ ## References
35
+
36
+ - [Anthropic Agent SDK Documentation](https://docs.anthropic.com/en/docs/agents)
@@ -0,0 +1,125 @@
1
+ # 1.2 — Orchestrate Multi-Agent Systems with Coordinator-Subagent Patterns
2
+
3
+ ## Concept
4
+
5
+ Multi-agent systems follow a **hub-and-spoke** (coordinator-subagent) pattern. A single coordinator agent receives a complex task, decomposes it into well-scoped subtasks, delegates each subtask to a specialized subagent, and then aggregates the results into a final response. The coordinator never tries to do everything itself — it drives decisions, while each subagent operates within guardrails enforced in code.
6
+
7
+ The critical design principle is **isolated context**: every subagent runs in its own independent conversation. It has no memory of what other subagents said, and it cannot read the coordinator's full conversation history. This isolation prevents context contamination, keeps each subagent focused, and makes the system easier to reason about. The coordinator is the only node that holds the full picture — it receives outputs from each spoke and synthesizes them.
8
+
9
+ Task decomposition is where the model earns its role. The coordinator's system prompt instructs Claude to analyze the incoming request and produce a structured breakdown of subtasks before any subagent is spawned. Each subtask has a clear scope, a defined output format, and a minimal `allowedTools` list. Result aggregation happens after all subagents complete: the coordinator receives their outputs as tool results, reasons across them, and produces a coherent final answer. The mental model is: **the model drives decisions, code enforces guardrails** — Claude decides which subagents to invoke and how to interpret their outputs; your application code controls which tools each subagent may access and what data it can see.
10
+
11
+ ## Code Example
12
+
13
+ ```typescript
14
+ import Anthropic from "@anthropic-ai/sdk";
15
+
16
+ const client = new Anthropic();
17
+
18
+ // Coordinator: receives a complex task and delegates to subagents
19
+ async function coordinator(userTask: string): Promise<string> {
20
+ const messages: Anthropic.MessageParam[] = [
21
+ { role: "user", content: userTask },
22
+ ];
23
+
24
+ // Agentic loop — coordinator decides which subagents to invoke
25
+ while (true) {
26
+ const response = await client.messages.create({
27
+ model: "claude-opus-4-5",
28
+ max_tokens: 4096,
29
+ system:
30
+ "You are a coordinator. Break complex tasks into subtasks and delegate each to a specialized subagent using the available tools. Synthesize their outputs into a final answer.",
31
+ tools: [
32
+ {
33
+ name: "run_research_agent",
34
+ description: "Spawn a subagent that researches a specific topic.",
35
+ input_schema: {
36
+ type: "object" as const,
37
+ properties: {
38
+ query: {
39
+ type: "string",
40
+ description: "The focused research question.",
41
+ },
42
+ },
43
+ required: ["query"],
44
+ },
45
+ },
46
+ {
47
+ name: "run_analysis_agent",
48
+ description: "Spawn a subagent that analyzes provided data.",
49
+ input_schema: {
50
+ type: "object" as const,
51
+ properties: {
52
+ data: { type: "string", description: "Raw data to analyze." },
53
+ },
54
+ required: ["data"],
55
+ },
56
+ },
57
+ ],
58
+ messages,
59
+ });
60
+
61
+ if (response.stop_reason === "end_turn") {
62
+ const textBlock = response.content.find((b) => b.type === "text");
63
+ return textBlock ? textBlock.text : "";
64
+ }
65
+
66
+ // Execute tool calls — each spawns an isolated subagent conversation
67
+ const toolResults: Anthropic.ToolResultBlockParam[] = [];
68
+ for (const block of response.content) {
69
+ if (block.type !== "tool_use") continue;
70
+
71
+ let result: string;
72
+ if (block.name === "run_research_agent") {
73
+ const input = block.input as { query: string };
74
+ result = await researchSubagent(input.query); // isolated context
75
+ } else {
76
+ const input = block.input as { data: string };
77
+ result = await analysisSubagent(input.data); // isolated context
78
+ }
79
+
80
+ toolResults.push({ type: "tool_result", tool_use_id: block.id, content: result });
81
+ }
82
+
83
+ messages.push({ role: "assistant", content: response.content });
84
+ messages.push({ role: "user", content: toolResults });
85
+ }
86
+ }
87
+
88
+ // Subagent: isolated conversation, restricted tools
89
+ async function researchSubagent(query: string): Promise<string> {
90
+ const response = await client.messages.create({
91
+ model: "claude-haiku-4-5",
92
+ max_tokens: 1024,
93
+ system: "You are a research specialist. Answer only the question asked.",
94
+ messages: [{ role: "user", content: query }],
95
+ // No tools — this subagent only reasons, does not act
96
+ });
97
+
98
+ const textBlock = response.content.find((b) => b.type === "text");
99
+ return textBlock?.text ?? "";
100
+ }
101
+
102
+ async function analysisSubagent(data: string): Promise<string> {
103
+ const response = await client.messages.create({
104
+ model: "claude-haiku-4-5",
105
+ max_tokens: 1024,
106
+ system: "You are a data analyst. Summarize key insights from the data.",
107
+ messages: [{ role: "user", content: `Analyze this data:\n\n${data}` }],
108
+ });
109
+
110
+ const textBlock = response.content.find((b) => b.type === "text");
111
+ return textBlock?.text ?? "";
112
+ }
113
+ ```
114
+
115
+ ## Common Mistakes
116
+
117
+ 1. **Sharing the full conversation history with subagents** — Each subagent should receive only the context it needs for its specific subtask. Passing the coordinator's entire message history bloats the context window, leaks unrelated information, and produces worse-focused outputs.
118
+ 2. **Letting subagents call other subagents directly** — In a hub-and-spoke topology, only the coordinator routes work. Allowing subagents to spawn their own subagents creates uncontrolled fan-out, makes aggregation logic unpredictable, and breaks the single-point-of-synthesis guarantee.
119
+ 3. **Skipping task decomposition and going straight to subagent invocation** — Without a structured decomposition step, the coordinator tends to spawn overlapping or redundant subagents. Always instruct the coordinator to plan subtasks first; the decomposition itself is where the model earns its role.
120
+
121
+ ## References
122
+
123
+ - [Anthropic: Build multi-agent systems](https://docs.anthropic.com/en/docs/agents/multi-agent-systems)
124
+ - [Anthropic: Tool use overview](https://docs.anthropic.com/en/docs/tool-use)
125
+ - [Anthropic: Model overview (choosing Haiku vs Sonnet vs Opus)](https://docs.anthropic.com/en/docs/about-claude/models)
@@ -0,0 +1,109 @@
1
+ # 1.3 — Configure Subagent Invocation, Context Passing, and Spawning
2
+
3
+ ## Concept
4
+
5
+ In multi-agent systems, an orchestrator agent spawns subagents to handle discrete subtasks. The **Task tool** in the Claude Agent SDK is the primary mechanism for this: the orchestrator calls `Task` with a description of the work, an optional set of allowed tools, and any context the subagent needs to execute independently. The SDK handles launching a fresh agent loop for that subagent, running it to completion, and returning the result to the orchestrator.
6
+
7
+ Tool scoping via `allowedTools` is critical for security and predictability. Each subagent should only have access to the tools it actually needs. A subagent responsible for reading files should not have write or shell execution tools. This principle of least privilege prevents runaway agents from taking destructive actions and makes the system easier to reason about. If `allowedTools` is omitted, the subagent inherits all tools available in the environment — a common source of unintended side effects.
8
+
9
+ Context isolation is the other essential discipline. Subagents do not share memory or conversation state with the orchestrator or with each other. Any information a subagent needs — user intent, prior results, relevant file contents, configuration — must be passed explicitly in the task prompt. Relying on shared global state or assuming a subagent "remembers" something from the parent conversation is a design error that produces silent, hard-to-debug failures. When you need to run multiple subagents over the same data, pass that data explicitly to each one. Parallel execution with `Promise.all` is both safe and efficient when subagents are truly independent, since each runs in its own isolated context.
10
+
11
+ ## Code Example
12
+
13
+ ```typescript
14
+ import Anthropic from "@anthropic-ai/sdk";
15
+
16
+ const client = new Anthropic();
17
+
18
+ // Orchestrator spawns two parallel subagents with scoped tools and explicit context
19
+ async function analyzeRepository(repoSummary: string): Promise<void> {
20
+ const [securityReport, qualityReport] = await Promise.all([
21
+ // Subagent 1: security review — read-only file access only
22
+ client.beta.messages.create({
23
+ model: "claude-sonnet-4-6",
24
+ max_tokens: 4096,
25
+ tools: [
26
+ {
27
+ type: "computer_20241022",
28
+ name: "Task",
29
+ description: "Spawn a subagent for security review",
30
+ },
31
+ ],
32
+ messages: [
33
+ {
34
+ role: "user",
35
+ content: `You are a security reviewer. Analyze this repository summary for vulnerabilities.
36
+
37
+ Repository context:
38
+ ${repoSummary}
39
+
40
+ Use only read_file to inspect individual files. Do not write or execute anything.
41
+ Return a structured JSON report with findings.`,
42
+ },
43
+ ],
44
+ betas: ["computer-use-2024-10-22"],
45
+ }),
46
+
47
+ // Subagent 2: code quality — read-only, no security concerns
48
+ client.beta.messages.create({
49
+ model: "claude-sonnet-4-6",
50
+ max_tokens: 4096,
51
+ messages: [
52
+ {
53
+ role: "user",
54
+ content: `You are a code quality reviewer. Analyze this repository summary for maintainability issues.
55
+
56
+ Repository context:
57
+ ${repoSummary}
58
+
59
+ Focus on: naming conventions, file size, cyclomatic complexity, dead code.
60
+ Return a structured JSON report with findings.`,
61
+ },
62
+ ],
63
+ }),
64
+ ]);
65
+
66
+ // Aggregate results from both isolated subagents
67
+ console.log("Security findings:", securityReport.content);
68
+ console.log("Quality findings:", qualityReport.content);
69
+ }
70
+
71
+ // Explicit context passing — never rely on shared state
72
+ async function processChunks(chunks: string[]): Promise<string[]> {
73
+ return Promise.all(
74
+ chunks.map((chunk, index) =>
75
+ client.messages
76
+ .create({
77
+ model: "claude-haiku-4-5",
78
+ max_tokens: 1024,
79
+ messages: [
80
+ {
81
+ role: "user",
82
+ // Each subagent receives its full context explicitly
83
+ content: `Process chunk ${index + 1} of ${chunks.length}:
84
+
85
+ ${chunk}
86
+
87
+ Return a one-paragraph summary.`,
88
+ },
89
+ ],
90
+ })
91
+ .then((r) => r.content[0].type === "text" ? r.content[0].text : "")
92
+ )
93
+ );
94
+ }
95
+ ```
96
+
97
+ ## Common Mistakes
98
+
99
+ 1. **Omitting `allowedTools` on subagents** — Without explicit scoping, subagents inherit all available tools. A summarization subagent that accidentally has shell execution access can cause unintended side effects. Always declare the minimal set of tools each subagent requires.
100
+
101
+ 2. **Passing context by reference or shared state** — Subagents run in isolated loops with no access to the orchestrator's conversation history. Storing shared context in a global variable and expecting subagents to read it silently fails. Every piece of information a subagent needs must appear in its initial `messages` array.
102
+
103
+ 3. **Running independent subagents sequentially** — Using `await` in a loop when subagents do not depend on each other's results wastes wall-clock time proportional to the number of agents. Use `Promise.all` (or `Promise.allSettled` when partial failures are acceptable) to run truly independent subagents concurrently.
104
+
105
+ ## References
106
+
107
+ - [Anthropic: Build multi-agent systems](https://docs.anthropic.com/en/docs/build-with-claude/agents)
108
+ - [Anthropic: Tool use overview](https://docs.anthropic.com/en/docs/build-with-claude/tool-use)
109
+ - [Anthropic: Model context and memory](https://docs.anthropic.com/en/docs/build-with-claude/memory-and-storage)
@@ -0,0 +1,120 @@
1
+ # 1.4 — Implement Multi-Step Workflows with Enforcement and Handoff Patterns
2
+
3
+ ## Concept
4
+
5
+ Multi-step agentic workflows require more than just chaining prompts together. Each stage may produce output that the next stage depends on, and the correctness of that handoff determines whether the entire pipeline succeeds. There are two fundamentally different ways to ensure a workflow progresses correctly: **prompt-based guidance** and **programmatic enforcement**.
6
+
7
+ Prompt-based guidance relies on instructions in the system or user prompt to steer the model — telling it to "always return valid JSON" or "only proceed if the previous step succeeded." This is convenient but fragile: the model may deviate under long context pressure, unexpected input, or ambiguous situations. The prompt provides no hard guarantees. Programmatic enforcement, by contrast, validates outputs in code after each model response before passing them to the next stage. If the output fails validation, the workflow halts or retries — the model's good intentions are irrelevant.
8
+
9
+ The structured handoff pattern bridges these stages: each step produces a typed, validated payload that the next step receives as its input. Rather than passing raw text between steps, you define an interface for what each stage emits and parse the model's output against that schema. This creates a clear contract at every boundary. The general rule is: use deterministic code for anything that must be correct (schema validation, precondition checks, routing logic), and use model flexibility for anything that requires judgment (summarization, classification, generation). Mixing the two — trusting the model to enforce its own constraints — is the most common source of silent workflow failures.
10
+
11
+ ## Code Example
12
+
13
+ ```typescript
14
+ import Anthropic from "@anthropic-ai/sdk";
15
+ import { z } from "zod";
16
+
17
+ const client = new Anthropic();
18
+
19
+ // --- Stage schemas define the handoff contract ---
20
+
21
+ const ResearchOutputSchema = z.object({
22
+ topic: z.string(),
23
+ keyFindings: z.array(z.string()).min(1),
24
+ confidence: z.enum(["low", "medium", "high"]),
25
+ });
26
+
27
+ const DraftOutputSchema = z.object({
28
+ title: z.string(),
29
+ body: z.string().min(100),
30
+ wordCount: z.number().int().positive(),
31
+ });
32
+
33
+ type ResearchOutput = z.infer<typeof ResearchOutputSchema>;
34
+ type DraftOutput = z.infer<typeof DraftOutputSchema>;
35
+
36
+ // --- Programmatic enforcement via a typed step runner ---
37
+
38
+ async function runStep<T>(
39
+ systemPrompt: string,
40
+ userMessage: string,
41
+ schema: z.ZodType<T>,
42
+ maxRetries = 2
43
+ ): Promise<T> {
44
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
45
+ const response = await client.messages.create({
46
+ model: "claude-sonnet-4-5",
47
+ max_tokens: 1024,
48
+ system: systemPrompt,
49
+ messages: [{ role: "user", content: userMessage }],
50
+ });
51
+
52
+ const text = response.content
53
+ .filter((b) => b.type === "text")
54
+ .map((b) => b.text)
55
+ .join("");
56
+
57
+ // Programmatic enforcement: parse and validate before proceeding
58
+ try {
59
+ const parsed = JSON.parse(text);
60
+ return schema.parse(parsed); // throws ZodError if schema not satisfied
61
+ } catch (err) {
62
+ if (attempt === maxRetries) {
63
+ throw new Error(
64
+ `Step failed schema validation after ${maxRetries + 1} attempts: ${err}`
65
+ );
66
+ }
67
+ // Could inject the validation error back into the next attempt here
68
+ }
69
+ }
70
+ throw new Error("Unreachable");
71
+ }
72
+
73
+ // --- Structured handoff: each stage receives the previous stage's validated output ---
74
+
75
+ async function researchStage(topic: string): Promise<ResearchOutput> {
76
+ return runStep(
77
+ 'You are a research assistant. Respond ONLY with a JSON object matching: { "topic": string, "keyFindings": string[], "confidence": "low"|"medium"|"high" }',
78
+ `Research this topic and return structured findings: ${topic}`,
79
+ ResearchOutputSchema
80
+ );
81
+ }
82
+
83
+ async function draftStage(research: ResearchOutput): Promise<DraftOutput> {
84
+ // The handoff: structured context from stage 1 becomes the input for stage 2
85
+ const handoffContext = JSON.stringify(research, null, 2);
86
+
87
+ return runStep(
88
+ 'You are a technical writer. Respond ONLY with a JSON object matching: { "title": string, "body": string, "wordCount": number }',
89
+ `Using this research, write a short article:\n\n${handoffContext}`,
90
+ DraftOutputSchema
91
+ );
92
+ }
93
+
94
+ // --- Orchestrator: deterministic routing, not prompt-guided ---
95
+
96
+ async function runWorkflow(topic: string): Promise<DraftOutput> {
97
+ const research = await researchStage(topic);
98
+
99
+ // Deterministic gate: model judgment not trusted for routing decisions
100
+ if (research.confidence === "low") {
101
+ throw new Error(
102
+ `Research confidence too low for topic "${topic}". Aborting draft stage.`
103
+ );
104
+ }
105
+
106
+ return draftStage(research);
107
+ }
108
+ ```
109
+
110
+ ## Common Mistakes
111
+
112
+ 1. **Trusting the model to self-validate** — Adding "ensure your output is valid JSON" to a prompt is not enforcement. Parse and validate in code; if the schema check fails, the step failed regardless of what the model intended.
113
+ 2. **Passing raw text between stages** — Handing the raw string output of one step directly into the next prompt makes the pipeline brittle. A single unexpected phrase or formatting quirk in stage one breaks stage two. Always parse into a typed structure at each boundary.
114
+ 3. **Using model output for routing decisions** — Letting the model decide whether to proceed to the next stage (e.g., "if you think the data is ready, say PROCEED") mixes concerns incorrectly. Routing logic belongs in deterministic code; use schema-validated fields like `confidence` or `status` to drive control flow.
115
+
116
+ ## References
117
+
118
+ - [Anthropic: Build with Claude — Workflows and orchestration](https://docs.anthropic.com/en/docs/build-with-claude/workflows)
119
+ - [Anthropic: Tool use overview](https://docs.anthropic.com/en/docs/build-with-claude/tool-use/overview)
120
+ - [Zod schema validation](https://zod.dev/)
@@ -0,0 +1,155 @@
1
+ # 1.5 — Apply Agent SDK Hooks for Tool Call Interception and Data Normalization
2
+
3
+ ## Concept
4
+
5
+ The Agent SDK exposes a hook system that lets you attach callbacks at key points in the agentic loop without modifying the core execution logic. Two hooks are central to this: **`beforeToolCall`** fires immediately after Claude requests a tool but before the tool executes, and **`afterToolCall`** fires after the tool returns but before the result is sent back to the model. These hooks receive the full tool call context — name, input arguments, and (in the case of `afterToolCall`) the raw output — giving you a structured intercept point for validation, transformation, and logging.
6
+
7
+ This architecture matters because it separates cross-cutting concerns from tool implementation. A tool that queries a database should not need to know about audit logging, input sanitization, or output schema normalization. Hooks handle those responsibilities once, centrally, and consistently. Every tool call passes through the same pipeline regardless of which tool is invoked or which part of your codebase registered it.
8
+
9
+ The distinction between **deterministic compliance** and **probabilistic compliance** is critical here. Probabilistic compliance means adding instructions to the system prompt such as "always return dates in ISO 8601 format" and hoping the model follows them. This works most of the time but fails silently when it doesn't. Deterministic compliance means enforcing the rule in code — in an `afterToolCall` hook that parses and reformats every date field in every tool output before the model ever sees it. The hook runs every time, unconditionally. For anything that must be reliably true — sanitized inputs, normalized schemas, redacted PII, validated ranges — deterministic enforcement via hooks is the correct approach.
10
+
11
+ ## Code Example
12
+
13
+ ```typescript
14
+ import Anthropic from "@anthropic-ai/sdk";
15
+
16
+ const client = new Anthropic();
17
+
18
+ // --- Hook definitions ---
19
+
20
+ function beforeToolCall(toolName: string, toolInput: Record<string, unknown>): Record<string, unknown> {
21
+ // Validate that required fields are present
22
+ if (toolName === "query_database" && typeof toolInput.table !== "string") {
23
+ throw new Error(`beforeToolCall: 'table' must be a string, got ${typeof toolInput.table}`);
24
+ }
25
+
26
+ // Normalize inputs deterministically — no model instruction needed
27
+ const normalized = { ...toolInput };
28
+ if (typeof normalized.limit === "number") {
29
+ normalized.limit = Math.min(normalized.limit, 100); // enforce hard cap
30
+ }
31
+
32
+ return normalized;
33
+ }
34
+
35
+ function afterToolCall(
36
+ toolName: string,
37
+ rawOutput: unknown
38
+ ): Record<string, unknown> {
39
+ if (typeof rawOutput !== "object" || rawOutput === null) {
40
+ return { result: rawOutput };
41
+ }
42
+
43
+ const output = rawOutput as Record<string, unknown>;
44
+
45
+ // Normalize date fields to ISO 8601 — deterministic, not instructional
46
+ const normalized = Object.fromEntries(
47
+ Object.entries(output).map(([key, value]) => {
48
+ if (key.endsWith("_at") || key.endsWith("_date")) {
49
+ const parsed = new Date(value as string);
50
+ return [key, isNaN(parsed.getTime()) ? value : parsed.toISOString()];
51
+ }
52
+ return [key, value];
53
+ })
54
+ );
55
+
56
+ // Redact sensitive fields before they reach the model context
57
+ const { password, secret, api_key, ...safe } = normalized as Record<string, unknown>;
58
+ void password; void secret; void api_key; // explicitly discarded
59
+
60
+ return safe;
61
+ }
62
+
63
+ // --- Tool executor that applies hooks ---
64
+
65
+ async function executeToolWithHooks(
66
+ toolName: string,
67
+ rawInput: Record<string, unknown>,
68
+ tools: Record<string, (input: Record<string, unknown>) => Promise<unknown>>
69
+ ): Promise<Record<string, unknown>> {
70
+ const validatedInput = beforeToolCall(toolName, rawInput);
71
+
72
+ const tool = tools[toolName];
73
+ if (!tool) throw new Error(`Unknown tool: ${toolName}`);
74
+
75
+ const rawOutput = await tool(validatedInput);
76
+ return afterToolCall(toolName, rawOutput);
77
+ }
78
+
79
+ // --- Usage in the agentic loop ---
80
+
81
+ async function runAgentWithHooks(userMessage: string) {
82
+ const tools: Anthropic.Tool[] = [
83
+ {
84
+ name: "query_database",
85
+ description: "Query a database table",
86
+ input_schema: {
87
+ type: "object" as const,
88
+ properties: {
89
+ table: { type: "string" },
90
+ limit: { type: "number" },
91
+ },
92
+ required: ["table"],
93
+ },
94
+ },
95
+ ];
96
+
97
+ const toolImpls: Record<string, (input: Record<string, unknown>) => Promise<unknown>> = {
98
+ query_database: async (input) => ({
99
+ rows: [{ id: 1, created_at: "2024-01-15", password: "hunter2" }],
100
+ table: input.table,
101
+ }),
102
+ };
103
+
104
+ const messages: Anthropic.MessageParam[] = [
105
+ { role: "user", content: userMessage },
106
+ ];
107
+
108
+ while (true) {
109
+ const response = await client.messages.create({
110
+ model: "claude-sonnet-4-5",
111
+ max_tokens: 1024,
112
+ tools,
113
+ messages,
114
+ });
115
+
116
+ if (response.stop_reason === "end_turn") {
117
+ return response;
118
+ }
119
+
120
+ const toolResults: Anthropic.ToolResultBlockParam[] = [];
121
+
122
+ for (const block of response.content) {
123
+ if (block.type === "tool_use") {
124
+ const normalizedOutput = await executeToolWithHooks(
125
+ block.name,
126
+ block.input as Record<string, unknown>,
127
+ toolImpls
128
+ );
129
+ toolResults.push({
130
+ type: "tool_result",
131
+ tool_use_id: block.id,
132
+ content: JSON.stringify(normalizedOutput),
133
+ });
134
+ }
135
+ }
136
+
137
+ messages.push({ role: "assistant", content: response.content });
138
+ messages.push({ role: "user", content: toolResults });
139
+ }
140
+ }
141
+ ```
142
+
143
+ ## Common Mistakes
144
+
145
+ 1. **Using system prompt instructions instead of hooks for data contracts.** Writing "always return dates as ISO 8601" in the system prompt is probabilistic — the model may comply, but it is not guaranteed. If downstream systems depend on date format consistency, enforce it in `afterToolCall` where the transformation is unconditional.
146
+
147
+ 2. **Mutating the input object passed to `beforeToolCall`.** Modifying the original input in-place creates hidden side effects and makes debugging difficult. Always return a new object (`{ ...toolInput, limit: cap }`) rather than reassigning properties on the argument directly.
148
+
149
+ 3. **Swallowing hook errors silently.** A `beforeToolCall` hook that catches a validation failure and returns a default value instead of throwing allows invalid tool calls to proceed undetected. Throw explicitly with a descriptive message so the failure surfaces in logs and halts execution at the correct point.
150
+
151
+ ## References
152
+
153
+ - [Anthropic Agent SDK — Hooks](https://docs.anthropic.com/en/docs/agents-and-tools/agent-sdk/hooks)
154
+ - [Tool Use Overview](https://docs.anthropic.com/en/docs/tool-use)
155
+ - [Building Effective Agents](https://www.anthropic.com/research/building-effective-agents)