agent-scaffold-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. agent_scaffold/__init__.py +8 -0
  2. agent_scaffold/__main__.py +6 -0
  3. agent_scaffold/_bundled_deployments/__init__.py +15 -0
  4. agent_scaffold/_bundled_deployments/docs/cross-cutting/README.md +15 -0
  5. agent_scaffold/_bundled_deployments/docs/cross-cutting/auth-jwt.md +235 -0
  6. agent_scaffold/_bundled_deployments/docs/cross-cutting/logging-structured.md +196 -0
  7. agent_scaffold/_bundled_deployments/docs/cross-cutting/observability.md +259 -0
  8. agent_scaffold/_bundled_deployments/docs/cross-cutting/rate-limiting.md +171 -0
  9. agent_scaffold/_bundled_deployments/docs/cross-cutting/testing-strategy.md +261 -0
  10. agent_scaffold/_bundled_deployments/docs/frameworks/README.md +22 -0
  11. agent_scaffold/_bundled_deployments/docs/frameworks/crewai.md +91 -0
  12. agent_scaffold/_bundled_deployments/docs/frameworks/langgraph.md +79 -0
  13. agent_scaffold/_bundled_deployments/docs/frameworks/mastra.md +74 -0
  14. agent_scaffold/_bundled_deployments/docs/frameworks/pydantic-ai.md +77 -0
  15. agent_scaffold/_bundled_deployments/docs/frameworks/vercel-ai-sdk.md +83 -0
  16. agent_scaffold/_bundled_deployments/docs/patterns/README.md +26 -0
  17. agent_scaffold/_bundled_deployments/docs/patterns/memory.md +82 -0
  18. agent_scaffold/_bundled_deployments/docs/patterns/multi-agent-flat.md +72 -0
  19. agent_scaffold/_bundled_deployments/docs/patterns/multi-agent-hierarchical.md +83 -0
  20. agent_scaffold/_bundled_deployments/docs/patterns/parallel-calls.md +73 -0
  21. agent_scaffold/_bundled_deployments/docs/patterns/plan-execute-reflect.md +77 -0
  22. agent_scaffold/_bundled_deployments/docs/patterns/prompt-chaining.md +73 -0
  23. agent_scaffold/_bundled_deployments/docs/patterns/rag.md +84 -0
  24. agent_scaffold/_bundled_deployments/docs/patterns/react.md +77 -0
  25. agent_scaffold/_bundled_deployments/docs/patterns/routing-tool-use.md +69 -0
  26. agent_scaffold/_bundled_deployments/docs/recipes/README.md +39 -0
  27. agent_scaffold/_bundled_deployments/docs/recipes/code-review-agent.md +518 -0
  28. agent_scaffold/_bundled_deployments/docs/recipes/content-pipeline.md +525 -0
  29. agent_scaffold/_bundled_deployments/docs/recipes/customer-support-triage.md +1679 -0
  30. agent_scaffold/_bundled_deployments/docs/recipes/docs-rag-qa.md +1254 -0
  31. agent_scaffold/_bundled_deployments/docs/recipes/hierarchical-agent.md +554 -0
  32. agent_scaffold/_bundled_deployments/docs/recipes/memory-assistant.md +499 -0
  33. agent_scaffold/_bundled_deployments/docs/recipes/ops-crew.md +457 -0
  34. agent_scaffold/_bundled_deployments/docs/recipes/parallel-enricher.md +457 -0
  35. agent_scaffold/_bundled_deployments/docs/recipes/research-assistant.md +1096 -0
  36. agent_scaffold/_bundled_deployments/docs/stack/README.md +19 -0
  37. agent_scaffold/_bundled_deployments/docs/stack/api-fastapi.md +112 -0
  38. agent_scaffold/_bundled_deployments/docs/stack/api-hono.md +108 -0
  39. agent_scaffold/_bundled_deployments/docs/stack/cache-redis.md +85 -0
  40. agent_scaffold/_bundled_deployments/docs/stack/eval-deepeval-ragas-promptfoo.md +164 -0
  41. agent_scaffold/_bundled_deployments/docs/stack/llm-claude.md +105 -0
  42. agent_scaffold/_bundled_deployments/docs/stack/relational-postgres.md +122 -0
  43. agent_scaffold/_bundled_deployments/docs/stack/tool-protocol-mcp.md +275 -0
  44. agent_scaffold/_bundled_deployments/docs/stack/tracing-langfuse.md +108 -0
  45. agent_scaffold/_bundled_deployments/docs/stack/vector-qdrant.md +121 -0
  46. agent_scaffold/cache.py +32 -0
  47. agent_scaffold/cli.py +512 -0
  48. agent_scaffold/config.py +117 -0
  49. agent_scaffold/context.py +253 -0
  50. agent_scaffold/contract.py +141 -0
  51. agent_scaffold/discovery.py +112 -0
  52. agent_scaffold/generator.py +213 -0
  53. agent_scaffold/languages/__init__.py +0 -0
  54. agent_scaffold/languages/python.yaml +28 -0
  55. agent_scaffold/languages/typescript.yaml +25 -0
  56. agent_scaffold/prompts/__init__.py +0 -0
  57. agent_scaffold/prompts/repair.md +9 -0
  58. agent_scaffold/prompts/system.md +21 -0
  59. agent_scaffold/prompts/user_template.md +43 -0
  60. agent_scaffold/validator.py +133 -0
  61. agent_scaffold/writer.py +171 -0
  62. agent_scaffold_cli-0.1.1.dist-info/METADATA +147 -0
  63. agent_scaffold_cli-0.1.1.dist-info/RECORD +66 -0
  64. agent_scaffold_cli-0.1.1.dist-info/WHEEL +4 -0
  65. agent_scaffold_cli-0.1.1.dist-info/entry_points.txt +2 -0
  66. agent_scaffold_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,259 @@
1
+ # Cross-cutting: Observability
2
+
3
+ **Concern:** Trace every LLM call, tool invocation, and agent step so you can debug, optimize, and audit agent behavior.
4
+ **Library:** Langfuse (self-hosted, MIT)
5
+ **Lives in:** Inline below (formerly `common/python/agent_common/observability/` and `common/typescript/src/observability/`)
6
+
7
+ ## What it provides
8
+
9
+ - **Singleton client** -- `get_langfuse()` (Py) / `createLangfuseClient()` (TS) initializes once and reuses across the app.
10
+ - **Trace decorator** -- `@traced("name")` (Py) / `traced("name", fn)` (TS) wraps any function in a Langfuse trace span with automatic error capture.
11
+ - **Async support** -- The Python decorator auto-detects sync vs async functions. The TS version is async-native.
12
+ - **Error propagation** -- Exceptions are recorded on the span (level=ERROR, status_message) and re-raised. Tracing never swallows errors.
13
+
14
+ ## How to use
15
+
16
+ ### Python
17
+
18
+ ```python
19
+ from agent_common.observability import get_langfuse, traced
20
+
21
+ # Initialize (typically in app lifespan)
22
+ langfuse = get_langfuse(
23
+ public_key="pk-lf-local",
24
+ secret_key="sk-lf-local",
25
+ host="http://localhost:3000",
26
+ )
27
+
28
+ # Trace a function
29
+ @traced("answer_question")
30
+ async def answer_question(question: str) -> str:
31
+ # LLM call, tool use, etc.
32
+ return result
33
+ ```
34
+
35
+ ### TypeScript
36
+
37
+ ```typescript
38
+ import { createLangfuseClient, traced } from "@agent-deployments/common";
39
+
40
+ // Initialize
41
+ createLangfuseClient({
42
+ publicKey: "pk-lf-local",
43
+ secretKey: "sk-lf-local",
44
+ host: "http://localhost:3000",
45
+ });
46
+
47
+ // Trace a function
48
+ const answer = await traced("answer_question", async () => {
49
+ // LLM call, tool use, etc.
50
+ return result;
51
+ });
52
+ ```
53
+
54
+ ### Nesting spans
55
+
56
+ Create child spans within a traced function for granular visibility:
57
+
58
+ ```python
59
+ @traced("rag_pipeline")
60
+ async def rag_pipeline(question: str) -> str:
61
+ client = get_langfuse()
62
+ trace = client.trace(name="rag_pipeline")
63
+
64
+ # Child span for retrieval
65
+ retrieval_span = trace.span(name="retrieve_chunks")
66
+ chunks = await retrieve(question)
67
+ retrieval_span.end(output=f"{len(chunks)} chunks")
68
+
69
+ # Child span for generation
70
+ gen_span = trace.span(name="generate_answer")
71
+ answer = await generate(question, chunks)
72
+ gen_span.end(output=answer[:200])
73
+
74
+ return answer
75
+ ```
76
+
77
+ ## Tests
78
+
79
+ Test that the observability fixtures work with mocked Langfuse (Py). Test traced() wrapper behavior for both success and error paths (TS).
80
+
81
+ ## Configuration via env
82
+
83
+ | Var | Default | Effect |
84
+ |-----|---------|--------|
85
+ | `LANGFUSE_PUBLIC_KEY` | `pk-lf-local` | Project public key for the Langfuse API |
86
+ | `LANGFUSE_SECRET_KEY` | `sk-lf-local` | Project secret key for the Langfuse API |
87
+ | `LANGFUSE_HOST` | `http://localhost:3000` | Langfuse server URL |
88
+
89
+ These are set in each prototype's `.env.example` and validated at boot via `settings.py` / `config.ts`.
90
+
91
+ ## Viewing traces
92
+
93
+ With `docker compose up`, Langfuse is available at `http://localhost:3000`:
94
+
95
+ - Default login: `admin@local.dev` / `admin`
96
+ - Project: `default` (auto-created via init env vars in `docker-compose.base.yml`)
97
+ - Each request generates a trace with spans for agent steps, tool calls, and LLM invocations
98
+
99
+ ## Swapping to LangSmith
100
+
101
+ For teams already using LangChain/LangGraph heavily, LangSmith is a drop-in alternative:
102
+
103
+ 1. Replace `langfuse` dependency with `langsmith`
104
+ 2. Replace `get_langfuse()` / `@traced` with LangSmith's `@traceable` decorator
105
+ 3. Set `LANGCHAIN_TRACING_V2=true` and `LANGCHAIN_API_KEY` in env
106
+ 4. Remove Langfuse services from `docker-compose.yml`
107
+
108
+ This is a **multi-file swap** (common module + env config + docker-compose).
109
+
110
+ ## Reference Implementation
111
+
112
+ <details>
113
+ <summary>Python — <code>langfuse.py</code></summary>
114
+
115
+ ```python
116
+ """Langfuse client singleton and trace decorator."""
117
+
118
+ import asyncio
119
+ import functools
120
+ from typing import Any, Callable
121
+
122
+ from langfuse import Langfuse
123
+
124
+ _client: Langfuse | None = None
125
+
126
+
127
+ def get_langfuse(
128
+ *,
129
+ public_key: str | None = None,
130
+ secret_key: str | None = None,
131
+ host: str = "http://localhost:3000",
132
+ ) -> Langfuse:
133
+ """Get or create the Langfuse singleton client."""
134
+ global _client
135
+ if _client is None:
136
+ _client = Langfuse(
137
+ public_key=public_key,
138
+ secret_key=secret_key,
139
+ host=host,
140
+ )
141
+ return _client
142
+
143
+
144
+ def traced(
145
+ name: str | None = None,
146
+ *,
147
+ metadata: dict[str, Any] | None = None,
148
+ ) -> Callable:
149
+ """Decorator that wraps a function in a Langfuse trace span."""
150
+
151
+ def decorator(fn: Callable) -> Callable:
152
+ span_name = name or fn.__name__
153
+
154
+ @functools.wraps(fn)
155
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
156
+ client = get_langfuse()
157
+ trace = client.trace(name=span_name, metadata=metadata or {})
158
+ span = trace.span(name=span_name)
159
+ try:
160
+ result = await fn(*args, **kwargs)
161
+ span.end(output=str(result)[:500])
162
+ return result
163
+ except Exception as exc:
164
+ span.end(level="ERROR", status_message=str(exc))
165
+ raise
166
+
167
+ @functools.wraps(fn)
168
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
169
+ client = get_langfuse()
170
+ trace = client.trace(name=span_name, metadata=metadata or {})
171
+ span = trace.span(name=span_name)
172
+ try:
173
+ result = fn(*args, **kwargs)
174
+ span.end(output=str(result)[:500])
175
+ return result
176
+ except Exception as exc:
177
+ span.end(level="ERROR", status_message=str(exc))
178
+ raise
179
+
180
+ if asyncio.iscoroutinefunction(fn):
181
+ return async_wrapper
182
+ return sync_wrapper
183
+
184
+ return decorator
185
+ ```
186
+
187
+ </details>
188
+
189
+ <details>
190
+ <summary>TypeScript — <code>langfuse.ts</code></summary>
191
+
192
+ ```typescript
193
+ /**
194
+ * Langfuse client wrapper and trace utilities.
195
+ *
196
+ * Note: This is a lightweight wrapper. The actual Langfuse SDK should be
197
+ * installed in each prototype that needs it. This module provides the
198
+ * configuration shape and a traced() helper pattern.
199
+ */
200
+
201
+ export interface LangfuseConfig {
202
+ publicKey: string;
203
+ secretKey: string;
204
+ host?: string;
205
+ }
206
+
207
+ interface TraceSpan {
208
+ name: string;
209
+ startTime: number;
210
+ endTime?: number;
211
+ metadata?: Record<string, unknown>;
212
+ status?: "ok" | "error";
213
+ error?: string;
214
+ }
215
+
216
+ let _config: LangfuseConfig | null = null;
217
+
218
+ /**
219
+ * Initialize the Langfuse client configuration.
220
+ */
221
+ export function createLangfuseClient(config: LangfuseConfig): LangfuseConfig {
222
+ _config = config;
223
+ return _config;
224
+ }
225
+
226
+ /**
227
+ * Decorator-style wrapper that traces a function execution.
228
+ *
229
+ * Usage:
230
+ * const result = await traced("my-operation", async () => {
231
+ * return doSomething();
232
+ * });
233
+ */
234
+ export async function traced<T>(
235
+ name: string,
236
+ fn: () => Promise<T>,
237
+ metadata?: Record<string, unknown>,
238
+ ): Promise<T> {
239
+ const span: TraceSpan = {
240
+ name,
241
+ startTime: Date.now(),
242
+ metadata,
243
+ };
244
+
245
+ try {
246
+ const result = await fn();
247
+ span.endTime = Date.now();
248
+ span.status = "ok";
249
+ return result;
250
+ } catch (error) {
251
+ span.endTime = Date.now();
252
+ span.status = "error";
253
+ span.error = error instanceof Error ? error.message : String(error);
254
+ throw error;
255
+ }
256
+ }
257
+ ```
258
+
259
+ </details>
@@ -0,0 +1,171 @@
1
+ # Cross-cutting: Rate Limiting
2
+
3
+ **Concern:** Protect agent endpoints from abuse with per-user and per-IP request throttling.
4
+ **Library:** `slowapi` (Py) / custom sliding-window middleware (TS)
5
+ **Lives in:** Inline below (formerly `common/python/agent_common/ratelimit/` and `common/typescript/src/ratelimit/`)
6
+
7
+ ## What it provides
8
+
9
+ - **Python:** `build_limiter(redis_url, default_limit)` returns a configured `slowapi.Limiter` instance backed by Redis. Integrates with FastAPI via `app.state.limiter` and the `@limiter.limit()` decorator.
10
+ - **TypeScript:** `buildRateLimiter(config)` returns a function `(key: string) => RateLimitResult` that checks a sliding window counter. Currently in-memory; swap to Redis for distributed deployments.
11
+
12
+ ## How to use
13
+
14
+ ### Python (FastAPI + slowapi)
15
+
16
+ ```python
17
+ from agent_common.ratelimit import build_limiter
18
+ from slowapi import _rate_limit_exceeded_handler
19
+ from slowapi.errors import RateLimitExceeded
20
+
21
+ limiter = build_limiter(redis_url="redis://localhost:6379", default_limit="60/minute")
22
+
23
+ app = FastAPI()
24
+ app.state.limiter = limiter
25
+ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
26
+
27
+ @app.post("/query")
28
+ @limiter.limit("30/minute") # Override default for this endpoint
29
+ async def query(request: Request):
30
+ ...
31
+ ```
32
+
33
+ The key function defaults to `get_remote_address` (client IP). For per-user limiting, pass a custom key function that extracts the user ID from the JWT.
34
+
35
+ ### TypeScript (Hono)
36
+
37
+ ```typescript
38
+ import { buildRateLimiter } from "@agent-deployments/common";
39
+
40
+ const checkLimit = buildRateLimiter({
41
+ redisUrl: "redis://localhost:6379",
42
+ maxRequests: 60,
43
+ windowSeconds: 60,
44
+ });
45
+
46
+ app.use("*", async (c, next) => {
47
+ const key = c.req.header("x-user-id") ?? c.req.header("x-forwarded-for") ?? "anon";
48
+ const result = checkLimit(key);
49
+
50
+ if (!result.allowed) {
51
+ return c.json({ error: "Rate limit exceeded" }, 429);
52
+ }
53
+
54
+ c.header("X-RateLimit-Remaining", String(result.remaining));
55
+ await next();
56
+ });
57
+ ```
58
+
59
+ ## Configuration via env
60
+
61
+ | Var | Default | Effect |
62
+ |-----|---------|--------|
63
+ | `REDIS_URL` | `redis://localhost:6379` | Redis instance for rate limit counters (Py) |
64
+ | Default limit | `60/minute` | Global default; override per-endpoint |
65
+
66
+ ## Suggested limits for agent endpoints
67
+
68
+ | Endpoint type | Suggested limit | Rationale |
69
+ |--------------|----------------|-----------|
70
+ | `/query` (LLM call) | 10-30/minute | LLM calls are expensive and slow |
71
+ | `/documents` (ingest) | 5/minute | Ingestion triggers chunking + embedding |
72
+ | `/health` | Unlimited | Monitoring probes |
73
+
74
+ ## Tests
75
+
76
+ Test limiter creation with Redis URL (Py). Test window behavior, allow/deny, and reset (TS).
77
+
78
+ ## Production considerations
79
+
80
+ - The Python implementation is **production-ready** -- slowapi + Redis handles distributed rate limiting across multiple app instances.
81
+ - The TypeScript implementation is **in-memory** -- fine for single-instance dev, but must be swapped to a Redis-backed store (e.g., `hono-rate-limiter` with `ioredis`) for multi-instance production.
82
+ - Add `Retry-After` and `X-RateLimit-*` headers so clients can back off gracefully.
83
+
84
+ ## Reference Implementation
85
+
86
+ <details>
87
+ <summary>Python — <code>slowapi_setup.py</code></summary>
88
+
89
+ ```python
90
+ """Rate limiter setup using slowapi + Redis."""
91
+
92
+ from slowapi import Limiter
93
+ from slowapi.util import get_remote_address
94
+
95
+
96
+ def build_limiter(
97
+ redis_url: str = "redis://localhost:6379",
98
+ *,
99
+ default_limit: str = "60/minute",
100
+ ) -> Limiter:
101
+ """Build a configured slowapi Limiter backed by Redis."""
102
+ return Limiter(
103
+ key_func=get_remote_address,
104
+ default_limits=[default_limit],
105
+ storage_uri=redis_url,
106
+ )
107
+ ```
108
+
109
+ </details>
110
+
111
+ <details>
112
+ <summary>TypeScript — <code>ratelimit.ts</code></summary>
113
+
114
+ ```typescript
115
+ /**
116
+ * Rate limiting utilities for Hono-based prototypes.
117
+ */
118
+
119
+ export interface RateLimitConfig {
120
+ /** Redis URL for distributed rate limiting */
121
+ redisUrl: string;
122
+ /** Max requests per window */
123
+ maxRequests: number;
124
+ /** Window size in seconds */
125
+ windowSeconds: number;
126
+ }
127
+
128
+ interface RateLimitResult {
129
+ allowed: boolean;
130
+ remaining: number;
131
+ resetAt: number;
132
+ }
133
+
134
+ /**
135
+ * Build a rate limiter function.
136
+ *
137
+ * Returns a function that checks whether a given key (e.g., user ID or IP)
138
+ * is within its rate limit. Uses a simple in-memory sliding window for now;
139
+ * Redis-backed implementation should be added per prototype.
140
+ */
141
+ export function buildRateLimiter(config: RateLimitConfig) {
142
+ const windows = new Map<string, { count: number; resetAt: number }>();
143
+
144
+ return (key: string): RateLimitResult => {
145
+ const now = Date.now();
146
+ const entry = windows.get(key);
147
+
148
+ if (!entry || now >= entry.resetAt) {
149
+ windows.set(key, {
150
+ count: 1,
151
+ resetAt: now + config.windowSeconds * 1000,
152
+ });
153
+ return {
154
+ allowed: true,
155
+ remaining: config.maxRequests - 1,
156
+ resetAt: now + config.windowSeconds * 1000,
157
+ };
158
+ }
159
+
160
+ entry.count++;
161
+ const allowed = entry.count <= config.maxRequests;
162
+ return {
163
+ allowed,
164
+ remaining: Math.max(0, config.maxRequests - entry.count),
165
+ resetAt: entry.resetAt,
166
+ };
167
+ };
168
+ }
169
+ ```
170
+
171
+ </details>
@@ -0,0 +1,261 @@
1
+ # Cross-cutting: Testing Strategy
2
+
3
+ **Concern:** Three-tier test strategy that validates agent behavior without flaky LLM-dependent suites blocking CI.
4
+ **Library:** `pytest` + `deepeval` (Py) / `vitest` (TS)
5
+ **Lives in:** Inline below (formerly `common/python/agent_common/testing/` and `common/typescript/src/testing/`)
6
+
7
+ ## The three tiers
8
+
9
+ ```
10
+ ┌──────────────────────────────────────────────┐
11
+ │ Tier 3: Eval (golden datasets) │ main branch only, real LLM
12
+ │ Faithfulness, relevancy, correctness │
13
+ ├──────────────────────────────────────────────┤
14
+ │ Tier 2: Integration (real LLM) │ main branch only, ANTHROPIC_API_KEY
15
+ │ End-to-end agent flow, actual model calls │
16
+ ├──────────────────────────────────────────────┤
17
+ │ Tier 1: Unit (mocked LLM) │ every PR, fast, deterministic
18
+ │ Schema validation, tool logic, API routes │
19
+ └──────────────────────────────────────────────┘
20
+ ```
21
+
22
+ | Tier | Runs on | LLM | Speed | What it validates |
23
+ |------|---------|-----|-------|-------------------|
24
+ | Unit | Every PR | Mocked | < 10s | Schemas, tool functions, route handlers, chunking logic |
25
+ | Integration | Main only | Real | 30-60s | Full agent pipeline with actual model calls |
26
+ | Eval | Main only | Real | 1-5min | Quality metrics on golden datasets |
27
+
28
+ ## Directory layout
29
+
30
+ Every prototype follows this structure:
31
+
32
+ ```
33
+ # Python
34
+ tests/
35
+ ├── __init__.py
36
+ ├── unit/
37
+ │ ├── __init__.py
38
+ │ ├── test_api.py # Route handler tests
39
+ │ ├── test_schemas.py # Request/response validation
40
+ │ └── test_tools.py # Tool function logic
41
+ ├── integration/
42
+ │ └── __init__.py
43
+ └── evals/
44
+ └── __init__.py
45
+
46
+ # TypeScript
47
+ tests/
48
+ ├── unit/
49
+ │ ├── api.test.ts
50
+ │ ├── schemas.test.ts
51
+ │ └── tools.test.ts
52
+ ```
53
+
54
+ ## Shared test fixtures
55
+
56
+ The shared testing utilities provide mock LLM utilities so unit tests never hit a real model (see Reference Implementation below for the full source):
57
+
58
+ ### Python
59
+
60
+ ```python
61
+ from agent_common.testing import mock_llm_response, mock_llm_client
62
+
63
+ # Single mock response
64
+ response = mock_llm_response("The answer is 42", model="claude-sonnet-4-6")
65
+ assert response.choices[0].message.content == "The answer is 42"
66
+
67
+ # Mock client that cycles through predefined responses
68
+ client = mock_llm_client(["Response 1", "Response 2"])
69
+ result = await client.chat.completions.create()
70
+ assert result.choices[0].message.content == "Response 1"
71
+ ```
72
+
73
+ ### TypeScript
74
+
75
+ ```typescript
76
+ import { mockLlmResponse, mockLlmClient } from "@agent-deployments/common";
77
+
78
+ const response = mockLlmResponse("The answer is 42");
79
+ expect(response.choices[0].message.content).toBe("The answer is 42");
80
+
81
+ const client = mockLlmClient(["Response 1", "Response 2"]);
82
+ const result = await client.chat.completions.create();
83
+ expect(result.choices[0].message.content).toBe("Response 1");
84
+ ```
85
+
86
+ ## Running tests
87
+
88
+ ```bash
89
+ # Unit tests (every PR)
90
+ make test-unit PROTOTYPE=docs-rag-qa TRACK=python
91
+
92
+ # Integration tests (needs ANTHROPIC_API_KEY)
93
+ make test-integration PROTOTYPE=docs-rag-qa TRACK=python
94
+
95
+ # Eval suite (needs ANTHROPIC_API_KEY)
96
+ make eval PROTOTYPE=docs-rag-qa TRACK=python
97
+
98
+ # All tests
99
+ make test PROTOTYPE=docs-rag-qa TRACK=python
100
+ ```
101
+
102
+ ## CI behavior
103
+
104
+ Defined in `.github/workflows/ci.yml`:
105
+
106
+ - **On PR:** Unit tests run. Integration and eval are skipped (no API key, saves cost).
107
+ - **On main:** Unit + integration + eval all run with `ANTHROPIC_API_KEY` from GitHub Secrets.
108
+ - **Exit code 5** (no tests collected) is treated as success via `|| test $? -eq 5`, handling prototypes where integration/eval tests haven't been written yet.
109
+
110
+ ## Eval datasets
111
+
112
+ Each prototype includes `eval/dataset.jsonl` with golden input/output pairs:
113
+
114
+ ```jsonl
115
+ {"input": "What is MCP?", "expected_output": "MCP is the Model Context Protocol...", "metadata": {}}
116
+ ```
117
+
118
+ ## Security testing (Promptfoo)
119
+
120
+ Each prototype includes `eval/promptfoo.yaml` for red-team scans:
121
+
122
+ ```yaml
123
+ redteam:
124
+ plugins:
125
+ - prompt-injection
126
+ - jailbreak
127
+ - pii
128
+ ```
129
+
130
+ Run via `make security PROTOTYPE=<name>`. Runs on main branch in CI.
131
+
132
+ ## Tests
133
+
134
+ Validate that mock fixtures produce correct response shapes and that the mock client cycles through predefined responses.
135
+
136
+ ## Reference Implementation
137
+
138
+ <details>
139
+ <summary>Python — <code>fixtures.py</code></summary>
140
+
141
+ ```python
142
+ """Shared pytest fixtures and test utilities for agent-deployments prototypes."""
143
+
144
+ from typing import Any
145
+ from unittest.mock import AsyncMock, MagicMock
146
+
147
+
148
+ def mock_llm_response(content: str = "Hello from mock LLM", **kwargs: Any) -> MagicMock:
149
+ """Create a mock LLM response object."""
150
+ message = MagicMock()
151
+ message.content = content
152
+ message.role = "assistant"
153
+ message.tool_calls = kwargs.get("tool_calls", [])
154
+
155
+ choice = MagicMock()
156
+ choice.message = message
157
+ choice.finish_reason = kwargs.get("finish_reason", "stop")
158
+
159
+ response = MagicMock()
160
+ response.choices = [choice]
161
+ response.model = kwargs.get("model", "mock-model")
162
+ response.usage = MagicMock(
163
+ prompt_tokens=kwargs.get("prompt_tokens", 10),
164
+ completion_tokens=kwargs.get("completion_tokens", 20),
165
+ total_tokens=kwargs.get("total_tokens", 30),
166
+ )
167
+
168
+ return response
169
+
170
+
171
+ def mock_llm_client(responses: list[str] | None = None) -> AsyncMock:
172
+ """Create a mock async LLM client that returns predefined responses."""
173
+ _responses = responses or ["Mock response"]
174
+ _call_count = 0
175
+
176
+ async def _create(**kwargs: Any) -> MagicMock:
177
+ nonlocal _call_count
178
+ content = _responses[_call_count % len(_responses)]
179
+ _call_count += 1
180
+ return mock_llm_response(content, **kwargs)
181
+
182
+ client = AsyncMock()
183
+ client.chat.completions.create = _create
184
+ return client
185
+ ```
186
+
187
+ </details>
188
+
189
+ <details>
190
+ <summary>TypeScript — <code>fixtures.ts</code></summary>
191
+
192
+ ```typescript
193
+ /**
194
+ * Shared test utilities for agent-deployments prototypes.
195
+ */
196
+
197
+ export interface MockLlmResponse {
198
+ choices: Array<{
199
+ message: { role: string; content: string; tool_calls?: unknown[] };
200
+ finish_reason: string;
201
+ }>;
202
+ model: string;
203
+ usage: {
204
+ prompt_tokens: number;
205
+ completion_tokens: number;
206
+ total_tokens: number;
207
+ };
208
+ }
209
+
210
+ /**
211
+ * Create a mock LLM response object.
212
+ */
213
+ export function mockLlmResponse(
214
+ content = "Hello from mock LLM",
215
+ options: {
216
+ model?: string;
217
+ finishReason?: string;
218
+ toolCalls?: unknown[];
219
+ } = {},
220
+ ): MockLlmResponse {
221
+ return {
222
+ choices: [
223
+ {
224
+ message: {
225
+ role: "assistant",
226
+ content,
227
+ tool_calls: options.toolCalls ?? [],
228
+ },
229
+ finish_reason: options.finishReason ?? "stop",
230
+ },
231
+ ],
232
+ model: options.model ?? "mock-model",
233
+ usage: {
234
+ prompt_tokens: 10,
235
+ completion_tokens: 20,
236
+ total_tokens: 30,
237
+ },
238
+ };
239
+ }
240
+
241
+ /**
242
+ * Create a mock LLM client that returns predefined responses.
243
+ */
244
+ export function mockLlmClient(responses: string[] = ["Mock response"]) {
245
+ let callCount = 0;
246
+
247
+ return {
248
+ chat: {
249
+ completions: {
250
+ create: async (): Promise<MockLlmResponse> => {
251
+ const content = responses[callCount % responses.length] ?? "";
252
+ callCount++;
253
+ return mockLlmResponse(content);
254
+ },
255
+ },
256
+ },
257
+ };
258
+ }
259
+ ```
260
+
261
+ </details>
@@ -0,0 +1,22 @@
1
+ # Frameworks
2
+
3
+ Agent frameworks used in this repo. Each file answers: **"How do I implement the pattern?"**
4
+
5
+ | Framework | Language | Best for | Used in |
6
+ |-----------|----------|----------|---------|
7
+ | [LangGraph](langgraph.md) | Python | Stateful graphs, multi-step, multi-agent | research-assistant, code-review, memory, hierarchical |
8
+ | [Pydantic AI](pydantic-ai.md) | Python | Single agents, typed tools, simple ReAct | customer-support, docs-rag-qa, research-assistant |
9
+ | [CrewAI](crewai.md) | Python | Multi-agent crews | ops-crew |
10
+ | [Mastra](mastra.md) | TypeScript | Workflows, memory, multi-agent | Not yet used (documented as TS option) |
11
+ | [Vercel AI SDK](vercel-ai-sdk.md) | TypeScript | Lightweight agents, streaming | All TS tracks |
12
+
13
+ ## How to pick a framework
14
+
15
+ **Python track:**
16
+ - Simple agent with tools → **Pydantic AI** (least boilerplate)
17
+ - Complex state, multi-step, checkpointing → **LangGraph** (best state management)
18
+ - Team of collaborating agents → **CrewAI** (purpose-built for crews)
19
+
20
+ **TypeScript track:**
21
+ - Most use cases → **Vercel AI SDK** (lightweight, production-proven)
22
+ - Need workflows, memory, or multi-agent → **Mastra** (batteries included)