pytest-resilience-agent 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ """pytest-resilience-agent: pytest plugin for LLM resilience testing.
2
+
3
+ Generates and runs tests that prove your LLM application keeps working when
4
+ infrastructure breaks: gateway timeouts, model brownouts, MCP server errors,
5
+ rate limits, partial outages.
6
+ """
7
+
8
+ __version__ = "0.2.0"
@@ -0,0 +1,174 @@
1
+ """Chaos scenario controller - real implementation.
2
+
3
+ The controller owns a single ``respx.MockRouter`` and one scenario object
4
+ per requested name. On enter, it starts the mock and applies every
5
+ scenario. On exit, it reverts each scenario (so cleanup is deterministic)
6
+ and stops the mock.
7
+
8
+ OpenTelemetry: every scenario activation / deactivation / call interception
9
+ emits a span on the ``pytest-resilience-agent`` tracer so downstream
10
+ observability tooling sees the chaos events alongside real spans.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass, field
16
+ from typing import Any
17
+
18
+ import pytest
19
+ import respx
20
+ from opentelemetry import trace
21
+
22
+ from pytest_resilience_agent.scenarios import (
23
+ Scenario,
24
+ build_scenario,
25
+ registered_scenarios,
26
+ )
27
+
28
+ _TRACER = trace.get_tracer("pytest-resilience-agent")
29
+
30
+
31
+ @dataclass
32
+ class ChaosEvent:
33
+ """One chaos event captured for the test report."""
34
+
35
+ scenario: str
36
+ detail: str
37
+ metadata: dict[str, Any] = field(default_factory=dict)
38
+
39
+
40
+ class ChaosController:
41
+ """Apply a set of chaos scenarios against a target HTTP host for one test.
42
+
43
+ Default target URL is ``https://gateway.local/v1/chat/completions``.
44
+ Override with ``target_gateway_url`` / ``target_lark_url`` when the test
45
+ fixtures know real URLs.
46
+ """
47
+
48
+ DEFAULT_GATEWAY_URL = "https://gateway.local/v1/chat/completions"
49
+ DEFAULT_LARK_URL = "https://lark.local"
50
+
51
+ def __init__(
52
+ self,
53
+ scenarios: list[str] | None = None,
54
+ target_gateway_url: str | None = None,
55
+ target_lark_url: str | None = None,
56
+ turns: list[list[str]] | None = None,
57
+ ) -> None:
58
+ if scenarios and turns is not None:
59
+ raise pytest.UsageError(
60
+ "resilience marker accepts either scenarios= or turns=, not both"
61
+ )
62
+ if turns is not None:
63
+ if not turns:
64
+ raise pytest.UsageError("turns= must list at least one turn")
65
+ known = set(registered_scenarios())
66
+ unknown = [n for turn in turns for n in turn if n not in known]
67
+ if unknown:
68
+ raise pytest.UsageError(
69
+ f"unknown chaos scenario(s) in turns=: {sorted(set(unknown))}. "
70
+ f"Registered: {sorted(known)}"
71
+ )
72
+ self.scenario_names = list(scenarios) if scenarios else []
73
+ self.turns = turns
74
+ self.target_gateway_url = target_gateway_url or self.DEFAULT_GATEWAY_URL
75
+ self.target_lark_url = target_lark_url or self.DEFAULT_LARK_URL
76
+ self.events: list[ChaosEvent] = []
77
+ self._mock = respx.mock(assert_all_called=False, assert_all_mocked=False)
78
+ self._scenarios: list[Scenario] = []
79
+ self._turn_index = 0
80
+
81
+ @property
82
+ def current_turn(self) -> int:
83
+ """Zero-based index of the active conversation turn (0 in single-window mode)."""
84
+ return self._turn_index
85
+
86
+ def _apply_scenarios(self, names: list[str]) -> None:
87
+ """Build, apply and record one set of scenarios against the live mock."""
88
+ for name in names:
89
+ target = self._target_for(name)
90
+ scenario = build_scenario(name, self._mock, target)
91
+ result = scenario.apply()
92
+ self._scenarios.append(scenario)
93
+ self.events.append(
94
+ ChaosEvent(
95
+ scenario=result.scenario,
96
+ detail=result.detail,
97
+ metadata=result.metadata,
98
+ )
99
+ )
100
+ with _TRACER.start_as_current_span(f"chaos.apply.{name}") as span:
101
+ for k, v in result.metadata.items():
102
+ span.set_attribute(f"chaos.{k}", v)
103
+
104
+ def _apply_turn(self, index: int) -> None:
105
+ """Activate the scenarios for one conversation turn and mark the boundary."""
106
+ assert self.turns is not None
107
+ self._turn_index = index
108
+ names = list(self.turns[index])
109
+ with _TRACER.start_as_current_span(f"chaos.turn.{index}") as span:
110
+ span.set_attribute("chaos.turn.scenarios", names)
111
+ # Apply inside the turn span so the chaos.apply.* spans nest under it.
112
+ self._apply_scenarios(names)
113
+
114
+ def _revert_current(self) -> None:
115
+ """Revert (record stats for) every scenario active in the current turn."""
116
+ for scenario in reversed(self._scenarios):
117
+ result = scenario.revert()
118
+ self.events.append(
119
+ ChaosEvent(
120
+ scenario=result.scenario,
121
+ detail=result.detail,
122
+ metadata=result.metadata,
123
+ )
124
+ )
125
+ with _TRACER.start_as_current_span(f"chaos.revert.{result.scenario}") as span:
126
+ for k, v in result.metadata.items():
127
+ span.set_attribute(f"chaos.{k}", v)
128
+ self._scenarios.clear()
129
+
130
+ def next_turn(self) -> None:
131
+ """Advance to the next conversation turn: revert this turn's chaos, drop the
132
+ mock's routes and call history, then apply the next turn's scenarios.
133
+
134
+ Counters reset because each turn builds brand-new Scenario instances (each
135
+ starts at zero calls). Clearing the mock removes the previous turn's routes
136
+ and the accumulated call records, so a long conversation does not grow them
137
+ unbounded."""
138
+ if self.turns is None:
139
+ raise pytest.UsageError("next_turn() requires turns= on the resilience marker")
140
+ if self._turn_index + 1 >= len(self.turns):
141
+ raise pytest.UsageError(
142
+ f"next_turn() advanced past the last defined turn "
143
+ f"(have {len(self.turns)} turns, already at turn {self._turn_index})"
144
+ )
145
+ self._revert_current()
146
+ self._mock.reset() # drop accumulated call records
147
+ self._mock.clear() # drop the previous turn's routes
148
+ self._apply_turn(self._turn_index + 1)
149
+
150
+ def enter(self) -> None:
151
+ """Start respx and install scenarios for turn 0 (or the single window)."""
152
+ self._mock.start()
153
+ if self.turns is not None:
154
+ self._apply_turn(0)
155
+ else:
156
+ self._apply_scenarios(self.scenario_names)
157
+
158
+ def exit(self) -> None:
159
+ """Revert every scenario in LIFO order and stop respx."""
160
+ self._revert_current()
161
+ self._mock.stop()
162
+
163
+ def record(self, scenario: str, detail: str, **metadata: Any) -> None:
164
+ """Append a custom chaos event from inside user code."""
165
+ self.events.append(ChaosEvent(scenario=scenario, detail=detail, metadata=metadata))
166
+
167
+ def _target_for(self, scenario_name: str) -> str:
168
+ """Pick gateway URL or Lark URL depending on which layer the scenario hits."""
169
+ if scenario_name == "mcp_error":
170
+ return self.target_lark_url
171
+ return self.target_gateway_url
172
+
173
+
174
+ __all__ = ["ChaosController", "ChaosEvent", "registered_scenarios"]
@@ -0,0 +1,199 @@
1
+ """Command-line tool entry point.
2
+
3
+ Subcommands:
4
+
5
+ - ``discover`` list failing tests via Lark MCP (or local mock)
6
+ - ``generate`` synthesize resilience test files from those failures
7
+ - ``run`` execute the generated tests via pytest
8
+ - ``report`` push resolution status back to Lark
9
+ - ``scenarios`` print the built-in chaos scenario registry
10
+
11
+ Install with ``pip install -e .`` and call ``pytest-resilience-agent``.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import os
18
+ import subprocess
19
+ import sys
20
+ from pathlib import Path
21
+
22
+ from rich.console import Console
23
+ from rich.table import Table
24
+
25
+ from pytest_resilience_agent.generator import generate_test
26
+ from pytest_resilience_agent.lark_mcp import LarkMCPClient
27
+ from pytest_resilience_agent.scenarios import registered_scenarios
28
+
29
+ _DEFAULT_LARK_URL = "http://localhost:8801"
30
+ _DEFAULT_PROJECT = "demo"
31
+ _DEFAULT_GENERATED_DIR = Path("generated_resilience_tests")
32
+
33
+
34
+ def _console() -> Console:
35
+ """Return a Console that handles UTF-8 cleanly on Windows cp1252 terminals."""
36
+ return Console(force_terminal=True)
37
+
38
+
39
+ def _lark_url(args: argparse.Namespace) -> str:
40
+ return args.lark_url or os.environ.get("LARK_MCP_URL") or _DEFAULT_LARK_URL
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # discover
45
+ # ---------------------------------------------------------------------------
46
+
47
+
48
+ def cmd_discover(args: argparse.Namespace) -> int:
49
+ """List failing tests from the Lark MCP server."""
50
+ console = _console()
51
+ client = LarkMCPClient(base_url=_lark_url(args))
52
+ try:
53
+ tests = client.list_failing_tests(args.project)
54
+ finally:
55
+ client.close()
56
+ table = Table(title=f"Failing tests in {args.project!r}")
57
+ table.add_column("Name", style="cyan")
58
+ table.add_column("Last failure", style="red")
59
+ table.add_column("Path", style="dim")
60
+ for t in tests:
61
+ table.add_row(t.name, t.last_failure or "", t.path)
62
+ console.print(table)
63
+ if not tests:
64
+ console.print("[yellow]No failing tests reported.[/]")
65
+ return 0
66
+
67
+
68
+ # ---------------------------------------------------------------------------
69
+ # generate
70
+ # ---------------------------------------------------------------------------
71
+
72
+
73
+ def cmd_generate(args: argparse.Namespace) -> int:
74
+ """Pull failing tests from Lark and write resilience test files."""
75
+ console = _console()
76
+ client = LarkMCPClient(base_url=_lark_url(args))
77
+ try:
78
+ tests = client.list_failing_tests(args.project)
79
+ finally:
80
+ client.close()
81
+ if not tests:
82
+ console.print("[yellow]No failing tests, nothing to generate.[/]")
83
+ return 0
84
+ out_dir = Path(args.out)
85
+ table = Table(title=f"Generated {len(tests)} resilience test(s)")
86
+ table.add_column("Test name", style="cyan")
87
+ table.add_column("Scenarios", style="green")
88
+ table.add_column("File", style="dim")
89
+ for t in tests:
90
+ generated = generate_test(t.name, t.last_failure or "", out_dir)
91
+ table.add_row(t.name, ", ".join(generated.scenarios), str(generated.file_path))
92
+ console.print(table)
93
+ console.print(f"[green]Wrote {len(tests)} file(s) under {out_dir}[/]")
94
+ return 0
95
+
96
+
97
+ # ---------------------------------------------------------------------------
98
+ # run
99
+ # ---------------------------------------------------------------------------
100
+
101
+
102
+ def cmd_run(args: argparse.Namespace) -> int:
103
+ """Run pytest against the generated tests directory."""
104
+ target = Path(args.path)
105
+ if not target.exists():
106
+ _console().print(f"[red]No such path: {target}[/]")
107
+ return 2
108
+ cmd = [sys.executable, "-X", "utf8", "-m", "pytest", "-v", str(target)]
109
+ if args.record:
110
+ cmd.append(f"--resilience-record={args.record}")
111
+ result = subprocess.run(cmd)
112
+ return result.returncode
113
+
114
+
115
+ # ---------------------------------------------------------------------------
116
+ # report
117
+ # ---------------------------------------------------------------------------
118
+
119
+
120
+ def cmd_report(args: argparse.Namespace) -> int:
121
+ """Push a resolution back to Lark."""
122
+ client = LarkMCPClient(base_url=_lark_url(args))
123
+ try:
124
+ client.report_resolved(
125
+ project=args.project,
126
+ test_name=args.test_name,
127
+ pytest_path=args.pytest_path,
128
+ )
129
+ finally:
130
+ client.close()
131
+ _console().print(f"[green]Reported resolution for {args.test_name}[/]")
132
+ return 0
133
+
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # scenarios
137
+ # ---------------------------------------------------------------------------
138
+
139
+
140
+ def cmd_scenarios(args: argparse.Namespace) -> int:
141
+ """Print the registered chaos scenario names."""
142
+ console = _console()
143
+ table = Table(title="Registered chaos scenarios")
144
+ table.add_column("Name", style="cyan")
145
+ for name in registered_scenarios():
146
+ table.add_row(name)
147
+ console.print(table)
148
+ return 0
149
+
150
+
151
+ # ---------------------------------------------------------------------------
152
+ # parser
153
+ # ---------------------------------------------------------------------------
154
+
155
+
156
+ def build_parser() -> argparse.ArgumentParser:
157
+ """Construct the argparse tree."""
158
+ parser = argparse.ArgumentParser(prog="pytest-resilience-agent")
159
+ parser.add_argument(
160
+ "--lark-url",
161
+ default=None,
162
+ help="Lark MCP base URL (env LARK_MCP_URL, default http://localhost:8801)",
163
+ )
164
+ sub = parser.add_subparsers(dest="command", required=True)
165
+
166
+ p_disc = sub.add_parser("discover", help="list failing tests via Lark")
167
+ p_disc.add_argument("--project", default=_DEFAULT_PROJECT)
168
+ p_disc.set_defaults(func=cmd_discover)
169
+
170
+ p_gen = sub.add_parser("generate", help="generate resilience tests from Lark failures")
171
+ p_gen.add_argument("--project", default=_DEFAULT_PROJECT)
172
+ p_gen.add_argument("--out", default=str(_DEFAULT_GENERATED_DIR))
173
+ p_gen.set_defaults(func=cmd_generate)
174
+
175
+ p_run = sub.add_parser("run", help="pytest the generated tests")
176
+ p_run.add_argument("--path", default=str(_DEFAULT_GENERATED_DIR))
177
+ p_run.add_argument("--record", default=None, help="JSON timeline path")
178
+ p_run.set_defaults(func=cmd_run)
179
+
180
+ p_rep = sub.add_parser("report", help="push a resolution back to Lark")
181
+ p_rep.add_argument("--project", default=_DEFAULT_PROJECT)
182
+ p_rep.add_argument("--test-name", required=True)
183
+ p_rep.add_argument("--pytest-path", required=True)
184
+ p_rep.set_defaults(func=cmd_report)
185
+
186
+ p_sce = sub.add_parser("scenarios", help="list registered chaos scenarios")
187
+ p_sce.set_defaults(func=cmd_scenarios)
188
+
189
+ return parser
190
+
191
+
192
+ def main(argv: list[str] | None = None) -> int:
193
+ parser = build_parser()
194
+ args = parser.parse_args(argv)
195
+ return int(args.func(args) or 0)
196
+
197
+
198
+ if __name__ == "__main__":
199
+ sys.exit(main())
@@ -0,0 +1,75 @@
1
+ """Thin client around TrueFoundry AI Gateway.
2
+
3
+ The gateway is an OpenAI-compatible proxy that handles fallbacks, retries,
4
+ and multi-model routing on its side. We just send chat completions through
5
+ it and let the gateway config decide what happens when upstream models error.
6
+
7
+ Docs: https://www.truefoundry.com/docs/ai-gateway/intro-to-llm-gateway
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+ from typing import Any
14
+
15
+ import httpx
16
+
17
+
18
+ @dataclass
19
+ class ChatReply:
20
+ """Minimal reply object returned by ``AIGatewayClient.chat``."""
21
+
22
+ content: str
23
+ model: str
24
+ raw: dict[str, Any]
25
+
26
+
27
+ class AIGatewayClient:
28
+ """Send chat completions through a TrueFoundry-style AI Gateway.
29
+
30
+ The gateway is OpenAI-compatible, so callers can keep the OpenAI SDK
31
+ interface in their own code and swap the base URL. We keep a thin
32
+ httpx client here to make timeout and error injection easy for tests.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ base_url: str,
38
+ api_key: str | None = None,
39
+ timeout: float = 30.0,
40
+ ) -> None:
41
+ self.base_url = base_url.rstrip("/")
42
+ self._timeout = timeout
43
+ headers: dict[str, str] = {
44
+ "Content-Type": "application/json",
45
+ "Accept": "application/json",
46
+ "User-Agent": "pytest-resilience-agent/0.1.0 (httpx)",
47
+ }
48
+ if api_key:
49
+ headers["Authorization"] = f"Bearer {api_key}"
50
+ self._client = httpx.Client(headers=headers, timeout=timeout)
51
+
52
+ def chat(
53
+ self,
54
+ messages: list[dict[str, str]],
55
+ model: str = "gateway-default",
56
+ **kwargs: Any,
57
+ ) -> ChatReply:
58
+ """Send a chat completion request through the gateway.
59
+
60
+ The gateway's own config decides fallback chain and retries.
61
+ If the gateway itself is unreachable, raises ``httpx.HTTPError``.
62
+ """
63
+ payload = {"model": model, "messages": messages, **kwargs}
64
+ response = self._client.post(f"{self.base_url}/chat/completions", json=payload)
65
+ response.raise_for_status()
66
+ data = response.json()
67
+ choices = data.get("choices") or []
68
+ content = ""
69
+ if choices:
70
+ content = choices[0].get("message", {}).get("content", "") or ""
71
+ return ChatReply(content=content, model=data.get("model", model), raw=data)
72
+
73
+ def close(self) -> None:
74
+ """Close the underlying httpx client."""
75
+ self._client.close()
@@ -0,0 +1,118 @@
1
+ """Generate resilience test files from observed failures.
2
+
3
+ Mirrors the Lark sponsor challenge example ("coding agent setup that
4
+ listens for failing tests and creates PRs to fix them"). For each
5
+ failing test reported by Lark, the generator picks the relevant chaos
6
+ scenario(s) and emits a runnable pytest file that reproduces the
7
+ failure under controlled conditions.
8
+
9
+ The mapping is deterministic and rule-based on the failure string.
10
+ That keeps generation auditable: the engineer can read the rule that
11
+ matched and tweak it. A future iteration can swap in an LLM-based
12
+ classifier behind the same Generator interface.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import re
18
+ from dataclasses import dataclass
19
+ from pathlib import Path
20
+
21
+ # Order matters: first match wins.
22
+ _RULES: list[tuple[re.Pattern[str], list[str]]] = [
23
+ (re.compile(r"\b429\b|too many requests", re.I), ["rate_limit"]),
24
+ (re.compile(r"\b502\b|bad gateway", re.I), ["llm_5xx"]),
25
+ (re.compile(r"\b503\b|service unavailable", re.I), ["partial_outage"]),
26
+ (re.compile(r"\b504\b|timeout", re.I), ["llm_timeout"]),
27
+ (re.compile(r"\b402\b|quota|cost", re.I), ["cost_exceeded"]),
28
+ (re.compile(r"connect.*(error|refused|reset)", re.I), ["network_blip"]),
29
+ (re.compile(r"empty|truncated|stream.*drop", re.I), ["stream_stall"]),
30
+ (re.compile(r"mcp|tool.*error|jsonrpc", re.I), ["mcp_error"]),
31
+ (re.compile(r"wrong.*model|model.*mismatch", re.I), ["wrong_model_returned"]),
32
+ ]
33
+
34
+
35
+ @dataclass
36
+ class GeneratedTest:
37
+ """Result of generating one test file from a Lark failure."""
38
+
39
+ test_name: str
40
+ scenarios: list[str]
41
+ file_path: Path
42
+ source: str
43
+
44
+
45
+ def pick_scenarios(failure_text: str) -> list[str]:
46
+ """Return the scenario names that best match a free-text failure.
47
+
48
+ Falls back to ``llm_5xx`` if nothing matches; that is the most generic
49
+ transient-failure scenario and a sane default.
50
+ """
51
+ matched: list[str] = []
52
+ for pattern, scenarios in _RULES:
53
+ if pattern.search(failure_text):
54
+ matched.extend(scenarios)
55
+ return matched or ["llm_5xx"]
56
+
57
+
58
+ _TEMPLATE = '''"""Auto-generated resilience test for {test_name!r}.
59
+
60
+ Generated by pytest-resilience-agent from a Lark-reported failure.
61
+
62
+ Original failure text:
63
+ {failure_text}
64
+
65
+ Chosen scenarios: {scenarios}
66
+ """
67
+
68
+ from __future__ import annotations
69
+
70
+ import httpx
71
+ import pytest
72
+
73
+
74
+ @pytest.mark.resilience(scenarios={scenarios!r})
75
+ def {function_name}(chaos):
76
+ """Reproduce the original failure under controlled chaos.
77
+
78
+ The chaos fixture installs the named scenarios at the HTTP layer.
79
+ Replace the body below with the call from your real agent code; the
80
+ boilerplate POST below just exercises the gateway to prove the
81
+ scenarios are active.
82
+ """
83
+ with httpx.Client() as client:
84
+ response = client.post(chaos.target_gateway_url, json={{"q": 1}})
85
+ # When the scenarios are correct, the response status / shape
86
+ # mirrors the original failure. Add an assertion on the contract
87
+ # your real agent should hold:
88
+ # assert agent.handle_response(response).fallback_used
89
+ assert response is not None
90
+ '''
91
+
92
+
93
+ def generate_test(
94
+ test_name: str,
95
+ failure_text: str,
96
+ out_dir: Path,
97
+ ) -> GeneratedTest:
98
+ """Write a resilience test file for one failure. Return the artefact."""
99
+ scenarios = pick_scenarios(failure_text)
100
+ safe_name = re.sub(r"[^a-zA-Z0-9_]+", "_", test_name).strip("_") or "auto_test"
101
+ if not safe_name.startswith("test_"):
102
+ safe_name = f"test_{safe_name}"
103
+ file_path = out_dir / f"{safe_name}_resilience.py"
104
+ function_name = safe_name + "_resilience"
105
+ source = _TEMPLATE.format(
106
+ test_name=test_name,
107
+ failure_text=failure_text,
108
+ scenarios=scenarios,
109
+ function_name=function_name,
110
+ )
111
+ out_dir.mkdir(parents=True, exist_ok=True)
112
+ file_path.write_text(source, encoding="utf-8")
113
+ return GeneratedTest(
114
+ test_name=test_name,
115
+ scenarios=scenarios,
116
+ file_path=file_path,
117
+ source=source,
118
+ )