pytest-resilience-agent 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytest_resilience_agent/__init__.py +8 -0
- pytest_resilience_agent/chaos.py +174 -0
- pytest_resilience_agent/cli.py +199 -0
- pytest_resilience_agent/gateway.py +75 -0
- pytest_resilience_agent/generator.py +118 -0
- pytest_resilience_agent/lark_mcp.py +131 -0
- pytest_resilience_agent/plugin.py +179 -0
- pytest_resilience_agent/scenarios.py +471 -0
- pytest_resilience_agent-0.2.0.dist-info/METADATA +278 -0
- pytest_resilience_agent-0.2.0.dist-info/RECORD +13 -0
- pytest_resilience_agent-0.2.0.dist-info/WHEEL +4 -0
- pytest_resilience_agent-0.2.0.dist-info/entry_points.txt +5 -0
- pytest_resilience_agent-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""pytest-resilience-agent: pytest plugin for LLM resilience testing.
|
|
2
|
+
|
|
3
|
+
Generates and runs tests that prove your LLM application keeps working when
|
|
4
|
+
infrastructure breaks: gateway timeouts, model brownouts, MCP server errors,
|
|
5
|
+
rate limits, partial outages.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.2.0"
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Chaos scenario controller - real implementation.
|
|
2
|
+
|
|
3
|
+
The controller owns a single ``respx.MockRouter`` and one scenario object
|
|
4
|
+
per requested name. On enter, it starts the mock and applies every
|
|
5
|
+
scenario. On exit, it reverts each scenario (so cleanup is deterministic)
|
|
6
|
+
and stops the mock.
|
|
7
|
+
|
|
8
|
+
OpenTelemetry: every scenario activation / deactivation / call interception
|
|
9
|
+
emits a span on the ``pytest-resilience-agent`` tracer so downstream
|
|
10
|
+
observability tooling sees the chaos events alongside real spans.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
import pytest
|
|
19
|
+
import respx
|
|
20
|
+
from opentelemetry import trace
|
|
21
|
+
|
|
22
|
+
from pytest_resilience_agent.scenarios import (
|
|
23
|
+
Scenario,
|
|
24
|
+
build_scenario,
|
|
25
|
+
registered_scenarios,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
_TRACER = trace.get_tracer("pytest-resilience-agent")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ChaosEvent:
|
|
33
|
+
"""One chaos event captured for the test report."""
|
|
34
|
+
|
|
35
|
+
scenario: str
|
|
36
|
+
detail: str
|
|
37
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ChaosController:
|
|
41
|
+
"""Apply a set of chaos scenarios against a target HTTP host for one test.
|
|
42
|
+
|
|
43
|
+
Default target URL is ``https://gateway.local/v1/chat/completions``.
|
|
44
|
+
Override with ``target_gateway_url`` / ``target_lark_url`` when the test
|
|
45
|
+
fixtures know real URLs.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
DEFAULT_GATEWAY_URL = "https://gateway.local/v1/chat/completions"
|
|
49
|
+
DEFAULT_LARK_URL = "https://lark.local"
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
scenarios: list[str] | None = None,
|
|
54
|
+
target_gateway_url: str | None = None,
|
|
55
|
+
target_lark_url: str | None = None,
|
|
56
|
+
turns: list[list[str]] | None = None,
|
|
57
|
+
) -> None:
|
|
58
|
+
if scenarios and turns is not None:
|
|
59
|
+
raise pytest.UsageError(
|
|
60
|
+
"resilience marker accepts either scenarios= or turns=, not both"
|
|
61
|
+
)
|
|
62
|
+
if turns is not None:
|
|
63
|
+
if not turns:
|
|
64
|
+
raise pytest.UsageError("turns= must list at least one turn")
|
|
65
|
+
known = set(registered_scenarios())
|
|
66
|
+
unknown = [n for turn in turns for n in turn if n not in known]
|
|
67
|
+
if unknown:
|
|
68
|
+
raise pytest.UsageError(
|
|
69
|
+
f"unknown chaos scenario(s) in turns=: {sorted(set(unknown))}. "
|
|
70
|
+
f"Registered: {sorted(known)}"
|
|
71
|
+
)
|
|
72
|
+
self.scenario_names = list(scenarios) if scenarios else []
|
|
73
|
+
self.turns = turns
|
|
74
|
+
self.target_gateway_url = target_gateway_url or self.DEFAULT_GATEWAY_URL
|
|
75
|
+
self.target_lark_url = target_lark_url or self.DEFAULT_LARK_URL
|
|
76
|
+
self.events: list[ChaosEvent] = []
|
|
77
|
+
self._mock = respx.mock(assert_all_called=False, assert_all_mocked=False)
|
|
78
|
+
self._scenarios: list[Scenario] = []
|
|
79
|
+
self._turn_index = 0
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def current_turn(self) -> int:
|
|
83
|
+
"""Zero-based index of the active conversation turn (0 in single-window mode)."""
|
|
84
|
+
return self._turn_index
|
|
85
|
+
|
|
86
|
+
def _apply_scenarios(self, names: list[str]) -> None:
|
|
87
|
+
"""Build, apply and record one set of scenarios against the live mock."""
|
|
88
|
+
for name in names:
|
|
89
|
+
target = self._target_for(name)
|
|
90
|
+
scenario = build_scenario(name, self._mock, target)
|
|
91
|
+
result = scenario.apply()
|
|
92
|
+
self._scenarios.append(scenario)
|
|
93
|
+
self.events.append(
|
|
94
|
+
ChaosEvent(
|
|
95
|
+
scenario=result.scenario,
|
|
96
|
+
detail=result.detail,
|
|
97
|
+
metadata=result.metadata,
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
with _TRACER.start_as_current_span(f"chaos.apply.{name}") as span:
|
|
101
|
+
for k, v in result.metadata.items():
|
|
102
|
+
span.set_attribute(f"chaos.{k}", v)
|
|
103
|
+
|
|
104
|
+
def _apply_turn(self, index: int) -> None:
|
|
105
|
+
"""Activate the scenarios for one conversation turn and mark the boundary."""
|
|
106
|
+
assert self.turns is not None
|
|
107
|
+
self._turn_index = index
|
|
108
|
+
names = list(self.turns[index])
|
|
109
|
+
with _TRACER.start_as_current_span(f"chaos.turn.{index}") as span:
|
|
110
|
+
span.set_attribute("chaos.turn.scenarios", names)
|
|
111
|
+
# Apply inside the turn span so the chaos.apply.* spans nest under it.
|
|
112
|
+
self._apply_scenarios(names)
|
|
113
|
+
|
|
114
|
+
def _revert_current(self) -> None:
|
|
115
|
+
"""Revert (record stats for) every scenario active in the current turn."""
|
|
116
|
+
for scenario in reversed(self._scenarios):
|
|
117
|
+
result = scenario.revert()
|
|
118
|
+
self.events.append(
|
|
119
|
+
ChaosEvent(
|
|
120
|
+
scenario=result.scenario,
|
|
121
|
+
detail=result.detail,
|
|
122
|
+
metadata=result.metadata,
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
with _TRACER.start_as_current_span(f"chaos.revert.{result.scenario}") as span:
|
|
126
|
+
for k, v in result.metadata.items():
|
|
127
|
+
span.set_attribute(f"chaos.{k}", v)
|
|
128
|
+
self._scenarios.clear()
|
|
129
|
+
|
|
130
|
+
def next_turn(self) -> None:
|
|
131
|
+
"""Advance to the next conversation turn: revert this turn's chaos, drop the
|
|
132
|
+
mock's routes and call history, then apply the next turn's scenarios.
|
|
133
|
+
|
|
134
|
+
Counters reset because each turn builds brand-new Scenario instances (each
|
|
135
|
+
starts at zero calls). Clearing the mock removes the previous turn's routes
|
|
136
|
+
and the accumulated call records, so a long conversation does not grow them
|
|
137
|
+
unbounded."""
|
|
138
|
+
if self.turns is None:
|
|
139
|
+
raise pytest.UsageError("next_turn() requires turns= on the resilience marker")
|
|
140
|
+
if self._turn_index + 1 >= len(self.turns):
|
|
141
|
+
raise pytest.UsageError(
|
|
142
|
+
f"next_turn() advanced past the last defined turn "
|
|
143
|
+
f"(have {len(self.turns)} turns, already at turn {self._turn_index})"
|
|
144
|
+
)
|
|
145
|
+
self._revert_current()
|
|
146
|
+
self._mock.reset() # drop accumulated call records
|
|
147
|
+
self._mock.clear() # drop the previous turn's routes
|
|
148
|
+
self._apply_turn(self._turn_index + 1)
|
|
149
|
+
|
|
150
|
+
def enter(self) -> None:
|
|
151
|
+
"""Start respx and install scenarios for turn 0 (or the single window)."""
|
|
152
|
+
self._mock.start()
|
|
153
|
+
if self.turns is not None:
|
|
154
|
+
self._apply_turn(0)
|
|
155
|
+
else:
|
|
156
|
+
self._apply_scenarios(self.scenario_names)
|
|
157
|
+
|
|
158
|
+
def exit(self) -> None:
|
|
159
|
+
"""Revert every scenario in LIFO order and stop respx."""
|
|
160
|
+
self._revert_current()
|
|
161
|
+
self._mock.stop()
|
|
162
|
+
|
|
163
|
+
def record(self, scenario: str, detail: str, **metadata: Any) -> None:
|
|
164
|
+
"""Append a custom chaos event from inside user code."""
|
|
165
|
+
self.events.append(ChaosEvent(scenario=scenario, detail=detail, metadata=metadata))
|
|
166
|
+
|
|
167
|
+
def _target_for(self, scenario_name: str) -> str:
|
|
168
|
+
"""Pick gateway URL or Lark URL depending on which layer the scenario hits."""
|
|
169
|
+
if scenario_name == "mcp_error":
|
|
170
|
+
return self.target_lark_url
|
|
171
|
+
return self.target_gateway_url
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
__all__ = ["ChaosController", "ChaosEvent", "registered_scenarios"]
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""Command-line tool entry point.
|
|
2
|
+
|
|
3
|
+
Subcommands:
|
|
4
|
+
|
|
5
|
+
- ``discover`` list failing tests via Lark MCP (or local mock)
|
|
6
|
+
- ``generate`` synthesize resilience test files from those failures
|
|
7
|
+
- ``run`` execute the generated tests via pytest
|
|
8
|
+
- ``report`` push resolution status back to Lark
|
|
9
|
+
- ``scenarios`` print the built-in chaos scenario registry
|
|
10
|
+
|
|
11
|
+
Install with ``pip install -e .`` and call ``pytest-resilience-agent``.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import os
|
|
18
|
+
import subprocess
|
|
19
|
+
import sys
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from rich.console import Console
|
|
23
|
+
from rich.table import Table
|
|
24
|
+
|
|
25
|
+
from pytest_resilience_agent.generator import generate_test
|
|
26
|
+
from pytest_resilience_agent.lark_mcp import LarkMCPClient
|
|
27
|
+
from pytest_resilience_agent.scenarios import registered_scenarios
|
|
28
|
+
|
|
29
|
+
_DEFAULT_LARK_URL = "http://localhost:8801"
|
|
30
|
+
_DEFAULT_PROJECT = "demo"
|
|
31
|
+
_DEFAULT_GENERATED_DIR = Path("generated_resilience_tests")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _console() -> Console:
|
|
35
|
+
"""Return a Console that handles UTF-8 cleanly on Windows cp1252 terminals."""
|
|
36
|
+
return Console(force_terminal=True)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _lark_url(args: argparse.Namespace) -> str:
|
|
40
|
+
return args.lark_url or os.environ.get("LARK_MCP_URL") or _DEFAULT_LARK_URL
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# discover
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def cmd_discover(args: argparse.Namespace) -> int:
|
|
49
|
+
"""List failing tests from the Lark MCP server."""
|
|
50
|
+
console = _console()
|
|
51
|
+
client = LarkMCPClient(base_url=_lark_url(args))
|
|
52
|
+
try:
|
|
53
|
+
tests = client.list_failing_tests(args.project)
|
|
54
|
+
finally:
|
|
55
|
+
client.close()
|
|
56
|
+
table = Table(title=f"Failing tests in {args.project!r}")
|
|
57
|
+
table.add_column("Name", style="cyan")
|
|
58
|
+
table.add_column("Last failure", style="red")
|
|
59
|
+
table.add_column("Path", style="dim")
|
|
60
|
+
for t in tests:
|
|
61
|
+
table.add_row(t.name, t.last_failure or "", t.path)
|
|
62
|
+
console.print(table)
|
|
63
|
+
if not tests:
|
|
64
|
+
console.print("[yellow]No failing tests reported.[/]")
|
|
65
|
+
return 0
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# generate
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def cmd_generate(args: argparse.Namespace) -> int:
|
|
74
|
+
"""Pull failing tests from Lark and write resilience test files."""
|
|
75
|
+
console = _console()
|
|
76
|
+
client = LarkMCPClient(base_url=_lark_url(args))
|
|
77
|
+
try:
|
|
78
|
+
tests = client.list_failing_tests(args.project)
|
|
79
|
+
finally:
|
|
80
|
+
client.close()
|
|
81
|
+
if not tests:
|
|
82
|
+
console.print("[yellow]No failing tests, nothing to generate.[/]")
|
|
83
|
+
return 0
|
|
84
|
+
out_dir = Path(args.out)
|
|
85
|
+
table = Table(title=f"Generated {len(tests)} resilience test(s)")
|
|
86
|
+
table.add_column("Test name", style="cyan")
|
|
87
|
+
table.add_column("Scenarios", style="green")
|
|
88
|
+
table.add_column("File", style="dim")
|
|
89
|
+
for t in tests:
|
|
90
|
+
generated = generate_test(t.name, t.last_failure or "", out_dir)
|
|
91
|
+
table.add_row(t.name, ", ".join(generated.scenarios), str(generated.file_path))
|
|
92
|
+
console.print(table)
|
|
93
|
+
console.print(f"[green]Wrote {len(tests)} file(s) under {out_dir}[/]")
|
|
94
|
+
return 0
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
# run
|
|
99
|
+
# ---------------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def cmd_run(args: argparse.Namespace) -> int:
|
|
103
|
+
"""Run pytest against the generated tests directory."""
|
|
104
|
+
target = Path(args.path)
|
|
105
|
+
if not target.exists():
|
|
106
|
+
_console().print(f"[red]No such path: {target}[/]")
|
|
107
|
+
return 2
|
|
108
|
+
cmd = [sys.executable, "-X", "utf8", "-m", "pytest", "-v", str(target)]
|
|
109
|
+
if args.record:
|
|
110
|
+
cmd.append(f"--resilience-record={args.record}")
|
|
111
|
+
result = subprocess.run(cmd)
|
|
112
|
+
return result.returncode
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# ---------------------------------------------------------------------------
|
|
116
|
+
# report
|
|
117
|
+
# ---------------------------------------------------------------------------
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def cmd_report(args: argparse.Namespace) -> int:
|
|
121
|
+
"""Push a resolution back to Lark."""
|
|
122
|
+
client = LarkMCPClient(base_url=_lark_url(args))
|
|
123
|
+
try:
|
|
124
|
+
client.report_resolved(
|
|
125
|
+
project=args.project,
|
|
126
|
+
test_name=args.test_name,
|
|
127
|
+
pytest_path=args.pytest_path,
|
|
128
|
+
)
|
|
129
|
+
finally:
|
|
130
|
+
client.close()
|
|
131
|
+
_console().print(f"[green]Reported resolution for {args.test_name}[/]")
|
|
132
|
+
return 0
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# ---------------------------------------------------------------------------
|
|
136
|
+
# scenarios
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def cmd_scenarios(args: argparse.Namespace) -> int:
|
|
141
|
+
"""Print the registered chaos scenario names."""
|
|
142
|
+
console = _console()
|
|
143
|
+
table = Table(title="Registered chaos scenarios")
|
|
144
|
+
table.add_column("Name", style="cyan")
|
|
145
|
+
for name in registered_scenarios():
|
|
146
|
+
table.add_row(name)
|
|
147
|
+
console.print(table)
|
|
148
|
+
return 0
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
# parser
|
|
153
|
+
# ---------------------------------------------------------------------------
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
157
|
+
"""Construct the argparse tree."""
|
|
158
|
+
parser = argparse.ArgumentParser(prog="pytest-resilience-agent")
|
|
159
|
+
parser.add_argument(
|
|
160
|
+
"--lark-url",
|
|
161
|
+
default=None,
|
|
162
|
+
help="Lark MCP base URL (env LARK_MCP_URL, default http://localhost:8801)",
|
|
163
|
+
)
|
|
164
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
165
|
+
|
|
166
|
+
p_disc = sub.add_parser("discover", help="list failing tests via Lark")
|
|
167
|
+
p_disc.add_argument("--project", default=_DEFAULT_PROJECT)
|
|
168
|
+
p_disc.set_defaults(func=cmd_discover)
|
|
169
|
+
|
|
170
|
+
p_gen = sub.add_parser("generate", help="generate resilience tests from Lark failures")
|
|
171
|
+
p_gen.add_argument("--project", default=_DEFAULT_PROJECT)
|
|
172
|
+
p_gen.add_argument("--out", default=str(_DEFAULT_GENERATED_DIR))
|
|
173
|
+
p_gen.set_defaults(func=cmd_generate)
|
|
174
|
+
|
|
175
|
+
p_run = sub.add_parser("run", help="pytest the generated tests")
|
|
176
|
+
p_run.add_argument("--path", default=str(_DEFAULT_GENERATED_DIR))
|
|
177
|
+
p_run.add_argument("--record", default=None, help="JSON timeline path")
|
|
178
|
+
p_run.set_defaults(func=cmd_run)
|
|
179
|
+
|
|
180
|
+
p_rep = sub.add_parser("report", help="push a resolution back to Lark")
|
|
181
|
+
p_rep.add_argument("--project", default=_DEFAULT_PROJECT)
|
|
182
|
+
p_rep.add_argument("--test-name", required=True)
|
|
183
|
+
p_rep.add_argument("--pytest-path", required=True)
|
|
184
|
+
p_rep.set_defaults(func=cmd_report)
|
|
185
|
+
|
|
186
|
+
p_sce = sub.add_parser("scenarios", help="list registered chaos scenarios")
|
|
187
|
+
p_sce.set_defaults(func=cmd_scenarios)
|
|
188
|
+
|
|
189
|
+
return parser
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def main(argv: list[str] | None = None) -> int:
|
|
193
|
+
parser = build_parser()
|
|
194
|
+
args = parser.parse_args(argv)
|
|
195
|
+
return int(args.func(args) or 0)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
if __name__ == "__main__":
|
|
199
|
+
sys.exit(main())
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Thin client around TrueFoundry AI Gateway.
|
|
2
|
+
|
|
3
|
+
The gateway is an OpenAI-compatible proxy that handles fallbacks, retries,
|
|
4
|
+
and multi-model routing on its side. We just send chat completions through
|
|
5
|
+
it and let the gateway config decide what happens when upstream models error.
|
|
6
|
+
|
|
7
|
+
Docs: https://www.truefoundry.com/docs/ai-gateway/intro-to-llm-gateway
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ChatReply:
|
|
20
|
+
"""Minimal reply object returned by ``AIGatewayClient.chat``."""
|
|
21
|
+
|
|
22
|
+
content: str
|
|
23
|
+
model: str
|
|
24
|
+
raw: dict[str, Any]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class AIGatewayClient:
|
|
28
|
+
"""Send chat completions through a TrueFoundry-style AI Gateway.
|
|
29
|
+
|
|
30
|
+
The gateway is OpenAI-compatible, so callers can keep the OpenAI SDK
|
|
31
|
+
interface in their own code and swap the base URL. We keep a thin
|
|
32
|
+
httpx client here to make timeout and error injection easy for tests.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
base_url: str,
|
|
38
|
+
api_key: str | None = None,
|
|
39
|
+
timeout: float = 30.0,
|
|
40
|
+
) -> None:
|
|
41
|
+
self.base_url = base_url.rstrip("/")
|
|
42
|
+
self._timeout = timeout
|
|
43
|
+
headers: dict[str, str] = {
|
|
44
|
+
"Content-Type": "application/json",
|
|
45
|
+
"Accept": "application/json",
|
|
46
|
+
"User-Agent": "pytest-resilience-agent/0.1.0 (httpx)",
|
|
47
|
+
}
|
|
48
|
+
if api_key:
|
|
49
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
50
|
+
self._client = httpx.Client(headers=headers, timeout=timeout)
|
|
51
|
+
|
|
52
|
+
def chat(
|
|
53
|
+
self,
|
|
54
|
+
messages: list[dict[str, str]],
|
|
55
|
+
model: str = "gateway-default",
|
|
56
|
+
**kwargs: Any,
|
|
57
|
+
) -> ChatReply:
|
|
58
|
+
"""Send a chat completion request through the gateway.
|
|
59
|
+
|
|
60
|
+
The gateway's own config decides fallback chain and retries.
|
|
61
|
+
If the gateway itself is unreachable, raises ``httpx.HTTPError``.
|
|
62
|
+
"""
|
|
63
|
+
payload = {"model": model, "messages": messages, **kwargs}
|
|
64
|
+
response = self._client.post(f"{self.base_url}/chat/completions", json=payload)
|
|
65
|
+
response.raise_for_status()
|
|
66
|
+
data = response.json()
|
|
67
|
+
choices = data.get("choices") or []
|
|
68
|
+
content = ""
|
|
69
|
+
if choices:
|
|
70
|
+
content = choices[0].get("message", {}).get("content", "") or ""
|
|
71
|
+
return ChatReply(content=content, model=data.get("model", model), raw=data)
|
|
72
|
+
|
|
73
|
+
def close(self) -> None:
|
|
74
|
+
"""Close the underlying httpx client."""
|
|
75
|
+
self._client.close()
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Generate resilience test files from observed failures.
|
|
2
|
+
|
|
3
|
+
Mirrors the Lark sponsor challenge example ("coding agent setup that
|
|
4
|
+
listens for failing tests and creates PRs to fix them"). For each
|
|
5
|
+
failing test reported by Lark, the generator picks the relevant chaos
|
|
6
|
+
scenario(s) and emits a runnable pytest file that reproduces the
|
|
7
|
+
failure under controlled conditions.
|
|
8
|
+
|
|
9
|
+
The mapping is deterministic and rule-based on the failure string.
|
|
10
|
+
That keeps generation auditable: the engineer can read the rule that
|
|
11
|
+
matched and tweak it. A future iteration can swap in an LLM-based
|
|
12
|
+
classifier behind the same Generator interface.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import re
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
# Order matters: first match wins.
|
|
22
|
+
_RULES: list[tuple[re.Pattern[str], list[str]]] = [
|
|
23
|
+
(re.compile(r"\b429\b|too many requests", re.I), ["rate_limit"]),
|
|
24
|
+
(re.compile(r"\b502\b|bad gateway", re.I), ["llm_5xx"]),
|
|
25
|
+
(re.compile(r"\b503\b|service unavailable", re.I), ["partial_outage"]),
|
|
26
|
+
(re.compile(r"\b504\b|timeout", re.I), ["llm_timeout"]),
|
|
27
|
+
(re.compile(r"\b402\b|quota|cost", re.I), ["cost_exceeded"]),
|
|
28
|
+
(re.compile(r"connect.*(error|refused|reset)", re.I), ["network_blip"]),
|
|
29
|
+
(re.compile(r"empty|truncated|stream.*drop", re.I), ["stream_stall"]),
|
|
30
|
+
(re.compile(r"mcp|tool.*error|jsonrpc", re.I), ["mcp_error"]),
|
|
31
|
+
(re.compile(r"wrong.*model|model.*mismatch", re.I), ["wrong_model_returned"]),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class GeneratedTest:
|
|
37
|
+
"""Result of generating one test file from a Lark failure."""
|
|
38
|
+
|
|
39
|
+
test_name: str
|
|
40
|
+
scenarios: list[str]
|
|
41
|
+
file_path: Path
|
|
42
|
+
source: str
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def pick_scenarios(failure_text: str) -> list[str]:
|
|
46
|
+
"""Return the scenario names that best match a free-text failure.
|
|
47
|
+
|
|
48
|
+
Falls back to ``llm_5xx`` if nothing matches; that is the most generic
|
|
49
|
+
transient-failure scenario and a sane default.
|
|
50
|
+
"""
|
|
51
|
+
matched: list[str] = []
|
|
52
|
+
for pattern, scenarios in _RULES:
|
|
53
|
+
if pattern.search(failure_text):
|
|
54
|
+
matched.extend(scenarios)
|
|
55
|
+
return matched or ["llm_5xx"]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
_TEMPLATE = '''"""Auto-generated resilience test for {test_name!r}.
|
|
59
|
+
|
|
60
|
+
Generated by pytest-resilience-agent from a Lark-reported failure.
|
|
61
|
+
|
|
62
|
+
Original failure text:
|
|
63
|
+
{failure_text}
|
|
64
|
+
|
|
65
|
+
Chosen scenarios: {scenarios}
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
from __future__ import annotations
|
|
69
|
+
|
|
70
|
+
import httpx
|
|
71
|
+
import pytest
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@pytest.mark.resilience(scenarios={scenarios!r})
|
|
75
|
+
def {function_name}(chaos):
|
|
76
|
+
"""Reproduce the original failure under controlled chaos.
|
|
77
|
+
|
|
78
|
+
The chaos fixture installs the named scenarios at the HTTP layer.
|
|
79
|
+
Replace the body below with the call from your real agent code; the
|
|
80
|
+
boilerplate POST below just exercises the gateway to prove the
|
|
81
|
+
scenarios are active.
|
|
82
|
+
"""
|
|
83
|
+
with httpx.Client() as client:
|
|
84
|
+
response = client.post(chaos.target_gateway_url, json={{"q": 1}})
|
|
85
|
+
# When the scenarios are correct, the response status / shape
|
|
86
|
+
# mirrors the original failure. Add an assertion on the contract
|
|
87
|
+
# your real agent should hold:
|
|
88
|
+
# assert agent.handle_response(response).fallback_used
|
|
89
|
+
assert response is not None
|
|
90
|
+
'''
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def generate_test(
|
|
94
|
+
test_name: str,
|
|
95
|
+
failure_text: str,
|
|
96
|
+
out_dir: Path,
|
|
97
|
+
) -> GeneratedTest:
|
|
98
|
+
"""Write a resilience test file for one failure. Return the artefact."""
|
|
99
|
+
scenarios = pick_scenarios(failure_text)
|
|
100
|
+
safe_name = re.sub(r"[^a-zA-Z0-9_]+", "_", test_name).strip("_") or "auto_test"
|
|
101
|
+
if not safe_name.startswith("test_"):
|
|
102
|
+
safe_name = f"test_{safe_name}"
|
|
103
|
+
file_path = out_dir / f"{safe_name}_resilience.py"
|
|
104
|
+
function_name = safe_name + "_resilience"
|
|
105
|
+
source = _TEMPLATE.format(
|
|
106
|
+
test_name=test_name,
|
|
107
|
+
failure_text=failure_text,
|
|
108
|
+
scenarios=scenarios,
|
|
109
|
+
function_name=function_name,
|
|
110
|
+
)
|
|
111
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
112
|
+
file_path.write_text(source, encoding="utf-8")
|
|
113
|
+
return GeneratedTest(
|
|
114
|
+
test_name=test_name,
|
|
115
|
+
scenarios=scenarios,
|
|
116
|
+
file_path=file_path,
|
|
117
|
+
source=source,
|
|
118
|
+
)
|