playagent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 emartai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,114 @@
1
+ Metadata-Version: 2.4
2
+ Name: playagent
3
+ Version: 0.1.0
4
+ Summary: Agent testing SDK. Instrument, assert, classify.
5
+ Author: emartai
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/emartai/playagent
8
+ Project-URL: Documentation, https://github.com/emartai/playagent#readme
9
+ Project-URL: Repository, https://github.com/emartai/playagent
10
+ Project-URL: Issues, https://github.com/emartai/playagent/issues
11
+ Keywords: ai,agents,testing,llm,openai,anthropic
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Requires-Python: >=3.11
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: typer>=0.15.0
21
+ Requires-Dist: rich>=13.0.0
22
+ Requires-Dist: python-dotenv>=1.0.0
23
+ Provides-Extra: openai
24
+ Requires-Dist: openai>=1.0.0; extra == "openai"
25
+ Provides-Extra: anthropic
26
+ Requires-Dist: anthropic>=0.25.0; extra == "anthropic"
27
+ Provides-Extra: all
28
+ Requires-Dist: openai>=1.0.0; extra == "all"
29
+ Requires-Dist: anthropic>=0.25.0; extra == "all"
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
32
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
33
+ Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
34
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
35
+ Dynamic: license-file
36
+
37
+ <p align="center">
38
+ <picture>
39
+ <source media="(prefers-color-scheme: dark)" srcset="assets/logo-dark.svg">
40
+ <source media="(prefers-color-scheme: light)" srcset="assets/logo-light.svg">
41
+ <img alt="PlayAgent" src="assets/logo-dark.svg" width="80">
42
+ </picture>
43
+ </p>
44
+
45
+ <p align="center">Test your AI agents. Catch failures before your users do.</p>
46
+
47
+ <p align="center">
48
+ <a href="https://github.com/emartai/playagent/actions/workflows/ci.yml"><img alt="Build" src="https://img.shields.io/github/actions/workflow/status/emartai/playagent/ci.yml?style=flat-square&label=build&color=%2334d399"></a>
49
+ <a href="https://pypi.org/project/playagent/"><img alt="PyPI" src="https://img.shields.io/pypi/v/playagent?style=flat-square&color=%2334d399"></a>
50
+ <a href="LICENSE"><img alt="License" src="https://img.shields.io/badge/license-MIT-%2334d399?style=flat-square"></a>
51
+ </p>
52
+
53
+ ---
54
+
55
+ **Quick Links:** [Quickstart](#quickstart) · [Assertions](docs/assertions.md) · [Failure Types](docs/failure-types.md) · [Contributing](docs/contributing.md)
56
+
57
+ ---
58
+
59
+ ```bash
60
+ pip install playagent[all]
61
+ ```
62
+
63
+ ```python
64
+ from playagent import record
65
+ from playagent.adapters.openai import OpenAI
66
+
67
+ client = OpenAI()
68
+
69
+ @record
70
+ def run_agent(user_input: str):
71
+ return client.chat.completions.create(
72
+ model="gpt-4o",
73
+ messages=[{"role": "user", "content": user_input}],
74
+ )
75
+ ```
76
+
77
+ ```text
78
+ session sess_a1b2c3d4
79
+ agent run_agent
80
+ started 2026-04-05 09:14:32
81
+ duration 3.24s
82
+ status passed
83
+
84
+ ──────────────────────── turn 1 ─────────────────────────
85
+ model gpt-4o
86
+ latency 812ms
87
+
88
+ ▸ user
89
+ What's the weather in Lagos today?
90
+
91
+ ▸ assistant
92
+ I'll look that up for you.
93
+ ⬡ tool call get_weather
94
+ location "Lagos, NG"
95
+ units "celsius"
96
+ ```
97
+
98
+ | Command | What it does |
99
+ |---|---|
100
+ | `playagent trace list` | Lists recent trace sessions. |
101
+ | `playagent trace view <trace_id>` | Shows turn-by-turn trace details. |
102
+ | `playagent report` | Shows aggregate pass/fail and failure breakdowns. |
103
+ | `playagent report --format json` | Emits report stats as JSON for CI pipelines. |
104
+ | `playagent --version` | Prints installed PlayAgent version. |
105
+
106
+ ## Why PlayAgent
107
+
108
+ - You stay local-first. PlayAgent writes to SQLite on your machine; nothing is sent to a hosted dashboard by default.
109
+ - You can test behavior, not only outputs. Assertions check tool-call order, parameters, and call counts directly.
110
+ - If you already use LangSmith, PlayAgent is a smaller option for local SDK-level checks; if you need hosted traces, collaboration, and observability dashboards, LangSmith is the better fit.
111
+
112
+ ## License
113
+
114
+ MIT
@@ -0,0 +1,78 @@
1
+ <p align="center">
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="assets/logo-dark.svg">
4
+ <source media="(prefers-color-scheme: light)" srcset="assets/logo-light.svg">
5
+ <img alt="PlayAgent" src="assets/logo-dark.svg" width="80">
6
+ </picture>
7
+ </p>
8
+
9
+ <p align="center">Test your AI agents. Catch failures before your users do.</p>
10
+
11
+ <p align="center">
12
+ <a href="https://github.com/emartai/playagent/actions/workflows/ci.yml"><img alt="Build" src="https://img.shields.io/github/actions/workflow/status/emartai/playagent/ci.yml?style=flat-square&label=build&color=%2334d399"></a>
13
+ <a href="https://pypi.org/project/playagent/"><img alt="PyPI" src="https://img.shields.io/pypi/v/playagent?style=flat-square&color=%2334d399"></a>
14
+ <a href="LICENSE"><img alt="License" src="https://img.shields.io/badge/license-MIT-%2334d399?style=flat-square"></a>
15
+ </p>
16
+
17
+ ---
18
+
19
+ **Quick Links:** [Quickstart](#quickstart) · [Assertions](docs/assertions.md) · [Failure Types](docs/failure-types.md) · [Contributing](docs/contributing.md)
20
+
21
+ ---
22
+
23
+ ```bash
24
+ pip install playagent[all]
25
+ ```
26
+
27
+ ```python
28
+ from playagent import record
29
+ from playagent.adapters.openai import OpenAI
30
+
31
+ client = OpenAI()
32
+
33
+ @record
34
+ def run_agent(user_input: str):
35
+ return client.chat.completions.create(
36
+ model="gpt-4o",
37
+ messages=[{"role": "user", "content": user_input}],
38
+ )
39
+ ```
40
+
41
+ ```text
42
+ session sess_a1b2c3d4
43
+ agent run_agent
44
+ started 2026-04-05 09:14:32
45
+ duration 3.24s
46
+ status passed
47
+
48
+ ──────────────────────── turn 1 ─────────────────────────
49
+ model gpt-4o
50
+ latency 812ms
51
+
52
+ ▸ user
53
+ What's the weather in Lagos today?
54
+
55
+ ▸ assistant
56
+ I'll look that up for you.
57
+ ⬡ tool call get_weather
58
+ location "Lagos, NG"
59
+ units "celsius"
60
+ ```
61
+
62
+ | Command | What it does |
63
+ |---|---|
64
+ | `playagent trace list` | Lists recent trace sessions. |
65
+ | `playagent trace view <trace_id>` | Shows turn-by-turn trace details. |
66
+ | `playagent report` | Shows aggregate pass/fail and failure breakdowns. |
67
+ | `playagent report --format json` | Emits report stats as JSON for CI pipelines. |
68
+ | `playagent --version` | Prints installed PlayAgent version. |
69
+
70
+ ## Why PlayAgent
71
+
72
+ - You stay local-first. PlayAgent writes to SQLite on your machine; nothing is sent to a hosted dashboard by default.
73
+ - You can test behavior, not only outputs. Assertions check tool-call order, parameters, and call counts directly.
74
+ - If you already use LangSmith, PlayAgent is a smaller option for local SDK-level checks; if you need hosted traces, collaboration, and observability dashboards, LangSmith is the better fit.
75
+
76
+ ## License
77
+
78
+ MIT
@@ -0,0 +1,61 @@
1
+ """Public API surface for PlayAgent."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ def _maybe_print_welcome() -> None:
9
+ """Print a one-time welcome message and initialize DB on first import."""
10
+ import os
11
+
12
+ playagent_dir = Path.home() / ".playagent"
13
+ if playagent_dir.exists() or os.environ.get("PLAYAGENT_NO_WELCOME"):
14
+ return
15
+ try:
16
+ playagent_dir.mkdir(parents=True, exist_ok=True)
17
+ print( # noqa: T201
18
+ f"PlayAgent v0.1.0 — creating local database at {playagent_dir / 'db.sqlite'}"
19
+ )
20
+ from playagent.storage import get_db_path, init_db
21
+
22
+ init_db(get_db_path())
23
+ except Exception: # noqa: BLE001
24
+ pass # Never crash on welcome message
25
+
26
+
27
+ _maybe_print_welcome()
28
+
29
+ from playagent.assertions import ( # noqa: E402
30
+ AssertionFailedError,
31
+ assert_trace,
32
+ contains,
33
+ equals,
34
+ matches,
35
+ starts_with,
36
+ )
37
+ from playagent.classifier import classify_trace, evaluate, score_goal # noqa: E402
38
+ from playagent.core import get_current_session_id, record, trace, validate_environment # noqa: E402
39
+ from playagent.models import ClassificationResult, Session, ToolCall, TraceEvent # noqa: E402
40
+
41
+ __version__ = "0.1.0"
42
+
43
+ __all__ = [
44
+ "record",
45
+ "trace",
46
+ "get_current_session_id",
47
+ "validate_environment",
48
+ "assert_trace",
49
+ "contains",
50
+ "starts_with",
51
+ "matches",
52
+ "equals",
53
+ "AssertionFailedError",
54
+ "classify_trace",
55
+ "score_goal",
56
+ "evaluate",
57
+ "Session",
58
+ "TraceEvent",
59
+ "ToolCall",
60
+ "ClassificationResult",
61
+ ]
@@ -0,0 +1,293 @@
1
+ """Anthropic drop-in adapters that capture request/response traces into PlayAgent storage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import time
7
+ from collections.abc import Callable
8
+ from typing import Any
9
+
10
+ try:
11
+ import anthropic
12
+ except ImportError as exc: # pragma: no cover
13
+ # Scenario: user did `pip install playagent` without the anthropic extra.
14
+ raise ImportError(
15
+ "Anthropic adapter requires the anthropic package. "
16
+ "Install it with: pip install playagent[anthropic]"
17
+ ) from exc
18
+
19
+ from playagent.core import get_current_session_id
20
+ from playagent.models import Session, ToolCall, TraceEvent, generate_id, now_utc
21
+ from playagent.storage import get_turn_count, save_session, save_trace_event, update_session
22
+
23
+
24
+ def _create_standalone_session() -> Session | None:
25
+ """Create and persist a standalone session when no active trace session exists."""
26
+ try:
27
+ started_at = now_utc()
28
+ session = Session(
29
+ id=generate_id("sess"),
30
+ agent_name="standalone",
31
+ status="running",
32
+ started_at=started_at,
33
+ ended_at=None,
34
+ metadata={},
35
+ error=None,
36
+ classification=None,
37
+ )
38
+ save_session(session)
39
+ return session
40
+ except Exception as exc: # noqa: BLE001
41
+ logging.warning("PlayAgent failed to create standalone session: %s", exc)
42
+ return None
43
+
44
+
45
+ def _finalize_standalone_session(
46
+ session: Session | None, status: str, error: str | None = None
47
+ ) -> None:
48
+ """Mark standalone sessions complete so lifecycle mirrors decorator-based sessions."""
49
+ if session is None:
50
+ return
51
+ try:
52
+ session.status = status
53
+ session.error = error
54
+ session.ended_at = now_utc()
55
+ update_session(session)
56
+ except Exception as exc: # noqa: BLE001
57
+ logging.warning("PlayAgent failed to finalize standalone session: %s", exc)
58
+
59
+
60
+ def _resolve_session() -> tuple[str | None, Session | None]:
61
+ """Return active session id, creating standalone session if needed."""
62
+ session_id = get_current_session_id()
63
+ if session_id is not None:
64
+ return session_id, None
65
+
66
+ standalone = _create_standalone_session()
67
+ if standalone is None:
68
+ return None, None
69
+ return standalone.id, standalone
70
+
71
+
72
+ def _extract_response_content(content_blocks: Any) -> str:
73
+ """Extract the first text block from Anthropic content blocks."""
74
+ if not content_blocks:
75
+ return ""
76
+ for block in content_blocks:
77
+ if getattr(block, "type", None) == "text":
78
+ return str(getattr(block, "text", "") or "")
79
+ return ""
80
+
81
+
82
+ def _extract_tool_calls(content_blocks: Any) -> list[ToolCall]:
83
+ """Extract tool_use blocks from Anthropic content blocks."""
84
+ calls: list[ToolCall] = []
85
+ if not content_blocks:
86
+ return calls
87
+
88
+ for block in content_blocks:
89
+ if getattr(block, "type", None) != "tool_use":
90
+ continue
91
+ block_input = getattr(block, "input", None)
92
+ args = block_input if isinstance(block_input, dict) else {}
93
+ calls.append(
94
+ ToolCall(
95
+ name=str(getattr(block, "name", "") or ""),
96
+ arguments=args,
97
+ result=None,
98
+ call_id=getattr(block, "id", None),
99
+ )
100
+ )
101
+ return calls
102
+
103
+
104
+ def _map_finish_reason(stop_reason: str | None) -> str:
105
+ """Map Anthropic stop reasons into unified TraceEvent finish reasons."""
106
+ if stop_reason == "end_turn":
107
+ return "stop"
108
+ if stop_reason == "tool_use":
109
+ return "tool_calls"
110
+ return str(stop_reason or "stop")
111
+
112
+
113
+ def _safe_save_event(event: TraceEvent) -> None:
114
+ """Persist trace event without surfacing instrumentation failures."""
115
+ try:
116
+ save_trace_event(event)
117
+ except Exception as exc: # noqa: BLE001
118
+ logging.warning("PlayAgent failed to save Anthropic trace event %s: %s", event.id, exc)
119
+
120
+
121
+ def _extract_model_and_messages(kwargs: dict[str, Any]) -> tuple[str, list[dict[str, Any]]]:
122
+ model = str(kwargs.get("model") or "")
123
+ messages_raw = kwargs.get("messages") or []
124
+ if isinstance(messages_raw, list):
125
+ messages = messages_raw
126
+ else:
127
+ messages = []
128
+ return model, messages
129
+
130
+
131
+ def _build_success_event(
132
+ *,
133
+ session_id: str,
134
+ model: str,
135
+ messages: list[dict[str, Any]],
136
+ response: Any,
137
+ latency_ms: int,
138
+ ) -> TraceEvent:
139
+ content = getattr(response, "content", None)
140
+ usage = getattr(response, "usage", None)
141
+
142
+ response_content = _extract_response_content(content)
143
+ tool_calls = _extract_tool_calls(content)
144
+ prompt_tokens = int(getattr(usage, "input_tokens", 0) or 0)
145
+ completion_tokens = int(getattr(usage, "output_tokens", 0) or 0)
146
+ finish_reason = _map_finish_reason(getattr(response, "stop_reason", None))
147
+
148
+ turn = get_turn_count(session_id) + 1
149
+ return TraceEvent(
150
+ id=generate_id("evt"),
151
+ session_id=session_id,
152
+ turn=turn,
153
+ model=model,
154
+ provider="anthropic",
155
+ messages=messages,
156
+ response_content=response_content,
157
+ tool_calls=tool_calls,
158
+ prompt_tokens=prompt_tokens,
159
+ completion_tokens=completion_tokens,
160
+ finish_reason=finish_reason,
161
+ latency_ms=latency_ms,
162
+ error=None,
163
+ created_at=now_utc(),
164
+ )
165
+
166
+
167
+ def _build_error_event(
168
+ *,
169
+ session_id: str,
170
+ model: str,
171
+ messages: list[dict[str, Any]],
172
+ error: Exception,
173
+ latency_ms: int,
174
+ ) -> TraceEvent:
175
+ turn = get_turn_count(session_id) + 1
176
+ return TraceEvent(
177
+ id=generate_id("evt"),
178
+ session_id=session_id,
179
+ turn=turn,
180
+ model=model,
181
+ provider="anthropic",
182
+ messages=messages,
183
+ response_content="",
184
+ tool_calls=[],
185
+ prompt_tokens=0,
186
+ completion_tokens=0,
187
+ finish_reason="error",
188
+ latency_ms=latency_ms,
189
+ error=str(error),
190
+ created_at=now_utc(),
191
+ )
192
+
193
+
194
+ def _instrument_sync_create(original_create: Callable[..., Any]) -> Callable[..., Any]:
195
+ def wrapped_create(*args: Any, **kwargs: Any) -> Any:
196
+ start = time.monotonic()
197
+ model, messages = _extract_model_and_messages(kwargs)
198
+ session_id, standalone_session = _resolve_session()
199
+
200
+ try:
201
+ response = original_create(*args, **kwargs)
202
+ except Exception as error:
203
+ latency_ms = int((time.monotonic() - start) * 1000)
204
+ if session_id is not None:
205
+ error_event = _build_error_event(
206
+ session_id=session_id,
207
+ model=model,
208
+ messages=messages,
209
+ error=error,
210
+ latency_ms=latency_ms,
211
+ )
212
+ _safe_save_event(error_event)
213
+ _finalize_standalone_session(standalone_session, status="failed", error=str(error))
214
+ raise
215
+
216
+ latency_ms = int((time.monotonic() - start) * 1000)
217
+ if session_id is not None:
218
+ try:
219
+ response_model = str(getattr(response, "model", None) or model)
220
+ event = _build_success_event(
221
+ session_id=session_id,
222
+ model=response_model,
223
+ messages=messages,
224
+ response=response,
225
+ latency_ms=latency_ms,
226
+ )
227
+ _safe_save_event(event)
228
+ except Exception as exc: # noqa: BLE001
229
+ logging.warning("PlayAgent failed to instrument Anthropic response: %s", exc)
230
+
231
+ _finalize_standalone_session(standalone_session, status="passed", error=None)
232
+ return response
233
+
234
+ return wrapped_create
235
+
236
+
237
+ def _instrument_async_create(original_create: Callable[..., Any]) -> Callable[..., Any]:
238
+ async def wrapped_create(*args: Any, **kwargs: Any) -> Any:
239
+ start = time.monotonic()
240
+ model, messages = _extract_model_and_messages(kwargs)
241
+ session_id, standalone_session = _resolve_session()
242
+
243
+ try:
244
+ response = await original_create(*args, **kwargs)
245
+ except Exception as error:
246
+ latency_ms = int((time.monotonic() - start) * 1000)
247
+ if session_id is not None:
248
+ error_event = _build_error_event(
249
+ session_id=session_id,
250
+ model=model,
251
+ messages=messages,
252
+ error=error,
253
+ latency_ms=latency_ms,
254
+ )
255
+ _safe_save_event(error_event)
256
+ _finalize_standalone_session(standalone_session, status="failed", error=str(error))
257
+ raise
258
+
259
+ latency_ms = int((time.monotonic() - start) * 1000)
260
+ if session_id is not None:
261
+ try:
262
+ response_model = str(getattr(response, "model", None) or model)
263
+ event = _build_success_event(
264
+ session_id=session_id,
265
+ model=response_model,
266
+ messages=messages,
267
+ response=response,
268
+ latency_ms=latency_ms,
269
+ )
270
+ _safe_save_event(event)
271
+ except Exception as exc: # noqa: BLE001
272
+ logging.warning("PlayAgent failed to instrument Anthropic async response: %s", exc)
273
+
274
+ _finalize_standalone_session(standalone_session, status="passed", error=None)
275
+ return response
276
+
277
+ return wrapped_create
278
+
279
+
280
+ class Anthropic(anthropic.Anthropic):
281
+ """Drop-in Anthropic client with automatic trace persistence."""
282
+
283
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
284
+ super().__init__(*args, **kwargs)
285
+ self.messages.create = _instrument_sync_create(self.messages.create)
286
+
287
+
288
+ class AsyncAnthropic(anthropic.AsyncAnthropic):
289
+ """Async drop-in Anthropic client with automatic trace persistence."""
290
+
291
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
292
+ super().__init__(*args, **kwargs)
293
+ self.messages.create = _instrument_async_create(self.messages.create)