PyPI - playagent - Versions diffs - 0.1.0__tar.gz - Mend

playagent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

playagent-0.1.0/LICENSE +21 -0
playagent-0.1.0/PKG-INFO +114 -0
playagent-0.1.0/README.md +78 -0
playagent-0.1.0/playagent/__init__.py +61 -0
playagent-0.1.0/playagent/adapters/anthropic.py +293 -0
playagent-0.1.0/playagent/adapters/openai.py +282 -0
playagent-0.1.0/playagent/assertions.py +195 -0
playagent-0.1.0/playagent/classifier.py +272 -0
playagent-0.1.0/playagent/cli.py +375 -0
playagent-0.1.0/playagent/core.py +179 -0
playagent-0.1.0/playagent/models.py +98 -0
playagent-0.1.0/playagent/storage.py +535 -0
playagent-0.1.0/playagent.egg-info/PKG-INFO +114 -0
playagent-0.1.0/playagent.egg-info/SOURCES.txt +25 -0
playagent-0.1.0/playagent.egg-info/dependency_links.txt +1 -0
playagent-0.1.0/playagent.egg-info/entry_points.txt +2 -0
playagent-0.1.0/playagent.egg-info/requires.txt +19 -0
playagent-0.1.0/playagent.egg-info/top_level.txt +1 -0
playagent-0.1.0/pyproject.toml +64 -0
playagent-0.1.0/setup.cfg +4 -0
playagent-0.1.0/tests/test_adapters.py +321 -0
playagent-0.1.0/tests/test_assertions.py +122 -0
playagent-0.1.0/tests/test_classifier.py +304 -0
playagent-0.1.0/tests/test_cli.py +330 -0
playagent-0.1.0/tests/test_core.py +151 -0
playagent-0.1.0/tests/test_integration.py +249 -0
playagent-0.1.0/tests/test_storage.py +146 -0

playagent-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 emartai
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

playagent-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,114 @@
+Metadata-Version: 2.4
+Name: playagent
+Version: 0.1.0
+Summary: Agent testing SDK. Instrument, assert, classify.
+Author: emartai
+License: MIT
+Project-URL: Homepage, https://github.com/emartai/playagent
+Project-URL: Documentation, https://github.com/emartai/playagent#readme
+Project-URL: Repository, https://github.com/emartai/playagent
+Project-URL: Issues, https://github.com/emartai/playagent/issues
+Keywords: ai,agents,testing,llm,openai,anthropic
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: typer>=0.15.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: python-dotenv>=1.0.0
+Provides-Extra: openai
+Requires-Dist: openai>=1.0.0; extra == "openai"
+Provides-Extra: anthropic
+Requires-Dist: anthropic>=0.25.0; extra == "anthropic"
+Provides-Extra: all
+Requires-Dist: openai>=1.0.0; extra == "all"
+Requires-Dist: anthropic>=0.25.0; extra == "all"
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
+Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
+Requires-Dist: ruff>=0.4.0; extra == "dev"
+Dynamic: license-file
+<p align="center">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="assets/logo-dark.svg">
+    <source media="(prefers-color-scheme: light)" srcset="assets/logo-light.svg">
+    <img alt="PlayAgent" src="assets/logo-dark.svg" width="80">
+  </picture>
+</p>
+<p align="center">Test your AI agents. Catch failures before your users do.</p>
+<p align="center">
+  <a href="https://github.com/emartai/playagent/actions/workflows/ci.yml"><img alt="Build" src="https://img.shields.io/github/actions/workflow/status/emartai/playagent/ci.yml?style=flat-square&label=build&color=%2334d399"></a>
+  <a href="https://pypi.org/project/playagent/"><img alt="PyPI" src="https://img.shields.io/pypi/v/playagent?style=flat-square&color=%2334d399"></a>
+  <a href="LICENSE"><img alt="License" src="https://img.shields.io/badge/license-MIT-%2334d399?style=flat-square"></a>
+</p>
+---
+**Quick Links:** [Quickstart](#quickstart) · [Assertions](docs/assertions.md) · [Failure Types](docs/failure-types.md) · [Contributing](docs/contributing.md)
+---
+```bash
+pip install playagent[all]
+```
+```python
+from playagent import record
+from playagent.adapters.openai import OpenAI
+client = OpenAI()
+@record
+def run_agent(user_input: str):
+    return client.chat.completions.create(
+        model="gpt-4o",
+        messages=[{"role": "user", "content": user_input}],
+    )
+```
+```text
+  session   sess_a1b2c3d4
+  agent     run_agent
+  started   2026-04-05 09:14:32
+  duration  3.24s
+  status    passed
+──────────────────────── turn 1 ─────────────────────────
+  model     gpt-4o
+  latency   812ms
+  ▸ user
+    What's the weather in Lagos today?
+  ▸ assistant
+    I'll look that up for you.
+    ⬡ tool call  get_weather
+      location  "Lagos, NG"
+      units     "celsius"
+```
+| Command | What it does |
+|---|---|
+| `playagent trace list` | Lists recent trace sessions. |
+| `playagent trace view <trace_id>` | Shows turn-by-turn trace details. |
+| `playagent report` | Shows aggregate pass/fail and failure breakdowns. |
+| `playagent report --format json` | Emits report stats as JSON for CI pipelines. |
+| `playagent --version` | Prints installed PlayAgent version. |
+## Why PlayAgent
+- You stay local-first. PlayAgent writes to SQLite on your machine; nothing is sent to a hosted dashboard by default.
+- You can test behavior, not only outputs. Assertions check tool-call order, parameters, and call counts directly.
+- If you already use LangSmith, PlayAgent is a smaller option for local SDK-level checks; if you need hosted traces, collaboration, and observability dashboards, LangSmith is the better fit.
+## License
+MIT

playagent-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,78 @@
+<p align="center">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="assets/logo-dark.svg">
+    <source media="(prefers-color-scheme: light)" srcset="assets/logo-light.svg">
+    <img alt="PlayAgent" src="assets/logo-dark.svg" width="80">
+  </picture>
+</p>
+<p align="center">Test your AI agents. Catch failures before your users do.</p>
+<p align="center">
+  <a href="https://github.com/emartai/playagent/actions/workflows/ci.yml"><img alt="Build" src="https://img.shields.io/github/actions/workflow/status/emartai/playagent/ci.yml?style=flat-square&label=build&color=%2334d399"></a>
+  <a href="https://pypi.org/project/playagent/"><img alt="PyPI" src="https://img.shields.io/pypi/v/playagent?style=flat-square&color=%2334d399"></a>
+  <a href="LICENSE"><img alt="License" src="https://img.shields.io/badge/license-MIT-%2334d399?style=flat-square"></a>
+</p>
+---
+**Quick Links:** [Quickstart](#quickstart) · [Assertions](docs/assertions.md) · [Failure Types](docs/failure-types.md) · [Contributing](docs/contributing.md)
+---
+```bash
+pip install playagent[all]
+```
+```python
+from playagent import record
+from playagent.adapters.openai import OpenAI
+client = OpenAI()
+@record
+def run_agent(user_input: str):
+    return client.chat.completions.create(
+        model="gpt-4o",
+        messages=[{"role": "user", "content": user_input}],
+    )
+```
+```text
+  session   sess_a1b2c3d4
+  agent     run_agent
+  started   2026-04-05 09:14:32
+  duration  3.24s
+  status    passed
+──────────────────────── turn 1 ─────────────────────────
+  model     gpt-4o
+  latency   812ms
+  ▸ user
+    What's the weather in Lagos today?
+  ▸ assistant
+    I'll look that up for you.
+    ⬡ tool call  get_weather
+      location  "Lagos, NG"
+      units     "celsius"
+```
+| Command | What it does |
+|---|---|
+| `playagent trace list` | Lists recent trace sessions. |
+| `playagent trace view <trace_id>` | Shows turn-by-turn trace details. |
+| `playagent report` | Shows aggregate pass/fail and failure breakdowns. |
+| `playagent report --format json` | Emits report stats as JSON for CI pipelines. |
+| `playagent --version` | Prints installed PlayAgent version. |
+## Why PlayAgent
+- You stay local-first. PlayAgent writes to SQLite on your machine; nothing is sent to a hosted dashboard by default.
+- You can test behavior, not only outputs. Assertions check tool-call order, parameters, and call counts directly.
+- If you already use LangSmith, PlayAgent is a smaller option for local SDK-level checks; if you need hosted traces, collaboration, and observability dashboards, LangSmith is the better fit.
+## License
+MIT

playagent-0.1.0/playagent/__init__.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""Public API surface for PlayAgent."""
+from __future__ import annotations
+from pathlib import Path
+def _maybe_print_welcome() -> None:
+    """Print a one-time welcome message and initialize DB on first import."""
+    import os
+    playagent_dir = Path.home() / ".playagent"
+    if playagent_dir.exists() or os.environ.get("PLAYAGENT_NO_WELCOME"):
+        return
+    try:
+        playagent_dir.mkdir(parents=True, exist_ok=True)
+        print(  # noqa: T201
+            f"PlayAgent v0.1.0 — creating local database at {playagent_dir / 'db.sqlite'}"
+        )
+        from playagent.storage import get_db_path, init_db
+        init_db(get_db_path())
+    except Exception:  # noqa: BLE001
+        pass  # Never crash on welcome message
+_maybe_print_welcome()
+from playagent.assertions import (  # noqa: E402
+    AssertionFailedError,
+    assert_trace,
+    contains,
+    equals,
+    matches,
+    starts_with,
+)
+from playagent.classifier import classify_trace, evaluate, score_goal  # noqa: E402
+from playagent.core import get_current_session_id, record, trace, validate_environment  # noqa: E402
+from playagent.models import ClassificationResult, Session, ToolCall, TraceEvent  # noqa: E402
+__version__ = "0.1.0"
+__all__ = [
+    "record",
+    "trace",
+    "get_current_session_id",
+    "validate_environment",
+    "assert_trace",
+    "contains",
+    "starts_with",
+    "matches",
+    "equals",
+    "AssertionFailedError",
+    "classify_trace",
+    "score_goal",
+    "evaluate",
+    "Session",
+    "TraceEvent",
+    "ToolCall",
+    "ClassificationResult",
+]

playagent-0.1.0/playagent/adapters/anthropic.py ADDED Viewed

@@ -0,0 +1,293 @@
+"""Anthropic drop-in adapters that capture request/response traces into PlayAgent storage."""
+from __future__ import annotations
+import logging
+import time
+from collections.abc import Callable
+from typing import Any
+try:
+    import anthropic
+except ImportError as exc:  # pragma: no cover
+    # Scenario: user did `pip install playagent` without the anthropic extra.
+    raise ImportError(
+        "Anthropic adapter requires the anthropic package. "
+        "Install it with: pip install playagent[anthropic]"
+    ) from exc
+from playagent.core import get_current_session_id
+from playagent.models import Session, ToolCall, TraceEvent, generate_id, now_utc
+from playagent.storage import get_turn_count, save_session, save_trace_event, update_session
+def _create_standalone_session() -> Session | None:
+    """Create and persist a standalone session when no active trace session exists."""
+    try:
+        started_at = now_utc()
+        session = Session(
+            id=generate_id("sess"),
+            agent_name="standalone",
+            status="running",
+            started_at=started_at,
+            ended_at=None,
+            metadata={},
+            error=None,
+            classification=None,
+        )
+        save_session(session)
+        return session
+    except Exception as exc:  # noqa: BLE001
+        logging.warning("PlayAgent failed to create standalone session: %s", exc)
+        return None
+def _finalize_standalone_session(
+    session: Session | None, status: str, error: str | None = None
+) -> None:
+    """Mark standalone sessions complete so lifecycle mirrors decorator-based sessions."""
+    if session is None:
+        return
+    try:
+        session.status = status
+        session.error = error
+        session.ended_at = now_utc()
+        update_session(session)
+    except Exception as exc:  # noqa: BLE001
+        logging.warning("PlayAgent failed to finalize standalone session: %s", exc)
+def _resolve_session() -> tuple[str | None, Session | None]:
+    """Return active session id, creating standalone session if needed."""
+    session_id = get_current_session_id()
+    if session_id is not None:
+        return session_id, None
+    standalone = _create_standalone_session()
+    if standalone is None:
+        return None, None
+    return standalone.id, standalone
+def _extract_response_content(content_blocks: Any) -> str:
+    """Extract the first text block from Anthropic content blocks."""
+    if not content_blocks:
+        return ""
+    for block in content_blocks:
+        if getattr(block, "type", None) == "text":
+            return str(getattr(block, "text", "") or "")
+    return ""
+def _extract_tool_calls(content_blocks: Any) -> list[ToolCall]:
+    """Extract tool_use blocks from Anthropic content blocks."""
+    calls: list[ToolCall] = []
+    if not content_blocks:
+        return calls
+    for block in content_blocks:
+        if getattr(block, "type", None) != "tool_use":
+            continue
+        block_input = getattr(block, "input", None)
+        args = block_input if isinstance(block_input, dict) else {}
+        calls.append(
+            ToolCall(
+                name=str(getattr(block, "name", "") or ""),
+                arguments=args,
+                result=None,
+                call_id=getattr(block, "id", None),
+            )
+        )
+    return calls
+def _map_finish_reason(stop_reason: str | None) -> str:
+    """Map Anthropic stop reasons into unified TraceEvent finish reasons."""
+    if stop_reason == "end_turn":
+        return "stop"
+    if stop_reason == "tool_use":
+        return "tool_calls"
+    return str(stop_reason or "stop")
+def _safe_save_event(event: TraceEvent) -> None:
+    """Persist trace event without surfacing instrumentation failures."""
+    try:
+        save_trace_event(event)
+    except Exception as exc:  # noqa: BLE001
+        logging.warning("PlayAgent failed to save Anthropic trace event %s: %s", event.id, exc)
+def _extract_model_and_messages(kwargs: dict[str, Any]) -> tuple[str, list[dict[str, Any]]]:
+    model = str(kwargs.get("model") or "")
+    messages_raw = kwargs.get("messages") or []
+    if isinstance(messages_raw, list):
+        messages = messages_raw
+    else:
+        messages = []
+    return model, messages
+def _build_success_event(
+    *,
+    session_id: str,
+    model: str,
+    messages: list[dict[str, Any]],
+    response: Any,
+    latency_ms: int,
+) -> TraceEvent:
+    content = getattr(response, "content", None)
+    usage = getattr(response, "usage", None)
+    response_content = _extract_response_content(content)
+    tool_calls = _extract_tool_calls(content)
+    prompt_tokens = int(getattr(usage, "input_tokens", 0) or 0)
+    completion_tokens = int(getattr(usage, "output_tokens", 0) or 0)
+    finish_reason = _map_finish_reason(getattr(response, "stop_reason", None))
+    turn = get_turn_count(session_id) + 1
+    return TraceEvent(
+        id=generate_id("evt"),
+        session_id=session_id,
+        turn=turn,
+        model=model,
+        provider="anthropic",
+        messages=messages,
+        response_content=response_content,
+        tool_calls=tool_calls,
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        finish_reason=finish_reason,
+        latency_ms=latency_ms,
+        error=None,
+        created_at=now_utc(),
+    )
+def _build_error_event(
+    *,
+    session_id: str,
+    model: str,
+    messages: list[dict[str, Any]],
+    error: Exception,
+    latency_ms: int,
+) -> TraceEvent:
+    turn = get_turn_count(session_id) + 1
+    return TraceEvent(
+        id=generate_id("evt"),
+        session_id=session_id,
+        turn=turn,
+        model=model,
+        provider="anthropic",
+        messages=messages,
+        response_content="",
+        tool_calls=[],
+        prompt_tokens=0,
+        completion_tokens=0,
+        finish_reason="error",
+        latency_ms=latency_ms,
+        error=str(error),
+        created_at=now_utc(),
+    )
+def _instrument_sync_create(original_create: Callable[..., Any]) -> Callable[..., Any]:
+    def wrapped_create(*args: Any, **kwargs: Any) -> Any:
+        start = time.monotonic()
+        model, messages = _extract_model_and_messages(kwargs)
+        session_id, standalone_session = _resolve_session()
+        try:
+            response = original_create(*args, **kwargs)
+        except Exception as error:
+            latency_ms = int((time.monotonic() - start) * 1000)
+            if session_id is not None:
+                error_event = _build_error_event(
+                    session_id=session_id,
+                    model=model,
+                    messages=messages,
+                    error=error,
+                    latency_ms=latency_ms,
+                )
+                _safe_save_event(error_event)
+            _finalize_standalone_session(standalone_session, status="failed", error=str(error))
+            raise
+        latency_ms = int((time.monotonic() - start) * 1000)
+        if session_id is not None:
+            try:
+                response_model = str(getattr(response, "model", None) or model)
+                event = _build_success_event(
+                    session_id=session_id,
+                    model=response_model,
+                    messages=messages,
+                    response=response,
+                    latency_ms=latency_ms,
+                )
+                _safe_save_event(event)
+            except Exception as exc:  # noqa: BLE001
+                logging.warning("PlayAgent failed to instrument Anthropic response: %s", exc)
+        _finalize_standalone_session(standalone_session, status="passed", error=None)
+        return response
+    return wrapped_create
+def _instrument_async_create(original_create: Callable[..., Any]) -> Callable[..., Any]:
+    async def wrapped_create(*args: Any, **kwargs: Any) -> Any:
+        start = time.monotonic()
+        model, messages = _extract_model_and_messages(kwargs)
+        session_id, standalone_session = _resolve_session()
+        try:
+            response = await original_create(*args, **kwargs)
+        except Exception as error:
+            latency_ms = int((time.monotonic() - start) * 1000)
+            if session_id is not None:
+                error_event = _build_error_event(
+                    session_id=session_id,
+                    model=model,
+                    messages=messages,
+                    error=error,
+                    latency_ms=latency_ms,
+                )
+                _safe_save_event(error_event)
+            _finalize_standalone_session(standalone_session, status="failed", error=str(error))
+            raise
+        latency_ms = int((time.monotonic() - start) * 1000)
+        if session_id is not None:
+            try:
+                response_model = str(getattr(response, "model", None) or model)
+                event = _build_success_event(
+                    session_id=session_id,
+                    model=response_model,
+                    messages=messages,
+                    response=response,
+                    latency_ms=latency_ms,
+                )
+                _safe_save_event(event)
+            except Exception as exc:  # noqa: BLE001
+                logging.warning("PlayAgent failed to instrument Anthropic async response: %s", exc)
+        _finalize_standalone_session(standalone_session, status="passed", error=None)
+        return response
+    return wrapped_create
+class Anthropic(anthropic.Anthropic):
+    """Drop-in Anthropic client with automatic trace persistence."""
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.messages.create = _instrument_sync_create(self.messages.create)
+class AsyncAnthropic(anthropic.AsyncAnthropic):
+    """Async drop-in Anthropic client with automatic trace persistence."""
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.messages.create = _instrument_async_create(self.messages.create)