PyPI - flashlite - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

flashlite 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

flashlite/_spinner.py +91 -0
flashlite/client.py +6 -2
flashlite/conversation/multi_agent.py +420 -54
flashlite/middleware/rate_limit.py +31 -8
flashlite/observability/inspect_compat.py +18 -6
flashlite/observability/logging.py +4 -0
flashlite/types.py +1 -1
{flashlite-0.1.1.dist-info → flashlite-0.2.0.dist-info}/METADATA +1 -1
{flashlite-0.1.1.dist-info → flashlite-0.2.0.dist-info}/RECORD +11 -10
{flashlite-0.1.1.dist-info → flashlite-0.2.0.dist-info}/WHEEL +0 -0
{flashlite-0.1.1.dist-info → flashlite-0.2.0.dist-info}/licenses/LICENSE.md +0 -0

flashlite/_spinner.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""Terminal spinner for user-visible progress during async waits.
+Provides a lightweight, non-blocking spinner that renders to stderr
+when — and only when — the output is an interactive terminal.  Multiple
+concurrent ``Spinner`` instances (e.g. from ``complete_many``) are
+gracefully collapsed so only one animation is visible at a time.
+"""
+import asyncio
+import sys
+import time
+# Braille-dot frames — smooth and compact.
+_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
+_INTERVAL = 0.08  # seconds between frame updates
+# ANSI helpers
+_CYAN = "\033[36m"
+_DIM = "\033[2m"
+_RESET = "\033[0m"
+_CLEAR_LINE = "\r\033[K"
+# Module-level guard — only one spinner renders at a time.
+_active: bool = False
+class Spinner:
+    """Async context manager that shows a terminal spinner on stderr.
+    The spinner only appears when stderr is a TTY **and** no other
+    ``Spinner`` is already active, making it safe for concurrent use
+    inside ``complete_many``.
+    Args:
+        message: Text displayed next to the spinner.
+        delay: Grace period (seconds) before the spinner appears.
+            If the wrapped operation finishes within this window the
+            spinner is never rendered, avoiding flicker for fast calls.
+    """
+    def __init__(self, message: str = "Working...", *, delay: float = 0.3) -> None:
+        self.message = message
+        self.delay = delay
+        self._task: asyncio.Task[None] | None = None
+        self._owns_active = False
+        self._start: float = 0.0
+    # -- internal -----------------------------------------------------
+    async def _render(self) -> None:
+        """Background coroutine that draws frames until cancelled."""
+        await asyncio.sleep(self.delay)
+        idx = 0
+        while True:
+            elapsed = time.monotonic() - self._start
+            frame = _FRAMES[idx % len(_FRAMES)]
+            sys.stderr.write(
+                f"{_CLEAR_LINE}{_CYAN}{frame}{_RESET} {self.message} "
+                f"{_DIM}({elapsed:.1f}s){_RESET}"
+            )
+            sys.stderr.flush()
+            idx += 1
+            await asyncio.sleep(_INTERVAL)
+    @staticmethod
+    def _clear() -> None:
+        sys.stderr.write(_CLEAR_LINE)
+        sys.stderr.flush()
+    # -- context manager ----------------------------------------------
+    async def __aenter__(self) -> "Spinner":
+        global _active  # noqa: PLW0603
+        if sys.stderr.isatty() and not _active:
+            _active = True
+            self._owns_active = True
+            self._start = time.monotonic()
+            self._task = asyncio.create_task(self._render())
+        return self
+    async def __aexit__(self, *_: object) -> None:
+        global _active  # noqa: PLW0603
+        if self._task is not None:
+            self._task.cancel()
+            try:
+                await self._task
+            except asyncio.CancelledError:
+                pass
+            self._clear()
+        if self._owns_active:
+            _active = False

flashlite/client.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Any, TypeVar, overload
 from pydantic import BaseModel
+from ._spinner import Spinner
 from .cache import CacheBackend, MemoryCache
 from .config import FlashliteConfig, load_env_files
 from .conversation import ContextManager, Conversation
@@ -223,7 +224,8 @@ class Flashlite:
         if self._config.log_requests:
             logger.info(f"Completion request: model={request.model}")
-        response = await core_complete(request)
+        async with Spinner(f"Waiting for {request.model}...", delay=0.2):
+            response = await core_complete(request)
         if self._config.log_requests:
             logger.info(
@@ -395,10 +397,12 @@ class Flashlite:
             else:
                 extra_kwargs["tools"] = tools_to_openai(tools)
-        # Build request
+        # Build request (template/variables stored for middleware traceability)
         request = CompletionRequest(
             model=resolved_model,
             messages=final_messages,
+            template=template,
+            variables=variables,
             temperature=temperature,
             max_tokens=max_tokens,
             max_completion_tokens=max_completion_tokens,

flashlite/conversation/multi_agent.py CHANGED Viewed

@@ -1,67 +1,134 @@
 """Multi-agent conversation support for agent-to-agent interactions."""
+import logging
+import re
+import time
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, TypeVar, overload
+from pydantic import BaseModel
+from ..core.messages import assistant_message, system_message, user_message
+from ..structured import (
+    StructuredOutputError,
+    format_validation_error_for_retry,
+    schema_to_prompt,
+    validate_response,
+)
 from ..types import CompletionResponse
 if TYPE_CHECKING:
     from ..client import Flashlite
+T = TypeVar("T", bound=BaseModel)
+logger = logging.getLogger(__name__)
+_INVALID_NAME_CHARS = re.compile(r"[\s<|\\/>]+")
+def _sanitize_name(name: str) -> str:
+    """Sanitize a display name for use in the OpenAI message ``name`` field.
+    The API requires names to match ``^[^\\s<|\\\\/>]+$``.  This helper
+    replaces any run of invalid characters with ``_`` and strips leading/
+    trailing underscores so that human-friendly display names like
+    ``"Character Voice"`` become ``"Character_Voice"``.
+    """
+    return _INVALID_NAME_CHARS.sub("_", name).strip("_")
 @dataclass
 class Agent:
     """
     An agent with a name, persona, and optional model override.
+    Agents can define their system prompt either as a raw string or as a
+    Jinja template (rendered at speak-time via the client's TemplateEngine).
+    Agents can also have private context that only they see.
     Attributes:
-        name: Display name for the agent (used in transcript and message attribution)
-        system_prompt: The agent's personality, instructions, and behavior guidelines
+        name: Display name (used in transcript and message attribution)
+        system_prompt: The agent's personality/instructions (raw string)
         model: Optional model override (uses MultiAgentChat default if None)
+        system_template: Jinja template name (alternative to system_prompt)
+        system_variables: Variables for template rendering
+        private_context: Static context only this agent sees (injected as system message)
-    Example:
-        agent = Agent(
-            name="Scientist",
-            system_prompt="You are a curious scientist who loves experiments.",
-            model="gpt-4o",  # Optional: use specific model for this agent
-        )
+    Examples:
+        # Raw system prompt
+        Agent(name="Scientist", system_prompt="You are a curious scientist.")
+        # Jinja template
+        Agent(name="Analyst", system_template="analyst_persona",
+              system_variables={"domain": "finance"})
+        # With private context
+        Agent(name="Judge", system_prompt="You are a debate judge.",
+              private_context="Score on: clarity (1-5), evidence (1-5).")
     """
     name: str
-    system_prompt: str
+    system_prompt: str | None = None
     model: str | None = None
+    # Jinja template support (alternative to system_prompt)
+    system_template: str | None = None
+    system_variables: dict[str, Any] | None = None
+    # Private context only this agent sees
+    private_context: str | None = None
+    def __post_init__(self) -> None:
+        if not self.system_prompt and not self.system_template:
+            raise ValueError(
+                f"Agent '{self.name}' must have either system_prompt or system_template"
+            )
+        if self.system_prompt and self.system_template:
+            raise ValueError(
+                f"Agent '{self.name}' cannot have both system_prompt and system_template"
+            )
 @dataclass
 class ChatMessage:
-    """A message in the multi-agent conversation."""
+    """A message in the multi-agent conversation.
+    Attributes:
+        agent_name: Who sent this message
+        content: The message content
+        metadata: Additional metadata (tokens, latency, model, etc.)
+        visible_to: If set, only these agents can see this message.
+                   None means all agents can see it.
+    """
     agent_name: str
     content: str
     metadata: dict[str, Any] = field(default_factory=dict)
+    visible_to: list[str] | None = None
 class MultiAgentChat:
     """
     Manages conversations between multiple AI agents.
-    This class enables agent-to-agent conversations where multiple AI agents
-    can discuss, debate, or collaborate. Each agent maintains its own persona
-    and sees the conversation from its perspective.
+    Integrates with flashlite's templating, logging, structured outputs,
+    and observability features.
     Key features:
     - Multiple agents with different personas and optionally different models
+    - Jinja template support for agent system prompts
+    - Per-message visibility control (private whispers to specific agents)
+    - Structured output support via Pydantic models (per-turn, flexible)
     - Automatic context building from each agent's perspective
+    - Conversation-level logging and per-agent stats
     - Round-robin or directed turn-taking
-    - Full conversation transcript with metadata
-    - Support for injecting external messages (moderator, user input)
     How it works:
-    - Each agent has a system prompt defining their persona
+    - Each agent has a system prompt (raw or Jinja template) defining their persona
     - When an agent speaks, they see:
       - Their own previous messages as "assistant" role
-      - Other agents' messages as "user" role with name attribution
-    - This creates natural back-and-forth conversation
+      - Other agents' messages as "user" role with the ``name`` field for attribution
+      - Only messages they are allowed to see (filtered by ``visible_to``)
+    - Private context on an agent is injected as a system message only they see
     Example:
         client = Flashlite(default_model="gpt-4o-mini")
@@ -80,15 +147,24 @@ class MultiAgentChat:
         # Start with a topic
         chat.add_message("Moderator", "Discuss: Will AI help or hurt jobs?")
+        # Whisper private info to one agent
+        chat.add_message("Moderator", "Secret: focus on healthcare jobs.",
+                        visible_to=["Optimist"])
         # Have agents take turns
-        await chat.speak("Optimist")  # Optimist responds
-        await chat.speak("Skeptic")   # Skeptic responds to Optimist
-        await chat.speak("Optimist")  # Continue the debate
+        await chat.speak("Optimist")
+        await chat.speak("Skeptic")
+        # Structured output from a judge
+        class Score(BaseModel):
+            winner: str
+            reasoning: str
+        result = await chat.speak("Judge", response_model=Score)
-        # Or use round-robin for structured turns
+        # Round-robin for structured turns
         await chat.round_robin(rounds=2)
-        # Get formatted transcript
         print(chat.format_transcript())
     """
@@ -109,6 +185,8 @@ class MultiAgentChat:
         self._agents: dict[str, Agent] = {}
         self._transcript: list[ChatMessage] = []
+    # -- Agent management ------------------------------------------------
     def add_agent(self, agent: Agent) -> "MultiAgentChat":
         """
         Add an agent to the chat.
@@ -120,8 +198,8 @@ class MultiAgentChat:
             Self for method chaining
         Example:
-            chat.add_agent(Agent("Alice", "You are helpful."))
-                .add_agent(Agent("Bob", "You are curious."))
+            chat.add_agent(Agent("Alice", system_prompt="You are helpful."))
+                .add_agent(Agent("Bob", system_prompt="You are curious."))
         """
         self._agents[agent.name] = agent
         return self
@@ -141,82 +219,151 @@ class MultiAgentChat:
             return True
         return False
+    # -- Message injection -----------------------------------------------
     def add_message(
         self,
         agent_name: str,
         content: str,
         metadata: dict[str, Any] | None = None,
+        visible_to: list[str] | None = None,
     ) -> "MultiAgentChat":
         """
         Manually add a message to the transcript.
-        Useful for:
-        - Injecting moderator or facilitator prompts
-        - Adding user input to the conversation
-        - Simulating agent messages for testing
+        Useful for injecting moderator prompts, user input, or private
+        whispers to specific agents.
         Args:
             agent_name: Name to attribute the message to
             content: Message content
             metadata: Optional metadata to attach
+            visible_to: If set, only these agents can see this message.
+                       None means all agents see it.
         Returns:
             Self for method chaining
+        Examples:
+            # Public message everyone sees
+            chat.add_message("Moderator", "New topic: climate change.")
+            # Private whisper only the Adversary sees
+            chat.add_message("GameMaster", "Secret: the key is in the library.",
+                            visible_to=["Adversary"])
         """
         self._transcript.append(
             ChatMessage(
                 agent_name=agent_name,
                 content=content,
                 metadata=metadata or {},
+                visible_to=visible_to,
             )
         )
+        logger.debug(
+            "Message injected from '%s'%s",
+            agent_name,
+            f" (visible_to={visible_to})" if visible_to else "",
+        )
         return self
+    # -- Speaking --------------------------------------------------------
+    @overload
     async def speak(
         self,
         agent_name: str,
+        *,
+        additional_context: str | None = ...,
+        response_model: None = ...,
+        structured_retries: int = ...,
+        visible_to: list[str] | None = ...,
+        **kwargs: Any,
+    ) -> str: ...
+    @overload
+    async def speak(
+        self,
+        agent_name: str,
+        *,
+        additional_context: str | None = ...,
+        response_model: type[T] = ...,
+        structured_retries: int = ...,
+        visible_to: list[str] | None = ...,
+        **kwargs: Any,
+    ) -> T: ...
+    async def speak(
+        self,
+        agent_name: str,
+        *,
         additional_context: str | None = None,
+        response_model: type[T] | None = None,
+        structured_retries: int = 1,
+        visible_to: list[str] | None = None,
         **kwargs: Any,
-    ) -> str:
+    ) -> str | T:
         """
         Have an agent respond to the conversation.
         The agent sees the full conversation history from their perspective:
         - Their own previous messages appear as "assistant" messages
         - Other agents' messages appear as "user" messages with name attribution
+        - Messages with ``visible_to`` set are filtered by visibility
         Args:
             agent_name: Name of the agent to speak
             additional_context: Optional extra context/instruction for this turn
+            response_model: Pydantic model class for structured output parsing.
+                          When provided, returns a validated model instance.
+                          Can change per call for flexible per-turn schemas.
+            structured_retries: Number of retries for structured output validation
+            visible_to: If set, only these agents see this agent's response.
+                       None means all agents see it.
             **kwargs: Additional kwargs passed to client.complete()
         Returns:
-            The agent's response content
+            The agent's response content (str), or a validated Pydantic model
+            instance if response_model is provided.
         Raises:
             ValueError: If agent_name is not found
+            StructuredOutputError: If structured output validation fails
+                after all retries are exhausted
         """
         if agent_name not in self._agents:
             raise ValueError(
-                f"Unknown agent: {agent_name}. Available agents: {list(self._agents.keys())}"
+                f"Unknown agent: {agent_name}. "
+                f"Available agents: {list(self._agents.keys())}"
             )
         agent = self._agents[agent_name]
+        start_time = time.perf_counter()
         # Build messages from this agent's perspective
         messages = self._build_messages_for(agent)
         # Add any additional context as a user message
         if additional_context:
-            messages.append({"role": "user", "content": additional_context})
+            messages.append(user_message(additional_context))
-        # Make completion
+        # Handle structured output: inject schema into system prompt
+        extra_kwargs = dict(kwargs)
+        if response_model is not None:
+            messages, extra_kwargs = self._inject_schema(
+                messages, extra_kwargs, response_model, agent
+            )
+        # Make completion (without response_model so we get CompletionResponse
+        # and can store raw content in the transcript)
         response: CompletionResponse = await self._client.complete(
             model=agent.model or self._default_model,
             messages=messages,
-            **kwargs,
+            **extra_kwargs,
         )
+        latency_ms = (time.perf_counter() - start_time) * 1000
         # Record in transcript with metadata
         self._transcript.append(
             ChatMessage(
@@ -225,35 +372,184 @@ class MultiAgentChat:
                 metadata={
                     "model": response.model,
                     "tokens": response.usage.total_tokens if response.usage else None,
+                    "input_tokens": (
+                        response.usage.input_tokens if response.usage else None
+                    ),
+                    "output_tokens": (
+                        response.usage.output_tokens if response.usage else None
+                    ),
+                    "latency_ms": round(latency_ms, 1),
                 },
+                visible_to=visible_to,
             )
         )
+        logger.info(
+            "%s spoke (model=%s, tokens=%s, %.1fms)%s",
+            agent_name,
+            response.model,
+            response.usage.total_tokens if response.usage else "N/A",
+            latency_ms,
+            f" [visible_to={visible_to}]" if visible_to else "",
+        )
+        # Validate structured output if requested
+        if response_model is not None:
+            return self._validate_structured(
+                response=response,
+                response_model=response_model,
+                messages=messages,
+                extra_kwargs=extra_kwargs,
+                agent=agent,
+                structured_retries=structured_retries,
+                visible_to=visible_to,
+            )
         return response.content
-    def _build_messages_for(self, agent: Agent) -> list[dict[str, str]]:
+    # -- Internal helpers ------------------------------------------------
+    def _inject_schema(
+        self,
+        messages: list[dict[str, Any]],
+        extra_kwargs: dict[str, Any],
+        response_model: type[BaseModel],
+        agent: Agent,
+    ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
+        """Inject structured output schema into the system prompt and kwargs."""
+        schema_prompt = schema_to_prompt(response_model)
+        # Append schema to the system message
+        if messages and messages[0].get("role") == "system":
+            messages[0] = {
+                **messages[0],
+                "content": messages[0]["content"] + "\n\n" + schema_prompt,
+            }
+        else:
+            messages.insert(0, system_message(schema_prompt))
+        # Enable JSON mode for supported providers
+        if "response_format" not in extra_kwargs:
+            resolved_model = (agent.model or self._default_model or "").lower()
+            if any(
+                p in resolved_model
+                for p in ["gpt-4", "gpt-3.5", "claude", "gemini", "mistral"]
+            ):
+                extra_kwargs["response_format"] = {"type": "json_object"}
+        return messages, extra_kwargs
+    async def _validate_structured(
+        self,
+        response: CompletionResponse,
+        response_model: type[T],
+        messages: list[dict[str, Any]],
+        extra_kwargs: dict[str, Any],
+        agent: Agent,
+        structured_retries: int,
+        visible_to: list[str] | None,
+    ) -> T:
+        """Validate structured output with retry support."""
+        last_error: StructuredOutputError | None = None
+        current_messages = list(messages)
+        for attempt in range(structured_retries + 1):
+            try:
+                return validate_response(response, response_model)
+            except StructuredOutputError as e:
+                last_error = e
+                logger.warning(
+                    "%s structured output validation failed (attempt %d): %s",
+                    agent.name,
+                    attempt + 1,
+                    e,
+                )
+                if attempt < structured_retries:
+                    # Ask the model to fix its response
+                    error_feedback = format_validation_error_for_retry(e)
+                    current_messages.append(assistant_message(response.content))
+                    current_messages.append(user_message(error_feedback))
+                    response = await self._client.complete(
+                        model=agent.model or self._default_model,
+                        messages=current_messages,
+                        **extra_kwargs,
+                    )
+                    # Update transcript with corrected response
+                    self._transcript[-1] = ChatMessage(
+                        agent_name=agent.name,
+                        content=response.content,
+                        metadata=self._transcript[-1].metadata,
+                        visible_to=visible_to,
+                    )
+        raise last_error  # type: ignore[misc]
+    def _resolve_system_prompt(self, agent: Agent) -> str:
+        """
+        Resolve an agent's system prompt from raw string or Jinja template.
+        Args:
+            agent: The agent to resolve the prompt for
+        Returns:
+            The rendered system prompt string
+        Raises:
+            ValueError: If template engine is not configured
+        """
+        if agent.system_template:
+            engine = self._client.template_engine
+            if engine is None:
+                raise ValueError(
+                    f"Agent '{agent.name}' uses system_template but no template "
+                    "engine is configured. Pass template_dir to the Flashlite client "
+                    "or call client.register_template()."
+                )
+            return engine.render(agent.system_template, agent.system_variables)
+        return agent.system_prompt or ""
+    def _build_messages_for(self, agent: Agent) -> list[dict[str, Any]]:
         """
         Build the message history from a specific agent's perspective.
-        The agent's own messages become "assistant" role (what they said).
-        Other agents' messages become "user" role with speaker attribution.
+        - System prompt (from raw string or Jinja template)
+        - Private context (if any, as an additional system message)
+        - Transcript messages filtered by visibility:
+          - Agent's own messages become "assistant" role with ``name`` field
+          - Other agents' messages become "user" role with ``name`` field
         """
-        messages: list[dict[str, str]] = []
+        messages: list[dict[str, Any]] = []
         # System prompt for this agent
-        messages.append({"role": "system", "content": agent.system_prompt})
+        prompt = self._resolve_system_prompt(agent)
+        messages.append(system_message(prompt))
+        # Private context (static, only this agent sees)
+        if agent.private_context:
+            messages.append(system_message(agent.private_context))
-        # Add conversation history
+        # Conversation history, filtered by visibility
         for msg in self._transcript:
+            # Check visibility
+            if msg.visible_to is not None and agent.name not in msg.visible_to:
+                continue
             if msg.agent_name == agent.name:
                 # Agent's own previous messages
-                messages.append({"role": "assistant", "content": msg.content})
+                messages.append(
+                    assistant_message(msg.content, name=_sanitize_name(agent.name))
+                )
             else:
-                # Other agents'/sources' messages - prefix with speaker name
-                messages.append({"role": "user", "content": f"[{msg.agent_name}]: {msg.content}"})
+                # Other agents'/sources' messages with name attribution
+                messages.append(
+                    user_message(msg.content, name=_sanitize_name(msg.agent_name))
+                )
         return messages
+    # -- Batch speaking --------------------------------------------------
     async def round_robin(
         self,
         rounds: int = 1,
@@ -271,13 +567,20 @@ class MultiAgentChat:
         Returns:
             List of all responses in order
         """
-        responses = []
+        responses: list[str] = []
         agent_names = list(self._agents.keys())
-        for _ in range(rounds):
+        for round_num in range(1, rounds + 1):
+            logger.info(
+                "Round %d/%d started (agents: %s)",
+                round_num,
+                rounds,
+                ", ".join(agent_names),
+            )
             for name in agent_names:
                 response = await self.speak(name, **kwargs)
                 responses.append(response)
+            logger.info("Round %d/%d complete", round_num, rounds)
         return responses
@@ -296,12 +599,14 @@ class MultiAgentChat:
         Returns:
             List of responses in order
         """
-        responses = []
+        responses: list[str] = []
         for name in agent_sequence:
             response = await self.speak(name, **kwargs)
             responses.append(response)
         return responses
+    # -- Transcript access -----------------------------------------------
     @property
     def transcript(self) -> list[ChatMessage]:
         """Get a copy of the conversation transcript."""
@@ -317,30 +622,90 @@ class MultiAgentChat:
         """Get list of agent names."""
         return list(self._agents.keys())
-    def format_transcript(self, include_metadata: bool = False) -> str:
+    @property
+    def stats(self) -> dict[str, Any]:
+        """
+        Get per-agent statistics from the conversation.
+        Returns a dict with total and per-agent breakdowns of tokens,
+        latency, and message counts.
+        """
+        agent_stats: dict[str, dict[str, Any]] = {}
+        total_tokens = 0
+        total_messages = 0
+        for msg in self._transcript:
+            name = msg.agent_name
+            if name not in agent_stats:
+                agent_stats[name] = {
+                    "messages": 0,
+                    "total_tokens": 0,
+                    "input_tokens": 0,
+                    "output_tokens": 0,
+                    "total_latency_ms": 0.0,
+                }
+            stats = agent_stats[name]
+            stats["messages"] += 1
+            total_messages += 1
+            tokens = msg.metadata.get("tokens")
+            if tokens is not None:
+                stats["total_tokens"] += tokens
+                total_tokens += tokens
+            input_t = msg.metadata.get("input_tokens")
+            if input_t is not None:
+                stats["input_tokens"] += input_t
+            output_t = msg.metadata.get("output_tokens")
+            if output_t is not None:
+                stats["output_tokens"] += output_t
+            latency = msg.metadata.get("latency_ms")
+            if latency is not None:
+                stats["total_latency_ms"] += latency
+        return {
+            "total_messages": total_messages,
+            "total_tokens": total_tokens,
+            "by_agent": agent_stats,
+        }
+    def format_transcript(
+        self,
+        include_metadata: bool = False,
+        include_private: bool = False,
+    ) -> str:
         """
         Format the transcript as a readable string.
         Args:
             include_metadata: Whether to include metadata like tokens used
+            include_private: Whether to show visibility annotations
         Returns:
             Formatted transcript string
         """
-        lines = []
+        lines: list[str] = []
         for msg in self._transcript:
-            lines.append(f"[{msg.agent_name}]:")
+            header = f"[{msg.agent_name}]"
+            if include_private and msg.visible_to is not None:
+                header += f" (visible_to: {', '.join(msg.visible_to)})"
+            header += ":"
+            lines.append(header)
             # Indent content for readability
             for line in msg.content.split("\n"):
                 lines.append(f"  {line}")
             if include_metadata and msg.metadata:
-                meta_str = ", ".join(f"{k}={v}" for k, v in msg.metadata.items() if v)
+                meta_str = ", ".join(
+                    f"{k}={v}" for k, v in msg.metadata.items() if v is not None
+                )
                 if meta_str:
                     lines.append(f"  ({meta_str})")
             lines.append("")
         return "\n".join(lines)
-    def get_messages_for(self, agent_name: str) -> list[dict[str, str]]:
+    def get_messages_for(self, agent_name: str) -> list[dict[str, Any]]:
         """
         Get the messages list as a specific agent would see it.
@@ -374,5 +739,6 @@ class MultiAgentChat:
     def __repr__(self) -> str:
         return (
-            f"MultiAgentChat(agents={list(self._agents.keys())}, messages={len(self._transcript)})"
+            f"MultiAgentChat(agents={list(self._agents.keys())}, "
+            f"messages={len(self._transcript)})"
         )

flashlite/middleware/rate_limit.py CHANGED Viewed

@@ -5,6 +5,7 @@ import logging
 import time
 from dataclasses import dataclass, field
+from .._spinner import Spinner
 from ..types import CompletionRequest, CompletionResponse, RateLimitConfig, RateLimitError
 from .base import CompletionHandler, Middleware
@@ -52,6 +53,18 @@ class TokenBucket:
         Raises:
             RateLimitError: If timeout exceeded
         """
+        # Clamp to capacity so a single oversized request can never deadlock.
+        # The request still pays the refill-wait cost for `capacity` tokens,
+        # which preserves rate-limiting backpressure.
+        effective = min(tokens, self.capacity)
+        if effective < tokens:
+            logger.warning(
+                "Requested %d tokens exceeds bucket capacity %d — "
+                "clamping to capacity to avoid deadlock",
+                int(tokens),
+                int(self.capacity),
+            )
         start_time = time.monotonic()
         deadline = start_time + timeout if timeout else None
@@ -59,12 +72,12 @@ class TokenBucket:
             while True:
                 self._refill()
-                if self.tokens >= tokens:
-                    self.tokens -= tokens
+                if self.tokens >= effective:
+                    self.tokens -= effective
                     return time.monotonic() - start_time
                 # Calculate wait time for enough tokens
-                tokens_needed = tokens - self.tokens
+                tokens_needed = effective - self.tokens
                 wait_time = tokens_needed / self.rate
                 # Check timeout
@@ -146,21 +159,31 @@ class RateLimitMiddleware(Middleware):
         # Acquire RPM token before making request
         if self._rpm_bucket:
-            wait_time = await self._rpm_bucket.acquire()
+            async with Spinner("Waiting on rate limit (RPM)...", delay=0.4):
+                wait_time = await self._rpm_bucket.acquire()
             if wait_time > 0.1:  # Only log significant waits
-                logger.debug(f"Rate limit: waited {wait_time:.2f}s for RPM token")
+                logger.info(
+                    "⏳ Rate limit backpressure: waited %.2fs for RPM capacity", wait_time
+                )
         # Make the request
         response = await next_handler(request)
         # For TPM limiting, consume tokens based on actual usage
-        # This is post-hoc - we can't know token count before the request
+        # This is post-hoc — we can't know token count before the request
         if self._tpm_bucket and response.usage:
             total_tokens = response.usage.total_tokens
             if total_tokens > 0:
-                # Don't block on TPM - just record the usage
                 # This creates backpressure for subsequent requests
-                await self._tpm_bucket.acquire(tokens=float(total_tokens))
+                async with Spinner("Waiting on rate limit (TPM)...", delay=0.4):
+                    wait_time = await self._tpm_bucket.acquire(tokens=float(total_tokens))
+                if wait_time > 0.1:
+                    logger.info(
+                        "⏳ Rate limit backpressure: waited %.2fs for TPM capacity "
+                        "(%d tokens used)",
+                        wait_time,
+                        total_tokens,
+                    )
         return response

flashlite/observability/inspect_compat.py CHANGED Viewed

@@ -354,11 +354,23 @@ class InspectLogger:
             sample_id = self._sample_count
             self._sample_count += 1
-        # Convert messages to Inspect format
-        input_messages = [
-            {"role": msg.get("role", "user"), "content": msg.get("content", "")}
-            for msg in request.messages
-        ]
+        # Convert messages to Inspect format (preserve name field for multi-agent)
+        input_messages = []
+        for msg in request.messages:
+            inspect_msg: dict[str, Any] = {
+                "role": msg.get("role", "user"),
+                "content": msg.get("content", ""),
+            }
+            if msg.get("name"):
+                inspect_msg["name"] = msg["name"]
+            input_messages.append(inspect_msg)
+        # Build metadata, including template info for traceability
+        entry_metadata = dict(metadata or {})
+        if request.template is not None:
+            entry_metadata["template"] = request.template
+        if request.variables is not None:
+            entry_metadata["variables"] = request.variables
         entry = InspectLogEntry(
             eval_id=self._eval_id,
@@ -373,7 +385,7 @@ class InspectLogger:
                 "total": response.usage.total_tokens if response.usage else 0,
             },
             timestamp=datetime.now(UTC).isoformat(),
-            metadata=metadata or {},
+            metadata=entry_metadata,
         )
         json_str = json.dumps(entry.to_dict())

flashlite/observability/logging.py CHANGED Viewed

@@ -175,6 +175,10 @@ class StructuredLogger:
         # Build parameters dict
         params: dict[str, Any] = {}
+        if request.template is not None:
+            params["template"] = request.template
+        if request.variables is not None:
+            params["variables"] = request.variables
         if request.temperature is not None:
             params["temperature"] = request.temperature
         if request.max_tokens is not None:

flashlite/types.py CHANGED Viewed

@@ -58,7 +58,7 @@ class CompletionRequest:
     """A request to complete a chat conversation."""
     model: str
-    messages: Messages = {}
+    messages: Messages = field(default_factory=list)
     template: str | None = None
     variables: dict[str, Any] | None = None
     temperature: float | None = None

{flashlite-0.1.1.dist-info → flashlite-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flashlite
-Version: 0.1.1
+Version: 0.2.0
 Summary: Batteries-included wrapper for litellm with rate limiting, retries, templating, and more
 Author-email: ndalton12 <niall.dalton12@gmail.com>
 License-File: LICENSE.md

{flashlite-0.1.1.dist-info → flashlite-0.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,9 @@
 flashlite/__init__.py,sha256=RlXjsK7zvZXStMvfz4FGqBxTWHev9VkyHYy-35TuTuM,3585
-flashlite/client.py,sha256=zQH_eLWZxnkX9acwI-y9c3uxeGybA-C0I9UPU6HrzvI,25081
+flashlite/_spinner.py,sha256=9KHXD1MW33P2VM-sUe7NZJYz48auJXLspwy6unjMjSE,3019
+flashlite/client.py,sha256=1UYWpWEfbrZe4mkz2-qaztjUM38TJS6swKbZ_OSgThw,25309
 flashlite/config.py,sha256=3RMEIAejBPlBG_VOgD8mpZKEDNZvK0k0cVv3vMM9kW8,4818
 flashlite/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-flashlite/types.py,sha256=mrqhUlnnYV8-FBSKxbxc2ZPy-aLH1hgwToCwqqlipYU,6473
+flashlite/types.py,sha256=OqtgtnZaEWRYfaI6esqzv4-HKwu76Y-RDu3-Mhsae9w,6498
 flashlite/cache/__init__.py,sha256=T8O7oiZ0U181_tacJzfK6IGEAt1m3NdaIlBjq9wmB44,325
 flashlite/cache/base.py,sha256=IaDAI4EzewhJe0quh2JQK9-BxQxGxUDwrsd9BCaHFFc,5663
 flashlite/cache/disk.py,sha256=pGPI7eJW6RqVCQC4laTYhQr0iU-AkjA4aFFYt-wg8ls,8777
@@ -10,7 +11,7 @@ flashlite/cache/memory.py,sha256=_A4F7NTR9da2KDQW7fcKnUWrC-W_JpaYmb3d6rovX3w,441
 flashlite/conversation/__init__.py,sha256=zSgC4G697mx3T5bKn8WUEkSaSkMQQeHJsfyLdRUM30w,694
 flashlite/conversation/context.py,sha256=NQMLi5_WiN1zDYaPZTO9uJG_dJ3JJiVmAFfGAPM4X6c,10164
 flashlite/conversation/manager.py,sha256=dSQDgtzNt_6T8S1sHSAXKcS3DoBQ2vI9Ig1PZKaTh48,11644
-flashlite/conversation/multi_agent.py,sha256=t1jZD1VS3NOcAJtjQTMtvjEZCVTlFGy3SOxE_jjAtuo,11591
+flashlite/conversation/multi_agent.py,sha256=tt5gNzUXqnbvp84_aWtcoCIlShPWSCzVa_Jt48Xuvy8,25427
 flashlite/core/__init__.py,sha256=nWbMMPED_HsD62hkIYv45DDR6zX2_cDWCMPDTNfqSu4,315
 flashlite/core/completion.py,sha256=NTtAJzJ3ba0N0xVs8lCN5htme0SWEMxYroGjI63crw4,3847
 flashlite/core/messages.py,sha256=-EUtEjFjSNY1Lzfrynb9xtYw4FZRKnfFoYQqgsUcQZQ,3848
@@ -18,12 +19,12 @@ flashlite/middleware/__init__.py,sha256=T8Z4uSqjkuAcf5u5FuUBNfKyL5sqp4Iw4sov_xiU
 flashlite/middleware/base.py,sha256=LC_IL96jWWPdE0o_PBGPvSylmyLmob20LBVvGkfUS3g,2691
 flashlite/middleware/cache.py,sha256=R1YwAZBg5YJGTiqgNWdkl7VSN1xpmqmupTSBQnpyH-s,4032
 flashlite/middleware/logging.py,sha256=D3x8X1l1LN1Um_qOWuELyO8Fgo9WulFJTIx6s94Ure4,4919
-flashlite/middleware/rate_limit.py,sha256=nf0-Ul0CGnX0VRKtxB2dfoplkBin3P2cMLrbks76lcg,7059
+flashlite/middleware/rate_limit.py,sha256=a0L0tnnX60ouJ7rLIoHs7JNX59Q5kqlf0kQgzP4FMlw,8091
 flashlite/middleware/retry.py,sha256=_3Lz9Gmes2sNk6rO10WamH6yrwJy8TQi-esIl8NIMag,4832
 flashlite/observability/__init__.py,sha256=VHdYteU9KmVkgSHrkA-Ssz6_qoi9uL-2JFDhSH5sgwI,949
 flashlite/observability/callbacks.py,sha256=yz1oZh7f7WVxvKmt7XyHbj4WDC2xnvM3SJiTSxfAkoQ,4897
-flashlite/observability/inspect_compat.py,sha256=IrsdEiV-qn_wOlgAvWLcIJ_7WxU0Bpq7DcHaS_KWXPw,16366
-flashlite/observability/logging.py,sha256=UxBH2RN8rNcGZHYgC_QYiuEpaIRXEQFs1OjiKjxbuf0,9273
+flashlite/observability/inspect_compat.py,sha256=S2D2h_w_qD7xsd6cPMwt3-kbt76NYWbR35h2BKR5m24,16913
+flashlite/observability/logging.py,sha256=qH0ky22nePzjVQIPPhsALcA4VIf7mkP_wMwg69fYM2s,9458
 flashlite/observability/metrics.py,sha256=blRx5N3uN4ilnPpxBe7k_uDhYV3GmQWXoKPLVxnk8_s,7466
 flashlite/structured/__init__.py,sha256=9k5bwkzFo_JD3WZ1Tm4iyZqoZ1A51EIINI8N1H2_2ew,750
 flashlite/structured/outputs.py,sha256=Q_isfrtKJGybBadGMKmfo5UJ5vMaUQRCRgFpjGWZOF8,5070
@@ -35,7 +36,7 @@ flashlite/templating/registry.py,sha256=wp8RaibHKNyu5q4tCdOXJ0B4tey7bv-c0qb9h1a7
 flashlite/tools/__init__.py,sha256=zpQ5KyvZwZaVvaulnpMmL_JjCnMfD08nD_foI95TjVg,1791
 flashlite/tools/definitions.py,sha256=cqyk6GR1qeMkTPFqsadnJc-YkCG15QVafiaf-OjGYNU,11519
 flashlite/tools/execution.py,sha256=iQC7V3R5Tx19suISnnuaDpjpgl8wURwOHmKZbsHL16s,10814
-flashlite-0.1.1.dist-info/METADATA,sha256=2BbX7EfrpyvJh-SjiM0noo-gzyDd8bmNchpfLpjLx4A,4293
-flashlite-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-flashlite-0.1.1.dist-info/licenses/LICENSE.md,sha256=z2KZcyoH16ayjxlbeBM01uD-bXn1WTcKFab5ZKBhfJE,1068
-flashlite-0.1.1.dist-info/RECORD,,
+flashlite-0.2.0.dist-info/METADATA,sha256=a1YS4nT7UJJD98ibIlguAfWdhUc-2SDN9xQ9jBmjjSA,4293
+flashlite-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+flashlite-0.2.0.dist-info/licenses/LICENSE.md,sha256=z2KZcyoH16ayjxlbeBM01uD-bXn1WTcKFab5ZKBhfJE,1068
+flashlite-0.2.0.dist-info/RECORD,,

{flashlite-0.1.1.dist-info → flashlite-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{flashlite-0.1.1.dist-info → flashlite-0.2.0.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

flashlite 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

flashlite 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl