PyPI - persona-runtime - Versions diffs - 0.1.0__py3-none-any.whl - Mend

persona-runtime 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

persona_runtime/__init__.py +42 -0
persona_runtime/agentic/__init__.py +46 -0
persona_runtime/agentic/compactor.py +134 -0
persona_runtime/agentic/errors.py +51 -0
persona_runtime/agentic/events.py +214 -0
persona_runtime/agentic/loop.py +691 -0
persona_runtime/agentic/run.py +117 -0
persona_runtime/agentic/step.py +81 -0
persona_runtime/ambiguity.py +422 -0
persona_runtime/errors.py +44 -0
persona_runtime/logging.py +474 -0
persona_runtime/loop.py +1113 -0
persona_runtime/openrouter_subscription.py +197 -0
persona_runtime/prompt.py +510 -0
persona_runtime/py.typed +0 -0
persona_runtime/question_author.py +143 -0
persona_runtime/questions.py +204 -0
persona_runtime/router.py +28 -0
persona_runtime/routing/__init__.py +62 -0
persona_runtime/routing/classifiers.py +107 -0
persona_runtime/routing/heuristic.py +263 -0
persona_runtime/routing/latency.py +116 -0
persona_runtime/routing/layer1.py +126 -0
persona_runtime/routing/nvidia_models.py +113 -0
persona_runtime/routing/protocol.py +112 -0
persona_runtime/routing/scoring.py +199 -0
persona_runtime/routing/types.py +141 -0
persona_runtime/routing/unified.py +235 -0
persona_runtime/task_detector.py +317 -0
persona_runtime/tier.py +616 -0
persona_runtime-0.1.0.dist-info/METADATA +171 -0
persona_runtime-0.1.0.dist-info/RECORD +34 -0
persona_runtime-0.1.0.dist-info/WHEEL +4 -0
persona_runtime-0.1.0.dist-info/licenses/LICENSE +77 -0

persona_runtime/__init__.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""Persona runtime — conversation loop, router, and agentic engine.
+The public surface spec 06 (agentic loop) and spec 08 (API) import:
+- :class:`ConversationLoop` — orchestrates one turn (the keystone).
+- :class:`PromptBuilder` + :class:`RetrievedContext` — prompt assembly.
+- :class:`Router` — rule-based tier selection.
+- :class:`TierConfig` / :class:`TierRegistry` / :func:`tier_registry_from_env`
+  — tier configuration and the lazily-cached backend registry.
+- :class:`TurnLog` / :class:`TurnLogWriter` / :class:`JSONLTurnLogWriter` /
+  :class:`MemoryTurnLogWriter` — per-turn telemetry.
+- :exc:`TierNotConfiguredError` — the one runtime domain exception (D-05-2).
+"""
+from __future__ import annotations
+from persona_runtime.errors import TierNotConfiguredError
+from persona_runtime.logging import (
+    JSONLTurnLogWriter,
+    MemoryTurnLogWriter,
+    TurnLog,
+    TurnLogWriter,
+)
+from persona_runtime.loop import ConversationLoop
+from persona_runtime.prompt import PromptBuilder, RetrievedContext
+from persona_runtime.router import Router
+from persona_runtime.tier import TierConfig, TierRegistry, tier_registry_from_env
+__all__ = [
+    "ConversationLoop",
+    "JSONLTurnLogWriter",
+    "MemoryTurnLogWriter",
+    "PromptBuilder",
+    "RetrievedContext",
+    "Router",
+    "TierConfig",
+    "TierNotConfiguredError",
+    "TierRegistry",
+    "TurnLog",
+    "TurnLogWriter",
+    "tier_registry_from_env",
+]

persona_runtime/agentic/__init__.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""The agentic loop — plan-act-reflect execution for end-to-end tasks (spec 06).
+When a chat turn is not enough ("draft a complaint about my landlord refusing to
+fix mould"), :class:`AgenticLoop` runs the *simplest possible* agent loop: one
+model decides at each step whether to call a tool, ask the user a question, or
+produce a final answer — no multi-agent orchestration, no graph-of-thought
+(architecture §5.2). The value is in the error-handling and budget management
+around the loop, not the loop itself.
+The public surface spec 08 (the API, which exposes ``/v1/runs``) imports:
+- :class:`AgenticLoop` — the plan-act-reflect engine (lands in T06).
+- :class:`Run` / :class:`RunStatus` / :class:`Step` / :class:`StepType` — the
+  serialisable run/step data model (T02).
+- :class:`CancelToken` — caller-held cancellation control (T02).
+- :class:`RunEvent` — the SSE event the API serialises for the run viewer (T03).
+- :exc:`MaxStepsReachedError` / :exc:`RunCancelledError` — the two agentic
+  terminal exception types (defined, but the loop returns a ``Run`` rather than
+  raising; D-06-2).
+Spec 08 owns what the loop does not (mirrors D-S05-4 / D-05-4): it persists the
+``Run`` per-step, supplies the ``user_respond`` blocking callback, serialises
+``RunEvent``\\ s to SSE, and owns the ``TierRegistry`` lifecycle.
+"""
+from __future__ import annotations
+from persona_runtime.agentic.compactor import StepHistoryCompactor
+from persona_runtime.agentic.errors import MaxStepsReachedError, RunCancelledError
+from persona_runtime.agentic.events import RunEvent
+from persona_runtime.agentic.loop import AgenticLoop
+from persona_runtime.agentic.run import CancelToken, Run, RunStatus
+from persona_runtime.agentic.step import Step, StepType
+__all__ = [
+    "AgenticLoop",
+    "CancelToken",
+    "MaxStepsReachedError",
+    "Run",
+    "RunCancelledError",
+    "RunEvent",
+    "RunStatus",
+    "Step",
+    "StepHistoryCompactor",
+    "StepType",
+]

persona_runtime/agentic/compactor.py ADDED Viewed

@@ -0,0 +1,134 @@
+"""Step-history compaction for the agentic loop (spec §6).
+An agentic run's context grows with every step — tool results can be large (a
+``web_fetch`` returning 4000 chars across four URLs is 16K tokens of tool results
+alone). The :class:`StepHistoryCompactor` keeps the context within the tier's
+budget by summarising earlier step history when it crosses 80% of the budget,
+while preserving the run's invariants verbatim: the **persona block + task
+description** (the floor, ``context[0]``) and the **most recent steps**.
+The async-bridge (D-06-4 — kept LOCAL, no shared ``_bridge.py``): the small-tier
+summary needs an ``await``, but :meth:`compact_if_needed` is sync-shaped. The
+*loop* owns the async call — it asks :meth:`should_compact` whether compaction
+will fire, pre-computes the summary by awaiting the small tier, and passes the
+resolved ``summary`` string in. This reuses the D-05-X *idiom* (predict →
+pre-compute → sync callee) but NOT its machinery: the conversation manager keys
+off a turn-count boundary and is stateful; this compactor keys off a token
+threshold and is stateless (a run is one pass). The shared element is a pattern,
+documented here, not a function.
+"""
+from __future__ import annotations
+from datetime import UTC, datetime
+from typing import TYPE_CHECKING
+from persona.schema.conversation import ConversationMessage
+from persona.skills import count_tokens
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+__all__ = ["StepHistoryCompactor"]
+# Fraction of the tier budget above which compaction fires (spec §6).
+_COMPACT_THRESHOLD = 0.8
+# Messages from the tail kept verbatim — "recent 2 steps" (spec §6). A step
+# contributes at most a couple of messages (an assistant turn + its tool
+# results), so keeping the last few trailing messages preserves recent steps.
+_KEEP_RECENT_MESSAGES = 4
+def _render(messages: Sequence[ConversationMessage]) -> str:
+    """Render messages to the text form used for token counting (mirrors loop.py)."""
+    return "\n".join(f"{m.role}: {m.content}" for m in messages)
+class StepHistoryCompactor:
+    """Compacts an agentic run's step history at the tier budget (spec §6).
+    Stateless — each :meth:`compact_if_needed` call recomputes from the current
+    context. The persona block + task description (``context[0]``) and the most
+    recent messages are never summarised; only the middle step history is.
+    """
+    def should_compact(self, context: Sequence[ConversationMessage], budget: int) -> bool:
+        """True if ``context`` exceeds 80% of ``budget`` and has a compactable middle.
+        The loop calls this BEFORE :meth:`compact_if_needed` so it knows whether
+        to pre-compute the (async) small-tier summary. Returns ``False`` when the
+        context is small enough OR when there is no middle to summarise (the
+        floor + recent tail already account for every message).
+        """
+        if budget <= 0:
+            return False
+        if len(context) <= 1 + _KEEP_RECENT_MESSAGES:
+            return False
+        return count_tokens(_render(context)) > int(budget * _COMPACT_THRESHOLD)
+    def compact_if_needed(
+        self,
+        context: list[ConversationMessage],
+        budget: int,
+        *,
+        summary: str | None,
+    ) -> list[ConversationMessage]:
+        """Return a compacted context, or ``context`` unchanged if under budget.
+        Args:
+            context: The run's working context — ``[floor, *step_messages]``
+                where ``floor`` (index 0) is the persona block + task +
+                agentic-instructions system message.
+            budget: The tier's context-window budget in tokens.
+            summary: The pre-computed summary of the middle step history (the
+                loop awaits the small tier and passes the result here; D-06-4).
+                ``None`` means "no summary available" → no-op (the loop passes a
+                string exactly when :meth:`should_compact` returned ``True``).
+        Returns:
+            ``[floor, summary_message, *recent_messages]`` when compaction fires,
+            else ``context`` unchanged. The floor and the recent tail are
+            byte-identical to the input (acceptance #8).
+        """
+        if summary is None or not self.should_compact(context, budget):
+            return context
+        floor = context[0]
+        recent = context[self._recent_start(context) :]
+        summary_message = ConversationMessage(
+            role="system",
+            content=f"Earlier in this run: {summary}",
+            created_at=datetime.now(UTC),
+            metadata={"kind": "step_compaction"},
+        )
+        return [floor, summary_message, *recent]
+    @staticmethod
+    def _recent_start(context: Sequence[ConversationMessage]) -> int:
+        """Index where the verbatim recent tail begins.
+        Never index 0 (the floor), and never on a dangling ``tool`` message: a
+        ``tool`` result must keep the preceding assistant ``tool_calls`` message
+        in the same context window, or native providers (OpenAI/DeepSeek) reject
+        the request ("'tool' must follow a message with 'tool_calls'"). We walk
+        the boundary back over any leading ``tool`` messages so the kept tool-call
+        group stays intact. Spec 11 soak finding.
+        """
+        start = max(1, len(context) - _KEEP_RECENT_MESSAGES)
+        while start > 1 and context[start].role == "tool":
+            start -= 1
+        return start
+    def middle_to_summarise(
+        self, context: Sequence[ConversationMessage]
+    ) -> list[ConversationMessage]:
+        """The slice the loop should summarise: everything between floor and recent tail.
+        The loop renders this, awaits the small-tier summariser on it, and passes
+        the resulting string back as ``summary``. Returns ``[]`` when there is no
+        middle (the caller then passes ``summary=None``).
+        """
+        if len(context) <= 1 + _KEEP_RECENT_MESSAGES:
+            return []
+        start = self._recent_start(context)
+        return list(context[1:start])

persona_runtime/agentic/errors.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Agentic-loop domain exceptions (D-06-2).
+The agentic loop is orchestration over already-reviewed components (specs 01–05),
+so it adds the smallest possible exception surface — two classes for its two
+genuinely-new terminal concepts:
+- :class:`MaxStepsReachedError` — the loop ran out of steps without a final answer.
+- :class:`RunCancelledError` — the caller cancelled the run.
+**Both are DEFINED but the loop itself does not raise them.** Max-steps and
+cancellation are normal terminal *outcomes*, modelled as
+:class:`~persona_runtime.agentic.run.RunStatus` values; the loop sets the status,
+produces (for max-steps) a best-effort summary, and *returns* the
+:class:`~persona_runtime.agentic.run.Run`. These exception types exist so the
+composition root (spec 08) can choose to surface those outcomes as raised errors
+to an HTTP caller if it prefers — the loop hands back a persistable ``Run`` either
+way (mirrors D-05-2's "``MaxToolRoundsExceeded`` deliberately not raised").
+Everything else (provider 429s, tool-not-allowed, schema mismatches) is a
+spec-01/02/03 domain exception that propagates unchanged — no parallel runtime
+vocabulary (hexagonal architecture, ENGINEERING_STANDARDS.md §1.2).
+"""
+from __future__ import annotations
+from persona.errors import PersonaError
+__all__ = ["MaxStepsReachedError", "RunCancelledError"]
+class MaxStepsReachedError(PersonaError):
+    """A run reached ``max_steps`` without producing a final answer.
+    Defined for callers that prefer to surface max-steps as an exception; the
+    :class:`~persona_runtime.agentic.loop.AgenticLoop` does **not** raise it
+    (it sets ``RunStatus.MAX_STEPS_REACHED``, generates a best-effort summary,
+    and returns the ``Run``). Carries ``context`` with ``max_steps`` and
+    ``run_id`` so an operator can see which run exhausted its budget.
+    """
+class RunCancelledError(PersonaError):
+    """A run was cancelled via its :class:`CancelToken`.
+    Defined for callers that prefer to surface cancellation as an exception;
+    the :class:`~persona_runtime.agentic.loop.AgenticLoop` does **not** raise it
+    (it sets ``RunStatus.CANCELLED`` at the step boundary and returns the
+    ``Run`` cleanly, with no half-executed step — acceptance #6). Carries
+    ``context`` with ``run_id`` and the ``step`` at which cancellation took
+    effect.
+    """

persona_runtime/agentic/events.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""`RunEvent` — the SSE event stream for the run viewer (spec §8).
+The :meth:`AgenticLoop.run` ``on_event`` callback receives :class:`RunEvent`
+objects that the API (spec 08) serialises to SSE; each event type maps to a
+visual element in the run viewer (spec 09). The loop never constructs a
+``RunEvent`` by hand — it calls one of the typed classmethod constructors, which
+are the single place each event's ``type`` string and ``data`` payload shape are
+defined (DRY).
+`RunEvent` is frozen Pydantic v2 (D-06-1): it crosses the spec-08 SSE
+serialisation boundary. The ``data`` payload is ``dict[str, Any]`` so events can
+carry structured detail (tool names, output text); the constructors are
+responsible for building **JSON-safe** payloads (tool calls are rendered to
+name/args dicts, never raw model objects) so ``model_dump_json`` always succeeds.
+"""
+from __future__ import annotations
+from datetime import UTC, datetime  # noqa: TC003 — Pydantic needs runtime access
+from typing import TYPE_CHECKING, Any
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+    from persona.schema.tools import ToolCall, ToolResult
+    from persona_runtime.agentic.run import Run
+    from persona_runtime.questions import QuestionOption
+__all__ = ["RunEvent"]
+class RunEvent(BaseModel):
+    """One event in a run's lifecycle, serialised to SSE by the API (spec §8).
+    Attributes:
+        type: The event kind — one of ``started``, ``tier``, ``thinking``,
+            ``tool_calling``, ``tool_result``, ``asking_user``,
+            ``user_responded``, ``reasoning``, ``completed``, ``cancelled``,
+            ``max_steps``, ``error``, ``finished``.
+        step: The zero-based step index the event belongs to (``-1`` for
+            run-level events that precede the first step, e.g. ``started``).
+        data: Event-type-specific JSON-safe payload built by the constructor.
+        timestamp: tz-aware UTC time the event was emitted.
+    """
+    model_config = ConfigDict(frozen=True, extra="forbid")
+    type: str
+    step: int
+    data: dict[str, Any] = Field(default_factory=dict)
+    timestamp: datetime
+    @field_validator("timestamp", mode="after")
+    @classmethod
+    def _timestamp_must_be_tz_aware(cls, value: datetime) -> datetime:
+        if value.tzinfo is None:
+            msg = "naive datetime not allowed on RunEvent.timestamp; use datetime.now(UTC)"
+            raise ValueError(msg)
+        return value.astimezone(UTC)
+    # Section: typed constructors (the single place each payload shape lives)
+    @classmethod
+    def started(cls, task: str) -> RunEvent:
+        """The run has begun executing ``task``."""
+        return cls(type="started", step=-1, data={"task": task}, timestamp=datetime.now(UTC))
+    @classmethod
+    def tier(cls, tier: str) -> RunEvent:
+        """The model tier chosen for this turn/step (run-level; ``step=-1``).
+        Used by the chat SSE stream (``ConversationLoop.turn``) to surface the
+        router's actual tier choice — and available to the run viewer too. One
+        event vocabulary across both streams.
+        """
+        return cls(type="tier", step=-1, data={"tier": tier}, timestamp=datetime.now(UTC))
+    @classmethod
+    def thinking(cls, step: int) -> RunEvent:
+        """The model is generating the next action for ``step``."""
+        return cls(type="thinking", step=step, data={}, timestamp=datetime.now(UTC))
+    @classmethod
+    def tool_calling(cls, step: int, tool_calls: list[ToolCall]) -> RunEvent:
+        """The model requested tool dispatches this step (JSON-safe call list)."""
+        calls = [{"name": c.name, "call_id": c.call_id, "args": c.args} for c in tool_calls]
+        names = ", ".join(c.name for c in tool_calls)
+        return cls(
+            type="tool_calling",
+            step=step,
+            data={"tool_names": names, "tool_calls": calls},
+            timestamp=datetime.now(UTC),
+        )
+    @classmethod
+    def tool_result(cls, step: int, tool_name: str, result: ToolResult) -> RunEvent:
+        """A tool dispatch completed (success or ``is_error=True``).
+        D-F4-X-event-kind-for-produced-files (Spec F4 Phase 5 T02b — Option A):
+        forward structured ``produced_files`` from ``ToolResult.data`` onto
+        the event payload when present. The sandbox tool factory at
+        ``packages/core/src/persona/sandbox/tool.py:269-279`` populates
+        ``result.data["produced_files"]`` as ``list[{path, size_bytes,
+        media_type}]``; pre-amendment this constructor dropped it.
+        Additive (back-compat): pre-existing frames lacked the field; the
+        F4 frontend dispatcher reads it when present and falls back to a
+        result-block render when absent. **One edit covers both chat SSE
+        AND RunEvent transports** because this constructor is the single
+        place each event's payload shape is defined (see module docstring
+        lines 7-8) — chat ``_sse(ev.type, ev.data)`` (bare payload, D-09-1)
+        and run ``model_dump_json(event)`` (envelope with ``.data`` nested)
+        both observe the same upstream shape.
+        Empty ``produced_files: []`` is omitted from the payload (absence
+        IS the back-compat shape; renderers treat absence as "no files").
+        """
+        data: dict[str, Any] = {
+            "tool_name": tool_name,
+            "is_error": result.is_error,
+            "content": result.content,
+        }
+        if result.data is not None:
+            pf = result.data.get("produced_files")
+            if isinstance(pf, list) and pf:
+                data["produced_files"] = pf
+        return cls(
+            type="tool_result",
+            step=step,
+            data=data,
+            timestamp=datetime.now(UTC),
+        )
+    @classmethod
+    def asking_user(
+        cls,
+        step: int,
+        question: str,
+        *,
+        options: Sequence[QuestionOption] | None = None,
+        allow_free_form: bool = True,
+    ) -> RunEvent:
+        """The persona asked the user a question.
+        Spec 21 (D-21-9): additively carries the 3+1 proactive-question shape.
+        When ``options`` is ``None`` (the model-initiated ``[ASK_USER]`` path and
+        every pre-spec-21 frame) the payload is the bare ``{"question": ...}`` —
+        byte-identical to the original shape, so existing renderers and the
+        web ``AskingUserData`` type are unaffected. When ``options`` is present
+        the payload adds the predefined options + free-form flag and the web
+        renders the 3-button + free-form UI (T12). Absence IS the back-compat
+        shape — exactly the ``produced_files`` precedent above.
+        Args:
+            step: The step index the question belongs to.
+            question: The question text.
+            options: The 3 predefined options, or ``None`` for a free-text ask.
+            allow_free_form: Whether a free-form answer is accepted (only
+                meaningful, and only emitted, when ``options`` is present).
+        """
+        data: dict[str, Any] = {"question": question}
+        if options is not None:
+            data["options"] = [{"label": o.label, "description": o.description} for o in options]
+            data["allow_free_form"] = allow_free_form
+        return cls(type="asking_user", step=step, data=data, timestamp=datetime.now(UTC))
+    @classmethod
+    def user_responded(cls, step: int) -> RunEvent:
+        """The user's answer was received and folded into context."""
+        return cls(type="user_responded", step=step, data={}, timestamp=datetime.now(UTC))
+    @classmethod
+    def reasoning(cls, step: int, content: str) -> RunEvent:
+        """Intermediate reasoning text (neither tool call, question, nor final)."""
+        return cls(
+            type="reasoning", step=step, data={"content": content}, timestamp=datetime.now(UTC)
+        )
+    @classmethod
+    def completed(cls, step: int, output: str) -> RunEvent:
+        """The model produced the final deliverable (``[FINAL]``)."""
+        return cls(
+            type="completed", step=step, data={"output": output}, timestamp=datetime.now(UTC)
+        )
+    @classmethod
+    def cancelled(cls, step: int) -> RunEvent:
+        """The run was cancelled at this step boundary."""
+        return cls(type="cancelled", step=step, data={}, timestamp=datetime.now(UTC))
+    @classmethod
+    def max_steps(cls, step: int, summary: str) -> RunEvent:
+        """The step budget was exhausted; ``summary`` is the best-effort output."""
+        return cls(
+            type="max_steps", step=step, data={"summary": summary}, timestamp=datetime.now(UTC)
+        )
+    @classmethod
+    def error(cls, step: int, message: str) -> RunEvent:
+        """An unrecoverable error terminated the run."""
+        return cls(type="error", step=step, data={"message": message}, timestamp=datetime.now(UTC))
+    @classmethod
+    def finished(cls, run: Run) -> RunEvent:
+        """The run is fully done (terminal); carries the final status + run id."""
+        return cls(
+            type="finished",
+            step=len(run.steps),
+            data={"run_id": run.id, "status": str(run.status)},
+            timestamp=datetime.now(UTC),
+        )