persona-runtime 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ """Persona runtime — conversation loop, router, and agentic engine.
2
+
3
+ The public surface spec 06 (agentic loop) and spec 08 (API) import:
4
+
5
+ - :class:`ConversationLoop` — orchestrates one turn (the keystone).
6
+ - :class:`PromptBuilder` + :class:`RetrievedContext` — prompt assembly.
7
+ - :class:`Router` — rule-based tier selection.
8
+ - :class:`TierConfig` / :class:`TierRegistry` / :func:`tier_registry_from_env`
9
+ — tier configuration and the lazily-cached backend registry.
10
+ - :class:`TurnLog` / :class:`TurnLogWriter` / :class:`JSONLTurnLogWriter` /
11
+ :class:`MemoryTurnLogWriter` — per-turn telemetry.
12
+ - :exc:`TierNotConfiguredError` — the one runtime domain exception (D-05-2).
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from persona_runtime.errors import TierNotConfiguredError
18
+ from persona_runtime.logging import (
19
+ JSONLTurnLogWriter,
20
+ MemoryTurnLogWriter,
21
+ TurnLog,
22
+ TurnLogWriter,
23
+ )
24
+ from persona_runtime.loop import ConversationLoop
25
+ from persona_runtime.prompt import PromptBuilder, RetrievedContext
26
+ from persona_runtime.router import Router
27
+ from persona_runtime.tier import TierConfig, TierRegistry, tier_registry_from_env
28
+
29
+ __all__ = [
30
+ "ConversationLoop",
31
+ "JSONLTurnLogWriter",
32
+ "MemoryTurnLogWriter",
33
+ "PromptBuilder",
34
+ "RetrievedContext",
35
+ "Router",
36
+ "TierConfig",
37
+ "TierNotConfiguredError",
38
+ "TierRegistry",
39
+ "TurnLog",
40
+ "TurnLogWriter",
41
+ "tier_registry_from_env",
42
+ ]
@@ -0,0 +1,46 @@
1
+ """The agentic loop — plan-act-reflect execution for end-to-end tasks (spec 06).
2
+
3
+ When a chat turn is not enough ("draft a complaint about my landlord refusing to
4
+ fix mould"), :class:`AgenticLoop` runs the *simplest possible* agent loop: one
5
+ model decides at each step whether to call a tool, ask the user a question, or
6
+ produce a final answer — no multi-agent orchestration, no graph-of-thought
7
+ (architecture §5.2). The value is in the error-handling and budget management
8
+ around the loop, not the loop itself.
9
+
10
+ The public surface spec 08 (the API, which exposes ``/v1/runs``) imports:
11
+
12
+ - :class:`AgenticLoop` — the plan-act-reflect engine (lands in T06).
13
+ - :class:`Run` / :class:`RunStatus` / :class:`Step` / :class:`StepType` — the
14
+ serialisable run/step data model (T02).
15
+ - :class:`CancelToken` — caller-held cancellation control (T02).
16
+ - :class:`RunEvent` — the SSE event the API serialises for the run viewer (T03).
17
+ - :exc:`MaxStepsReachedError` / :exc:`RunCancelledError` — the two agentic
18
+ terminal exception types (defined, but the loop returns a ``Run`` rather than
19
+ raising; D-06-2).
20
+
21
+ Spec 08 owns what the loop does not (mirrors D-S05-4 / D-05-4): it persists the
22
+ ``Run`` per-step, supplies the ``user_respond`` blocking callback, serialises
23
+ ``RunEvent``\\ s to SSE, and owns the ``TierRegistry`` lifecycle.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ from persona_runtime.agentic.compactor import StepHistoryCompactor
29
+ from persona_runtime.agentic.errors import MaxStepsReachedError, RunCancelledError
30
+ from persona_runtime.agentic.events import RunEvent
31
+ from persona_runtime.agentic.loop import AgenticLoop
32
+ from persona_runtime.agentic.run import CancelToken, Run, RunStatus
33
+ from persona_runtime.agentic.step import Step, StepType
34
+
35
+ __all__ = [
36
+ "AgenticLoop",
37
+ "CancelToken",
38
+ "MaxStepsReachedError",
39
+ "Run",
40
+ "RunCancelledError",
41
+ "RunEvent",
42
+ "RunStatus",
43
+ "Step",
44
+ "StepHistoryCompactor",
45
+ "StepType",
46
+ ]
@@ -0,0 +1,134 @@
1
+ """Step-history compaction for the agentic loop (spec §6).
2
+
3
+ An agentic run's context grows with every step — tool results can be large (a
4
+ ``web_fetch`` returning 4000 chars across four URLs is 16K tokens of tool results
5
+ alone). The :class:`StepHistoryCompactor` keeps the context within the tier's
6
+ budget by summarising earlier step history when it crosses 80% of the budget,
7
+ while preserving the run's invariants verbatim: the **persona block + task
8
+ description** (the floor, ``context[0]``) and the **most recent steps**.
9
+
10
+ The async-bridge (D-06-4 — kept LOCAL, no shared ``_bridge.py``): the small-tier
11
+ summary needs an ``await``, but :meth:`compact_if_needed` is sync-shaped. The
12
+ *loop* owns the async call — it asks :meth:`should_compact` whether compaction
13
+ will fire, pre-computes the summary by awaiting the small tier, and passes the
14
+ resolved ``summary`` string in. This reuses the D-05-X *idiom* (predict →
15
+ pre-compute → sync callee) but NOT its machinery: the conversation manager keys
16
+ off a turn-count boundary and is stateful; this compactor keys off a token
17
+ threshold and is stateless (a run is one pass). The shared element is a pattern,
18
+ documented here, not a function.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from datetime import UTC, datetime
24
+ from typing import TYPE_CHECKING
25
+
26
+ from persona.schema.conversation import ConversationMessage
27
+ from persona.skills import count_tokens
28
+
29
+ if TYPE_CHECKING:
30
+ from collections.abc import Sequence
31
+
32
+ __all__ = ["StepHistoryCompactor"]
33
+
34
+ # Fraction of the tier budget above which compaction fires (spec §6).
35
+ _COMPACT_THRESHOLD = 0.8
36
+ # Messages from the tail kept verbatim — "recent 2 steps" (spec §6). A step
37
+ # contributes at most a couple of messages (an assistant turn + its tool
38
+ # results), so keeping the last few trailing messages preserves recent steps.
39
+ _KEEP_RECENT_MESSAGES = 4
40
+
41
+
42
+ def _render(messages: Sequence[ConversationMessage]) -> str:
43
+ """Render messages to the text form used for token counting (mirrors loop.py)."""
44
+ return "\n".join(f"{m.role}: {m.content}" for m in messages)
45
+
46
+
47
+ class StepHistoryCompactor:
48
+ """Compacts an agentic run's step history at the tier budget (spec §6).
49
+
50
+ Stateless — each :meth:`compact_if_needed` call recomputes from the current
51
+ context. The persona block + task description (``context[0]``) and the most
52
+ recent messages are never summarised; only the middle step history is.
53
+ """
54
+
55
+ def should_compact(self, context: Sequence[ConversationMessage], budget: int) -> bool:
56
+ """True if ``context`` exceeds 80% of ``budget`` and has a compactable middle.
57
+
58
+ The loop calls this BEFORE :meth:`compact_if_needed` so it knows whether
59
+ to pre-compute the (async) small-tier summary. Returns ``False`` when the
60
+ context is small enough OR when there is no middle to summarise (the
61
+ floor + recent tail already account for every message).
62
+ """
63
+ if budget <= 0:
64
+ return False
65
+ if len(context) <= 1 + _KEEP_RECENT_MESSAGES:
66
+ return False
67
+ return count_tokens(_render(context)) > int(budget * _COMPACT_THRESHOLD)
68
+
69
+ def compact_if_needed(
70
+ self,
71
+ context: list[ConversationMessage],
72
+ budget: int,
73
+ *,
74
+ summary: str | None,
75
+ ) -> list[ConversationMessage]:
76
+ """Return a compacted context, or ``context`` unchanged if under budget.
77
+
78
+ Args:
79
+ context: The run's working context — ``[floor, *step_messages]``
80
+ where ``floor`` (index 0) is the persona block + task +
81
+ agentic-instructions system message.
82
+ budget: The tier's context-window budget in tokens.
83
+ summary: The pre-computed summary of the middle step history (the
84
+ loop awaits the small tier and passes the result here; D-06-4).
85
+ ``None`` means "no summary available" → no-op (the loop passes a
86
+ string exactly when :meth:`should_compact` returned ``True``).
87
+
88
+ Returns:
89
+ ``[floor, summary_message, *recent_messages]`` when compaction fires,
90
+ else ``context`` unchanged. The floor and the recent tail are
91
+ byte-identical to the input (acceptance #8).
92
+ """
93
+ if summary is None or not self.should_compact(context, budget):
94
+ return context
95
+
96
+ floor = context[0]
97
+ recent = context[self._recent_start(context) :]
98
+ summary_message = ConversationMessage(
99
+ role="system",
100
+ content=f"Earlier in this run: {summary}",
101
+ created_at=datetime.now(UTC),
102
+ metadata={"kind": "step_compaction"},
103
+ )
104
+ return [floor, summary_message, *recent]
105
+
106
+ @staticmethod
107
+ def _recent_start(context: Sequence[ConversationMessage]) -> int:
108
+ """Index where the verbatim recent tail begins.
109
+
110
+ Never index 0 (the floor), and never on a dangling ``tool`` message: a
111
+ ``tool`` result must keep the preceding assistant ``tool_calls`` message
112
+ in the same context window, or native providers (OpenAI/DeepSeek) reject
113
+ the request ("'tool' must follow a message with 'tool_calls'"). We walk
114
+ the boundary back over any leading ``tool`` messages so the kept tool-call
115
+ group stays intact. Spec 11 soak finding.
116
+ """
117
+ start = max(1, len(context) - _KEEP_RECENT_MESSAGES)
118
+ while start > 1 and context[start].role == "tool":
119
+ start -= 1
120
+ return start
121
+
122
+ def middle_to_summarise(
123
+ self, context: Sequence[ConversationMessage]
124
+ ) -> list[ConversationMessage]:
125
+ """The slice the loop should summarise: everything between floor and recent tail.
126
+
127
+ The loop renders this, awaits the small-tier summariser on it, and passes
128
+ the resulting string back as ``summary``. Returns ``[]`` when there is no
129
+ middle (the caller then passes ``summary=None``).
130
+ """
131
+ if len(context) <= 1 + _KEEP_RECENT_MESSAGES:
132
+ return []
133
+ start = self._recent_start(context)
134
+ return list(context[1:start])
@@ -0,0 +1,51 @@
1
+ """Agentic-loop domain exceptions (D-06-2).
2
+
3
+ The agentic loop is orchestration over already-reviewed components (specs 01–05),
4
+ so it adds the smallest possible exception surface — two classes for its two
5
+ genuinely-new terminal concepts:
6
+
7
+ - :class:`MaxStepsReachedError` — the loop ran out of steps without a final answer.
8
+ - :class:`RunCancelledError` — the caller cancelled the run.
9
+
10
+ **Both are DEFINED but the loop itself does not raise them.** Max-steps and
11
+ cancellation are normal terminal *outcomes*, modelled as
12
+ :class:`~persona_runtime.agentic.run.RunStatus` values; the loop sets the status,
13
+ produces (for max-steps) a best-effort summary, and *returns* the
14
+ :class:`~persona_runtime.agentic.run.Run`. These exception types exist so the
15
+ composition root (spec 08) can choose to surface those outcomes as raised errors
16
+ to an HTTP caller if it prefers — the loop hands back a persistable ``Run`` either
17
+ way (mirrors D-05-2's "``MaxToolRoundsExceeded`` deliberately not raised").
18
+
19
+ Everything else (provider 429s, tool-not-allowed, schema mismatches) is a
20
+ spec-01/02/03 domain exception that propagates unchanged — no parallel runtime
21
+ vocabulary (hexagonal architecture, ENGINEERING_STANDARDS.md §1.2).
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ from persona.errors import PersonaError
27
+
28
+ __all__ = ["MaxStepsReachedError", "RunCancelledError"]
29
+
30
+
31
+ class MaxStepsReachedError(PersonaError):
32
+ """A run reached ``max_steps`` without producing a final answer.
33
+
34
+ Defined for callers that prefer to surface max-steps as an exception; the
35
+ :class:`~persona_runtime.agentic.loop.AgenticLoop` does **not** raise it
36
+ (it sets ``RunStatus.MAX_STEPS_REACHED``, generates a best-effort summary,
37
+ and returns the ``Run``). Carries ``context`` with ``max_steps`` and
38
+ ``run_id`` so an operator can see which run exhausted its budget.
39
+ """
40
+
41
+
42
+ class RunCancelledError(PersonaError):
43
+ """A run was cancelled via its :class:`CancelToken`.
44
+
45
+ Defined for callers that prefer to surface cancellation as an exception;
46
+ the :class:`~persona_runtime.agentic.loop.AgenticLoop` does **not** raise it
47
+ (it sets ``RunStatus.CANCELLED`` at the step boundary and returns the
48
+ ``Run`` cleanly, with no half-executed step — acceptance #6). Carries
49
+ ``context`` with ``run_id`` and the ``step`` at which cancellation took
50
+ effect.
51
+ """
@@ -0,0 +1,214 @@
1
+ """`RunEvent` — the SSE event stream for the run viewer (spec §8).
2
+
3
+ The :meth:`AgenticLoop.run` ``on_event`` callback receives :class:`RunEvent`
4
+ objects that the API (spec 08) serialises to SSE; each event type maps to a
5
+ visual element in the run viewer (spec 09). The loop never constructs a
6
+ ``RunEvent`` by hand — it calls one of the typed classmethod constructors, which
7
+ are the single place each event's ``type`` string and ``data`` payload shape are
8
+ defined (DRY).
9
+
10
+ `RunEvent` is frozen Pydantic v2 (D-06-1): it crosses the spec-08 SSE
11
+ serialisation boundary. The ``data`` payload is ``dict[str, Any]`` so events can
12
+ carry structured detail (tool names, output text); the constructors are
13
+ responsible for building **JSON-safe** payloads (tool calls are rendered to
14
+ name/args dicts, never raw model objects) so ``model_dump_json`` always succeeds.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from datetime import UTC, datetime # noqa: TC003 — Pydantic needs runtime access
20
+ from typing import TYPE_CHECKING, Any
21
+
22
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
23
+
24
+ if TYPE_CHECKING:
25
+ from collections.abc import Sequence
26
+
27
+ from persona.schema.tools import ToolCall, ToolResult
28
+
29
+ from persona_runtime.agentic.run import Run
30
+ from persona_runtime.questions import QuestionOption
31
+
32
+ __all__ = ["RunEvent"]
33
+
34
+
35
+ class RunEvent(BaseModel):
36
+ """One event in a run's lifecycle, serialised to SSE by the API (spec §8).
37
+
38
+ Attributes:
39
+ type: The event kind — one of ``started``, ``tier``, ``thinking``,
40
+ ``tool_calling``, ``tool_result``, ``asking_user``,
41
+ ``user_responded``, ``reasoning``, ``completed``, ``cancelled``,
42
+ ``max_steps``, ``error``, ``finished``.
43
+ step: The zero-based step index the event belongs to (``-1`` for
44
+ run-level events that precede the first step, e.g. ``started``).
45
+ data: Event-type-specific JSON-safe payload built by the constructor.
46
+ timestamp: tz-aware UTC time the event was emitted.
47
+ """
48
+
49
+ model_config = ConfigDict(frozen=True, extra="forbid")
50
+
51
+ type: str
52
+ step: int
53
+ data: dict[str, Any] = Field(default_factory=dict)
54
+ timestamp: datetime
55
+
56
+ @field_validator("timestamp", mode="after")
57
+ @classmethod
58
+ def _timestamp_must_be_tz_aware(cls, value: datetime) -> datetime:
59
+ if value.tzinfo is None:
60
+ msg = "naive datetime not allowed on RunEvent.timestamp; use datetime.now(UTC)"
61
+ raise ValueError(msg)
62
+ return value.astimezone(UTC)
63
+
64
+ # Section: typed constructors (the single place each payload shape lives)
65
+
66
+ @classmethod
67
+ def started(cls, task: str) -> RunEvent:
68
+ """The run has begun executing ``task``."""
69
+ return cls(type="started", step=-1, data={"task": task}, timestamp=datetime.now(UTC))
70
+
71
+ @classmethod
72
+ def tier(cls, tier: str) -> RunEvent:
73
+ """The model tier chosen for this turn/step (run-level; ``step=-1``).
74
+
75
+ Used by the chat SSE stream (``ConversationLoop.turn``) to surface the
76
+ router's actual tier choice — and available to the run viewer too. One
77
+ event vocabulary across both streams.
78
+ """
79
+ return cls(type="tier", step=-1, data={"tier": tier}, timestamp=datetime.now(UTC))
80
+
81
+ @classmethod
82
+ def thinking(cls, step: int) -> RunEvent:
83
+ """The model is generating the next action for ``step``."""
84
+ return cls(type="thinking", step=step, data={}, timestamp=datetime.now(UTC))
85
+
86
+ @classmethod
87
+ def tool_calling(cls, step: int, tool_calls: list[ToolCall]) -> RunEvent:
88
+ """The model requested tool dispatches this step (JSON-safe call list)."""
89
+ calls = [{"name": c.name, "call_id": c.call_id, "args": c.args} for c in tool_calls]
90
+ names = ", ".join(c.name for c in tool_calls)
91
+ return cls(
92
+ type="tool_calling",
93
+ step=step,
94
+ data={"tool_names": names, "tool_calls": calls},
95
+ timestamp=datetime.now(UTC),
96
+ )
97
+
98
+ @classmethod
99
+ def tool_result(cls, step: int, tool_name: str, result: ToolResult) -> RunEvent:
100
+ """A tool dispatch completed (success or ``is_error=True``).
101
+
102
+ D-F4-X-event-kind-for-produced-files (Spec F4 Phase 5 T02b — Option A):
103
+ forward structured ``produced_files`` from ``ToolResult.data`` onto
104
+ the event payload when present. The sandbox tool factory at
105
+ ``packages/core/src/persona/sandbox/tool.py:269-279`` populates
106
+ ``result.data["produced_files"]`` as ``list[{path, size_bytes,
107
+ media_type}]``; pre-amendment this constructor dropped it.
108
+
109
+ Additive (back-compat): pre-existing frames lacked the field; the
110
+ F4 frontend dispatcher reads it when present and falls back to a
111
+ result-block render when absent. **One edit covers both chat SSE
112
+ AND RunEvent transports** because this constructor is the single
113
+ place each event's payload shape is defined (see module docstring
114
+ lines 7-8) — chat ``_sse(ev.type, ev.data)`` (bare payload, D-09-1)
115
+ and run ``model_dump_json(event)`` (envelope with ``.data`` nested)
116
+ both observe the same upstream shape.
117
+
118
+ Empty ``produced_files: []`` is omitted from the payload (absence
119
+ IS the back-compat shape; renderers treat absence as "no files").
120
+ """
121
+ data: dict[str, Any] = {
122
+ "tool_name": tool_name,
123
+ "is_error": result.is_error,
124
+ "content": result.content,
125
+ }
126
+ if result.data is not None:
127
+ pf = result.data.get("produced_files")
128
+ if isinstance(pf, list) and pf:
129
+ data["produced_files"] = pf
130
+ return cls(
131
+ type="tool_result",
132
+ step=step,
133
+ data=data,
134
+ timestamp=datetime.now(UTC),
135
+ )
136
+
137
+ @classmethod
138
+ def asking_user(
139
+ cls,
140
+ step: int,
141
+ question: str,
142
+ *,
143
+ options: Sequence[QuestionOption] | None = None,
144
+ allow_free_form: bool = True,
145
+ ) -> RunEvent:
146
+ """The persona asked the user a question.
147
+
148
+ Spec 21 (D-21-9): additively carries the 3+1 proactive-question shape.
149
+ When ``options`` is ``None`` (the model-initiated ``[ASK_USER]`` path and
150
+ every pre-spec-21 frame) the payload is the bare ``{"question": ...}`` —
151
+ byte-identical to the original shape, so existing renderers and the
152
+ web ``AskingUserData`` type are unaffected. When ``options`` is present
153
+ the payload adds the predefined options + free-form flag and the web
154
+ renders the 3-button + free-form UI (T12). Absence IS the back-compat
155
+ shape — exactly the ``produced_files`` precedent above.
156
+
157
+ Args:
158
+ step: The step index the question belongs to.
159
+ question: The question text.
160
+ options: The 3 predefined options, or ``None`` for a free-text ask.
161
+ allow_free_form: Whether a free-form answer is accepted (only
162
+ meaningful, and only emitted, when ``options`` is present).
163
+ """
164
+ data: dict[str, Any] = {"question": question}
165
+ if options is not None:
166
+ data["options"] = [{"label": o.label, "description": o.description} for o in options]
167
+ data["allow_free_form"] = allow_free_form
168
+ return cls(type="asking_user", step=step, data=data, timestamp=datetime.now(UTC))
169
+
170
+ @classmethod
171
+ def user_responded(cls, step: int) -> RunEvent:
172
+ """The user's answer was received and folded into context."""
173
+ return cls(type="user_responded", step=step, data={}, timestamp=datetime.now(UTC))
174
+
175
+ @classmethod
176
+ def reasoning(cls, step: int, content: str) -> RunEvent:
177
+ """Intermediate reasoning text (neither tool call, question, nor final)."""
178
+ return cls(
179
+ type="reasoning", step=step, data={"content": content}, timestamp=datetime.now(UTC)
180
+ )
181
+
182
+ @classmethod
183
+ def completed(cls, step: int, output: str) -> RunEvent:
184
+ """The model produced the final deliverable (``[FINAL]``)."""
185
+ return cls(
186
+ type="completed", step=step, data={"output": output}, timestamp=datetime.now(UTC)
187
+ )
188
+
189
+ @classmethod
190
+ def cancelled(cls, step: int) -> RunEvent:
191
+ """The run was cancelled at this step boundary."""
192
+ return cls(type="cancelled", step=step, data={}, timestamp=datetime.now(UTC))
193
+
194
+ @classmethod
195
+ def max_steps(cls, step: int, summary: str) -> RunEvent:
196
+ """The step budget was exhausted; ``summary`` is the best-effort output."""
197
+ return cls(
198
+ type="max_steps", step=step, data={"summary": summary}, timestamp=datetime.now(UTC)
199
+ )
200
+
201
+ @classmethod
202
+ def error(cls, step: int, message: str) -> RunEvent:
203
+ """An unrecoverable error terminated the run."""
204
+ return cls(type="error", step=step, data={"message": message}, timestamp=datetime.now(UTC))
205
+
206
+ @classmethod
207
+ def finished(cls, run: Run) -> RunEvent:
208
+ """The run is fully done (terminal); carries the final status + run id."""
209
+ return cls(
210
+ type="finished",
211
+ step=len(run.steps),
212
+ data={"run_id": run.id, "status": str(run.status)},
213
+ timestamp=datetime.now(UTC),
214
+ )