agentforge-core 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. agentforge_core/__init__.py +228 -0
  2. agentforge_core/_bm25.py +132 -0
  3. agentforge_core/config/__init__.py +62 -0
  4. agentforge_core/config/loader.py +239 -0
  5. agentforge_core/config/module_schemas.py +208 -0
  6. agentforge_core/config/schema.py +424 -0
  7. agentforge_core/contracts/__init__.py +52 -0
  8. agentforge_core/contracts/auth.py +33 -0
  9. agentforge_core/contracts/chat.py +118 -0
  10. agentforge_core/contracts/embedding.py +71 -0
  11. agentforge_core/contracts/evaluator.py +56 -0
  12. agentforge_core/contracts/finding.py +39 -0
  13. agentforge_core/contracts/graph_store.py +180 -0
  14. agentforge_core/contracts/guardrails.py +129 -0
  15. agentforge_core/contracts/llm.py +152 -0
  16. agentforge_core/contracts/memory.py +113 -0
  17. agentforge_core/contracts/migrator.py +120 -0
  18. agentforge_core/contracts/renderer.py +57 -0
  19. agentforge_core/contracts/reranker.py +91 -0
  20. agentforge_core/contracts/strategy.py +70 -0
  21. agentforge_core/contracts/task.py +73 -0
  22. agentforge_core/contracts/tool.py +71 -0
  23. agentforge_core/contracts/vector_store.py +151 -0
  24. agentforge_core/migrations/__init__.py +14 -0
  25. agentforge_core/migrations/discover.py +77 -0
  26. agentforge_core/migrations/template.py +34 -0
  27. agentforge_core/observability/__init__.py +18 -0
  28. agentforge_core/observability/tracing.py +37 -0
  29. agentforge_core/production/__init__.py +77 -0
  30. agentforge_core/production/budget.py +134 -0
  31. agentforge_core/production/exceptions.py +136 -0
  32. agentforge_core/production/fallback.py +321 -0
  33. agentforge_core/production/log_filter.py +49 -0
  34. agentforge_core/production/log_format.py +117 -0
  35. agentforge_core/production/run_context.py +108 -0
  36. agentforge_core/py.typed +0 -0
  37. agentforge_core/resolver/__init__.py +38 -0
  38. agentforge_core/resolver/discover.py +145 -0
  39. agentforge_core/resolver/resolve.py +168 -0
  40. agentforge_core/testing/__init__.py +45 -0
  41. agentforge_core/testing/conformance.py +1138 -0
  42. agentforge_core/values/__init__.py +103 -0
  43. agentforge_core/values/auth.py +20 -0
  44. agentforge_core/values/chat.py +131 -0
  45. agentforge_core/values/claim.py +30 -0
  46. agentforge_core/values/graph.py +136 -0
  47. agentforge_core/values/guardrails.py +49 -0
  48. agentforge_core/values/manifest.py +129 -0
  49. agentforge_core/values/messages.py +153 -0
  50. agentforge_core/values/module.py +40 -0
  51. agentforge_core/values/pipeline.py +43 -0
  52. agentforge_core/values/retrieval.py +53 -0
  53. agentforge_core/values/state.py +118 -0
  54. agentforge_core/values/vector.py +59 -0
  55. agentforge_core-0.2.1.dist-info/METADATA +66 -0
  56. agentforge_core-0.2.1.dist-info/RECORD +58 -0
  57. agentforge_core-0.2.1.dist-info/WHEEL +4 -0
  58. agentforge_core-0.2.1.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,136 @@
1
+ """AgentForge exception hierarchy.
2
+
3
+ Every exception the framework raises is a subclass of `AgentForgeError`.
4
+ This is the only place new top-level exception classes are defined; per
5
+ .claude/standards/coding.md, modules subclass these for their own
6
+ errors but never `raise Exception(...)`.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+
12
+ class AgentForgeError(Exception):
13
+ """Base exception for all AgentForge errors.
14
+
15
+ Catch this to handle any framework-raised error generically.
16
+ Production code should narrow to a more specific subclass.
17
+ """
18
+
19
+
20
+ # Locked names per the framework's public API; suppress N818 globally
21
+ # in this file so individual classes don't need per-line noqa.
22
+ # ruff: noqa: N818
23
+
24
+
25
+ class BudgetExceeded(AgentForgeError):
26
+ """Raised when `BudgetPolicy.check` detects a USD or token cap breach.
27
+
28
+ The agent run terminates immediately; partial state is preserved on
29
+ `RunResult`.
30
+ """
31
+
32
+
33
+ class GuardrailViolation(AgentForgeError):
34
+ """Raised when a non-budget guardrail trips.
35
+
36
+ Examples: iteration cap reached, error streak limit hit. Distinct
37
+ from `BudgetExceeded` so callers can branch on the cause.
38
+ """
39
+
40
+
41
+ class ModuleError(AgentForgeError):
42
+ """Raised at agent construction when the resolver cannot find a
43
+ registered module by name.
44
+
45
+ Surfaced at startup (P11 — fail at startup, not at runtime), with a
46
+ clear message telling the developer which package to install or
47
+ which entry point is missing.
48
+ """
49
+
50
+
51
+ class ProviderError(AgentForgeError):
52
+ """Base for errors originating in an LLM / embedding provider.
53
+
54
+ Concrete subclasses below cover the cross-provider failure modes
55
+ every reasoning loop needs to branch on. Provider drivers map
56
+ their SDK exceptions into one of these at the boundary; callers
57
+ catch `ProviderError` for general handling or narrow to a
58
+ specific subclass for retry / surfacing logic.
59
+ """
60
+
61
+
62
+ class RateLimitError(ProviderError):
63
+ """The provider throttled the request (HTTP 429 / `ThrottlingException`).
64
+
65
+ Retryable with exponential backoff. Provider drivers honour
66
+ `Retry-After` headers when present.
67
+ """
68
+
69
+
70
+ class AuthenticationError(ProviderError):
71
+ """The provider rejected credentials (HTTP 401 / 403).
72
+
73
+ Not retryable. The agent run terminates and the developer fixes
74
+ credentials at the deployment layer.
75
+ """
76
+
77
+
78
+ class ModelNotFoundError(ProviderError):
79
+ """The provider does not recognise the requested model id.
80
+
81
+ Surfaced at the first call rather than at construction because
82
+ most providers don't expose a synchronous "does this model exist"
83
+ check. Not retryable.
84
+ """
85
+
86
+
87
+ class ServiceError(ProviderError):
88
+ """The provider returned a transient server error (HTTP 5xx).
89
+
90
+ Retryable. Drivers retry up to `max_retries` times with bounded
91
+ exponential backoff before propagating.
92
+ """
93
+
94
+
95
+ class TimeoutError(ProviderError):
96
+ """A request to the provider exceeded the configured timeout.
97
+
98
+ Distinct from the stdlib `TimeoutError` (which subclasses
99
+ `OSError`); this one subclasses `ProviderError` so it can be
100
+ caught by the same handler as other provider failures. Retryable.
101
+ """
102
+
103
+
104
+ class CapabilityNotSupported(AgentForgeError):
105
+ """Raised when an optional capability is invoked on a driver that
106
+ does not declare it.
107
+
108
+ Per ADR-0009, capability negotiation is honest — drivers declare
109
+ their supported set and this exception fires if a consumer skipped
110
+ the `supports(...)` check.
111
+ """
112
+
113
+
114
+ class A2ACallError(AgentForgeError):
115
+ """Raised when an A2A call to a remote peer fails (feat-014).
116
+
117
+ Wraps the underlying HTTP / transport error; carries the peer
118
+ URL and the error code from the response body when available.
119
+ """
120
+
121
+
122
+ class A2AAuthError(A2ACallError):
123
+ """The peer rejected the supplied credentials (HTTP 401/403).
124
+
125
+ Distinct from `A2ACallError` so callers can branch on retry
126
+ semantics — auth errors are not retryable without rotating
127
+ the credential.
128
+ """
129
+
130
+
131
+ class A2ATimeout(A2ACallError):
132
+ """The A2A call exceeded its configured timeout.
133
+
134
+ Retryable with backoff. Subclasses `A2ACallError` so generic
135
+ A2A handlers catch all transport failures at one level.
136
+ """
@@ -0,0 +1,321 @@
1
+ """`FallbackChain` — cross-provider failover wrapping multiple
2
+ `LLMClient`s (feat-007).
3
+
4
+ Implements the `LLMClient` ABC, so any strategy that accepts an
5
+ `LLMClient` accepts a chain transparently.
6
+
7
+ Usage:
8
+
9
+ from agentforge import Agent, FallbackChain
10
+
11
+ chain = FallbackChain(
12
+ [
13
+ "anthropic:claude-sonnet-4.7",
14
+ "bedrock:anthropic.claude-sonnet-4.7",
15
+ "openai:gpt-4o",
16
+ ],
17
+ retry_on=(RateLimitError, ProviderError),
18
+ attempts_per_provider=1,
19
+ )
20
+ agent = Agent(model=chain, tools=[...])
21
+
22
+ Behaviour:
23
+ - On `retry_on` exception → try next provider (after retrying the
24
+ current provider `attempts_per_provider` times).
25
+ - Last provider's exception bubbles up if every provider exhausts.
26
+ - `last_used_provider` tracks the index of the provider that
27
+ answered the most recent call (diagnostic only).
28
+ - `capabilities()` returns the **intersection** of every wrapped
29
+ provider's capabilities — a chain can only honestly claim what
30
+ every fallback can deliver.
31
+ - `call_with_cache` / `call_with_thinking` raise
32
+ `CapabilityNotSupported` unless every wrapped provider declares
33
+ the capability.
34
+ - `close()` cascades in reverse-construction order.
35
+
36
+ Out of scope (v0.1):
37
+ - Streaming (`stream`) — not yet supported by `FallbackChain`;
38
+ callers using streaming should pick a single provider.
39
+ - Provider-level retry backoff — providers handle their own
40
+ retries internally.
41
+ - Per-call `retry_on` override — chain-level configuration only.
42
+ """
43
+
44
+ from __future__ import annotations
45
+
46
+ import logging
47
+ from collections.abc import AsyncIterator
48
+ from typing import Any
49
+
50
+ from agentforge_core.contracts.llm import LLMClient
51
+ from agentforge_core.production.exceptions import (
52
+ CapabilityNotSupported,
53
+ ModuleError,
54
+ ProviderError,
55
+ RateLimitError,
56
+ )
57
+ from agentforge_core.resolver import Resolver, parse_model_string
58
+ from agentforge_core.values.messages import LLMResponse, Message, ToolSpec
59
+
60
+ log = logging.getLogger(__name__)
61
+
62
+ _DEFAULT_RETRY_ON: tuple[type[Exception], ...] = (RateLimitError, ProviderError)
63
+ _DEFAULT_ATTEMPTS_PER_PROVIDER = 1
64
+
65
+
66
+ class FallbackChain(LLMClient):
67
+ """Wrap multiple `LLMClient`s with cross-provider failover.
68
+
69
+ Args:
70
+ providers: A non-empty list of providers. Each entry is
71
+ either a model string (`"<provider>:<model_id>"`,
72
+ resolved via the global `Resolver`) or a typed
73
+ `LLMClient` instance.
74
+ retry_on: Exception types that trigger a fallback to the
75
+ next provider. Default: `(RateLimitError, ProviderError)`.
76
+ Other exceptions (e.g. `AuthenticationError`) bubble
77
+ immediately — falling back on those is usually wrong.
78
+ attempts_per_provider: How many times to retry the *current*
79
+ provider before moving to the next. Default 1 (no
80
+ retry; first failure → next provider).
81
+
82
+ Raises:
83
+ ValueError: empty providers list, non-positive
84
+ `attempts_per_provider`, or an unrecognised provider
85
+ string.
86
+ """
87
+
88
+ def __init__(
89
+ self,
90
+ providers: list[str | LLMClient],
91
+ *,
92
+ retry_on: tuple[type[Exception], ...] = _DEFAULT_RETRY_ON,
93
+ attempts_per_provider: int = _DEFAULT_ATTEMPTS_PER_PROVIDER,
94
+ ) -> None:
95
+ if not providers:
96
+ msg = "FallbackChain requires at least one provider"
97
+ raise ValueError(msg)
98
+ if attempts_per_provider < 1:
99
+ msg = f"attempts_per_provider must be >= 1, got {attempts_per_provider}"
100
+ raise ValueError(msg)
101
+ self._clients: list[LLMClient] = [_resolve_provider(p) for p in providers]
102
+ self._retry_on = retry_on
103
+ self._attempts_per_provider = attempts_per_provider
104
+ self._last_used_provider: int | None = None
105
+
106
+ # ------------------------------------------------------------------
107
+ # Diagnostics
108
+ # ------------------------------------------------------------------
109
+
110
+ @property
111
+ def last_used_provider(self) -> int | None:
112
+ """Index (0-based) of the provider that answered the most
113
+ recent call. `None` until the first call succeeds."""
114
+ return self._last_used_provider
115
+
116
+ @property
117
+ def providers(self) -> tuple[LLMClient, ...]:
118
+ """Resolved providers in chain order. Useful for tests."""
119
+ return tuple(self._clients)
120
+
121
+ # ------------------------------------------------------------------
122
+ # LLMClient surface
123
+ # ------------------------------------------------------------------
124
+
125
+ async def call(
126
+ self,
127
+ system: str,
128
+ messages: list[Message],
129
+ tools: list[ToolSpec] | None = None,
130
+ ) -> LLMResponse:
131
+ return await self._dispatch_with_fallback("call", system, messages, tools=tools)
132
+
133
+ async def close(self) -> None:
134
+ """Close every wrapped provider in reverse-construction order.
135
+
136
+ Reverse order so a partial-construction failure during
137
+ `__init__` doesn't leak resources held by earlier providers.
138
+ Exceptions during close are logged and swallowed; the goal
139
+ is best-effort cleanup, not failure.
140
+ """
141
+ for client in reversed(self._clients):
142
+ try:
143
+ await client.close()
144
+ except Exception:
145
+ log.exception(
146
+ "FallbackChain: error closing %s; continuing",
147
+ type(client).__name__,
148
+ )
149
+
150
+ def capabilities(self) -> set[str]:
151
+ """Intersection of every wrapped provider's capabilities.
152
+
153
+ A chain can only honestly claim a capability that every
154
+ fallback can deliver — otherwise a fallback might fail to
155
+ honour a feature the caller relied on declaring.
156
+ """
157
+ if not self._clients:
158
+ return set()
159
+ common = set(self._clients[0].capabilities())
160
+ for client in self._clients[1:]:
161
+ common &= client.capabilities()
162
+ return common
163
+
164
+ # ------------------------------------------------------------------
165
+ # Optional capabilities — capability-intersection rule
166
+ # ------------------------------------------------------------------
167
+
168
+ async def call_with_cache(
169
+ self,
170
+ system: str,
171
+ messages: list[Message],
172
+ tools: list[ToolSpec] | None = None,
173
+ *,
174
+ cache_breakpoints: list[int],
175
+ ) -> LLMResponse:
176
+ if "caching" not in self.capabilities():
177
+ msg = (
178
+ "FallbackChain does not support 'caching'. Every "
179
+ "wrapped provider must declare the capability for the "
180
+ "chain to honour it; check chain.supports('caching') "
181
+ "before calling."
182
+ )
183
+ raise CapabilityNotSupported(msg)
184
+ return await self._dispatch_with_fallback(
185
+ "call_with_cache",
186
+ system,
187
+ messages,
188
+ tools=tools,
189
+ cache_breakpoints=cache_breakpoints,
190
+ )
191
+
192
+ async def call_with_thinking(
193
+ self,
194
+ system: str,
195
+ messages: list[Message],
196
+ tools: list[ToolSpec] | None = None,
197
+ *,
198
+ thinking_budget_tokens: int,
199
+ ) -> LLMResponse:
200
+ if "thinking" not in self.capabilities():
201
+ msg = (
202
+ "FallbackChain does not support 'thinking'. Every "
203
+ "wrapped provider must declare the capability for the "
204
+ "chain to honour it; check chain.supports('thinking') "
205
+ "before calling."
206
+ )
207
+ raise CapabilityNotSupported(msg)
208
+ return await self._dispatch_with_fallback(
209
+ "call_with_thinking",
210
+ system,
211
+ messages,
212
+ tools=tools,
213
+ thinking_budget_tokens=thinking_budget_tokens,
214
+ )
215
+
216
+ def stream(
217
+ self,
218
+ system: str, # noqa: ARG002 — interface compatibility; we raise unconditionally
219
+ messages: list[Message], # noqa: ARG002
220
+ tools: list[ToolSpec] | None = None, # noqa: ARG002
221
+ ) -> AsyncIterator[Any]:
222
+ """Streaming is not supported on `FallbackChain` in v0.1.
223
+
224
+ Streaming with cross-provider fallback semantics is genuinely
225
+ harder than the unary call: events from provider N might
226
+ partially arrive before a fallback to N+1 kicks in, leaving
227
+ the caller with incoherent partial output. Callers needing
228
+ streaming should pick a single provider.
229
+ """
230
+ msg = (
231
+ "FallbackChain does not support 'streaming' in v0.1. "
232
+ "Pick a single provider for streaming use cases."
233
+ )
234
+ raise CapabilityNotSupported(msg)
235
+
236
+ # ------------------------------------------------------------------
237
+ # Internal — fallback dispatch
238
+ # ------------------------------------------------------------------
239
+
240
+ async def _dispatch_with_fallback(
241
+ self,
242
+ method_name: str,
243
+ *args: Any,
244
+ **kwargs: Any,
245
+ ) -> LLMResponse:
246
+ """Iterate providers; for each, try `attempts_per_provider`
247
+ times; on `retry_on` exception move to the next provider.
248
+
249
+ The last provider's exception bubbles up if every provider
250
+ is exhausted.
251
+ """
252
+ last_exc: Exception | None = None
253
+ for index, client in enumerate(self._clients):
254
+ method = getattr(client, method_name)
255
+ for attempt in range(self._attempts_per_provider):
256
+ try:
257
+ response: LLMResponse = await method(*args, **kwargs)
258
+ except self._retry_on as exc:
259
+ last_exc = exc
260
+ log.warning(
261
+ "FallbackChain: provider %d/%d (%s) raised %s (attempt %d/%d); %s",
262
+ index + 1,
263
+ len(self._clients),
264
+ type(client).__name__,
265
+ type(exc).__name__,
266
+ attempt + 1,
267
+ self._attempts_per_provider,
268
+ "trying next provider"
269
+ if attempt + 1 == self._attempts_per_provider
270
+ else "retrying",
271
+ )
272
+ continue
273
+ else:
274
+ self._last_used_provider = index
275
+ return response
276
+ # Every provider exhausted.
277
+ assert last_exc is not None
278
+ raise last_exc
279
+
280
+
281
+ # ----------------------------------------------------------------------
282
+ # Helpers
283
+ # ----------------------------------------------------------------------
284
+
285
+
286
+ def _resolve_provider(provider: Any) -> LLMClient:
287
+ """Turn a `str` model spec or `LLMClient` instance into an
288
+ `LLMClient` instance via the global resolver.
289
+
290
+ Accepts `Any` (not `str | LLMClient`) so the runtime
291
+ `isinstance` guards remain reachable for type-checkers — the
292
+ public `FallbackChain.__init__` signature is the typed gate;
293
+ this internal helper hardens against accidental mistypes.
294
+ """
295
+ if isinstance(provider, LLMClient):
296
+ return provider
297
+ if not isinstance(provider, str):
298
+ msg = f"FallbackChain providers must be str or LLMClient, got {type(provider).__name__}"
299
+ raise TypeError(msg)
300
+ name, model_id = parse_model_string(provider)
301
+ try:
302
+ cls = Resolver.global_().resolve("providers", name)
303
+ except ModuleError as exc:
304
+ msg = (
305
+ f"FallbackChain: no LLM provider registered for {name!r}. "
306
+ f"Install agentforge-{name} (e.g. "
307
+ f"`uv add agentforge-{name}`) or pass a typed LLMClient "
308
+ f"instance instead of the {provider!r} string."
309
+ )
310
+ raise ValueError(msg) from exc
311
+ instance = cls(model_id=model_id)
312
+ if not isinstance(instance, LLMClient):
313
+ msg = (
314
+ f"FallbackChain: resolved provider {name!r} ({cls.__name__}) "
315
+ f"does not implement LLMClient."
316
+ )
317
+ raise TypeError(msg)
318
+ return instance
319
+
320
+
321
+ __all__ = ["FallbackChain"]
@@ -0,0 +1,49 @@
1
+ """`RunIdFilter` — attach `run_id` to every log record.
2
+
3
+ Auto-installed on the root logger by `Agent.__init__` (per ADR-0010,
4
+ P4). Idempotent — multiple installs do not accumulate filters.
5
+
6
+ Disable via `logging.run_id_filter: false` in `agentforge.yaml`.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+
13
+ from agentforge_core.production.run_context import _current_run
14
+
15
+ _FILTER_NAME = "agentforge.run_id_filter"
16
+
17
+
18
+ class RunIdFilter(logging.Filter):
19
+ """Attach `run_id` from the active `RunContext` (or `"-"`) to records."""
20
+
21
+ def __init__(self) -> None:
22
+ super().__init__(name=_FILTER_NAME)
23
+
24
+ def filter(self, record: logging.LogRecord) -> bool:
25
+ ctx = _current_run.get()
26
+ record.run_id = ctx.run_id if ctx is not None else "-"
27
+ return True
28
+
29
+
30
+ def install_run_id_filter(logger: logging.Logger | None = None) -> RunIdFilter:
31
+ """Install `RunIdFilter` on `logger` (root by default), idempotent.
32
+
33
+ Returns the live filter (the existing one if already installed).
34
+ """
35
+ target = logger if logger is not None else logging.getLogger()
36
+ for existing in target.filters:
37
+ if isinstance(existing, RunIdFilter):
38
+ return existing
39
+ new_filter = RunIdFilter()
40
+ target.addFilter(new_filter)
41
+ return new_filter
42
+
43
+
44
+ def uninstall_run_id_filter(logger: logging.Logger | None = None) -> None:
45
+ """Remove `RunIdFilter` from `logger` (root by default), if present."""
46
+ target = logger if logger is not None else logging.getLogger()
47
+ for existing in list(target.filters):
48
+ if isinstance(existing, RunIdFilter):
49
+ target.removeFilter(existing)
@@ -0,0 +1,117 @@
1
+ """`JsonFormatter` — structured JSON log records for production.
2
+
3
+ Per feat-009 §4.5: `logging.format: "json"` switches `agentforge` to
4
+ emit one-JSON-object-per-line records, ready for ingestion by log
5
+ aggregators (Loki, CloudWatch, Datadog, etc.). Default stays `"text"`
6
+ to keep local development greppable.
7
+
8
+ The formatter respects whatever `RunIdFilter` added — `run_id` lands
9
+ on every record. Standard fields: `ts`, `level`, `logger`, `msg`,
10
+ `run_id`. Anything else attached to the record via `extra=` (or via
11
+ filters) is included verbatim.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import logging
18
+ from datetime import UTC, datetime
19
+ from typing import Any
20
+
21
+ _HANDLER_NAME = "agentforge.json_handler"
22
+
23
+ # LogRecord attributes set by stdlib that we don't want to leak into
24
+ # the JSON payload (already represented via dedicated fields, or
25
+ # internal).
26
+ _RESERVED: frozenset[str] = frozenset(
27
+ {
28
+ "args",
29
+ "asctime",
30
+ "created",
31
+ "exc_info",
32
+ "exc_text",
33
+ "filename",
34
+ "funcName",
35
+ "levelname",
36
+ "levelno",
37
+ "lineno",
38
+ "message",
39
+ "module",
40
+ "msecs",
41
+ "msg",
42
+ "name",
43
+ "pathname",
44
+ "process",
45
+ "processName",
46
+ "relativeCreated",
47
+ "stack_info",
48
+ "thread",
49
+ "threadName",
50
+ "taskName",
51
+ }
52
+ )
53
+
54
+
55
+ class JsonFormatter(logging.Formatter):
56
+ """Emit one JSON object per record.
57
+
58
+ Output shape:
59
+ {"ts": "2026-05-11T16:42:01.123Z",
60
+ "level": "INFO",
61
+ "logger": "agentforge.agent",
62
+ "msg": "the message",
63
+ "run_id": "01HX...",
64
+ ...any custom extras...}
65
+ """
66
+
67
+ def format(self, record: logging.LogRecord) -> str:
68
+ payload: dict[str, Any] = {
69
+ "ts": datetime.fromtimestamp(record.created, tz=UTC).isoformat().replace("+00:00", "Z"),
70
+ "level": record.levelname,
71
+ "logger": record.name,
72
+ "msg": record.getMessage(),
73
+ }
74
+ # `run_id` lands here when `RunIdFilter` installed it.
75
+ if hasattr(record, "run_id"):
76
+ payload["run_id"] = record.run_id
77
+ # Surface any extras the caller attached via `logger.info(..., extra={...})`.
78
+ for key, value in record.__dict__.items():
79
+ if key in _RESERVED or key in payload or key.startswith("_"):
80
+ continue
81
+ payload[key] = value
82
+ if record.exc_info:
83
+ payload["exc"] = self.formatException(record.exc_info)
84
+ return json.dumps(payload, default=str)
85
+
86
+
87
+ def install_json_formatter(
88
+ logger: logging.Logger | None = None,
89
+ *,
90
+ level: int = logging.INFO,
91
+ ) -> logging.Handler:
92
+ """Attach a `StreamHandler` with `JsonFormatter` to `logger` (root
93
+ by default). Idempotent — repeated calls return the existing
94
+ handler.
95
+
96
+ Returns the handler so callers can adjust level / stream.
97
+ """
98
+ target = logger if logger is not None else logging.getLogger()
99
+ for existing in target.handlers:
100
+ if getattr(existing, "name", None) == _HANDLER_NAME:
101
+ return existing
102
+ handler = logging.StreamHandler()
103
+ handler.name = _HANDLER_NAME
104
+ handler.setLevel(level)
105
+ handler.setFormatter(JsonFormatter())
106
+ target.addHandler(handler)
107
+ if target.level == logging.NOTSET or target.level > level:
108
+ target.setLevel(level)
109
+ return handler
110
+
111
+
112
+ def uninstall_json_formatter(logger: logging.Logger | None = None) -> None:
113
+ """Remove the JSON handler if present (idempotent)."""
114
+ target = logger if logger is not None else logging.getLogger()
115
+ for existing in list(target.handlers):
116
+ if getattr(existing, "name", None) == _HANDLER_NAME:
117
+ target.removeHandler(existing)