cendor-sdk 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cendor/sdk/__init__.py ADDED
@@ -0,0 +1,143 @@
1
+ """cendor.sdk — a governed, provider-agnostic agent SDK.
2
+
3
+ *The second door into Cendor:* the simple, all-in-one governed agent SDK. Cost budgets,
4
+ tamper-evident audit, PII redaction, context governance, and record/replay testing are the
5
+ **foundation**, not plugins — composed through ``cendor-core``'s bus / interceptor / ``Sink`` /
6
+ ``Compressor`` seams, correlated by ``trace()``, with zero SDK-specific glue. An ungoverned
7
+ ``run()`` works on ``cendor-core`` alone.
8
+
9
+ ```python
10
+ from cendor.sdk import Agent, tool, run, budget, guard, Policy, AuditLog
11
+
12
+ @tool
13
+ def get_weather(city: str) -> str:
14
+ "Current weather for a city."
15
+ return f"Sunny in {city}"
16
+
17
+ agent = Agent(name="assistant", model="gpt-4o", tools=[get_weather])
18
+ log = AuditLog(system="support", path="audit.jsonl")
19
+ with budget(usd=0.25, on_exceed="block"), guard(Policy.default(), audit=log):
20
+ result = run(agent, "What's the weather in Paris?", audit=log)
21
+ print(result.output, result.cost)
22
+ ```
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ # --- audit + redaction (cendor-acttrace, re-exported) -------------------------------------------
28
+ from cendor.acttrace import AuditLog, Policy, verify
29
+
30
+ # --- correlation (cendor-core) ------------------------------------------------------------------
31
+ from cendor.core import current_trace_id, trace
32
+
33
+ # --- budgets + attribution (cendor-tokenguard, re-exported) -------------------------------------
34
+ from cendor.tokenguard import BudgetExceeded, budget, configure, report, track
35
+
36
+ from ._governance import guard
37
+
38
+ # --- the SDK -----------------------------------------------------------------------------------
39
+ from .a2a import A2AClient, A2AServer
40
+ from .agent import Agent
41
+ from .checkpoint import Checkpointer
42
+ from .embeddings import aembed, embed
43
+ from .eval import EvalCase, EvalReport, EvalResult, evaluate
44
+ from .foundry import FoundryAdapter
45
+ from .hitl import require_approval
46
+ from .mcp import get_mcp_prompt, load_mcp_prompts, load_mcp_resources, load_mcp_tools
47
+ from .memory import Session, SQLiteSessionStore, SummarizingSession, llm_summarizer
48
+ from .orchestration import (
49
+ Handoff,
50
+ handoff,
51
+ parallel,
52
+ parallel_async,
53
+ sequential,
54
+ supervisor,
55
+ )
56
+ from .otel import span_tree
57
+ from .pricing import register_model_price
58
+ from .providers import ParsedResponse, ToolInvocation
59
+ from .rag import Hit, VectorIndex
60
+ from .resilience import RetryPolicy
61
+ from .result import (
62
+ Result,
63
+ RunComplete,
64
+ Step,
65
+ StreamEvent,
66
+ TextDelta,
67
+ ToolCallEvent,
68
+ ToolResultEvent,
69
+ )
70
+ from .result import Run as Run
71
+ from .runner import Runner, run
72
+ from .tools import Tool, tool
73
+
74
+ __version__ = "1.0.0"
75
+
76
+ __all__ = [
77
+ # agent + loop
78
+ "Agent",
79
+ "tool",
80
+ "Tool",
81
+ "run",
82
+ "Runner",
83
+ "Session",
84
+ # embeddings + RAG
85
+ "embed",
86
+ "aembed",
87
+ "VectorIndex",
88
+ "Hit",
89
+ # orchestration (Phase 2)
90
+ "handoff",
91
+ "Handoff",
92
+ "sequential",
93
+ "parallel",
94
+ "parallel_async",
95
+ "supervisor",
96
+ # interop (Phase 3)
97
+ "load_mcp_tools",
98
+ "load_mcp_prompts",
99
+ "get_mcp_prompt",
100
+ "load_mcp_resources",
101
+ "A2AServer",
102
+ "A2AClient",
103
+ "FoundryAdapter",
104
+ "span_tree",
105
+ "require_approval",
106
+ # hardening + eval (Phase 4)
107
+ "RetryPolicy",
108
+ "Checkpointer",
109
+ "SQLiteSessionStore",
110
+ "SummarizingSession",
111
+ "llm_summarizer",
112
+ "evaluate",
113
+ "EvalCase",
114
+ "EvalReport",
115
+ "EvalResult",
116
+ # result model
117
+ "Result",
118
+ "Run",
119
+ "Step",
120
+ "ParsedResponse",
121
+ "ToolInvocation",
122
+ # streaming events (run.stream / run.astream)
123
+ "StreamEvent",
124
+ "TextDelta",
125
+ "ToolCallEvent",
126
+ "ToolResultEvent",
127
+ "RunComplete",
128
+ # governance (the real tokenguard/acttrace objects, re-exported)
129
+ "budget",
130
+ "track",
131
+ "report",
132
+ "configure",
133
+ "register_model_price",
134
+ "BudgetExceeded",
135
+ "guard",
136
+ "Policy",
137
+ "AuditLog",
138
+ "verify",
139
+ # correlation
140
+ "trace",
141
+ "current_trace_id",
142
+ "__version__",
143
+ ]
@@ -0,0 +1,42 @@
1
+ """Thin governance wiring the SDK owns: ``guard`` as a context manager.
2
+
3
+ ``acttrace.guard()`` returns a bare pre-call interceptor (you install it on core's interceptor
4
+ seam yourself). The SDK exposes ``guard`` as a **context manager** so it reads like ``budget()`` /
5
+ ``track()`` and composes in one ``with`` line — the whole reason it's in the re-export surface
6
+ (plan §4). It installs the acttrace interceptor for the duration and removes it on exit; the actual
7
+ redact/block/flag logic and the audit recording are 100% acttrace's, riding core's seam.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from contextlib import contextmanager
13
+ from typing import Any
14
+
15
+ from cendor.core.instrument import add_interceptor, remove_interceptor
16
+
17
+
18
+ @contextmanager
19
+ def guard(policy: Any = None, *, audit: Any = None, on_block: Any = None) -> Any:
20
+ """Install an ``acttrace`` policy guard on core's interceptor seam for the block's duration.
21
+
22
+ Redacts PII **before** the provider sees it, blocks disallowed content, and flags the rest —
23
+ per the ``Policy``. When ``audit`` is given, each action is recorded on the hash-chained log.
24
+
25
+ ```python
26
+ from cendor.sdk import guard, Policy, AuditLog
27
+ log = AuditLog(system="support", path="audit.jsonl")
28
+ with guard(Policy.gdpr(), audit=log):
29
+ run(agent, "email me at alice@example.com", audit=log)
30
+ ```
31
+ """
32
+ from cendor.acttrace import guard as _acttrace_guard
33
+
34
+ if on_block is not None:
35
+ interceptor = _acttrace_guard(policy, audit=audit, on_block=on_block)
36
+ else:
37
+ interceptor = _acttrace_guard(policy, audit=audit)
38
+ add_interceptor(interceptor)
39
+ try:
40
+ yield interceptor
41
+ finally:
42
+ remove_interceptor(interceptor)
cendor/sdk/a2a.py ADDED
@@ -0,0 +1,151 @@
1
+ """A2A: expose a governed ``cendor.sdk`` agent over the Agent-to-Agent protocol (plan §7 Phase 3).
2
+
3
+ A minimal, dependency-free implementation of A2A's JSON-RPC ``message/send`` plus the agent card.
4
+ ``A2AServer.handle(request)`` runs the agent and returns an A2A message result (with governance
5
+ metadata: trace id, cost); ``A2AClient`` calls a server **in-process** (no socket) for tests and
6
+ embedding. ``serve()`` is an optional local HTTP server (stdlib only — local-first, never required).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import uuid
13
+ from typing import Any
14
+
15
+ from .agent import Agent
16
+ from .runner import run
17
+
18
+
19
+ def _text_of_message(message: dict) -> str:
20
+ parts = message.get("parts") or []
21
+ texts = [str(p.get("text", "")) for p in parts if p.get("kind", "text") == "text"]
22
+ return "\n".join(t for t in texts if t)
23
+
24
+
25
+ def _message_result(text: str, metadata: dict) -> dict:
26
+ return {
27
+ "messageId": uuid.uuid4().hex,
28
+ "role": "agent",
29
+ "parts": [{"kind": "text", "text": text}],
30
+ "kind": "message",
31
+ "metadata": metadata,
32
+ }
33
+
34
+
35
+ class A2AServer:
36
+ """Serve one agent over A2A. In-process via :meth:`handle`; over HTTP via :func:`serve`."""
37
+
38
+ def __init__(self, agent: Agent, *, audit: Any = None) -> None:
39
+ self.agent = agent
40
+ self.audit = audit
41
+
42
+ def agent_card(self) -> dict:
43
+ """The A2A agent card advertising this agent's identity and skills."""
44
+ return {
45
+ "name": self.agent.name,
46
+ "description": self.agent.instructions or f"The {self.agent.name} agent.",
47
+ "version": "1.0.0",
48
+ "protocolVersion": "0.2",
49
+ "capabilities": {"streaming": False},
50
+ "defaultInputModes": ["text/plain"],
51
+ "defaultOutputModes": ["text/plain"],
52
+ "skills": [
53
+ {
54
+ "id": t.name,
55
+ "name": t.name,
56
+ "description": t.description,
57
+ }
58
+ for t in self.agent.toolset
59
+ ],
60
+ }
61
+
62
+ def handle(self, request: dict) -> dict:
63
+ """Dispatch a JSON-RPC A2A request. Supports ``message/send``."""
64
+ rpc_id = request.get("id")
65
+ method = request.get("method")
66
+ if method != "message/send":
67
+ return {
68
+ "jsonrpc": "2.0",
69
+ "id": rpc_id,
70
+ "error": {"code": -32601, "message": f"method not found: {method}"},
71
+ }
72
+ message = (request.get("params") or {}).get("message") or {}
73
+ text = _text_of_message(message)
74
+ result = run(self.agent, text, audit=self.audit)
75
+ metadata = {
76
+ "trace_id": result.trace_id,
77
+ "cost_usd": str(result.cost.amount),
78
+ "agents": result.agents,
79
+ }
80
+ return {
81
+ "jsonrpc": "2.0",
82
+ "id": rpc_id,
83
+ "result": _message_result(str(result.output), metadata),
84
+ }
85
+
86
+
87
+ class A2AClient:
88
+ """Call an :class:`A2AServer` in-process (no network) — the offline/embedded path."""
89
+
90
+ def __init__(self, server: A2AServer) -> None:
91
+ self.server = server
92
+
93
+ def card(self) -> dict:
94
+ return self.server.agent_card()
95
+
96
+ def send(self, text: str) -> str:
97
+ """Send a user message and return the agent's text reply."""
98
+ response = self.server.handle(self._request(text))
99
+ if "error" in response:
100
+ raise RuntimeError(f"A2A error: {response['error']}")
101
+ parts = response["result"]["parts"]
102
+ return "\n".join(p.get("text", "") for p in parts if p.get("kind") == "text")
103
+
104
+ def send_full(self, text: str) -> dict:
105
+ """Send a message and return the full A2A message result (incl. governance metadata)."""
106
+ return self.server.handle(self._request(text))["result"]
107
+
108
+ @staticmethod
109
+ def _request(text: str) -> dict:
110
+ return {
111
+ "jsonrpc": "2.0",
112
+ "id": uuid.uuid4().hex,
113
+ "method": "message/send",
114
+ "params": {"message": {"role": "user", "parts": [{"kind": "text", "text": text}]}},
115
+ }
116
+
117
+
118
+ def serve(agent: Agent, *, host: str = "127.0.0.1", port: int = 0, audit: Any = None) -> Any:
119
+ """Start a local A2A HTTP server (stdlib ``http.server``). Optional, opt-in; returns the server.
120
+
121
+ The agent card is served at ``GET /.well-known/agent-card.json``; JSON-RPC at ``POST /``.
122
+ Call ``.serve_forever()`` (blocking) or run it in a thread; ``.shutdown()`` to stop.
123
+ """
124
+ from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
125
+
126
+ a2a = A2AServer(agent, audit=audit)
127
+
128
+ class Handler(BaseHTTPRequestHandler):
129
+ def _send(self, code: int, payload: dict) -> None:
130
+ body = json.dumps(payload).encode()
131
+ self.send_response(code)
132
+ self.send_header("Content-Type", "application/json")
133
+ self.send_header("Content-Length", str(len(body)))
134
+ self.end_headers()
135
+ self.wfile.write(body)
136
+
137
+ def do_GET(self) -> None: # noqa: N802 - stdlib handler name
138
+ if self.path.rstrip("/").endswith("agent-card.json") or self.path == "/":
139
+ self._send(200, a2a.agent_card())
140
+ else:
141
+ self._send(404, {"error": "not found"})
142
+
143
+ def do_POST(self) -> None: # noqa: N802 - stdlib handler name
144
+ length = int(self.headers.get("Content-Length", 0))
145
+ request = json.loads(self.rfile.read(length) or b"{}")
146
+ self._send(200, a2a.handle(request))
147
+
148
+ def log_message(self, *args: Any) -> None: # silence the default stderr logging
149
+ return
150
+
151
+ return ThreadingHTTPServer((host, port), Handler)
cendor/sdk/agent.py ADDED
@@ -0,0 +1,94 @@
1
+ """``Agent`` — a small, opinionated, provider-agnostic agent definition.
2
+
3
+ An ``Agent`` is declarative data: a name, a model id, instructions, tools, and a few knobs. The
4
+ loop lives in ``runner.py``; governance lives in the surrounding ``budget()``/``guard()`` contexts.
5
+ The provider is inferred from the model id (override with ``provider=``).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import Any
12
+
13
+ from .providers import Provider, resolve_provider
14
+ from .tools import Tool, as_tool
15
+
16
+
17
+ @dataclass
18
+ class Agent:
19
+ """A provider-agnostic agent.
20
+
21
+ Args:
22
+ name: A short identifier used in results, audit decisions, and handoffs.
23
+ model: Any core-supported model id (``"gpt-4o"``, ``"claude-opus-4-8"``, ``"gemini-…"``).
24
+ instructions: The system prompt.
25
+ tools: ``@tool``-decorated callables, plain functions, or ``Tool`` objects.
26
+ provider: Override the provider inferred from ``model``.
27
+ output_type: Structured output — a dataclass, a Pydantic model, or a JSON-schema dict.
28
+ max_turns: Upper bound on ReAct iterations (loop-termination guarantee).
29
+ context_budget: If set, assemble the history to this token budget via ``contextkit``.
30
+ temperature / max_tokens: Optional generation controls.
31
+ extra: Extra provider request kwargs merged into every model call — the passthrough for
32
+ things the SDK doesn't model first-class (``tool_choice``, ``reasoning_effort``,
33
+ ``top_p``, ``stop``, ``seed``, ``response_format``, ``extra_body``, …). Merged at the
34
+ top level of the request, matching the OpenAI/Anthropic/Ollama/HF/Azure shape.
35
+ retriever: Optional ``query -> list[str]`` callable (e.g. ``VectorIndex.as_retriever()``).
36
+ When set, context is retrieved for the run's query and injected as a system message
37
+ before the call — "always-on" RAG. (For agentic retrieval, expose it as a tool instead.)
38
+ handoffs: Names of peer agents this agent may transfer to (Phase 2).
39
+ api_key / base_url / client: Optional client config, or an explicit instrumented client.
40
+ """
41
+
42
+ name: str
43
+ model: str
44
+ instructions: str = ""
45
+ tools: list[Any] = field(default_factory=list)
46
+ provider: str | None = None
47
+ output_type: Any = None
48
+ max_turns: int = 8
49
+ context_budget: int | None = None
50
+ temperature: float | None = None
51
+ max_tokens: int | None = None
52
+ extra: dict[str, Any] = field(default_factory=dict)
53
+ retriever: Any = None # Callable[[str], list[str]] — injected as context when set (RAG)
54
+ handoffs: list[Any] = field(default_factory=list)
55
+ max_usd: float | None = None # per-agent spend cap (enforced by the orchestrator, Phase 2)
56
+ api_key: str | None = None
57
+ base_url: str | None = None
58
+ client: Any = None
59
+
60
+ _tools: list[Tool] = field(default_factory=list, init=False, repr=False)
61
+ _tool_map: dict[str, Tool] = field(default_factory=dict, init=False, repr=False)
62
+
63
+ def __post_init__(self) -> None:
64
+ self._tools = [as_tool(t) for t in self.tools]
65
+ self._tool_map = {t.name: t for t in self._tools}
66
+
67
+ @property
68
+ def provider_impl(self) -> Provider:
69
+ """The resolved provider implementation for this agent's model."""
70
+ return resolve_provider(self.model, self.provider)
71
+
72
+ @property
73
+ def toolset(self) -> list[Tool]:
74
+ """The agent's tools as ``Tool`` objects."""
75
+ return self._tools
76
+
77
+ def get_tool(self, name: str) -> Tool | None:
78
+ """Look up a tool by name."""
79
+ return self._tool_map.get(name)
80
+
81
+ def config(self) -> dict[str, Any]:
82
+ """Client construction config (api_key / base_url)."""
83
+ cfg: dict[str, Any] = {}
84
+ if self.api_key:
85
+ cfg["api_key"] = self.api_key
86
+ if self.base_url:
87
+ cfg["base_url"] = self.base_url
88
+ return cfg
89
+
90
+ def add_tool(self, tool: Any) -> None:
91
+ """Register an extra tool at runtime (used by MCP/handoff wiring in later phases)."""
92
+ t = as_tool(tool)
93
+ self._tools.append(t)
94
+ self._tool_map[t.name] = t
@@ -0,0 +1,55 @@
1
+ """Checkpointed / resumable runs — local-first (plan §7 Phase 4).
2
+
3
+ A ``Checkpointer`` persists a run's conversation to a local JSON file after each turn, so a long
4
+ agent can resume after a crash or restart without re-doing completed work (already-run tools are in
5
+ the saved messages and are not re-executed). Local by default; no server.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+
15
+ class Checkpointer:
16
+ """Persist and restore run state to a local JSON file."""
17
+
18
+ def __init__(self, path: str) -> None:
19
+ self.path = Path(path)
20
+
21
+ def load(self) -> dict[str, Any] | None:
22
+ """The saved state (``{run_id, messages, done, output}``), or ``None`` if absent/bad."""
23
+ if not self.path.exists():
24
+ return None
25
+ try:
26
+ return json.loads(self.path.read_text(encoding="utf-8"))
27
+ except (json.JSONDecodeError, OSError):
28
+ return None
29
+
30
+ def save(self, state: dict[str, Any]) -> None:
31
+ """Atomically write the run state (temp file + replace)."""
32
+ self.path.parent.mkdir(parents=True, exist_ok=True)
33
+ tmp = self.path.with_suffix(self.path.suffix + ".tmp")
34
+ tmp.write_text(json.dumps(state, indent=2, default=str), encoding="utf-8")
35
+ tmp.replace(self.path)
36
+
37
+ def resumable_messages(self) -> list[dict] | None:
38
+ """Saved messages to resume from, or ``None`` if there's no unfinished checkpoint."""
39
+ state = self.load()
40
+ if state and not state.get("done"):
41
+ return list(state.get("messages") or [])
42
+ return None
43
+
44
+ def clear(self) -> None:
45
+ """Delete the checkpoint file (e.g. after a successful, finished run)."""
46
+ try:
47
+ self.path.unlink()
48
+ except OSError:
49
+ pass
50
+
51
+
52
+ def _as_checkpointer(value: Any) -> Checkpointer | None:
53
+ if value is None or isinstance(value, Checkpointer):
54
+ return value
55
+ return Checkpointer(str(value))
@@ -0,0 +1,132 @@
1
+ """Embeddings — governed, captured embedding calls (the RAG plumbing from the plan's §0).
2
+
3
+ ``embed(model, inputs)`` calls the provider's embeddings endpoint, returns the vectors, and emits a
4
+ governed ``LLMCall`` on ``cendor-core``'s bus — so the call's tokens + cost land in the *same* audit
5
+ / attribution / cost tree as chat calls (RAG embeddings were invisible beneath frameworks; owning
6
+ the call makes them first-class). Correlate them by wrapping in ``trace(...)`` like any run.
7
+
8
+ Note: this *captures* (records) the embedding call. Pre-call USD *blocking* of embeddings would need
9
+ core-level embeddings interception; use a ``tokens=`` budget or register a price to bound spend.
10
+
11
+ OpenAI-family providers (``openai`` / ``azure`` / ``foundry_local``) share ``embeddings.create``;
12
+ for others, call the provider's embedding client directly.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import time
18
+ import uuid
19
+ from datetime import UTC, datetime
20
+ from typing import Any
21
+
22
+ from cendor.core import bus, current_trace_id, prices
23
+ from cendor.core.types import LLMCall, Usage
24
+
25
+ from .providers import OpenAIChatProvider, get_provider, resolve_provider
26
+
27
+
28
+ def _texts(inputs: str | list[str]) -> list[str]:
29
+ return [inputs] if isinstance(inputs, str) else list(inputs)
30
+
31
+
32
+ def _config(api_key: str | None, base_url: str | None) -> dict[str, Any]:
33
+ cfg: dict[str, Any] = {}
34
+ if api_key:
35
+ cfg["api_key"] = api_key
36
+ if base_url:
37
+ cfg["base_url"] = base_url
38
+ return cfg
39
+
40
+
41
+ def _resolve(model: str, provider: str | None) -> OpenAIChatProvider:
42
+ try:
43
+ prov = resolve_provider(model, provider)
44
+ except ValueError:
45
+ prov = get_provider("openai") # embedding ids rarely prefix-infer; default to OpenAI shape
46
+ if not isinstance(prov, OpenAIChatProvider):
47
+ raise NotImplementedError(
48
+ f"embed() supports OpenAI-family providers (openai/azure/foundry_local); got "
49
+ f"{prov.name!r}. Call that provider's embeddings client directly."
50
+ )
51
+ return prov
52
+
53
+
54
+ def _vectors(resp: Any) -> list[list[float]]:
55
+ data = getattr(resp, "data", None)
56
+ if data is None and isinstance(resp, dict):
57
+ data = resp.get("data", [])
58
+ out: list[list[float]] = []
59
+ for d in data or []:
60
+ emb = getattr(d, "embedding", None)
61
+ if emb is None and isinstance(d, dict):
62
+ emb = d.get("embedding")
63
+ out.append(list(emb) if emb is not None else [])
64
+ return out
65
+
66
+
67
+ def _emit(model: str, provider: str, resp: Any, start: float) -> None:
68
+ u = getattr(resp, "usage", None)
69
+ inp = 0
70
+ if u is not None:
71
+ inp = getattr(u, "prompt_tokens", None) or getattr(u, "total_tokens", 0) or 0
72
+ elif isinstance(resp, dict):
73
+ inp = (resp.get("usage") or {}).get("prompt_tokens", 0) or 0
74
+ call = LLMCall(
75
+ id=uuid.uuid4().hex,
76
+ provider=provider,
77
+ model=model,
78
+ messages=[],
79
+ trace_id=current_trace_id(),
80
+ ts=datetime.now(UTC),
81
+ )
82
+ call.latency_ms = (time.perf_counter() - start) * 1000.0
83
+ call.usage = Usage(input_tokens=int(inp), output_tokens=0)
84
+ try:
85
+ call.cost = prices.estimate(model, int(inp), 0)
86
+ call.metadata["cost_estimated"] = True
87
+ except KeyError:
88
+ call.cost = None
89
+ call.metadata["embedding"] = True
90
+ bus.emit(call)
91
+
92
+
93
+ def embed(
94
+ model: str,
95
+ inputs: str | list[str],
96
+ *,
97
+ provider: str | None = None,
98
+ api_key: str | None = None,
99
+ base_url: str | None = None,
100
+ dimensions: int | None = None,
101
+ ) -> list[list[float]]:
102
+ """Embed text(s); return one vector per input and emit a governed ``LLMCall`` on the bus."""
103
+ prov = _resolve(model, provider)
104
+ client = prov.client(async_=False, config=_config(api_key, base_url))
105
+ kwargs: dict[str, Any] = {"model": model, "input": _texts(inputs)}
106
+ if dimensions is not None:
107
+ kwargs["dimensions"] = dimensions
108
+ start = time.perf_counter()
109
+ resp = client.embeddings.create(**kwargs)
110
+ _emit(model, prov.name, resp, start)
111
+ return _vectors(resp)
112
+
113
+
114
+ async def aembed(
115
+ model: str,
116
+ inputs: str | list[str],
117
+ *,
118
+ provider: str | None = None,
119
+ api_key: str | None = None,
120
+ base_url: str | None = None,
121
+ dimensions: int | None = None,
122
+ ) -> list[list[float]]:
123
+ """Async counterpart of :func:`embed`."""
124
+ prov = _resolve(model, provider)
125
+ client = prov.client(async_=True, config=_config(api_key, base_url))
126
+ kwargs: dict[str, Any] = {"model": model, "input": _texts(inputs)}
127
+ if dimensions is not None:
128
+ kwargs["dimensions"] = dimensions
129
+ start = time.perf_counter()
130
+ resp = await client.embeddings.create(**kwargs)
131
+ _emit(model, prov.name, resp, start)
132
+ return _vectors(resp)