cendor-sdk 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cendor/sdk/__init__.py +143 -0
- cendor/sdk/_governance.py +42 -0
- cendor/sdk/a2a.py +151 -0
- cendor/sdk/agent.py +94 -0
- cendor/sdk/checkpoint.py +55 -0
- cendor/sdk/embeddings.py +132 -0
- cendor/sdk/eval.py +129 -0
- cendor/sdk/foundry.py +58 -0
- cendor/sdk/hitl.py +85 -0
- cendor/sdk/mcp.py +132 -0
- cendor/sdk/memory.py +238 -0
- cendor/sdk/orchestration.py +356 -0
- cendor/sdk/otel.py +112 -0
- cendor/sdk/pricing.py +73 -0
- cendor/sdk/providers.py +1243 -0
- cendor/sdk/py.typed +0 -0
- cendor/sdk/rag.py +111 -0
- cendor/sdk/resilience.py +94 -0
- cendor/sdk/result.py +170 -0
- cendor/sdk/runner.py +735 -0
- cendor/sdk/tools.py +264 -0
- cendor_sdk-1.0.0.dist-info/METADATA +238 -0
- cendor_sdk-1.0.0.dist-info/RECORD +25 -0
- cendor_sdk-1.0.0.dist-info/WHEEL +4 -0
- cendor_sdk-1.0.0.dist-info/licenses/LICENSE +201 -0
cendor/sdk/__init__.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""cendor.sdk — a governed, provider-agnostic agent SDK.
|
|
2
|
+
|
|
3
|
+
*The second door into Cendor:* the simple, all-in-one governed agent SDK. Cost budgets,
|
|
4
|
+
tamper-evident audit, PII redaction, context governance, and record/replay testing are the
|
|
5
|
+
**foundation**, not plugins — composed through ``cendor-core``'s bus / interceptor / ``Sink`` /
|
|
6
|
+
``Compressor`` seams, correlated by ``trace()``, with zero SDK-specific glue. An ungoverned
|
|
7
|
+
``run()`` works on ``cendor-core`` alone.
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from cendor.sdk import Agent, tool, run, budget, guard, Policy, AuditLog
|
|
11
|
+
|
|
12
|
+
@tool
|
|
13
|
+
def get_weather(city: str) -> str:
|
|
14
|
+
"Current weather for a city."
|
|
15
|
+
return f"Sunny in {city}"
|
|
16
|
+
|
|
17
|
+
agent = Agent(name="assistant", model="gpt-4o", tools=[get_weather])
|
|
18
|
+
log = AuditLog(system="support", path="audit.jsonl")
|
|
19
|
+
with budget(usd=0.25, on_exceed="block"), guard(Policy.default(), audit=log):
|
|
20
|
+
result = run(agent, "What's the weather in Paris?", audit=log)
|
|
21
|
+
print(result.output, result.cost)
|
|
22
|
+
```
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
# --- audit + redaction (cendor-acttrace, re-exported) -------------------------------------------
|
|
28
|
+
from cendor.acttrace import AuditLog, Policy, verify
|
|
29
|
+
|
|
30
|
+
# --- correlation (cendor-core) ------------------------------------------------------------------
|
|
31
|
+
from cendor.core import current_trace_id, trace
|
|
32
|
+
|
|
33
|
+
# --- budgets + attribution (cendor-tokenguard, re-exported) -------------------------------------
|
|
34
|
+
from cendor.tokenguard import BudgetExceeded, budget, configure, report, track
|
|
35
|
+
|
|
36
|
+
from ._governance import guard
|
|
37
|
+
|
|
38
|
+
# --- the SDK -----------------------------------------------------------------------------------
|
|
39
|
+
from .a2a import A2AClient, A2AServer
|
|
40
|
+
from .agent import Agent
|
|
41
|
+
from .checkpoint import Checkpointer
|
|
42
|
+
from .embeddings import aembed, embed
|
|
43
|
+
from .eval import EvalCase, EvalReport, EvalResult, evaluate
|
|
44
|
+
from .foundry import FoundryAdapter
|
|
45
|
+
from .hitl import require_approval
|
|
46
|
+
from .mcp import get_mcp_prompt, load_mcp_prompts, load_mcp_resources, load_mcp_tools
|
|
47
|
+
from .memory import Session, SQLiteSessionStore, SummarizingSession, llm_summarizer
|
|
48
|
+
from .orchestration import (
|
|
49
|
+
Handoff,
|
|
50
|
+
handoff,
|
|
51
|
+
parallel,
|
|
52
|
+
parallel_async,
|
|
53
|
+
sequential,
|
|
54
|
+
supervisor,
|
|
55
|
+
)
|
|
56
|
+
from .otel import span_tree
|
|
57
|
+
from .pricing import register_model_price
|
|
58
|
+
from .providers import ParsedResponse, ToolInvocation
|
|
59
|
+
from .rag import Hit, VectorIndex
|
|
60
|
+
from .resilience import RetryPolicy
|
|
61
|
+
from .result import (
|
|
62
|
+
Result,
|
|
63
|
+
RunComplete,
|
|
64
|
+
Step,
|
|
65
|
+
StreamEvent,
|
|
66
|
+
TextDelta,
|
|
67
|
+
ToolCallEvent,
|
|
68
|
+
ToolResultEvent,
|
|
69
|
+
)
|
|
70
|
+
from .result import Run as Run
|
|
71
|
+
from .runner import Runner, run
|
|
72
|
+
from .tools import Tool, tool
|
|
73
|
+
|
|
74
|
+
__version__ = "1.0.0"
|
|
75
|
+
|
|
76
|
+
__all__ = [
|
|
77
|
+
# agent + loop
|
|
78
|
+
"Agent",
|
|
79
|
+
"tool",
|
|
80
|
+
"Tool",
|
|
81
|
+
"run",
|
|
82
|
+
"Runner",
|
|
83
|
+
"Session",
|
|
84
|
+
# embeddings + RAG
|
|
85
|
+
"embed",
|
|
86
|
+
"aembed",
|
|
87
|
+
"VectorIndex",
|
|
88
|
+
"Hit",
|
|
89
|
+
# orchestration (Phase 2)
|
|
90
|
+
"handoff",
|
|
91
|
+
"Handoff",
|
|
92
|
+
"sequential",
|
|
93
|
+
"parallel",
|
|
94
|
+
"parallel_async",
|
|
95
|
+
"supervisor",
|
|
96
|
+
# interop (Phase 3)
|
|
97
|
+
"load_mcp_tools",
|
|
98
|
+
"load_mcp_prompts",
|
|
99
|
+
"get_mcp_prompt",
|
|
100
|
+
"load_mcp_resources",
|
|
101
|
+
"A2AServer",
|
|
102
|
+
"A2AClient",
|
|
103
|
+
"FoundryAdapter",
|
|
104
|
+
"span_tree",
|
|
105
|
+
"require_approval",
|
|
106
|
+
# hardening + eval (Phase 4)
|
|
107
|
+
"RetryPolicy",
|
|
108
|
+
"Checkpointer",
|
|
109
|
+
"SQLiteSessionStore",
|
|
110
|
+
"SummarizingSession",
|
|
111
|
+
"llm_summarizer",
|
|
112
|
+
"evaluate",
|
|
113
|
+
"EvalCase",
|
|
114
|
+
"EvalReport",
|
|
115
|
+
"EvalResult",
|
|
116
|
+
# result model
|
|
117
|
+
"Result",
|
|
118
|
+
"Run",
|
|
119
|
+
"Step",
|
|
120
|
+
"ParsedResponse",
|
|
121
|
+
"ToolInvocation",
|
|
122
|
+
# streaming events (run.stream / run.astream)
|
|
123
|
+
"StreamEvent",
|
|
124
|
+
"TextDelta",
|
|
125
|
+
"ToolCallEvent",
|
|
126
|
+
"ToolResultEvent",
|
|
127
|
+
"RunComplete",
|
|
128
|
+
# governance (the real tokenguard/acttrace objects, re-exported)
|
|
129
|
+
"budget",
|
|
130
|
+
"track",
|
|
131
|
+
"report",
|
|
132
|
+
"configure",
|
|
133
|
+
"register_model_price",
|
|
134
|
+
"BudgetExceeded",
|
|
135
|
+
"guard",
|
|
136
|
+
"Policy",
|
|
137
|
+
"AuditLog",
|
|
138
|
+
"verify",
|
|
139
|
+
# correlation
|
|
140
|
+
"trace",
|
|
141
|
+
"current_trace_id",
|
|
142
|
+
"__version__",
|
|
143
|
+
]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Thin governance wiring the SDK owns: ``guard`` as a context manager.
|
|
2
|
+
|
|
3
|
+
``acttrace.guard()`` returns a bare pre-call interceptor (you install it on core's interceptor
|
|
4
|
+
seam yourself). The SDK exposes ``guard`` as a **context manager** so it reads like ``budget()`` /
|
|
5
|
+
``track()`` and composes in one ``with`` line — the whole reason it's in the re-export surface
|
|
6
|
+
(plan §4). It installs the acttrace interceptor for the duration and removes it on exit; the actual
|
|
7
|
+
redact/block/flag logic and the audit recording are 100% acttrace's, riding core's seam.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from contextlib import contextmanager
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from cendor.core.instrument import add_interceptor, remove_interceptor
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@contextmanager
|
|
19
|
+
def guard(policy: Any = None, *, audit: Any = None, on_block: Any = None) -> Any:
|
|
20
|
+
"""Install an ``acttrace`` policy guard on core's interceptor seam for the block's duration.
|
|
21
|
+
|
|
22
|
+
Redacts PII **before** the provider sees it, blocks disallowed content, and flags the rest —
|
|
23
|
+
per the ``Policy``. When ``audit`` is given, each action is recorded on the hash-chained log.
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from cendor.sdk import guard, Policy, AuditLog
|
|
27
|
+
log = AuditLog(system="support", path="audit.jsonl")
|
|
28
|
+
with guard(Policy.gdpr(), audit=log):
|
|
29
|
+
run(agent, "email me at alice@example.com", audit=log)
|
|
30
|
+
```
|
|
31
|
+
"""
|
|
32
|
+
from cendor.acttrace import guard as _acttrace_guard
|
|
33
|
+
|
|
34
|
+
if on_block is not None:
|
|
35
|
+
interceptor = _acttrace_guard(policy, audit=audit, on_block=on_block)
|
|
36
|
+
else:
|
|
37
|
+
interceptor = _acttrace_guard(policy, audit=audit)
|
|
38
|
+
add_interceptor(interceptor)
|
|
39
|
+
try:
|
|
40
|
+
yield interceptor
|
|
41
|
+
finally:
|
|
42
|
+
remove_interceptor(interceptor)
|
cendor/sdk/a2a.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""A2A: expose a governed ``cendor.sdk`` agent over the Agent-to-Agent protocol (plan §7 Phase 3).
|
|
2
|
+
|
|
3
|
+
A minimal, dependency-free implementation of A2A's JSON-RPC ``message/send`` plus the agent card.
|
|
4
|
+
``A2AServer.handle(request)`` runs the agent and returns an A2A message result (with governance
|
|
5
|
+
metadata: trace id, cost); ``A2AClient`` calls a server **in-process** (no socket) for tests and
|
|
6
|
+
embedding. ``serve()`` is an optional local HTTP server (stdlib only — local-first, never required).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import uuid
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from .agent import Agent
|
|
16
|
+
from .runner import run
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _text_of_message(message: dict) -> str:
|
|
20
|
+
parts = message.get("parts") or []
|
|
21
|
+
texts = [str(p.get("text", "")) for p in parts if p.get("kind", "text") == "text"]
|
|
22
|
+
return "\n".join(t for t in texts if t)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _message_result(text: str, metadata: dict) -> dict:
|
|
26
|
+
return {
|
|
27
|
+
"messageId": uuid.uuid4().hex,
|
|
28
|
+
"role": "agent",
|
|
29
|
+
"parts": [{"kind": "text", "text": text}],
|
|
30
|
+
"kind": "message",
|
|
31
|
+
"metadata": metadata,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class A2AServer:
|
|
36
|
+
"""Serve one agent over A2A. In-process via :meth:`handle`; over HTTP via :func:`serve`."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, agent: Agent, *, audit: Any = None) -> None:
|
|
39
|
+
self.agent = agent
|
|
40
|
+
self.audit = audit
|
|
41
|
+
|
|
42
|
+
def agent_card(self) -> dict:
|
|
43
|
+
"""The A2A agent card advertising this agent's identity and skills."""
|
|
44
|
+
return {
|
|
45
|
+
"name": self.agent.name,
|
|
46
|
+
"description": self.agent.instructions or f"The {self.agent.name} agent.",
|
|
47
|
+
"version": "1.0.0",
|
|
48
|
+
"protocolVersion": "0.2",
|
|
49
|
+
"capabilities": {"streaming": False},
|
|
50
|
+
"defaultInputModes": ["text/plain"],
|
|
51
|
+
"defaultOutputModes": ["text/plain"],
|
|
52
|
+
"skills": [
|
|
53
|
+
{
|
|
54
|
+
"id": t.name,
|
|
55
|
+
"name": t.name,
|
|
56
|
+
"description": t.description,
|
|
57
|
+
}
|
|
58
|
+
for t in self.agent.toolset
|
|
59
|
+
],
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
def handle(self, request: dict) -> dict:
|
|
63
|
+
"""Dispatch a JSON-RPC A2A request. Supports ``message/send``."""
|
|
64
|
+
rpc_id = request.get("id")
|
|
65
|
+
method = request.get("method")
|
|
66
|
+
if method != "message/send":
|
|
67
|
+
return {
|
|
68
|
+
"jsonrpc": "2.0",
|
|
69
|
+
"id": rpc_id,
|
|
70
|
+
"error": {"code": -32601, "message": f"method not found: {method}"},
|
|
71
|
+
}
|
|
72
|
+
message = (request.get("params") or {}).get("message") or {}
|
|
73
|
+
text = _text_of_message(message)
|
|
74
|
+
result = run(self.agent, text, audit=self.audit)
|
|
75
|
+
metadata = {
|
|
76
|
+
"trace_id": result.trace_id,
|
|
77
|
+
"cost_usd": str(result.cost.amount),
|
|
78
|
+
"agents": result.agents,
|
|
79
|
+
}
|
|
80
|
+
return {
|
|
81
|
+
"jsonrpc": "2.0",
|
|
82
|
+
"id": rpc_id,
|
|
83
|
+
"result": _message_result(str(result.output), metadata),
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class A2AClient:
|
|
88
|
+
"""Call an :class:`A2AServer` in-process (no network) — the offline/embedded path."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, server: A2AServer) -> None:
|
|
91
|
+
self.server = server
|
|
92
|
+
|
|
93
|
+
def card(self) -> dict:
|
|
94
|
+
return self.server.agent_card()
|
|
95
|
+
|
|
96
|
+
def send(self, text: str) -> str:
|
|
97
|
+
"""Send a user message and return the agent's text reply."""
|
|
98
|
+
response = self.server.handle(self._request(text))
|
|
99
|
+
if "error" in response:
|
|
100
|
+
raise RuntimeError(f"A2A error: {response['error']}")
|
|
101
|
+
parts = response["result"]["parts"]
|
|
102
|
+
return "\n".join(p.get("text", "") for p in parts if p.get("kind") == "text")
|
|
103
|
+
|
|
104
|
+
def send_full(self, text: str) -> dict:
|
|
105
|
+
"""Send a message and return the full A2A message result (incl. governance metadata)."""
|
|
106
|
+
return self.server.handle(self._request(text))["result"]
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _request(text: str) -> dict:
|
|
110
|
+
return {
|
|
111
|
+
"jsonrpc": "2.0",
|
|
112
|
+
"id": uuid.uuid4().hex,
|
|
113
|
+
"method": "message/send",
|
|
114
|
+
"params": {"message": {"role": "user", "parts": [{"kind": "text", "text": text}]}},
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def serve(agent: Agent, *, host: str = "127.0.0.1", port: int = 0, audit: Any = None) -> Any:
|
|
119
|
+
"""Start a local A2A HTTP server (stdlib ``http.server``). Optional, opt-in; returns the server.
|
|
120
|
+
|
|
121
|
+
The agent card is served at ``GET /.well-known/agent-card.json``; JSON-RPC at ``POST /``.
|
|
122
|
+
Call ``.serve_forever()`` (blocking) or run it in a thread; ``.shutdown()`` to stop.
|
|
123
|
+
"""
|
|
124
|
+
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
|
125
|
+
|
|
126
|
+
a2a = A2AServer(agent, audit=audit)
|
|
127
|
+
|
|
128
|
+
class Handler(BaseHTTPRequestHandler):
|
|
129
|
+
def _send(self, code: int, payload: dict) -> None:
|
|
130
|
+
body = json.dumps(payload).encode()
|
|
131
|
+
self.send_response(code)
|
|
132
|
+
self.send_header("Content-Type", "application/json")
|
|
133
|
+
self.send_header("Content-Length", str(len(body)))
|
|
134
|
+
self.end_headers()
|
|
135
|
+
self.wfile.write(body)
|
|
136
|
+
|
|
137
|
+
def do_GET(self) -> None: # noqa: N802 - stdlib handler name
|
|
138
|
+
if self.path.rstrip("/").endswith("agent-card.json") or self.path == "/":
|
|
139
|
+
self._send(200, a2a.agent_card())
|
|
140
|
+
else:
|
|
141
|
+
self._send(404, {"error": "not found"})
|
|
142
|
+
|
|
143
|
+
def do_POST(self) -> None: # noqa: N802 - stdlib handler name
|
|
144
|
+
length = int(self.headers.get("Content-Length", 0))
|
|
145
|
+
request = json.loads(self.rfile.read(length) or b"{}")
|
|
146
|
+
self._send(200, a2a.handle(request))
|
|
147
|
+
|
|
148
|
+
def log_message(self, *args: Any) -> None: # silence the default stderr logging
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
return ThreadingHTTPServer((host, port), Handler)
|
cendor/sdk/agent.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""``Agent`` — a small, opinionated, provider-agnostic agent definition.
|
|
2
|
+
|
|
3
|
+
An ``Agent`` is declarative data: a name, a model id, instructions, tools, and a few knobs. The
|
|
4
|
+
loop lives in ``runner.py``; governance lives in the surrounding ``budget()``/``guard()`` contexts.
|
|
5
|
+
The provider is inferred from the model id (override with ``provider=``).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from .providers import Provider, resolve_provider
|
|
14
|
+
from .tools import Tool, as_tool
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class Agent:
|
|
19
|
+
"""A provider-agnostic agent.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
name: A short identifier used in results, audit decisions, and handoffs.
|
|
23
|
+
model: Any core-supported model id (``"gpt-4o"``, ``"claude-opus-4-8"``, ``"gemini-…"``).
|
|
24
|
+
instructions: The system prompt.
|
|
25
|
+
tools: ``@tool``-decorated callables, plain functions, or ``Tool`` objects.
|
|
26
|
+
provider: Override the provider inferred from ``model``.
|
|
27
|
+
output_type: Structured output — a dataclass, a Pydantic model, or a JSON-schema dict.
|
|
28
|
+
max_turns: Upper bound on ReAct iterations (loop-termination guarantee).
|
|
29
|
+
context_budget: If set, assemble the history to this token budget via ``contextkit``.
|
|
30
|
+
temperature / max_tokens: Optional generation controls.
|
|
31
|
+
extra: Extra provider request kwargs merged into every model call — the passthrough for
|
|
32
|
+
things the SDK doesn't model first-class (``tool_choice``, ``reasoning_effort``,
|
|
33
|
+
``top_p``, ``stop``, ``seed``, ``response_format``, ``extra_body``, …). Merged at the
|
|
34
|
+
top level of the request, matching the OpenAI/Anthropic/Ollama/HF/Azure shape.
|
|
35
|
+
retriever: Optional ``query -> list[str]`` callable (e.g. ``VectorIndex.as_retriever()``).
|
|
36
|
+
When set, context is retrieved for the run's query and injected as a system message
|
|
37
|
+
before the call — "always-on" RAG. (For agentic retrieval, expose it as a tool instead.)
|
|
38
|
+
handoffs: Names of peer agents this agent may transfer to (Phase 2).
|
|
39
|
+
api_key / base_url / client: Optional client config, or an explicit instrumented client.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
name: str
|
|
43
|
+
model: str
|
|
44
|
+
instructions: str = ""
|
|
45
|
+
tools: list[Any] = field(default_factory=list)
|
|
46
|
+
provider: str | None = None
|
|
47
|
+
output_type: Any = None
|
|
48
|
+
max_turns: int = 8
|
|
49
|
+
context_budget: int | None = None
|
|
50
|
+
temperature: float | None = None
|
|
51
|
+
max_tokens: int | None = None
|
|
52
|
+
extra: dict[str, Any] = field(default_factory=dict)
|
|
53
|
+
retriever: Any = None # Callable[[str], list[str]] — injected as context when set (RAG)
|
|
54
|
+
handoffs: list[Any] = field(default_factory=list)
|
|
55
|
+
max_usd: float | None = None # per-agent spend cap (enforced by the orchestrator, Phase 2)
|
|
56
|
+
api_key: str | None = None
|
|
57
|
+
base_url: str | None = None
|
|
58
|
+
client: Any = None
|
|
59
|
+
|
|
60
|
+
_tools: list[Tool] = field(default_factory=list, init=False, repr=False)
|
|
61
|
+
_tool_map: dict[str, Tool] = field(default_factory=dict, init=False, repr=False)
|
|
62
|
+
|
|
63
|
+
def __post_init__(self) -> None:
|
|
64
|
+
self._tools = [as_tool(t) for t in self.tools]
|
|
65
|
+
self._tool_map = {t.name: t for t in self._tools}
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def provider_impl(self) -> Provider:
|
|
69
|
+
"""The resolved provider implementation for this agent's model."""
|
|
70
|
+
return resolve_provider(self.model, self.provider)
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def toolset(self) -> list[Tool]:
|
|
74
|
+
"""The agent's tools as ``Tool`` objects."""
|
|
75
|
+
return self._tools
|
|
76
|
+
|
|
77
|
+
def get_tool(self, name: str) -> Tool | None:
|
|
78
|
+
"""Look up a tool by name."""
|
|
79
|
+
return self._tool_map.get(name)
|
|
80
|
+
|
|
81
|
+
def config(self) -> dict[str, Any]:
|
|
82
|
+
"""Client construction config (api_key / base_url)."""
|
|
83
|
+
cfg: dict[str, Any] = {}
|
|
84
|
+
if self.api_key:
|
|
85
|
+
cfg["api_key"] = self.api_key
|
|
86
|
+
if self.base_url:
|
|
87
|
+
cfg["base_url"] = self.base_url
|
|
88
|
+
return cfg
|
|
89
|
+
|
|
90
|
+
def add_tool(self, tool: Any) -> None:
|
|
91
|
+
"""Register an extra tool at runtime (used by MCP/handoff wiring in later phases)."""
|
|
92
|
+
t = as_tool(tool)
|
|
93
|
+
self._tools.append(t)
|
|
94
|
+
self._tool_map[t.name] = t
|
cendor/sdk/checkpoint.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Checkpointed / resumable runs — local-first (plan §7 Phase 4).
|
|
2
|
+
|
|
3
|
+
A ``Checkpointer`` persists a run's conversation to a local JSON file after each turn, so a long
|
|
4
|
+
agent can resume after a crash or restart without re-doing completed work (already-run tools are in
|
|
5
|
+
the saved messages and are not re-executed). Local by default; no server.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Checkpointer:
|
|
16
|
+
"""Persist and restore run state to a local JSON file."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, path: str) -> None:
|
|
19
|
+
self.path = Path(path)
|
|
20
|
+
|
|
21
|
+
def load(self) -> dict[str, Any] | None:
|
|
22
|
+
"""The saved state (``{run_id, messages, done, output}``), or ``None`` if absent/bad."""
|
|
23
|
+
if not self.path.exists():
|
|
24
|
+
return None
|
|
25
|
+
try:
|
|
26
|
+
return json.loads(self.path.read_text(encoding="utf-8"))
|
|
27
|
+
except (json.JSONDecodeError, OSError):
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
def save(self, state: dict[str, Any]) -> None:
|
|
31
|
+
"""Atomically write the run state (temp file + replace)."""
|
|
32
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
tmp = self.path.with_suffix(self.path.suffix + ".tmp")
|
|
34
|
+
tmp.write_text(json.dumps(state, indent=2, default=str), encoding="utf-8")
|
|
35
|
+
tmp.replace(self.path)
|
|
36
|
+
|
|
37
|
+
def resumable_messages(self) -> list[dict] | None:
|
|
38
|
+
"""Saved messages to resume from, or ``None`` if there's no unfinished checkpoint."""
|
|
39
|
+
state = self.load()
|
|
40
|
+
if state and not state.get("done"):
|
|
41
|
+
return list(state.get("messages") or [])
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
def clear(self) -> None:
|
|
45
|
+
"""Delete the checkpoint file (e.g. after a successful, finished run)."""
|
|
46
|
+
try:
|
|
47
|
+
self.path.unlink()
|
|
48
|
+
except OSError:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _as_checkpointer(value: Any) -> Checkpointer | None:
|
|
53
|
+
if value is None or isinstance(value, Checkpointer):
|
|
54
|
+
return value
|
|
55
|
+
return Checkpointer(str(value))
|
cendor/sdk/embeddings.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Embeddings — governed, captured embedding calls (the RAG plumbing from the plan's §0).
|
|
2
|
+
|
|
3
|
+
``embed(model, inputs)`` calls the provider's embeddings endpoint, returns the vectors, and emits a
|
|
4
|
+
governed ``LLMCall`` on ``cendor-core``'s bus — so the call's tokens + cost land in the *same* audit
|
|
5
|
+
/ attribution / cost tree as chat calls (RAG embeddings were invisible beneath frameworks; owning
|
|
6
|
+
the call makes them first-class). Correlate them by wrapping in ``trace(...)`` like any run.
|
|
7
|
+
|
|
8
|
+
Note: this *captures* (records) the embedding call. Pre-call USD *blocking* of embeddings would need
|
|
9
|
+
core-level embeddings interception; use a ``tokens=`` budget or register a price to bound spend.
|
|
10
|
+
|
|
11
|
+
OpenAI-family providers (``openai`` / ``azure`` / ``foundry_local``) share ``embeddings.create``;
|
|
12
|
+
for others, call the provider's embedding client directly.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import time
|
|
18
|
+
import uuid
|
|
19
|
+
from datetime import UTC, datetime
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from cendor.core import bus, current_trace_id, prices
|
|
23
|
+
from cendor.core.types import LLMCall, Usage
|
|
24
|
+
|
|
25
|
+
from .providers import OpenAIChatProvider, get_provider, resolve_provider
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _texts(inputs: str | list[str]) -> list[str]:
|
|
29
|
+
return [inputs] if isinstance(inputs, str) else list(inputs)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _config(api_key: str | None, base_url: str | None) -> dict[str, Any]:
|
|
33
|
+
cfg: dict[str, Any] = {}
|
|
34
|
+
if api_key:
|
|
35
|
+
cfg["api_key"] = api_key
|
|
36
|
+
if base_url:
|
|
37
|
+
cfg["base_url"] = base_url
|
|
38
|
+
return cfg
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _resolve(model: str, provider: str | None) -> OpenAIChatProvider:
|
|
42
|
+
try:
|
|
43
|
+
prov = resolve_provider(model, provider)
|
|
44
|
+
except ValueError:
|
|
45
|
+
prov = get_provider("openai") # embedding ids rarely prefix-infer; default to OpenAI shape
|
|
46
|
+
if not isinstance(prov, OpenAIChatProvider):
|
|
47
|
+
raise NotImplementedError(
|
|
48
|
+
f"embed() supports OpenAI-family providers (openai/azure/foundry_local); got "
|
|
49
|
+
f"{prov.name!r}. Call that provider's embeddings client directly."
|
|
50
|
+
)
|
|
51
|
+
return prov
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _vectors(resp: Any) -> list[list[float]]:
|
|
55
|
+
data = getattr(resp, "data", None)
|
|
56
|
+
if data is None and isinstance(resp, dict):
|
|
57
|
+
data = resp.get("data", [])
|
|
58
|
+
out: list[list[float]] = []
|
|
59
|
+
for d in data or []:
|
|
60
|
+
emb = getattr(d, "embedding", None)
|
|
61
|
+
if emb is None and isinstance(d, dict):
|
|
62
|
+
emb = d.get("embedding")
|
|
63
|
+
out.append(list(emb) if emb is not None else [])
|
|
64
|
+
return out
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _emit(model: str, provider: str, resp: Any, start: float) -> None:
|
|
68
|
+
u = getattr(resp, "usage", None)
|
|
69
|
+
inp = 0
|
|
70
|
+
if u is not None:
|
|
71
|
+
inp = getattr(u, "prompt_tokens", None) or getattr(u, "total_tokens", 0) or 0
|
|
72
|
+
elif isinstance(resp, dict):
|
|
73
|
+
inp = (resp.get("usage") or {}).get("prompt_tokens", 0) or 0
|
|
74
|
+
call = LLMCall(
|
|
75
|
+
id=uuid.uuid4().hex,
|
|
76
|
+
provider=provider,
|
|
77
|
+
model=model,
|
|
78
|
+
messages=[],
|
|
79
|
+
trace_id=current_trace_id(),
|
|
80
|
+
ts=datetime.now(UTC),
|
|
81
|
+
)
|
|
82
|
+
call.latency_ms = (time.perf_counter() - start) * 1000.0
|
|
83
|
+
call.usage = Usage(input_tokens=int(inp), output_tokens=0)
|
|
84
|
+
try:
|
|
85
|
+
call.cost = prices.estimate(model, int(inp), 0)
|
|
86
|
+
call.metadata["cost_estimated"] = True
|
|
87
|
+
except KeyError:
|
|
88
|
+
call.cost = None
|
|
89
|
+
call.metadata["embedding"] = True
|
|
90
|
+
bus.emit(call)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def embed(
|
|
94
|
+
model: str,
|
|
95
|
+
inputs: str | list[str],
|
|
96
|
+
*,
|
|
97
|
+
provider: str | None = None,
|
|
98
|
+
api_key: str | None = None,
|
|
99
|
+
base_url: str | None = None,
|
|
100
|
+
dimensions: int | None = None,
|
|
101
|
+
) -> list[list[float]]:
|
|
102
|
+
"""Embed text(s); return one vector per input and emit a governed ``LLMCall`` on the bus."""
|
|
103
|
+
prov = _resolve(model, provider)
|
|
104
|
+
client = prov.client(async_=False, config=_config(api_key, base_url))
|
|
105
|
+
kwargs: dict[str, Any] = {"model": model, "input": _texts(inputs)}
|
|
106
|
+
if dimensions is not None:
|
|
107
|
+
kwargs["dimensions"] = dimensions
|
|
108
|
+
start = time.perf_counter()
|
|
109
|
+
resp = client.embeddings.create(**kwargs)
|
|
110
|
+
_emit(model, prov.name, resp, start)
|
|
111
|
+
return _vectors(resp)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
async def aembed(
|
|
115
|
+
model: str,
|
|
116
|
+
inputs: str | list[str],
|
|
117
|
+
*,
|
|
118
|
+
provider: str | None = None,
|
|
119
|
+
api_key: str | None = None,
|
|
120
|
+
base_url: str | None = None,
|
|
121
|
+
dimensions: int | None = None,
|
|
122
|
+
) -> list[list[float]]:
|
|
123
|
+
"""Async counterpart of :func:`embed`."""
|
|
124
|
+
prov = _resolve(model, provider)
|
|
125
|
+
client = prov.client(async_=True, config=_config(api_key, base_url))
|
|
126
|
+
kwargs: dict[str, Any] = {"model": model, "input": _texts(inputs)}
|
|
127
|
+
if dimensions is not None:
|
|
128
|
+
kwargs["dimensions"] = dimensions
|
|
129
|
+
start = time.perf_counter()
|
|
130
|
+
resp = await client.embeddings.create(**kwargs)
|
|
131
|
+
_emit(model, prov.name, resp, start)
|
|
132
|
+
return _vectors(resp)
|