agentkernel-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. agentkernel/__init__.py +7 -0
  2. agentkernel/__main__.py +5 -0
  3. agentkernel/agent.py +311 -0
  4. agentkernel/approval/__init__.py +23 -0
  5. agentkernel/approval/base.py +34 -0
  6. agentkernel/approval/cli.py +129 -0
  7. agentkernel/approval/policy.py +58 -0
  8. agentkernel/approval/risk.py +91 -0
  9. agentkernel/approval/sandbox.py +201 -0
  10. agentkernel/budget.py +64 -0
  11. agentkernel/checkpoint.py +50 -0
  12. agentkernel/cli.py +1482 -0
  13. agentkernel/config.py +224 -0
  14. agentkernel/context/__init__.py +17 -0
  15. agentkernel/context/manager.py +216 -0
  16. agentkernel/context/truncate.py +35 -0
  17. agentkernel/cron.py +146 -0
  18. agentkernel/curation.py +183 -0
  19. agentkernel/doctor.py +141 -0
  20. agentkernel/embeddings.py +132 -0
  21. agentkernel/evaluation.py +186 -0
  22. agentkernel/improvement.py +133 -0
  23. agentkernel/insights.py +141 -0
  24. agentkernel/kanban.py +114 -0
  25. agentkernel/knowledge.py +383 -0
  26. agentkernel/loops.py +145 -0
  27. agentkernel/mcp/__init__.py +23 -0
  28. agentkernel/mcp/client.py +181 -0
  29. agentkernel/mcp/config.py +59 -0
  30. agentkernel/mcp/tools.py +96 -0
  31. agentkernel/memory.py +1208 -0
  32. agentkernel/paths.py +73 -0
  33. agentkernel/plugins.py +76 -0
  34. agentkernel/profiles.py +70 -0
  35. agentkernel/progress.py +89 -0
  36. agentkernel/providers/__init__.py +35 -0
  37. agentkernel/providers/_http.py +157 -0
  38. agentkernel/providers/anthropic.py +282 -0
  39. agentkernel/providers/base.py +38 -0
  40. agentkernel/providers/credentials.py +65 -0
  41. agentkernel/providers/local.py +34 -0
  42. agentkernel/providers/openai.py +260 -0
  43. agentkernel/redaction.py +77 -0
  44. agentkernel/semantic_index.py +139 -0
  45. agentkernel/semantic_memory.py +253 -0
  46. agentkernel/skills.py +268 -0
  47. agentkernel/subagent.py +161 -0
  48. agentkernel/telemetry.py +199 -0
  49. agentkernel/templates/README.md +35 -0
  50. agentkernel/templates/SKILL.md +28 -0
  51. agentkernel/templates/eval-suite.toml +22 -0
  52. agentkernel/templates/loop.toml +29 -0
  53. agentkernel/templates/mcp-servers.toml +22 -0
  54. agentkernel/templates/profile.toml +29 -0
  55. agentkernel/templates/tool_module.py +64 -0
  56. agentkernel/tools/__init__.py +5 -0
  57. agentkernel/tools/base.py +100 -0
  58. agentkernel/tools/builtin/__init__.py +37 -0
  59. agentkernel/tools/builtin/checkpoint_tool.py +33 -0
  60. agentkernel/tools/builtin/clarify.py +60 -0
  61. agentkernel/tools/builtin/files.py +221 -0
  62. agentkernel/tools/builtin/kanban_tool.py +100 -0
  63. agentkernel/tools/builtin/search.py +225 -0
  64. agentkernel/tools/builtin/shell.py +67 -0
  65. agentkernel/tools/builtin/todo.py +106 -0
  66. agentkernel/tui/__init__.py +50 -0
  67. agentkernel/tui/app.py +594 -0
  68. agentkernel/types.py +127 -0
  69. agentkernel/worktree.py +64 -0
  70. agentkernel_cli-0.1.0.dist-info/METADATA +426 -0
  71. agentkernel_cli-0.1.0.dist-info/RECORD +74 -0
  72. agentkernel_cli-0.1.0.dist-info/WHEEL +4 -0
  73. agentkernel_cli-0.1.0.dist-info/entry_points.txt +2 -0
  74. agentkernel_cli-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,282 @@
1
+ """Anthropic Messages API adapter (design §5, §8.1, §9.3).
2
+
3
+ Wire shape: assistant ``tool_use`` content blocks; all tool results for a turn
4
+ go in a single ``user`` message of ``tool_result`` blocks keyed by
5
+ ``tool_use_id``. The stable prefix (system + tool defs) carries
6
+ ``cache_control: ephemeral`` on its final element so Anthropic serves it from
7
+ cache. No Anthropic dict escapes this module except inside ``CompletionResponse.raw``.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ from collections.abc import Callable, Iterable
14
+ from typing import Any
15
+
16
+ from agentkernel.providers._http import ProviderError, post_json_pooled, stream_sse
17
+ from agentkernel.providers.credentials import CredentialPool
18
+ from agentkernel.tools import ToolSpec
19
+ from agentkernel.types import CompletionResponse, Message, ToolCall, Usage
20
+
21
+ API_URL = "https://api.anthropic.com/v1/messages"
22
+ API_VERSION = "2023-06-01"
23
+ DEFAULT_CONTEXT_WINDOW = 200_000
24
+ _EPHEMERAL = {"type": "ephemeral"}
25
+
26
+
27
+ # --- translation: canonical -> wire (pure, offline-testable) ---------------
28
+
29
+
30
+ def render_tools(tools: list[ToolSpec]) -> list[dict[str, Any]]:
31
+ """Render tool specs to Anthropic's schema, caching the prefix at the last
32
+ tool. Order is preserved (never re-sorted) so the prefix stays byte-stable."""
33
+ wire: list[dict[str, Any]] = [
34
+ {"name": t.name, "description": t.description, "input_schema": t.parameters}
35
+ for t in tools
36
+ ]
37
+ if wire:
38
+ wire[-1]["cache_control"] = _EPHEMERAL # prefix boundary (design §9.3)
39
+ return wire
40
+
41
+
42
+ _THINKING_BUDGET = {"low": 1024, "medium": 4096, "high": 8192}
43
+
44
+
45
+ def thinking_config(reasoning: str | None, max_tokens: int) -> dict[str, Any] | None:
46
+ """Map a reasoning level to an extended-thinking block, or None.
47
+
48
+ The budget is capped below ``max_tokens`` (thinking must leave room for the
49
+ reply); if there isn't enough room, thinking is skipped rather than erroring.
50
+ Thinking blocks in the response are already ignored by ``parse_response``.
51
+ """
52
+ if not reasoning:
53
+ return None
54
+ headroom = max_tokens - 1024
55
+ if headroom < 1024:
56
+ return None
57
+ budget = min(_THINKING_BUDGET.get(reasoning, 4096), headroom)
58
+ return {"type": "enabled", "budget_tokens": budget}
59
+
60
+
61
+ def render_system(system: str | None) -> list[dict[str, Any]] | None:
62
+ """System prompt as a cached text block, or None when absent."""
63
+ if not system:
64
+ return None
65
+ return [{"type": "text", "text": system, "cache_control": _EPHEMERAL}]
66
+
67
+
68
+ def render_messages(messages: list[Message]) -> list[dict[str, Any]]:
69
+ out: list[dict[str, Any]] = []
70
+ for m in messages:
71
+ if m.role == "user":
72
+ out.append({"role": "user", "content": m.content})
73
+ elif m.role == "assistant":
74
+ if m.tool_calls:
75
+ blocks: list[dict[str, Any]] = []
76
+ if m.content:
77
+ blocks.append({"type": "text", "text": m.content})
78
+ blocks.extend(
79
+ {
80
+ "type": "tool_use",
81
+ "id": tc.id,
82
+ "name": tc.name,
83
+ "input": tc.arguments,
84
+ }
85
+ for tc in m.tool_calls
86
+ )
87
+ out.append({"role": "assistant", "content": blocks})
88
+ else:
89
+ out.append({"role": "assistant", "content": m.content})
90
+ elif m.role == "tool":
91
+ # All results for the turn in one user message (design §8.1).
92
+ out.append(
93
+ {
94
+ "role": "user",
95
+ "content": [
96
+ {
97
+ "type": "tool_result",
98
+ "tool_use_id": r.call_id,
99
+ "content": r.content,
100
+ "is_error": r.is_error,
101
+ }
102
+ for r in m.tool_results
103
+ ],
104
+ }
105
+ )
106
+ # role == "system" is delivered via the `system` param, not as a message.
107
+ return out
108
+
109
+
110
+ # --- translation: wire -> canonical ----------------------------------------
111
+
112
+
113
+ def parse_response(data: dict[str, Any]) -> CompletionResponse:
114
+ text_parts: list[str] = []
115
+ tool_calls: list[ToolCall] = []
116
+ for block in data.get("content", []):
117
+ btype = block.get("type")
118
+ if btype == "text":
119
+ text_parts.append(block.get("text", ""))
120
+ elif btype == "tool_use":
121
+ args = block.get("input")
122
+ tool_calls.append(
123
+ ToolCall(
124
+ id=block["id"],
125
+ name=block["name"],
126
+ arguments=args if isinstance(args, dict) else {},
127
+ )
128
+ )
129
+ u = data.get("usage", {})
130
+ usage = Usage(
131
+ input_tokens=u.get("input_tokens", 0),
132
+ output_tokens=u.get("output_tokens", 0),
133
+ cache_read_tokens=u.get("cache_read_input_tokens", 0),
134
+ cache_write_tokens=u.get("cache_creation_input_tokens", 0),
135
+ )
136
+ return CompletionResponse(
137
+ message=Message(
138
+ role="assistant", content="".join(text_parts), tool_calls=tool_calls
139
+ ),
140
+ usage=usage,
141
+ stop_reason=data.get("stop_reason", ""),
142
+ raw=data,
143
+ )
144
+
145
+
146
+ def accumulate_stream(
147
+ events: Iterable[dict[str, Any]],
148
+ on_text: Callable[[str], None] | None = None,
149
+ ) -> dict[str, Any]:
150
+ """Fold Anthropic SSE events into a single non-streaming response dict.
151
+
152
+ Text deltas forward to ``on_text``; tool_use blocks accumulate their
153
+ ``input_json`` fragments and are parsed at the end. The result is exactly
154
+ what ``parse_response`` consumes."""
155
+ blocks: dict[int, dict[str, Any]] = {}
156
+ usage: dict[str, Any] = {}
157
+ stop_reason = ""
158
+ for event in events:
159
+ etype = event.get("type")
160
+ if etype == "message_start":
161
+ usage.update(event.get("message", {}).get("usage", {}) or {})
162
+ elif etype == "content_block_start":
163
+ cb = event.get("content_block", {})
164
+ blocks[event.get("index", 0)] = {
165
+ "type": cb.get("type"),
166
+ "text": cb.get("text", "") or "",
167
+ "id": cb.get("id"),
168
+ "name": cb.get("name"),
169
+ "json": "",
170
+ }
171
+ elif etype == "content_block_delta":
172
+ block = blocks.setdefault(
173
+ event.get("index", 0), {"type": "text", "text": "", "json": ""}
174
+ )
175
+ delta = event.get("delta", {})
176
+ if delta.get("type") == "text_delta":
177
+ text = delta.get("text", "")
178
+ block["text"] += text
179
+ if on_text is not None and text:
180
+ on_text(text)
181
+ elif delta.get("type") == "input_json_delta":
182
+ block["json"] += delta.get("partial_json", "")
183
+ elif delta.get("type") == "thinking_delta":
184
+ # Extended thinking: shown live but not part of the answer.
185
+ if on_text is not None:
186
+ on_text(delta.get("thinking", ""))
187
+ elif etype == "message_delta":
188
+ stop_reason = event.get("delta", {}).get("stop_reason", stop_reason)
189
+ usage.update(event.get("usage", {}) or {})
190
+ content: list[dict[str, Any]] = []
191
+ for _index, block in sorted(blocks.items()):
192
+ if block.get("type") == "text":
193
+ content.append({"type": "text", "text": block["text"]})
194
+ elif block.get("type") == "tool_use":
195
+ try:
196
+ parsed = json.loads(block["json"] or "{}")
197
+ except json.JSONDecodeError:
198
+ parsed = {}
199
+ content.append(
200
+ {"type": "tool_use", "id": block["id"], "name": block["name"], "input": parsed}
201
+ )
202
+ return {"content": content, "stop_reason": stop_reason, "usage": usage}
203
+
204
+
205
+ class AnthropicProvider:
206
+ name = "anthropic"
207
+
208
+ def __init__(
209
+ self,
210
+ model: str,
211
+ *,
212
+ api_key: str | None = None,
213
+ context_window: int = DEFAULT_CONTEXT_WINDOW,
214
+ ) -> None:
215
+ self.model = model
216
+ self.context_window = context_window
217
+ self._pool = (
218
+ CredentialPool([api_key]) if api_key
219
+ else CredentialPool.from_env("ANTHROPIC_API_KEY")
220
+ )
221
+
222
+ def with_model(self, model: str) -> AnthropicProvider:
223
+ """A copy of this provider bound to a different model (shares credentials)."""
224
+ clone = AnthropicProvider.__new__(AnthropicProvider)
225
+ clone.model = model
226
+ clone.context_window = self.context_window
227
+ clone._pool = self._pool
228
+ return clone
229
+
230
+ def complete(
231
+ self,
232
+ messages: list[Message],
233
+ tools: list[ToolSpec],
234
+ *,
235
+ max_tokens: int,
236
+ temperature: float = 1.0,
237
+ system: str | None = None,
238
+ reasoning: str | None = None,
239
+ on_text: Callable[[str], None] | None = None,
240
+ ) -> CompletionResponse:
241
+ if self._pool.current() is None:
242
+ raise ProviderError("ANTHROPIC_API_KEY is not set in the environment")
243
+ thinking = thinking_config(reasoning, max_tokens)
244
+ payload: dict[str, Any] = {
245
+ "model": self.model,
246
+ "max_tokens": max_tokens,
247
+ # Extended thinking requires temperature 1; otherwise honor the caller.
248
+ "temperature": 1.0 if thinking else temperature,
249
+ "messages": render_messages(messages),
250
+ }
251
+ if thinking is not None:
252
+ payload["thinking"] = thinking
253
+ if tools:
254
+ payload["tools"] = render_tools(tools)
255
+ sys_blocks = render_system(system)
256
+ if sys_blocks is not None:
257
+ payload["system"] = sys_blocks
258
+
259
+ def header_for_key(key: str | None) -> dict[str, str]:
260
+ return {
261
+ "x-api-key": key or "",
262
+ "anthropic-version": API_VERSION,
263
+ "content-type": "application/json",
264
+ }
265
+
266
+ if on_text is not None:
267
+ # Best-effort streaming with a non-streaming fallback on any fault.
268
+ try:
269
+ events = stream_sse(
270
+ API_URL,
271
+ headers=header_for_key(self._pool.current()),
272
+ payload={**payload, "stream": True},
273
+ )
274
+ return parse_response(accumulate_stream(events, on_text))
275
+ except ProviderError:
276
+ pass
277
+
278
+ return parse_response(
279
+ post_json_pooled(
280
+ API_URL, header_for_key=header_for_key, payload=payload, pool=self._pool
281
+ )
282
+ )
@@ -0,0 +1,38 @@
1
+ """The Provider protocol (design §5.1).
2
+
3
+ An adapter translates the canonical message/tool types to a provider's wire
4
+ format, calls the API, and translates the reply back into one
5
+ ``CompletionResponse``. No provider-specific object escapes an adapter except
6
+ inside ``CompletionResponse.raw``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Callable
12
+ from typing import TYPE_CHECKING, Protocol
13
+
14
+ from agentkernel.types import CompletionResponse, Message
15
+
16
+ if TYPE_CHECKING:
17
+ from agentkernel.tools import ToolSpec
18
+
19
+
20
+ class Provider(Protocol):
21
+ name: str
22
+ context_window: int # total token capacity of the selected model
23
+
24
+ def complete(
25
+ self,
26
+ messages: list[Message],
27
+ tools: list[ToolSpec],
28
+ *,
29
+ max_tokens: int,
30
+ temperature: float = 1.0,
31
+ system: str | None = None,
32
+ reasoning: str | None = None,
33
+ on_text: Callable[[str], None] | None = None,
34
+ ) -> CompletionResponse:
35
+ """Complete one turn. When ``on_text`` is given, the adapter streams and
36
+ calls it with each text delta; the returned ``CompletionResponse`` is the
37
+ same as the non-streaming result (the loop contract is unchanged)."""
38
+ ...
@@ -0,0 +1,65 @@
1
+ """Credential pools for providers (design §18.5).
2
+
3
+ A provider can be given several API keys and rotate to the next one when the
4
+ current key is rate-limited or exhausted. Keys still come only from the
5
+ environment (design §11): a pool is read from one env var that may hold a
6
+ comma-separated list, plus numbered siblings ``<VAR>_1``, ``<VAR>_2``, …
7
+
8
+ A single key is just a pool of one, so existing single-key setups are unchanged.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import os
14
+
15
+
16
+ class CredentialPool:
17
+ """An ordered set of API keys with a rotating cursor."""
18
+
19
+ def __init__(self, keys: list[str]) -> None:
20
+ # Dedupe, preserving order; drop blanks.
21
+ seen: set[str] = set()
22
+ self._keys: list[str] = []
23
+ for k in keys:
24
+ k = (k or "").strip()
25
+ if k and k not in seen:
26
+ seen.add(k)
27
+ self._keys.append(k)
28
+ self._idx = 0
29
+ self._exhausted: set[int] = set()
30
+
31
+ @classmethod
32
+ def from_env(cls, env_var: str, *, env: dict[str, str] | None = None) -> CredentialPool:
33
+ """Collect keys from ``env_var`` (comma-separated) and ``env_var_1..N``."""
34
+ env = os.environ if env is None else env
35
+ keys: list[str] = [p.strip() for p in (env.get(env_var) or "").split(",")]
36
+ i = 1
37
+ while True:
38
+ value = env.get(f"{env_var}_{i}")
39
+ if not value:
40
+ break
41
+ keys.append(value)
42
+ i += 1
43
+ return cls(keys)
44
+
45
+ def __len__(self) -> int:
46
+ return len(self._keys)
47
+
48
+ def current(self) -> str | None:
49
+ """The active key, or None if the pool is empty."""
50
+ return self._keys[self._idx] if self._keys else None
51
+
52
+ def mark_exhausted(self) -> None:
53
+ """Flag the active key as exhausted (rate-limited) for this session."""
54
+ if self._keys:
55
+ self._exhausted.add(self._idx)
56
+
57
+ def rotate(self) -> bool:
58
+ """Advance to the next key that isn't exhausted. False if none remain."""
59
+ n = len(self._keys)
60
+ for step in range(1, n):
61
+ j = (self._idx + step) % n
62
+ if j not in self._exhausted:
63
+ self._idx = j
64
+ return True
65
+ return False
@@ -0,0 +1,34 @@
1
+ """Local / OpenAI-compatible endpoint adapter (design §5.2).
2
+
3
+ Same wire shape as OpenAI (Ollama, vLLM, LM Studio, …) with a configurable
4
+ ``base_url`` and optional auth. No prompt caching is assumed, so the turn-2
5
+ cache check in M1 does not apply to this provider.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from agentkernel.providers.openai import OpenAIProvider
11
+
12
+ DEFAULT_BASE_URL = "http://localhost:11434/v1" # Ollama default
13
+ DEFAULT_CONTEXT_WINDOW = 8192
14
+
15
+
16
+ class LocalProvider(OpenAIProvider):
17
+ def __init__(
18
+ self,
19
+ model: str,
20
+ *,
21
+ base_url: str = DEFAULT_BASE_URL,
22
+ api_key: str | None = None,
23
+ context_window: int = DEFAULT_CONTEXT_WINDOW,
24
+ ) -> None:
25
+ super().__init__(
26
+ model,
27
+ api_key=api_key,
28
+ base_url=base_url,
29
+ context_window=context_window,
30
+ name="local",
31
+ require_key=False, # local endpoints commonly need no key
32
+ env_key="LOCAL_API_KEY",
33
+ send_reasoning=False, # arbitrary local models may reject reasoning_effort
34
+ )
@@ -0,0 +1,260 @@
1
+ """OpenAI Chat Completions adapter (design §5, §8.1).
2
+
3
+ Wire shape: assistant ``tool_calls`` array (arguments are JSON *strings*); each
4
+ tool result is its own ``role: "tool"`` message keyed by ``tool_call_id``.
5
+ OpenAI caches the prefix automatically, so there are no explicit cache markers —
6
+ ``cache_read_tokens`` is read back from ``usage.prompt_tokens_details``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from collections.abc import Callable, Iterable
13
+ from typing import Any
14
+
15
+ from agentkernel.providers._http import ProviderError, post_json_pooled, stream_sse
16
+ from agentkernel.providers.credentials import CredentialPool
17
+ from agentkernel.tools import ToolSpec
18
+ from agentkernel.types import CompletionResponse, Message, ToolCall, Usage
19
+
20
+ DEFAULT_BASE_URL = "https://api.openai.com/v1"
21
+ DEFAULT_CONTEXT_WINDOW = 128_000
22
+
23
+ _STOP_REASONS = {"stop": "end_turn", "tool_calls": "tool_use", "length": "max_tokens"}
24
+
25
+
26
+ # --- translation: canonical -> wire (pure, offline-testable) ---------------
27
+
28
+
29
+ def render_tools(tools: list[ToolSpec]) -> list[dict[str, Any]]:
30
+ return [
31
+ {
32
+ "type": "function",
33
+ "function": {
34
+ "name": t.name,
35
+ "description": t.description,
36
+ "parameters": t.parameters,
37
+ },
38
+ }
39
+ for t in tools
40
+ ]
41
+
42
+
43
+ def render_messages(
44
+ messages: list[Message], system: str | None = None
45
+ ) -> list[dict[str, Any]]:
46
+ out: list[dict[str, Any]] = []
47
+ if system:
48
+ out.append({"role": "system", "content": system})
49
+ for m in messages:
50
+ if m.role == "user":
51
+ out.append({"role": "user", "content": m.content})
52
+ elif m.role == "assistant":
53
+ msg: dict[str, Any] = {"role": "assistant", "content": m.content or None}
54
+ if m.tool_calls:
55
+ msg["tool_calls"] = [
56
+ {
57
+ "id": tc.id,
58
+ "type": "function",
59
+ "function": {
60
+ "name": tc.name,
61
+ "arguments": json.dumps(tc.arguments),
62
+ },
63
+ }
64
+ for tc in m.tool_calls
65
+ ]
66
+ out.append(msg)
67
+ elif m.role == "tool":
68
+ # One message per result, keyed by tool_call_id (design §8.1).
69
+ out.extend(
70
+ {"role": "tool", "tool_call_id": r.call_id, "content": r.content}
71
+ for r in m.tool_results
72
+ )
73
+ # role == "system" messages are delivered via the `system` param.
74
+ return out
75
+
76
+
77
+ # --- translation: wire -> canonical ----------------------------------------
78
+
79
+
80
+ def parse_response(data: dict[str, Any]) -> CompletionResponse:
81
+ choice = data["choices"][0]
82
+ msg = choice.get("message", {})
83
+ tool_calls: list[ToolCall] = []
84
+ for tc in msg.get("tool_calls") or []:
85
+ fn = tc.get("function", {})
86
+ try:
87
+ args = json.loads(fn.get("arguments") or "{}")
88
+ except json.JSONDecodeError:
89
+ args = {} # malformed JSON surfaces as a validation error in §6
90
+ tool_calls.append(
91
+ ToolCall(id=tc["id"], name=fn.get("name", ""), arguments=args)
92
+ )
93
+ u = data.get("usage", {})
94
+ cached = (u.get("prompt_tokens_details") or {}).get("cached_tokens", 0)
95
+ usage = Usage(
96
+ input_tokens=u.get("prompt_tokens", 0),
97
+ output_tokens=u.get("completion_tokens", 0),
98
+ cache_read_tokens=cached,
99
+ )
100
+ finish = choice.get("finish_reason", "")
101
+ return CompletionResponse(
102
+ message=Message(
103
+ role="assistant",
104
+ content=msg.get("content") or "",
105
+ tool_calls=tool_calls,
106
+ ),
107
+ usage=usage,
108
+ stop_reason=_STOP_REASONS.get(finish, finish),
109
+ raw=data,
110
+ )
111
+
112
+
113
+ def accumulate_stream(
114
+ events: Iterable[dict[str, Any]],
115
+ on_text: Callable[[str], None] | None = None,
116
+ ) -> dict[str, Any]:
117
+ """Fold OpenAI streaming chunks into a single non-streaming response dict.
118
+
119
+ Text deltas are forwarded to ``on_text``; ``tool_calls`` deltas are
120
+ accumulated by index (id/name arrive once, arguments arrive in fragments).
121
+ The result is exactly what ``parse_response`` consumes."""
122
+ content: list[str] = []
123
+ tool_calls: dict[int, dict[str, str]] = {}
124
+ finish = ""
125
+ usage: dict[str, Any] = {}
126
+ for event in events:
127
+ if event.get("usage"):
128
+ usage = event["usage"]
129
+ for choice in event.get("choices", []):
130
+ delta = choice.get("delta", {})
131
+ text = delta.get("content")
132
+ if text:
133
+ content.append(text)
134
+ if on_text is not None:
135
+ on_text(text)
136
+ # Reasoning models (e.g. via LM Studio) stream their thinking on a
137
+ # separate channel — show it live, but it is not part of the answer.
138
+ reasoning = delta.get("reasoning_content")
139
+ if reasoning and on_text is not None:
140
+ on_text(reasoning)
141
+ for tc in delta.get("tool_calls") or []:
142
+ slot = tool_calls.setdefault(
143
+ tc.get("index", 0), {"id": "", "name": "", "arguments": ""}
144
+ )
145
+ if tc.get("id"):
146
+ slot["id"] = tc["id"]
147
+ fn = tc.get("function", {})
148
+ if fn.get("name"):
149
+ slot["name"] = fn["name"]
150
+ if fn.get("arguments"):
151
+ slot["arguments"] += fn["arguments"]
152
+ if choice.get("finish_reason"):
153
+ finish = choice["finish_reason"]
154
+ message: dict[str, Any] = {"content": "".join(content) or None}
155
+ if tool_calls:
156
+ message["tool_calls"] = [
157
+ {
158
+ "id": slot["id"],
159
+ "type": "function",
160
+ "function": {"name": slot["name"], "arguments": slot["arguments"]},
161
+ }
162
+ for _index, slot in sorted(tool_calls.items())
163
+ ]
164
+ return {"choices": [{"message": message, "finish_reason": finish}], "usage": usage}
165
+
166
+
167
+ class OpenAIProvider:
168
+ def __init__(
169
+ self,
170
+ model: str,
171
+ *,
172
+ api_key: str | None = None,
173
+ base_url: str = DEFAULT_BASE_URL,
174
+ context_window: int = DEFAULT_CONTEXT_WINDOW,
175
+ name: str = "openai",
176
+ require_key: bool = True,
177
+ env_key: str = "OPENAI_API_KEY",
178
+ send_reasoning: bool = True,
179
+ ) -> None:
180
+ self.name = name
181
+ self.model = model
182
+ self.context_window = context_window
183
+ self._base_url = base_url.rstrip("/")
184
+ self._require_key = require_key
185
+ self._send_reasoning = send_reasoning
186
+ self._pool = (
187
+ CredentialPool([api_key]) if api_key else CredentialPool.from_env(env_key)
188
+ )
189
+
190
+ def with_model(self, model: str) -> OpenAIProvider:
191
+ """A copy of this provider bound to a different model (shares credentials).
192
+
193
+ Used to honor a profile's ``model_override`` for one run without
194
+ rebuilding the credential pool or re-reading the environment."""
195
+ clone = OpenAIProvider.__new__(OpenAIProvider)
196
+ clone.name = self.name
197
+ clone.model = model
198
+ clone.context_window = self.context_window
199
+ clone._base_url = self._base_url
200
+ clone._require_key = self._require_key
201
+ clone._send_reasoning = self._send_reasoning
202
+ clone._pool = self._pool
203
+ return clone
204
+
205
+ def complete(
206
+ self,
207
+ messages: list[Message],
208
+ tools: list[ToolSpec],
209
+ *,
210
+ max_tokens: int,
211
+ temperature: float = 1.0,
212
+ system: str | None = None,
213
+ reasoning: str | None = None,
214
+ on_text: Callable[[str], None] | None = None,
215
+ ) -> CompletionResponse:
216
+ if self._require_key and self._pool.current() is None:
217
+ raise ProviderError(f"API key for provider {self.name!r} is not set")
218
+ payload: dict[str, Any] = {
219
+ "model": self.model,
220
+ "max_tokens": max_tokens,
221
+ "temperature": temperature,
222
+ "messages": render_messages(messages, system),
223
+ }
224
+ # reasoning_effort is honored by OpenAI reasoning models; only sent when a
225
+ # profile asks for it, and never for local endpoints that may reject it.
226
+ if reasoning and self._send_reasoning:
227
+ payload["reasoning_effort"] = reasoning
228
+ if tools:
229
+ payload["tools"] = render_tools(tools)
230
+
231
+ def header_for_key(key: str | None) -> dict[str, str]:
232
+ headers = {"content-type": "application/json"}
233
+ if key:
234
+ headers["Authorization"] = f"Bearer {key}"
235
+ return headers
236
+
237
+ url = f"{self._base_url}/chat/completions"
238
+
239
+ if on_text is not None:
240
+ # Best-effort streaming: on any transport/protocol fault, fall back to
241
+ # the non-streaming path so the turn still completes correctly.
242
+ try:
243
+ events = stream_sse(
244
+ url,
245
+ headers=header_for_key(self._pool.current()),
246
+ payload={
247
+ **payload,
248
+ "stream": True,
249
+ "stream_options": {"include_usage": True},
250
+ },
251
+ )
252
+ return parse_response(accumulate_stream(events, on_text))
253
+ except ProviderError:
254
+ pass
255
+
256
+ return parse_response(
257
+ post_json_pooled(
258
+ url, header_for_key=header_for_key, payload=payload, pool=self._pool
259
+ )
260
+ )