bareagent-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bareagent/__init__.py +10 -0
- bareagent/concurrency/__init__.py +6 -0
- bareagent/concurrency/background.py +97 -0
- bareagent/concurrency/notification.py +61 -0
- bareagent/concurrency/scheduler.py +136 -0
- bareagent/config.toml +299 -0
- bareagent/core/__init__.py +1 -0
- bareagent/core/config_paths.py +49 -0
- bareagent/core/context.py +127 -0
- bareagent/core/fileutil.py +103 -0
- bareagent/core/goal.py +214 -0
- bareagent/core/handlers/__init__.py +1 -0
- bareagent/core/handlers/bash.py +79 -0
- bareagent/core/handlers/file_edit.py +47 -0
- bareagent/core/handlers/file_read.py +270 -0
- bareagent/core/handlers/file_write.py +34 -0
- bareagent/core/handlers/glob_search.py +30 -0
- bareagent/core/handlers/goal.py +60 -0
- bareagent/core/handlers/grep_search.py +52 -0
- bareagent/core/handlers/memory.py +71 -0
- bareagent/core/handlers/plan.py +106 -0
- bareagent/core/handlers/search_utils.py +77 -0
- bareagent/core/handlers/skill.py +87 -0
- bareagent/core/handlers/subagent_send.py +70 -0
- bareagent/core/handlers/web_fetch.py +126 -0
- bareagent/core/handlers/web_search.py +165 -0
- bareagent/core/handlers/workflow.py +190 -0
- bareagent/core/loop.py +535 -0
- bareagent/core/retry.py +131 -0
- bareagent/core/sandbox.py +27 -0
- bareagent/core/schema.py +21 -0
- bareagent/core/tools.py +779 -0
- bareagent/core/workflow.py +517 -0
- bareagent/core/workflow_registry.py +219 -0
- bareagent/debug/__init__.py +0 -0
- bareagent/debug/interaction_log.py +263 -0
- bareagent/debug/viewer.html +1750 -0
- bareagent/debug/web_viewer.py +157 -0
- bareagent/hooks/__init__.py +32 -0
- bareagent/hooks/config.py +118 -0
- bareagent/hooks/engine.py +197 -0
- bareagent/hooks/errors.py +14 -0
- bareagent/hooks/events.py +22 -0
- bareagent/lsp/__init__.py +63 -0
- bareagent/lsp/config.py +134 -0
- bareagent/lsp/coord.py +118 -0
- bareagent/lsp/diagnostics.py +240 -0
- bareagent/lsp/errors.py +24 -0
- bareagent/lsp/manager.py +866 -0
- bareagent/lsp/tools.py +629 -0
- bareagent/lsp/workspace_edit.py +305 -0
- bareagent/main.py +4205 -0
- bareagent/mcp/__init__.py +69 -0
- bareagent/mcp/_sse.py +69 -0
- bareagent/mcp/client.py +341 -0
- bareagent/mcp/config.py +169 -0
- bareagent/mcp/errors.py +32 -0
- bareagent/mcp/manager.py +318 -0
- bareagent/mcp/protocol.py +187 -0
- bareagent/mcp/registry.py +557 -0
- bareagent/mcp/transport/__init__.py +15 -0
- bareagent/mcp/transport/base.py +149 -0
- bareagent/mcp/transport/http_legacy.py +192 -0
- bareagent/mcp/transport/http_streamable.py +217 -0
- bareagent/mcp/transport/stdio.py +202 -0
- bareagent/memory/__init__.py +1 -0
- bareagent/memory/compact.py +203 -0
- bareagent/memory/conversation_io.py +226 -0
- bareagent/memory/embedding.py +194 -0
- bareagent/memory/persistent.py +515 -0
- bareagent/memory/token_counter.py +67 -0
- bareagent/memory/token_tracker.py +262 -0
- bareagent/memory/transcript.py +100 -0
- bareagent/permission/__init__.py +1 -0
- bareagent/permission/guard.py +329 -0
- bareagent/permission/rules.py +19 -0
- bareagent/planning/__init__.py +19 -0
- bareagent/planning/agent_types.py +169 -0
- bareagent/planning/skill_gen.py +141 -0
- bareagent/planning/skill_store.py +173 -0
- bareagent/planning/skills.py +146 -0
- bareagent/planning/subagent.py +355 -0
- bareagent/planning/subagent_registry.py +77 -0
- bareagent/planning/tasks.py +348 -0
- bareagent/planning/todo.py +153 -0
- bareagent/planning/worktree.py +122 -0
- bareagent/provider/__init__.py +1 -0
- bareagent/provider/anthropic.py +348 -0
- bareagent/provider/base.py +136 -0
- bareagent/provider/factory.py +130 -0
- bareagent/provider/openai.py +881 -0
- bareagent/provider/presets.py +72 -0
- bareagent/provider/setup.py +356 -0
- bareagent/skills/.gitkeep +1 -0
- bareagent/skills/code-review/SKILL.md +68 -0
- bareagent/skills/git/SKILL.md +68 -0
- bareagent/skills/test/SKILL.md +70 -0
- bareagent/team/__init__.py +17 -0
- bareagent/team/autonomous.py +193 -0
- bareagent/team/mailbox.py +239 -0
- bareagent/team/manager.py +155 -0
- bareagent/team/protocols.py +129 -0
- bareagent/tracing/__init__.py +12 -0
- bareagent/tracing/_api.py +92 -0
- bareagent/tracing/_proxy.py +60 -0
- bareagent/tracing/composite.py +115 -0
- bareagent/tracing/json_file.py +115 -0
- bareagent/tracing/langfuse.py +139 -0
- bareagent/tracing/otel.py +107 -0
- bareagent/tracing/setup.py +85 -0
- bareagent/ui/__init__.py +24 -0
- bareagent/ui/console.py +167 -0
- bareagent/ui/prompt.py +78 -0
- bareagent/ui/protocol.py +24 -0
- bareagent/ui/stream.py +66 -0
- bareagent/ui/theme.py +240 -0
- bareagent_cli-0.1.0.dist-info/METADATA +331 -0
- bareagent_cli-0.1.0.dist-info/RECORD +121 -0
- bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
- bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
- bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import anthropic
|
|
6
|
+
|
|
7
|
+
from bareagent.provider.base import (
|
|
8
|
+
BaseLLMProvider,
|
|
9
|
+
CacheConfig,
|
|
10
|
+
LLMResponse,
|
|
11
|
+
StreamEvent,
|
|
12
|
+
ThinkingConfig,
|
|
13
|
+
ToolCall,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
_PROTECTED_KEYS = frozenset({"model", "messages", "tools", "system", "thinking", "max_tokens"})
|
|
17
|
+
|
|
18
|
+
# Content-block types that may carry a ``cache_control`` breakpoint. Thinking /
|
|
19
|
+
# redacted_thinking blocks must not, so the conversation breakpoint skips a
|
|
20
|
+
# trailing thinking block rather than risk an API error.
|
|
21
|
+
_CACHEABLE_BLOCK_TYPES = frozenset({"text", "image", "tool_use", "tool_result", "document"})
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AnthropicProvider(BaseLLMProvider):
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
api_key: str,
|
|
28
|
+
model: str,
|
|
29
|
+
thinking_config: ThinkingConfig | None = None,
|
|
30
|
+
cache_config: CacheConfig | None = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
# The app layer (src/core/retry.py) owns retries exclusively; disable
|
|
33
|
+
# the SDK's built-in retries to avoid 2xN compound amplification.
|
|
34
|
+
self.client = anthropic.Anthropic(api_key=api_key, max_retries=0)
|
|
35
|
+
self.model = model
|
|
36
|
+
self.thinking_config = thinking_config or ThinkingConfig()
|
|
37
|
+
# None => caching off (legacy byte-identical requests). factory always
|
|
38
|
+
# passes an instance, so the app defaults to caching ON.
|
|
39
|
+
self.cache_config = cache_config
|
|
40
|
+
|
|
41
|
+
def create(
|
|
42
|
+
self,
|
|
43
|
+
messages: list[dict[str, Any]],
|
|
44
|
+
tools: list[dict[str, Any]],
|
|
45
|
+
**kwargs: Any,
|
|
46
|
+
) -> LLMResponse:
|
|
47
|
+
params = self._build_request_params(messages, tools, **kwargs)
|
|
48
|
+
response = self.client.messages.create(**params)
|
|
49
|
+
return self._parse_response(response)
|
|
50
|
+
|
|
51
|
+
def create_stream(
|
|
52
|
+
self,
|
|
53
|
+
messages: list[dict[str, Any]],
|
|
54
|
+
tools: list[dict[str, Any]],
|
|
55
|
+
**kwargs: Any,
|
|
56
|
+
):
|
|
57
|
+
params = self._build_request_params(messages, tools, **kwargs)
|
|
58
|
+
with self.client.messages.stream(**params) as stream:
|
|
59
|
+
for event in stream:
|
|
60
|
+
if event.type == "content_block_delta" and event.delta.type == "text_delta":
|
|
61
|
+
yield StreamEvent(type="text", text=event.delta.text)
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
if event.type != "content_block_stop":
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
content_block = event.content_block
|
|
68
|
+
if content_block.type != "tool_use":
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
yield StreamEvent(
|
|
72
|
+
type="tool_call",
|
|
73
|
+
tool_call_id=content_block.id,
|
|
74
|
+
name=content_block.name,
|
|
75
|
+
input=dict(content_block.input or {}),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
return self._parse_response(stream.get_final_message())
|
|
79
|
+
|
|
80
|
+
def _build_request_params(
|
|
81
|
+
self,
|
|
82
|
+
messages: list[dict[str, Any]],
|
|
83
|
+
tools: list[dict[str, Any]],
|
|
84
|
+
**kwargs: Any,
|
|
85
|
+
) -> dict[str, Any]:
|
|
86
|
+
system_prompt, anthropic_messages = self._convert_messages(messages)
|
|
87
|
+
max_tokens = int(kwargs.get("max_tokens", 8000))
|
|
88
|
+
if self.thinking_config.mode in {"enabled", "adaptive"}:
|
|
89
|
+
max_tokens = max(max_tokens, self.thinking_config.budget_tokens + 1)
|
|
90
|
+
|
|
91
|
+
params: dict[str, Any] = {
|
|
92
|
+
"model": self.model,
|
|
93
|
+
"messages": anthropic_messages,
|
|
94
|
+
"max_tokens": max_tokens,
|
|
95
|
+
}
|
|
96
|
+
converted_tools = self._convert_tools(tools)
|
|
97
|
+
system_value: str | list[dict[str, Any]] | None = system_prompt or None
|
|
98
|
+
|
|
99
|
+
if self._caching_enabled():
|
|
100
|
+
cache_control = self._cache_control()
|
|
101
|
+
# tools render first, then system, then messages; a breakpoint on
|
|
102
|
+
# the last system block already caches tools+system, but a separate
|
|
103
|
+
# breakpoint on the last tool gives an independent tools-only cache
|
|
104
|
+
# segment (cheap insurance, no double-billing). <=3 breakpoints total
|
|
105
|
+
# (tools, system, last message) — well within Anthropic's max of 4.
|
|
106
|
+
if converted_tools:
|
|
107
|
+
converted_tools[-1] = {**converted_tools[-1], "cache_control": cache_control}
|
|
108
|
+
if system_prompt:
|
|
109
|
+
system_value = [
|
|
110
|
+
{"type": "text", "text": system_prompt, "cache_control": cache_control}
|
|
111
|
+
]
|
|
112
|
+
self._apply_conversation_breakpoint(anthropic_messages, cache_control)
|
|
113
|
+
|
|
114
|
+
if converted_tools:
|
|
115
|
+
params["tools"] = converted_tools
|
|
116
|
+
if system_value:
|
|
117
|
+
params["system"] = system_value
|
|
118
|
+
if self.thinking_config.mode in {"enabled", "adaptive"}:
|
|
119
|
+
params["thinking"] = {
|
|
120
|
+
"type": self.thinking_config.mode,
|
|
121
|
+
"budget_tokens": self.thinking_config.budget_tokens,
|
|
122
|
+
}
|
|
123
|
+
params.update({k: v for k, v in kwargs.items() if k not in _PROTECTED_KEYS})
|
|
124
|
+
return params
|
|
125
|
+
|
|
126
|
+
def _caching_enabled(self) -> bool:
|
|
127
|
+
return self.cache_config is not None and self.cache_config.enabled
|
|
128
|
+
|
|
129
|
+
def _cache_control(self) -> dict[str, Any]:
|
|
130
|
+
control: dict[str, Any] = {"type": "ephemeral"}
|
|
131
|
+
if self.cache_config is not None and self.cache_config.ttl == "1h":
|
|
132
|
+
control["ttl"] = "1h"
|
|
133
|
+
return control
|
|
134
|
+
|
|
135
|
+
def _apply_conversation_breakpoint(
|
|
136
|
+
self,
|
|
137
|
+
messages: list[dict[str, Any]],
|
|
138
|
+
cache_control: dict[str, Any],
|
|
139
|
+
) -> None:
|
|
140
|
+
"""Attach a ``cache_control`` breakpoint to the last message's last block.
|
|
141
|
+
|
|
142
|
+
This is the moving incremental-caching breakpoint: each request only
|
|
143
|
+
appends a couple of blocks since the previous one, so the 20-block
|
|
144
|
+
lookback reliably finds the prior cached prefix. The message dicts here
|
|
145
|
+
are freshly built by ``_convert_messages`` (not shared with the caller),
|
|
146
|
+
so in-place mutation is safe.
|
|
147
|
+
"""
|
|
148
|
+
if not messages:
|
|
149
|
+
return
|
|
150
|
+
last = messages[-1]
|
|
151
|
+
content = last.get("content")
|
|
152
|
+
if isinstance(content, str):
|
|
153
|
+
if content:
|
|
154
|
+
last["content"] = [
|
|
155
|
+
{"type": "text", "text": content, "cache_control": cache_control}
|
|
156
|
+
]
|
|
157
|
+
return
|
|
158
|
+
if isinstance(content, list) and content:
|
|
159
|
+
last_block = content[-1]
|
|
160
|
+
if last_block.get("type") in _CACHEABLE_BLOCK_TYPES:
|
|
161
|
+
content[-1] = {**last_block, "cache_control": cache_control}
|
|
162
|
+
|
|
163
|
+
def _convert_messages(
|
|
164
|
+
self,
|
|
165
|
+
messages: list[dict[str, Any]],
|
|
166
|
+
) -> tuple[str | None, list[dict[str, Any]]]:
|
|
167
|
+
system_parts: list[str] = []
|
|
168
|
+
converted: list[dict[str, Any]] = []
|
|
169
|
+
|
|
170
|
+
for message in messages:
|
|
171
|
+
role = message["role"]
|
|
172
|
+
content = message.get("content", "")
|
|
173
|
+
if role == "system":
|
|
174
|
+
text = self._stringify_content(content)
|
|
175
|
+
if text:
|
|
176
|
+
system_parts.append(text)
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
converted.append(
|
|
180
|
+
{
|
|
181
|
+
"role": role,
|
|
182
|
+
"content": self._convert_message_content(content),
|
|
183
|
+
}
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
system_prompt = "\n\n".join(part for part in system_parts if part) or None
|
|
187
|
+
return system_prompt, converted
|
|
188
|
+
|
|
189
|
+
def _convert_message_content(self, content: Any) -> str | list[dict[str, Any]]:
|
|
190
|
+
if isinstance(content, str):
|
|
191
|
+
return content
|
|
192
|
+
if not isinstance(content, list):
|
|
193
|
+
return str(content)
|
|
194
|
+
|
|
195
|
+
converted_blocks: list[dict[str, Any]] = []
|
|
196
|
+
for block in content:
|
|
197
|
+
block_type = block.get("type")
|
|
198
|
+
if block_type == "text":
|
|
199
|
+
converted_blocks.append({"type": "text", "text": block.get("text", "")})
|
|
200
|
+
continue
|
|
201
|
+
if block_type == "tool_use":
|
|
202
|
+
converted_blocks.append(
|
|
203
|
+
{
|
|
204
|
+
"type": "tool_use",
|
|
205
|
+
"id": block.get("id", ""),
|
|
206
|
+
"name": block.get("name", ""),
|
|
207
|
+
"input": block.get("input", {}),
|
|
208
|
+
}
|
|
209
|
+
)
|
|
210
|
+
continue
|
|
211
|
+
if block_type == "tool_result":
|
|
212
|
+
result_block: dict[str, Any] = {
|
|
213
|
+
"type": "tool_result",
|
|
214
|
+
"tool_use_id": block.get("tool_use_id", ""),
|
|
215
|
+
"content": self._convert_tool_result_content(block.get("content", "")),
|
|
216
|
+
}
|
|
217
|
+
if block.get("is_error"):
|
|
218
|
+
result_block["is_error"] = True
|
|
219
|
+
converted_blocks.append(result_block)
|
|
220
|
+
continue
|
|
221
|
+
if block_type == "thinking" and block.get("signature"):
|
|
222
|
+
converted_blocks.append(
|
|
223
|
+
{
|
|
224
|
+
"type": "thinking",
|
|
225
|
+
"thinking": block.get("thinking", ""),
|
|
226
|
+
"signature": block["signature"],
|
|
227
|
+
}
|
|
228
|
+
)
|
|
229
|
+
continue
|
|
230
|
+
if block_type == "redacted_thinking":
|
|
231
|
+
converted_blocks.append(
|
|
232
|
+
{
|
|
233
|
+
"type": "redacted_thinking",
|
|
234
|
+
"data": block.get("data", ""),
|
|
235
|
+
}
|
|
236
|
+
)
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
return converted_blocks
|
|
240
|
+
|
|
241
|
+
def _convert_tool_result_content(self, content: Any) -> str | list[dict[str, Any]]:
|
|
242
|
+
if isinstance(content, str):
|
|
243
|
+
return content
|
|
244
|
+
if isinstance(content, list):
|
|
245
|
+
blocks: list[dict[str, Any]] = []
|
|
246
|
+
for item in content:
|
|
247
|
+
if not isinstance(item, dict):
|
|
248
|
+
blocks.append({"type": "text", "text": self._stringify_content(item)})
|
|
249
|
+
continue
|
|
250
|
+
item_type = item.get("type")
|
|
251
|
+
if item_type == "text":
|
|
252
|
+
blocks.append({"type": "text", "text": item.get("text", "")})
|
|
253
|
+
continue
|
|
254
|
+
if item_type == "image":
|
|
255
|
+
# BareAgent's internal image shape is already Anthropic-native.
|
|
256
|
+
source = item.get("source")
|
|
257
|
+
if (
|
|
258
|
+
isinstance(source, dict)
|
|
259
|
+
and source.get("type") == "base64"
|
|
260
|
+
and source.get("data")
|
|
261
|
+
):
|
|
262
|
+
blocks.append(
|
|
263
|
+
{
|
|
264
|
+
"type": "image",
|
|
265
|
+
"source": {
|
|
266
|
+
"type": "base64",
|
|
267
|
+
"media_type": source.get("media_type", "image/png"),
|
|
268
|
+
"data": source.get("data", ""),
|
|
269
|
+
},
|
|
270
|
+
}
|
|
271
|
+
)
|
|
272
|
+
continue
|
|
273
|
+
blocks.append({"type": "text", "text": self._stringify_content(item)})
|
|
274
|
+
continue
|
|
275
|
+
blocks.append({"type": "text", "text": self._stringify_content(item)})
|
|
276
|
+
return blocks
|
|
277
|
+
return self._stringify_content(content)
|
|
278
|
+
|
|
279
|
+
def _convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
280
|
+
return [
|
|
281
|
+
{
|
|
282
|
+
"name": tool["name"],
|
|
283
|
+
"description": tool.get("description", ""),
|
|
284
|
+
"input_schema": tool.get("parameters", {"type": "object", "properties": {}}),
|
|
285
|
+
}
|
|
286
|
+
for tool in tools
|
|
287
|
+
]
|
|
288
|
+
|
|
289
|
+
def _parse_response(self, response: Any) -> LLMResponse:
|
|
290
|
+
text_parts: list[str] = []
|
|
291
|
+
thinking_parts: list[str] = []
|
|
292
|
+
tool_calls: list[ToolCall] = []
|
|
293
|
+
content_blocks: list[dict[str, Any]] = []
|
|
294
|
+
|
|
295
|
+
for block in getattr(response, "content", []):
|
|
296
|
+
block_type = getattr(block, "type", "")
|
|
297
|
+
if block_type == "text":
|
|
298
|
+
text = getattr(block, "text", "")
|
|
299
|
+
text_parts.append(text)
|
|
300
|
+
content_blocks.append({"type": "text", "text": text})
|
|
301
|
+
elif block_type == "thinking":
|
|
302
|
+
thinking = getattr(block, "thinking", "")
|
|
303
|
+
signature = getattr(block, "signature", "")
|
|
304
|
+
thinking_parts.append(thinking)
|
|
305
|
+
thinking_block: dict[str, Any] = {
|
|
306
|
+
"type": "thinking",
|
|
307
|
+
"thinking": thinking,
|
|
308
|
+
}
|
|
309
|
+
if signature:
|
|
310
|
+
thinking_block["signature"] = signature
|
|
311
|
+
content_blocks.append(thinking_block)
|
|
312
|
+
elif block_type == "redacted_thinking":
|
|
313
|
+
content_blocks.append(
|
|
314
|
+
{
|
|
315
|
+
"type": "redacted_thinking",
|
|
316
|
+
"data": getattr(block, "data", ""),
|
|
317
|
+
}
|
|
318
|
+
)
|
|
319
|
+
elif block_type == "tool_use":
|
|
320
|
+
tool_input = dict(getattr(block, "input", {}) or {})
|
|
321
|
+
tool_calls.append(
|
|
322
|
+
ToolCall(
|
|
323
|
+
id=getattr(block, "id", ""),
|
|
324
|
+
name=getattr(block, "name", ""),
|
|
325
|
+
input=tool_input,
|
|
326
|
+
)
|
|
327
|
+
)
|
|
328
|
+
content_blocks.append(
|
|
329
|
+
{
|
|
330
|
+
"type": "tool_use",
|
|
331
|
+
"id": getattr(block, "id", ""),
|
|
332
|
+
"name": getattr(block, "name", ""),
|
|
333
|
+
"input": tool_input,
|
|
334
|
+
}
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
usage = getattr(response, "usage", None)
|
|
338
|
+
return LLMResponse(
|
|
339
|
+
text="".join(text_parts),
|
|
340
|
+
tool_calls=tool_calls,
|
|
341
|
+
stop_reason=getattr(response, "stop_reason", "") or "",
|
|
342
|
+
input_tokens=getattr(usage, "input_tokens", 0) or 0,
|
|
343
|
+
output_tokens=getattr(usage, "output_tokens", 0) or 0,
|
|
344
|
+
cache_creation_input_tokens=getattr(usage, "cache_creation_input_tokens", 0) or 0,
|
|
345
|
+
cache_read_input_tokens=getattr(usage, "cache_read_input_tokens", 0) or 0,
|
|
346
|
+
thinking="\n\n".join(part for part in thinking_parts if part),
|
|
347
|
+
content_blocks=content_blocks,
|
|
348
|
+
)
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from collections.abc import Generator
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any, Literal
|
|
8
|
+
|
|
9
|
+
from bareagent.core.fileutil import stringify
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(slots=True)
|
|
13
|
+
class ThinkingConfig:
|
|
14
|
+
"""Extended thinking settings shared across providers."""
|
|
15
|
+
|
|
16
|
+
mode: Literal["enabled", "adaptive", "disabled"] = "adaptive"
|
|
17
|
+
budget_tokens: int = 10000
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
VALID_THINKING_MODES: frozenset[str] = frozenset({"enabled", "adaptive", "disabled"})
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(slots=True)
|
|
24
|
+
class CacheConfig:
|
|
25
|
+
"""Prompt-caching settings (Anthropic explicit ``cache_control`` breakpoints).
|
|
26
|
+
|
|
27
|
+
Provider-neutral by name, but today only the Anthropic provider acts on it:
|
|
28
|
+
OpenAI/DeepSeek cache automatically with no request-side knob. A ``None``
|
|
29
|
+
cache_config passed to a provider means "caching off" (byte-identical legacy
|
|
30
|
+
requests); ``factory.create_provider`` always supplies an instance so the app
|
|
31
|
+
defaults to caching ON.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
enabled: bool = True
|
|
35
|
+
ttl: Literal["5m", "1h"] = "5m"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
VALID_CACHE_TTLS: frozenset[str] = frozenset({"5m", "1h"})
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(slots=True)
|
|
42
|
+
class ToolCall:
|
|
43
|
+
id: str
|
|
44
|
+
name: str
|
|
45
|
+
input: dict[str, Any]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(slots=True)
|
|
49
|
+
class StreamEvent:
|
|
50
|
+
type: str
|
|
51
|
+
text: str = ""
|
|
52
|
+
tool_call_id: str = ""
|
|
53
|
+
name: str = ""
|
|
54
|
+
input: dict[str, Any] = field(default_factory=dict)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(slots=True)
|
|
58
|
+
class LLMResponse:
|
|
59
|
+
text: str
|
|
60
|
+
stop_reason: str
|
|
61
|
+
input_tokens: int
|
|
62
|
+
output_tokens: int
|
|
63
|
+
# Prompt-caching usage (additive, non-overlapping with input_tokens). For
|
|
64
|
+
# Anthropic these come straight off the wire; for OpenAI/DeepSeek the
|
|
65
|
+
# provider normalizes auto-cache hits into cache_read_input_tokens so the
|
|
66
|
+
# three fields carry one consistent meaning across providers:
|
|
67
|
+
# input_tokens = full-price input (1x)
|
|
68
|
+
# cache_read_input_tokens = served from cache (read discount)
|
|
69
|
+
# cache_creation_input_tokens = written to cache (write premium; Anthropic
|
|
70
|
+
# only — 0 for auto-caching providers)
|
|
71
|
+
cache_creation_input_tokens: int = 0
|
|
72
|
+
cache_read_input_tokens: int = 0
|
|
73
|
+
tool_calls: list[ToolCall] = field(default_factory=list)
|
|
74
|
+
thinking: str = ""
|
|
75
|
+
content_blocks: list[dict[str, Any]] = field(default_factory=list)
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def has_tool_calls(self) -> bool:
|
|
79
|
+
return len(self.tool_calls) > 0
|
|
80
|
+
|
|
81
|
+
def to_message(self) -> dict[str, Any]:
|
|
82
|
+
"""Convert the normalized response back into an assistant message."""
|
|
83
|
+
if self.content_blocks:
|
|
84
|
+
return {
|
|
85
|
+
"role": "assistant",
|
|
86
|
+
"content": [dict(block) for block in self.content_blocks],
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if not self.tool_calls:
|
|
90
|
+
return {"role": "assistant", "content": self.text}
|
|
91
|
+
|
|
92
|
+
content: list[dict[str, Any]] = []
|
|
93
|
+
if self.text:
|
|
94
|
+
content.append({"type": "text", "text": self.text})
|
|
95
|
+
for tool_call in self.tool_calls:
|
|
96
|
+
content.append(
|
|
97
|
+
{
|
|
98
|
+
"type": "tool_use",
|
|
99
|
+
"id": tool_call.id,
|
|
100
|
+
"name": tool_call.name,
|
|
101
|
+
"input": tool_call.input,
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return {"role": "assistant", "content": content}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class BaseLLMProvider(ABC):
|
|
109
|
+
@abstractmethod
|
|
110
|
+
def create(
|
|
111
|
+
self,
|
|
112
|
+
messages: list[dict[str, Any]],
|
|
113
|
+
tools: list[dict[str, Any]],
|
|
114
|
+
**kwargs: Any,
|
|
115
|
+
) -> LLMResponse:
|
|
116
|
+
"""Create a non-streaming response."""
|
|
117
|
+
|
|
118
|
+
@abstractmethod
|
|
119
|
+
def create_stream(
|
|
120
|
+
self,
|
|
121
|
+
messages: list[dict[str, Any]],
|
|
122
|
+
tools: list[dict[str, Any]],
|
|
123
|
+
**kwargs: Any,
|
|
124
|
+
) -> Generator[StreamEvent, None, LLMResponse]:
|
|
125
|
+
"""Yield streaming events and return the final normalized response."""
|
|
126
|
+
|
|
127
|
+
def _stringify_content(self, content: Any) -> str:
|
|
128
|
+
if isinstance(content, list):
|
|
129
|
+
text_parts: list[str] = []
|
|
130
|
+
for block in content:
|
|
131
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
132
|
+
text_parts.append(str(block.get("text", "")))
|
|
133
|
+
else:
|
|
134
|
+
text_parts.append(json.dumps(block, ensure_ascii=False, default=str))
|
|
135
|
+
return "\n".join(part for part in text_parts if part)
|
|
136
|
+
return stringify(content)
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, Literal, cast
|
|
6
|
+
|
|
7
|
+
from bareagent.provider.anthropic import AnthropicProvider
|
|
8
|
+
from bareagent.provider.base import (
|
|
9
|
+
VALID_CACHE_TTLS,
|
|
10
|
+
VALID_THINKING_MODES,
|
|
11
|
+
BaseLLMProvider,
|
|
12
|
+
CacheConfig,
|
|
13
|
+
ThinkingConfig,
|
|
14
|
+
)
|
|
15
|
+
from bareagent.provider.openai import OpenAIProvider
|
|
16
|
+
from bareagent.provider.presets import resolve_preset
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _resolve_api_key(provider_config: Any) -> str:
|
|
20
|
+
"""Resolve the API key, preferring an explicit plaintext key.
|
|
21
|
+
|
|
22
|
+
Priority: ``provider_config.api_key`` (explicit plaintext, used as-is) ->
|
|
23
|
+
``provider_config.api_key_env`` (an ``sk-`` value is treated as plaintext,
|
|
24
|
+
otherwise it names an environment variable). Fixes non-``sk-`` prefixed
|
|
25
|
+
keys (qwen/glm) being misread as env var names.
|
|
26
|
+
"""
|
|
27
|
+
explicit_key = getattr(provider_config, "api_key", None)
|
|
28
|
+
if explicit_key:
|
|
29
|
+
return str(explicit_key)
|
|
30
|
+
|
|
31
|
+
api_key_env = getattr(provider_config, "api_key_env", "")
|
|
32
|
+
if not api_key_env:
|
|
33
|
+
raise ValueError(
|
|
34
|
+
"Provider config is missing both 'api_key' and 'api_key_env'. "
|
|
35
|
+
"Please provide the API key directly via 'api_key', or specify the "
|
|
36
|
+
"environment variable name that holds it via 'api_key_env'."
|
|
37
|
+
)
|
|
38
|
+
if api_key_env.startswith("sk-"):
|
|
39
|
+
return api_key_env
|
|
40
|
+
api_key = os.getenv(api_key_env)
|
|
41
|
+
if not api_key:
|
|
42
|
+
raise ValueError(f"Missing API key in environment variable: {api_key_env}")
|
|
43
|
+
return api_key
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def create_provider(config: Any) -> BaseLLMProvider:
|
|
47
|
+
provider_config = getattr(config, "provider", None)
|
|
48
|
+
if provider_config is None:
|
|
49
|
+
raise ValueError("Config is missing a provider section.")
|
|
50
|
+
|
|
51
|
+
provider_name = str(getattr(provider_config, "name", "")).strip().lower()
|
|
52
|
+
model = getattr(provider_config, "model", "")
|
|
53
|
+
api_key = _resolve_api_key(provider_config)
|
|
54
|
+
|
|
55
|
+
preset = resolve_preset(provider_name)
|
|
56
|
+
if preset is None:
|
|
57
|
+
raise ValueError(f"Unknown provider: {provider_name}")
|
|
58
|
+
|
|
59
|
+
if preset.route == "anthropic":
|
|
60
|
+
return AnthropicProvider(
|
|
61
|
+
api_key=api_key,
|
|
62
|
+
model=model,
|
|
63
|
+
thinking_config=_build_thinking_config(getattr(config, "thinking", None)),
|
|
64
|
+
cache_config=_build_cache_config(getattr(config, "cache", None)),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
base_url = getattr(provider_config, "base_url", None) or preset.default_base_url
|
|
68
|
+
return OpenAIProvider(
|
|
69
|
+
api_key=api_key,
|
|
70
|
+
model=model,
|
|
71
|
+
base_url=base_url,
|
|
72
|
+
wire_api=getattr(provider_config, "wire_api", None),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _validated_thinking_mode(mode: str) -> Literal["enabled", "adaptive", "disabled"]:
|
|
77
|
+
if mode not in VALID_THINKING_MODES:
|
|
78
|
+
logging.warning("Invalid thinking mode %r, falling back to 'adaptive'", mode)
|
|
79
|
+
return "adaptive"
|
|
80
|
+
return cast(Literal["enabled", "adaptive", "disabled"], mode)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _validated_cache_ttl(ttl: str) -> Literal["5m", "1h"]:
|
|
84
|
+
if ttl not in VALID_CACHE_TTLS:
|
|
85
|
+
logging.warning("Invalid cache ttl %r, falling back to '5m'", ttl)
|
|
86
|
+
return "5m"
|
|
87
|
+
return cast(Literal["5m", "1h"], ttl)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _build_cache_config(raw_config: Any) -> CacheConfig:
|
|
91
|
+
"""Coerce a config-supplied cache section into a :class:`CacheConfig`.
|
|
92
|
+
|
|
93
|
+
``None`` (e.g. a namespace without a cache attribute) yields the default
|
|
94
|
+
enabled instance so the app defaults to caching ON.
|
|
95
|
+
"""
|
|
96
|
+
if raw_config is None:
|
|
97
|
+
return CacheConfig()
|
|
98
|
+
if isinstance(raw_config, CacheConfig):
|
|
99
|
+
return CacheConfig(
|
|
100
|
+
enabled=bool(raw_config.enabled),
|
|
101
|
+
ttl=_validated_cache_ttl(raw_config.ttl),
|
|
102
|
+
)
|
|
103
|
+
if isinstance(raw_config, dict):
|
|
104
|
+
return CacheConfig(
|
|
105
|
+
enabled=bool(raw_config.get("enabled", True)),
|
|
106
|
+
ttl=_validated_cache_ttl(str(raw_config.get("ttl", "5m"))),
|
|
107
|
+
)
|
|
108
|
+
return CacheConfig(
|
|
109
|
+
enabled=bool(getattr(raw_config, "enabled", True)),
|
|
110
|
+
ttl=_validated_cache_ttl(str(getattr(raw_config, "ttl", "5m"))),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _build_thinking_config(raw_config: Any) -> ThinkingConfig:
|
|
115
|
+
if raw_config is None:
|
|
116
|
+
return ThinkingConfig()
|
|
117
|
+
if isinstance(raw_config, ThinkingConfig):
|
|
118
|
+
return ThinkingConfig(
|
|
119
|
+
mode=_validated_thinking_mode(raw_config.mode),
|
|
120
|
+
budget_tokens=raw_config.budget_tokens,
|
|
121
|
+
)
|
|
122
|
+
if isinstance(raw_config, dict):
|
|
123
|
+
return ThinkingConfig(
|
|
124
|
+
mode=_validated_thinking_mode(str(raw_config.get("mode", "adaptive"))),
|
|
125
|
+
budget_tokens=int(raw_config.get("budget_tokens", 10000)),
|
|
126
|
+
)
|
|
127
|
+
return ThinkingConfig(
|
|
128
|
+
mode=_validated_thinking_mode(str(getattr(raw_config, "mode", "adaptive"))),
|
|
129
|
+
budget_tokens=int(getattr(raw_config, "budget_tokens", 10000)),
|
|
130
|
+
)
|