kolega-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kolega_code/__init__.py +151 -0
- kolega_code/agent/__init__.py +42 -0
- kolega_code/agent/baseagent.py +998 -0
- kolega_code/agent/browseragent.py +123 -0
- kolega_code/agent/coder.py +157 -0
- kolega_code/agent/common.py +41 -0
- kolega_code/agent/compression.py +81 -0
- kolega_code/agent/context.py +112 -0
- kolega_code/agent/conversation.py +408 -0
- kolega_code/agent/generalagent.py +146 -0
- kolega_code/agent/investigationagent.py +123 -0
- kolega_code/agent/planningagent.py +187 -0
- kolega_code/agent/prompt_provider.py +196 -0
- kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
- kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
- kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
- kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
- kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
- kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
- kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
- kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
- kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
- kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
- kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
- kolega_code/agent/prompts.py +192 -0
- kolega_code/agent/tests/__init__.py +0 -0
- kolega_code/agent/tests/llm/__init__.py +0 -0
- kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
- kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
- kolega_code/agent/tests/llm/test_client.py +773 -0
- kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
- kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
- kolega_code/agent/tests/llm/test_exceptions.py +249 -0
- kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
- kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
- kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
- kolega_code/agent/tests/llm/test_model_specs.py +17 -0
- kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
- kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
- kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
- kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
- kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
- kolega_code/agent/tests/services/__init__.py +1 -0
- kolega_code/agent/tests/services/test_browser.py +447 -0
- kolega_code/agent/tests/services/test_browser_parity.py +353 -0
- kolega_code/agent/tests/services/test_file_system.py +699 -0
- kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
- kolega_code/agent/tests/services/test_terminal.py +154 -0
- kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
- kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
- kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
- kolega_code/agent/tests/test_base_agent.py +1942 -0
- kolega_code/agent/tests/test_coder_attachments.py +330 -0
- kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
- kolega_code/agent/tests/test_commands.py +179 -0
- kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
- kolega_code/agent/tests/test_empty_message_handling.py +48 -0
- kolega_code/agent/tests/test_general_agent.py +242 -0
- kolega_code/agent/tests/test_html.py +320 -0
- kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
- kolega_code/agent/tests/test_planning_agent.py +227 -0
- kolega_code/agent/tests/test_prompt_provider.py +271 -0
- kolega_code/agent/tests/test_tool_registry.py +102 -0
- kolega_code/agent/tests/test_tools.py +549 -0
- kolega_code/agent/tests/tool_backend/__init__.py +0 -0
- kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
- kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
- kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
- kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
- kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
- kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
- kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
- kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
- kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
- kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
- kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
- kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
- kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
- kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
- kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
- kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
- kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
- kolega_code/agent/tool_backend/agent_tool.py +414 -0
- kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
- kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
- kolega_code/agent/tool_backend/base_tool.py +217 -0
- kolega_code/agent/tool_backend/browser_tool.py +271 -0
- kolega_code/agent/tool_backend/build_tool.py +93 -0
- kolega_code/agent/tool_backend/create_file_tool.py +52 -0
- kolega_code/agent/tool_backend/glob_tool.py +323 -0
- kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
- kolega_code/agent/tool_backend/memory_tool.py +79 -0
- kolega_code/agent/tool_backend/read_file_tool.py +119 -0
- kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
- kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
- kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
- kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
- kolega_code/agent/tool_backend/streaming_tool.py +47 -0
- kolega_code/agent/tool_backend/terminal_tool.py +643 -0
- kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
- kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
- kolega_code/agent/tools.py +1704 -0
- kolega_code/agent/utils/commands.py +94 -0
- kolega_code/cli/__init__.py +1 -0
- kolega_code/cli/app.py +2756 -0
- kolega_code/cli/config.py +280 -0
- kolega_code/cli/connection.py +49 -0
- kolega_code/cli/file_index.py +147 -0
- kolega_code/cli/main.py +564 -0
- kolega_code/cli/mentions.py +155 -0
- kolega_code/cli/messages.py +89 -0
- kolega_code/cli/provider_registry.py +96 -0
- kolega_code/cli/session_store.py +207 -0
- kolega_code/cli/settings.py +87 -0
- kolega_code/cli/skills.py +409 -0
- kolega_code/cli/slash_commands.py +108 -0
- kolega_code/cli/tests/__init__.py +1 -0
- kolega_code/cli/tests/test_app.py +4251 -0
- kolega_code/cli/tests/test_cli_config.py +171 -0
- kolega_code/cli/tests/test_connection.py +26 -0
- kolega_code/cli/tests/test_file_index.py +103 -0
- kolega_code/cli/tests/test_main.py +455 -0
- kolega_code/cli/tests/test_mentions.py +108 -0
- kolega_code/cli/tests/test_session_store.py +67 -0
- kolega_code/cli/tests/test_settings.py +62 -0
- kolega_code/cli/tests/test_skills.py +157 -0
- kolega_code/cli/tests/test_slash_commands.py +88 -0
- kolega_code/cli/theme.py +180 -0
- kolega_code/config.py +154 -0
- kolega_code/events.py +202 -0
- kolega_code/llm/client.py +300 -0
- kolega_code/llm/exceptions.py +285 -0
- kolega_code/llm/instrumented_client.py +520 -0
- kolega_code/llm/models.py +1368 -0
- kolega_code/llm/providers/__init__.py +0 -0
- kolega_code/llm/providers/anthropic.py +387 -0
- kolega_code/llm/providers/base.py +71 -0
- kolega_code/llm/providers/google.py +157 -0
- kolega_code/llm/providers/models.py +37 -0
- kolega_code/llm/providers/openai.py +363 -0
- kolega_code/llm/ratelimit.py +40 -0
- kolega_code/llm/specs.py +67 -0
- kolega_code/llm/tool_execution_ids.py +18 -0
- kolega_code/models/__init__.py +9 -0
- kolega_code/models/sandbox_terminal_state.py +47 -0
- kolega_code/runtime.py +50 -0
- kolega_code/sandbox/README.md +200 -0
- kolega_code/sandbox/__init__.py +21 -0
- kolega_code/sandbox/async_filesystem.py +475 -0
- kolega_code/sandbox/base.py +297 -0
- kolega_code/sandbox/browser.py +25 -0
- kolega_code/sandbox/event_loop.py +43 -0
- kolega_code/sandbox/filesystem.py +341 -0
- kolega_code/sandbox/local.py +118 -0
- kolega_code/sandbox/serializer.py +175 -0
- kolega_code/sandbox/terminal.py +868 -0
- kolega_code/sandbox/utils.py +216 -0
- kolega_code/services/base.py +255 -0
- kolega_code/services/browser.py +444 -0
- kolega_code/services/file_system.py +749 -0
- kolega_code/services/html.py +221 -0
- kolega_code/services/terminal.py +903 -0
- kolega_code/tools/__init__.py +22 -0
- kolega_code/tools/core.py +33 -0
- kolega_code/tools/definitions.py +81 -0
- kolega_code/tools/registry.py +73 -0
- kolega_code-0.1.0.dist-info/METADATA +157 -0
- kolega_code-0.1.0.dist-info/RECORD +171 -0
- kolega_code-0.1.0.dist-info/WHEEL +4 -0
- kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
- kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
from typing import Any, AsyncContextManager, Dict, List, Optional
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
import tiktoken
|
|
7
|
+
from openai import AsyncOpenAI, OpenAI
|
|
8
|
+
|
|
9
|
+
from ..models import ImageBlock, Message, MessageChunk, MessageHistory, ToolCall, ToolDefinition, ToolResult
|
|
10
|
+
from ..tool_execution_ids import ToolExecutionIdRegistry
|
|
11
|
+
from .base import BaseLLMProvider
|
|
12
|
+
from .models import GenerationParams, TokenCount
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class OpenAIStreamWrapper:
|
|
16
|
+
def __init__(self, openai_stream, requested_include_usage: bool = False):
|
|
17
|
+
self.openai_stream = openai_stream
|
|
18
|
+
self.final_content = ""
|
|
19
|
+
self.final_tool_calls = {}
|
|
20
|
+
self.stop_reason = None
|
|
21
|
+
self.usage_data = None
|
|
22
|
+
self.tool_execution_ids = ToolExecutionIdRegistry()
|
|
23
|
+
|
|
24
|
+
self._closed = False
|
|
25
|
+
self._requested_include_usage = requested_include_usage
|
|
26
|
+
|
|
27
|
+
async def __aenter__(self):
|
|
28
|
+
return self
|
|
29
|
+
|
|
30
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
31
|
+
if hasattr(self.openai_stream, "aclose"):
|
|
32
|
+
await self.openai_stream.aclose()
|
|
33
|
+
|
|
34
|
+
self._closed = True
|
|
35
|
+
return False
|
|
36
|
+
|
|
37
|
+
def __aiter__(self):
|
|
38
|
+
return self
|
|
39
|
+
|
|
40
|
+
async def __anext__(self):
|
|
41
|
+
if self._closed:
|
|
42
|
+
raise StopAsyncIteration
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
chunk = await self.openai_stream.__anext__()
|
|
46
|
+
|
|
47
|
+
# Some providers emit usage-only events with no choices; guard accesses
|
|
48
|
+
if hasattr(chunk, "choices") and chunk.choices:
|
|
49
|
+
choice0 = chunk.choices[0]
|
|
50
|
+
delta = getattr(choice0, "delta", None)
|
|
51
|
+
if delta is not None:
|
|
52
|
+
content = getattr(delta, "content", None) or ""
|
|
53
|
+
if content:
|
|
54
|
+
self.final_content += content
|
|
55
|
+
|
|
56
|
+
for tool_call in getattr(delta, "tool_calls", []) or []:
|
|
57
|
+
index = tool_call.index
|
|
58
|
+
|
|
59
|
+
if index not in self.final_tool_calls:
|
|
60
|
+
self.final_tool_calls[index] = tool_call
|
|
61
|
+
|
|
62
|
+
if self.final_tool_calls[index].function.arguments != tool_call.function.arguments:
|
|
63
|
+
if self.final_tool_calls[index].function.arguments is None:
|
|
64
|
+
self.final_tool_calls[index].function.arguments = ""
|
|
65
|
+
|
|
66
|
+
self.final_tool_calls[index].function.arguments += tool_call.function.arguments
|
|
67
|
+
|
|
68
|
+
# Capture stop reason if present
|
|
69
|
+
self.stop_reason = getattr(choice0, "finish_reason", self.stop_reason)
|
|
70
|
+
|
|
71
|
+
# Capture usage data from final chunk
|
|
72
|
+
if hasattr(chunk, "usage") and chunk.usage:
|
|
73
|
+
self.usage_data = {
|
|
74
|
+
"prompt_tokens": chunk.usage.prompt_tokens,
|
|
75
|
+
"completion_tokens": chunk.usage.completion_tokens,
|
|
76
|
+
"total_tokens": chunk.usage.total_tokens,
|
|
77
|
+
}
|
|
78
|
+
# Capture cached prompt tokens if available (e.g., DashScope/Qwen)
|
|
79
|
+
details = getattr(chunk.usage, "prompt_tokens_details", None)
|
|
80
|
+
cached = None
|
|
81
|
+
if details is not None:
|
|
82
|
+
cached = getattr(details, "cached_tokens", None)
|
|
83
|
+
if cached is None and isinstance(details, dict):
|
|
84
|
+
cached = details.get("cached_tokens")
|
|
85
|
+
if cached is not None:
|
|
86
|
+
self.usage_data["cache_read_input_tokens"] = cached
|
|
87
|
+
|
|
88
|
+
# Return a safe chunk representation; ignore events with no choices
|
|
89
|
+
if hasattr(chunk, "choices") and chunk.choices:
|
|
90
|
+
return MessageChunk.from_openai(chunk)
|
|
91
|
+
else:
|
|
92
|
+
return MessageChunk(type="ignore", text="")
|
|
93
|
+
|
|
94
|
+
except StopAsyncIteration:
|
|
95
|
+
raise
|
|
96
|
+
|
|
97
|
+
async def get_final_message(self):
|
|
98
|
+
message = Message.from_openai_stream(
|
|
99
|
+
role="assistant",
|
|
100
|
+
content=self.final_content,
|
|
101
|
+
tool_calls=self.final_tool_calls,
|
|
102
|
+
stop_reason=self.stop_reason,
|
|
103
|
+
tool_execution_ids=self.tool_execution_ids,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Add usage data if available
|
|
107
|
+
if self.usage_data:
|
|
108
|
+
message.usage_metadata.update(self.usage_data)
|
|
109
|
+
else:
|
|
110
|
+
logger = logging.getLogger(__name__)
|
|
111
|
+
if self._requested_include_usage:
|
|
112
|
+
logger.warning(
|
|
113
|
+
"OpenAIStreamWrapper: include_usage requested but provider emitted no usage; billing may be skipped"
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
logger.warning(
|
|
117
|
+
"OpenAIStreamWrapper: no usage metadata captured from streaming response; billing may be skipped"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return message
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class OpenAIProvider(BaseLLMProvider):
|
|
124
|
+
|
|
125
|
+
models_max_completion_tokens = ["o3-mini", "o3", "o3-2025-04-16", "o4-mini"]
|
|
126
|
+
|
|
127
|
+
def __init__(
|
|
128
|
+
self,
|
|
129
|
+
api_key: str,
|
|
130
|
+
max_retries: int = 3,
|
|
131
|
+
requests_per_minute: Optional[int] = None,
|
|
132
|
+
tokens_per_minute: Optional[int] = None,
|
|
133
|
+
base_url: Optional[str] = None,
|
|
134
|
+
):
|
|
135
|
+
super().__init__(api_key, max_retries, requests_per_minute, tokens_per_minute, base_url)
|
|
136
|
+
self.async_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
137
|
+
self.sync_client = OpenAI(api_key=api_key, base_url=base_url)
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def retry_decorator(self):
|
|
141
|
+
"""Get retry decorator with configured max retries"""
|
|
142
|
+
return self.get_retry_decorator()
|
|
143
|
+
|
|
144
|
+
def _prepare_generation_params(self, params: Optional[GenerationParams] = None) -> Dict[str, Any]:
|
|
145
|
+
"""Convert common parameters to provider-specific format"""
|
|
146
|
+
generation_params = {
|
|
147
|
+
"model": "gpt-4o", # Default model
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if params:
|
|
151
|
+
if params.temperature is not None:
|
|
152
|
+
generation_params["temperature"] = params.temperature
|
|
153
|
+
if params.max_completion_tokens is not None:
|
|
154
|
+
generation_params["max_tokens"] = params.max_completion_tokens
|
|
155
|
+
if params.tools:
|
|
156
|
+
generation_params["tools"] = [t.to_openai() for t in params.tools]
|
|
157
|
+
if params.thinking:
|
|
158
|
+
generation_params.update(self._prepare_thinking_params(params.thinking))
|
|
159
|
+
|
|
160
|
+
return generation_params
|
|
161
|
+
|
|
162
|
+
async def count_tokens(
|
|
163
|
+
self,
|
|
164
|
+
messages: MessageHistory,
|
|
165
|
+
system: Optional[Message] = None,
|
|
166
|
+
model: Optional[str] = None,
|
|
167
|
+
tools: List[ToolDefinition] = None,
|
|
168
|
+
**kwargs,
|
|
169
|
+
) -> TokenCount:
|
|
170
|
+
"""Count tokens for a list of messages using tiktoken.
|
|
171
|
+
|
|
172
|
+
Provides comprehensive token counting including:
|
|
173
|
+
- System prompts and messages with formatting overhead
|
|
174
|
+
- Images with estimation based on base64 data size
|
|
175
|
+
- Tool definitions with JSON serialization
|
|
176
|
+
- Tool calls and tool results in message content
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
messages: List of messages to count tokens for
|
|
180
|
+
system: Optional system message
|
|
181
|
+
model: Optional model name to use for counting (defaults to gpt-4)
|
|
182
|
+
tools: Optional tool definitions
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
TokenCount object with input token count
|
|
186
|
+
"""
|
|
187
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
|
188
|
+
num_tokens = 0
|
|
189
|
+
|
|
190
|
+
# Combine system message with messages if provided
|
|
191
|
+
all_messages = ([system] + list(messages)) if system else list(messages)
|
|
192
|
+
|
|
193
|
+
# Count tokens for each message
|
|
194
|
+
for message in all_messages:
|
|
195
|
+
# Base tokens for message formatting
|
|
196
|
+
num_tokens += 4 # Every message follows <im_start>{role/name}\n{content}<im_end>\n format
|
|
197
|
+
|
|
198
|
+
# Add tokens for role
|
|
199
|
+
if hasattr(message, "role") and message.role:
|
|
200
|
+
num_tokens += len(encoding.encode(message.role))
|
|
201
|
+
|
|
202
|
+
# Add tokens for content
|
|
203
|
+
if hasattr(message, "content"):
|
|
204
|
+
if isinstance(message.content, str):
|
|
205
|
+
num_tokens += len(encoding.encode(message.content))
|
|
206
|
+
elif isinstance(message.content, list):
|
|
207
|
+
for item in message.content:
|
|
208
|
+
# Handle text blocks
|
|
209
|
+
if hasattr(item, "text") and item.text:
|
|
210
|
+
num_tokens += len(encoding.encode(item.text))
|
|
211
|
+
elif isinstance(item, dict) and "text" in item:
|
|
212
|
+
num_tokens += len(encoding.encode(item["text"]))
|
|
213
|
+
# Handle image blocks
|
|
214
|
+
elif isinstance(item, ImageBlock):
|
|
215
|
+
num_tokens += self._estimate_image_tokens(len(item.data))
|
|
216
|
+
elif hasattr(item, "data") and hasattr(item, "media_type"):
|
|
217
|
+
# ImageBlock - estimate tokens based on base64 data size
|
|
218
|
+
num_tokens += self._estimate_image_tokens(len(item.data))
|
|
219
|
+
# Handle tool calls
|
|
220
|
+
elif isinstance(item, ToolCall):
|
|
221
|
+
tool_call_json = json.dumps(item.to_openai())
|
|
222
|
+
num_tokens += len(encoding.encode(tool_call_json))
|
|
223
|
+
num_tokens += 2 # Minimal formatting overhead for tool calls
|
|
224
|
+
# Handle tool results
|
|
225
|
+
elif isinstance(item, ToolResult):
|
|
226
|
+
# Tool results contain content that needs to be counted
|
|
227
|
+
if isinstance(item.content, str):
|
|
228
|
+
num_tokens += len(encoding.encode(item.content))
|
|
229
|
+
elif isinstance(item.content, list):
|
|
230
|
+
for result_item in item.content:
|
|
231
|
+
if hasattr(result_item, "text") and result_item.text:
|
|
232
|
+
num_tokens += len(encoding.encode(result_item.text))
|
|
233
|
+
num_tokens += 2 # Minimal formatting overhead for tool results
|
|
234
|
+
|
|
235
|
+
# Count tool definition tokens
|
|
236
|
+
if tools:
|
|
237
|
+
for tool in tools:
|
|
238
|
+
tool_json = json.dumps(tool.to_openai())
|
|
239
|
+
# Count JSON tokens
|
|
240
|
+
json_tokens = len(encoding.encode(tool_json))
|
|
241
|
+
# OpenAI uses highly optimized internal format (not JSON)
|
|
242
|
+
# Empirically, their token count is ~79% of raw JSON token count
|
|
243
|
+
# Apply scaling factor to match API behavior
|
|
244
|
+
num_tokens += int(json_tokens * 0.79)
|
|
245
|
+
|
|
246
|
+
return TokenCount(input_tokens=num_tokens)
|
|
247
|
+
|
|
248
|
+
def _estimate_image_tokens(self, base64_data_length: int) -> int:
|
|
249
|
+
"""Estimate image token cost based on base64 data length.
|
|
250
|
+
|
|
251
|
+
OpenAI charges for images based on their dimensions after resizing.
|
|
252
|
+
Since we don't decode images (performance), we estimate based on data size.
|
|
253
|
+
|
|
254
|
+
Uses same formula as Anthropic for consistency:
|
|
255
|
+
tokens ≈ 20 + sqrt(base64_length * 6)
|
|
256
|
+
|
|
257
|
+
This gives reasonable estimates:
|
|
258
|
+
- Tiny images (96 chars base64): ~44 tokens
|
|
259
|
+
- Small images (~50KB base64): ~659 tokens
|
|
260
|
+
- Medium images (~200KB base64): ~1285 tokens
|
|
261
|
+
- Large images (~800KB base64): ~2549 tokens
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
base64_data_length: Length of base64 encoded image data
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Estimated token count for the image
|
|
268
|
+
"""
|
|
269
|
+
# Use square root scaling for better fit across image sizes
|
|
270
|
+
# Base cost of 20 tokens + sqrt scaling
|
|
271
|
+
estimated_tokens = 20 + int(math.sqrt(base64_data_length * 6))
|
|
272
|
+
return estimated_tokens
|
|
273
|
+
|
|
274
|
+
async def stream(
|
|
275
|
+
self,
|
|
276
|
+
messages: MessageHistory,
|
|
277
|
+
system: Optional[Message] = None,
|
|
278
|
+
params: Optional[GenerationParams] = None,
|
|
279
|
+
**kwargs,
|
|
280
|
+
) -> AsyncContextManager:
|
|
281
|
+
"""Generate a streaming response from OpenAI
|
|
282
|
+
|
|
283
|
+
Returns a coroutine that resolves to an async iterator.
|
|
284
|
+
"""
|
|
285
|
+
generation_params = self._prepare_generation_params(params)
|
|
286
|
+
generation_params.update(kwargs)
|
|
287
|
+
generation_params["stream"] = True
|
|
288
|
+
# Ask provider to include usage in the final stream chunk when supported
|
|
289
|
+
try:
|
|
290
|
+
existing_stream_options = generation_params.get("stream_options") or {}
|
|
291
|
+
existing_stream_options["include_usage"] = True
|
|
292
|
+
generation_params["stream_options"] = existing_stream_options
|
|
293
|
+
except Exception:
|
|
294
|
+
# Best-effort; some providers may not support stream_options
|
|
295
|
+
pass
|
|
296
|
+
|
|
297
|
+
# Swap max_tokens for max_completion_tokens for o3-mini, etc.
|
|
298
|
+
if generation_params["model"] in self.models_max_completion_tokens:
|
|
299
|
+
if "max_tokens" in generation_params:
|
|
300
|
+
generation_params["max_completion_tokens"] = generation_params["max_tokens"]
|
|
301
|
+
del generation_params["max_tokens"]
|
|
302
|
+
|
|
303
|
+
# Combine system message with messages if provided
|
|
304
|
+
if system:
|
|
305
|
+
messages = MessageHistory([system] + messages)
|
|
306
|
+
|
|
307
|
+
await self.rate_limiter.acquire()
|
|
308
|
+
|
|
309
|
+
return OpenAIStreamWrapper(
|
|
310
|
+
await self.async_client.chat.completions.create(messages=messages.to_openai(), **generation_params),
|
|
311
|
+
requested_include_usage=True,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
async def generate(
|
|
315
|
+
self,
|
|
316
|
+
messages: MessageHistory,
|
|
317
|
+
system: Optional[Message] = None,
|
|
318
|
+
params: Optional[GenerationParams] = None,
|
|
319
|
+
**kwargs,
|
|
320
|
+
) -> Message:
|
|
321
|
+
generation_params = self._prepare_generation_params(params)
|
|
322
|
+
generation_params.update(kwargs)
|
|
323
|
+
|
|
324
|
+
# Swap max_tokens for max_completion_tokens for o3-mini, etc.
|
|
325
|
+
if generation_params["model"] in self.models_max_completion_tokens:
|
|
326
|
+
if "max_tokens" in generation_params:
|
|
327
|
+
generation_params["max_completion_tokens"] = generation_params["max_tokens"]
|
|
328
|
+
del generation_params["max_tokens"]
|
|
329
|
+
|
|
330
|
+
# Combine system message with messages if provided
|
|
331
|
+
if system:
|
|
332
|
+
messages = MessageHistory([system] + messages)
|
|
333
|
+
|
|
334
|
+
await self.rate_limiter.acquire()
|
|
335
|
+
response = await self.async_client.chat.completions.create(messages=messages.to_openai(), **generation_params)
|
|
336
|
+
|
|
337
|
+
# Extract message and add usage data
|
|
338
|
+
message = Message.from_openai(response.choices[0].message)
|
|
339
|
+
|
|
340
|
+
# Add usage data from the response
|
|
341
|
+
if hasattr(response, "usage") and response.usage:
|
|
342
|
+
message.usage_metadata.update(
|
|
343
|
+
{
|
|
344
|
+
"prompt_tokens": response.usage.prompt_tokens,
|
|
345
|
+
"completion_tokens": response.usage.completion_tokens,
|
|
346
|
+
"total_tokens": response.usage.total_tokens,
|
|
347
|
+
}
|
|
348
|
+
)
|
|
349
|
+
# Capture cached prompt tokens if available (e.g., DashScope/Qwen)
|
|
350
|
+
details = getattr(response.usage, "prompt_tokens_details", None)
|
|
351
|
+
cached = None
|
|
352
|
+
if details is not None:
|
|
353
|
+
cached = getattr(details, "cached_tokens", None)
|
|
354
|
+
if cached is None and isinstance(details, dict):
|
|
355
|
+
cached = details.get("cached_tokens")
|
|
356
|
+
if cached is not None:
|
|
357
|
+
message.usage_metadata["cache_read_input_tokens"] = cached
|
|
358
|
+
else:
|
|
359
|
+
logging.getLogger(__name__).warning(
|
|
360
|
+
"OpenAIProvider.generate: response contains no usage metadata; billing may be skipped"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
return message
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import time
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class RateLimiter:
|
|
7
|
+
"""Rate limiter for API requests and tokens"""
|
|
8
|
+
|
|
9
|
+
def __init__(self, requests_per_minute: Optional[int] = None, tokens_per_minute: Optional[int] = None):
|
|
10
|
+
self.requests_per_minute = requests_per_minute
|
|
11
|
+
self.tokens_per_minute = tokens_per_minute
|
|
12
|
+
self.request_timestamps: List[float] = []
|
|
13
|
+
self.token_timestamps: List[float] = []
|
|
14
|
+
self.lock = asyncio.Lock()
|
|
15
|
+
|
|
16
|
+
async def acquire(self, tokens: Optional[int] = None):
|
|
17
|
+
"""Acquire permission to make a request"""
|
|
18
|
+
async with self.lock:
|
|
19
|
+
current_time = time.time()
|
|
20
|
+
|
|
21
|
+
# Clean up old timestamps
|
|
22
|
+
self.request_timestamps = [ts for ts in self.request_timestamps if current_time - ts < 60]
|
|
23
|
+
self.token_timestamps = [ts for ts in self.token_timestamps if current_time - ts < 60]
|
|
24
|
+
|
|
25
|
+
# Check request rate limit
|
|
26
|
+
if self.requests_per_minute and len(self.request_timestamps) >= self.requests_per_minute:
|
|
27
|
+
wait_time = 60 - (current_time - self.request_timestamps[0])
|
|
28
|
+
if wait_time > 0:
|
|
29
|
+
await asyncio.sleep(wait_time)
|
|
30
|
+
|
|
31
|
+
# Check token rate limit
|
|
32
|
+
if self.tokens_per_minute and tokens and len(self.token_timestamps) >= self.tokens_per_minute:
|
|
33
|
+
wait_time = 60 - (current_time - self.token_timestamps[0])
|
|
34
|
+
if wait_time > 0:
|
|
35
|
+
await asyncio.sleep(wait_time)
|
|
36
|
+
|
|
37
|
+
# Record new timestamps
|
|
38
|
+
self.request_timestamps.append(current_time)
|
|
39
|
+
if tokens:
|
|
40
|
+
self.token_timestamps.extend([current_time] * tokens)
|
kolega_code/llm/specs.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from typing import Dict, Tuple
|
|
2
|
+
|
|
3
|
+
# Dictionary mapping (provider, model_name) to model specifications
|
|
4
|
+
# Each entry contains context_length (maximum input tokens), max_completion_tokens, default_temperature,
|
|
5
|
+
# and optional model capability flags.
|
|
6
|
+
MODEL_SPECS: Dict[Tuple[str, str], Dict[str, int | float | bool]] = {
|
|
7
|
+
# Anthropic models
|
|
8
|
+
("anthropic", "claude-opus-4-7"): {
|
|
9
|
+
"context_length": 1000000,
|
|
10
|
+
"max_completion_tokens": 128000,
|
|
11
|
+
"default_temperature": 1.0,
|
|
12
|
+
"supports_temperature": False,
|
|
13
|
+
},
|
|
14
|
+
("anthropic", "claude-sonnet-4-6"): {"context_length": 1000000, "max_completion_tokens": 64000, "default_temperature": 1.0},
|
|
15
|
+
("anthropic", "claude-3-7-sonnet-20250219"): {"context_length": 200000, "max_completion_tokens": 16384, "default_temperature": 1.0},
|
|
16
|
+
("anthropic", "claude-3-haiku-20240307"): {"context_length": 200000, "max_completion_tokens": 4096, "default_temperature": 1.0},
|
|
17
|
+
("anthropic", "claude-3-5-sonnet-20241022"): {"context_length": 200000, "max_completion_tokens": 8192, "default_temperature": 1.0},
|
|
18
|
+
("anthropic", "claude-opus-4-20250514"): {"context_length": 200000, "max_completion_tokens": 16384, "default_temperature": 1.0},
|
|
19
|
+
("anthropic", "claude-sonnet-4-20250514"): {"context_length": 200000, "max_completion_tokens": 16384, "default_temperature": 1.0},
|
|
20
|
+
("anthropic", "claude-sonnet-4-5-20250929"): {"context_length": 200000, "max_completion_tokens": 16384, "default_temperature": 1.0},
|
|
21
|
+
("anthropic", "claude-opus-4-5-20251101"): {"context_length": 200000, "max_completion_tokens": 16384, "default_temperature": 1.0},
|
|
22
|
+
("anthropic", "claude-haiku-4-5-20251001"): {"context_length": 200000, "max_completion_tokens": 16384, "default_temperature": 1.0},
|
|
23
|
+
# Moonshot models
|
|
24
|
+
("moonshot", "kimi-k2.6"): {"context_length": 262144, "max_completion_tokens": 32768, "default_temperature": 1.0},
|
|
25
|
+
# DeepSeek models
|
|
26
|
+
("deepseek", "deepseek-v4-pro"): {"context_length": 1000000, "max_completion_tokens": 384000, "default_temperature": 1.0},
|
|
27
|
+
# OpenAI models
|
|
28
|
+
("openai", "gpt-4o"): {"context_length": 128000, "max_completion_tokens": 4096, "default_temperature": 1.0},
|
|
29
|
+
("openai", "o3-mini"): {"context_length": 200000, "max_completion_tokens": 16384, "default_temperature": 1.0},
|
|
30
|
+
("openai", "gpt-4.1-2025-04-14"): {"context_length": 1000000, "max_completion_tokens": 32768, "default_temperature": 1.0},
|
|
31
|
+
("openai", "gpt-4.1-mini"): {"context_length": 1000000, "max_completion_tokens": 32768, "default_temperature": 1.0},
|
|
32
|
+
("openai", "o3-2025-04-16"): {"context_length": 200000, "max_completion_tokens": 100000, "default_temperature": 1.0},
|
|
33
|
+
("openai", "o3"): {"context_length": 200000, "max_completion_tokens": 100000, "default_temperature": 1.0},
|
|
34
|
+
("openai", "o4-mini"): {"context_length": 200000, "max_completion_tokens": 100000, "default_temperature": 1.0},
|
|
35
|
+
# Together.ai models
|
|
36
|
+
("together", "deepseek-ai/DeepSeek-R1"): {"context_length": 64000, "max_completion_tokens": 8000, "default_temperature": 1.0},
|
|
37
|
+
# Google models
|
|
38
|
+
("google", "gemini-2.0-flash"): {"context_length": 1000000, "max_completion_tokens": 8192, "default_temperature": 1.0},
|
|
39
|
+
("google", "gemini-2.5-pro-exp-03-25"): {"context_length": 1000000, "max_completion_tokens": 65536, "default_temperature": 1.0},
|
|
40
|
+
("google", "gemini-2.5-pro"): {"context_length": 1000000, "max_completion_tokens": 65536, "default_temperature": 1.0},
|
|
41
|
+
# X.ai models
|
|
42
|
+
("xai", "grok-3-beta"): {"context_length": 128000, "max_completion_tokens": 16384, "default_temperature": 1.0},
|
|
43
|
+
# Fireworks models
|
|
44
|
+
("fireworks", "accounts/fireworks/models/glm-4p5"): {"context_length": 128000, "max_completion_tokens": 16384, "default_temperature": 0.6},
|
|
45
|
+
("dashscope", "qwen3-coder-plus"): {"context_length": 1000000, "max_completion_tokens": 16384, "default_temperature": 0.7},
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_model_specs(provider: str, model_name: str) -> Dict[str, int | float | bool]:
|
|
50
|
+
"""
|
|
51
|
+
Get the specifications for a given model.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
provider: The LLM provider (e.g., 'anthropic', 'openai') - can be string or enum
|
|
55
|
+
model_name: The name of the model
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Dictionary containing context_length, max_completion_tokens, and default_temperature
|
|
59
|
+
"""
|
|
60
|
+
# Handle both string and enum provider types
|
|
61
|
+
provider_str = provider.value if hasattr(provider, "value") else provider
|
|
62
|
+
key = (provider_str, model_name)
|
|
63
|
+
|
|
64
|
+
if key not in MODEL_SPECS:
|
|
65
|
+
raise ValueError(f"Model {model_name} from provider {provider_str} is not supported.")
|
|
66
|
+
|
|
67
|
+
return MODEL_SPECS.get(key)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def new_tool_execution_id() -> str:
|
|
5
|
+
"""Create an app-level identifier for one tool execution."""
|
|
6
|
+
return f"tool_exec_{uuid.uuid4().hex}"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ToolExecutionIdRegistry:
|
|
10
|
+
"""Response-scoped mapping from provider tool call IDs to app execution IDs."""
|
|
11
|
+
|
|
12
|
+
def __init__(self) -> None:
|
|
13
|
+
self._by_provider_tool_call_id: dict[str, str] = {}
|
|
14
|
+
|
|
15
|
+
def get_or_create(self, provider_tool_call_id: str) -> str:
|
|
16
|
+
if provider_tool_call_id not in self._by_provider_tool_call_id:
|
|
17
|
+
self._by_provider_tool_call_id[provider_tool_call_id] = new_tool_execution_id()
|
|
18
|
+
return self._by_provider_tool_call_id[provider_tool_call_id]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Sandbox terminal state model for persisting terminal sessions."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from typing import Any, Dict, List, Optional
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TerminalInfo(BaseModel):
|
|
11
|
+
"""Information about a single terminal."""
|
|
12
|
+
|
|
13
|
+
terminal_id: str = Field(..., description="Terminal identifier")
|
|
14
|
+
created_at: datetime = Field(..., description="When the terminal was created")
|
|
15
|
+
cwd: str = Field(..., description="Working directory of the terminal")
|
|
16
|
+
env: Dict[str, str] = Field(default_factory=dict, description="Environment variables")
|
|
17
|
+
last_command: str = Field(default="", description="Last command executed")
|
|
18
|
+
last_command_purpose: str = Field(default="", description="Purpose of last command")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TerminalOutput(BaseModel):
|
|
22
|
+
"""Single output entry from a terminal."""
|
|
23
|
+
|
|
24
|
+
type: str = Field(..., description="Type of output: command, stdout, stderr, exit")
|
|
25
|
+
data: str = Field(..., description="Output data")
|
|
26
|
+
timestamp: datetime = Field(..., description="When the output was generated")
|
|
27
|
+
purpose: Optional[str] = Field(None, description="Purpose for commands")
|
|
28
|
+
exit_code: Optional[int] = Field(None, description="Exit code for exit type")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SandboxTerminalState(BaseModel):
|
|
32
|
+
"""Model for persisting sandbox terminal state."""
|
|
33
|
+
|
|
34
|
+
model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True)
|
|
35
|
+
|
|
36
|
+
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique identifier")
|
|
37
|
+
workspace_id: str = Field(..., description="Associated workspace ID")
|
|
38
|
+
sandbox_id: str = Field(..., description="Associated sandbox ID")
|
|
39
|
+
terminals: Dict[str, TerminalInfo] = Field(default_factory=dict)
|
|
40
|
+
outputs: Dict[str, List[TerminalOutput]] = Field(default_factory=dict)
|
|
41
|
+
default_terminal_id: Optional[str] = None
|
|
42
|
+
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
43
|
+
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
44
|
+
total_output_size: int = 0
|
|
45
|
+
MAX_OUTPUT_SIZE: int = 1048576
|
|
46
|
+
MAX_OUTPUT_PER_TERMINAL: int = 262144
|
|
47
|
+
|
kolega_code/runtime.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Runtime adapter registry for host-provided services.
|
|
2
|
+
|
|
3
|
+
The shared agent package intentionally does not own product databases,
|
|
4
|
+
background job runners, or app-specific MCP/environment services. Host
|
|
5
|
+
applications can register those objects directly.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
_registry: dict[str, Any] = {}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RuntimeAdapterError(RuntimeError):
|
|
16
|
+
"""Raised when shared agent code needs a host service that was not registered."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def register_runtime_adapter(name: str, value: Any) -> None:
|
|
20
|
+
"""Register a host-provided runtime dependency."""
|
|
21
|
+
_registry[name] = value
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_runtime_adapter(name: str) -> Any:
|
|
25
|
+
"""Return a registered host adapter."""
|
|
26
|
+
if name in _registry:
|
|
27
|
+
return _registry[name]
|
|
28
|
+
|
|
29
|
+
raise RuntimeAdapterError(
|
|
30
|
+
f"Host runtime adapter '{name}' is not registered. "
|
|
31
|
+
"Call kolega_code.runtime.register_runtime_adapter during app startup."
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class RuntimeProxy:
|
|
36
|
+
"""Lazy proxy for a host-provided object."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, name: str) -> None:
|
|
39
|
+
self._name = name
|
|
40
|
+
|
|
41
|
+
def _target(self) -> Any:
|
|
42
|
+
return get_runtime_adapter(self._name)
|
|
43
|
+
|
|
44
|
+
def __getattr__(self, item: str) -> Any:
|
|
45
|
+
if item.startswith("_"):
|
|
46
|
+
raise AttributeError(item)
|
|
47
|
+
return getattr(self._target(), item)
|
|
48
|
+
|
|
49
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
50
|
+
return self._target()(*args, **kwargs)
|