connectonion 0.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- connectonion/__init__.py +78 -0
- connectonion/address.py +320 -0
- connectonion/agent.py +450 -0
- connectonion/announce.py +84 -0
- connectonion/asgi.py +287 -0
- connectonion/auto_debug_exception.py +181 -0
- connectonion/cli/__init__.py +3 -0
- connectonion/cli/browser_agent/__init__.py +5 -0
- connectonion/cli/browser_agent/browser.py +243 -0
- connectonion/cli/browser_agent/prompt.md +107 -0
- connectonion/cli/commands/__init__.py +1 -0
- connectonion/cli/commands/auth_commands.py +527 -0
- connectonion/cli/commands/browser_commands.py +27 -0
- connectonion/cli/commands/create.py +511 -0
- connectonion/cli/commands/deploy_commands.py +220 -0
- connectonion/cli/commands/doctor_commands.py +173 -0
- connectonion/cli/commands/init.py +469 -0
- connectonion/cli/commands/project_cmd_lib.py +828 -0
- connectonion/cli/commands/reset_commands.py +149 -0
- connectonion/cli/commands/status_commands.py +168 -0
- connectonion/cli/docs/co-vibecoding-principles-docs-contexts-all-in-one.md +2010 -0
- connectonion/cli/docs/connectonion.md +1256 -0
- connectonion/cli/docs.md +123 -0
- connectonion/cli/main.py +148 -0
- connectonion/cli/templates/meta-agent/README.md +287 -0
- connectonion/cli/templates/meta-agent/agent.py +196 -0
- connectonion/cli/templates/meta-agent/prompts/answer_prompt.md +9 -0
- connectonion/cli/templates/meta-agent/prompts/docs_retrieve_prompt.md +15 -0
- connectonion/cli/templates/meta-agent/prompts/metagent.md +71 -0
- connectonion/cli/templates/meta-agent/prompts/think_prompt.md +18 -0
- connectonion/cli/templates/minimal/README.md +56 -0
- connectonion/cli/templates/minimal/agent.py +40 -0
- connectonion/cli/templates/playwright/README.md +118 -0
- connectonion/cli/templates/playwright/agent.py +336 -0
- connectonion/cli/templates/playwright/prompt.md +102 -0
- connectonion/cli/templates/playwright/requirements.txt +3 -0
- connectonion/cli/templates/web-research/agent.py +122 -0
- connectonion/connect.py +128 -0
- connectonion/console.py +539 -0
- connectonion/debug_agent/__init__.py +13 -0
- connectonion/debug_agent/agent.py +45 -0
- connectonion/debug_agent/prompts/debug_assistant.md +72 -0
- connectonion/debug_agent/runtime_inspector.py +406 -0
- connectonion/debug_explainer/__init__.py +10 -0
- connectonion/debug_explainer/explain_agent.py +114 -0
- connectonion/debug_explainer/explain_context.py +263 -0
- connectonion/debug_explainer/explainer_prompt.md +29 -0
- connectonion/debug_explainer/root_cause_analysis_prompt.md +43 -0
- connectonion/debugger_ui.py +1039 -0
- connectonion/decorators.py +208 -0
- connectonion/events.py +248 -0
- connectonion/execution_analyzer/__init__.py +9 -0
- connectonion/execution_analyzer/execution_analysis.py +93 -0
- connectonion/execution_analyzer/execution_analysis_prompt.md +47 -0
- connectonion/host.py +579 -0
- connectonion/interactive_debugger.py +342 -0
- connectonion/llm.py +801 -0
- connectonion/llm_do.py +307 -0
- connectonion/logger.py +300 -0
- connectonion/prompt_files/__init__.py +1 -0
- connectonion/prompt_files/analyze_contact.md +62 -0
- connectonion/prompt_files/eval_expected.md +12 -0
- connectonion/prompt_files/react_evaluate.md +11 -0
- connectonion/prompt_files/react_plan.md +16 -0
- connectonion/prompt_files/reflect.md +22 -0
- connectonion/prompts.py +144 -0
- connectonion/relay.py +200 -0
- connectonion/static/docs.html +688 -0
- connectonion/tool_executor.py +279 -0
- connectonion/tool_factory.py +186 -0
- connectonion/tool_registry.py +105 -0
- connectonion/trust.py +166 -0
- connectonion/trust_agents.py +71 -0
- connectonion/trust_functions.py +88 -0
- connectonion/tui/__init__.py +57 -0
- connectonion/tui/divider.py +39 -0
- connectonion/tui/dropdown.py +251 -0
- connectonion/tui/footer.py +31 -0
- connectonion/tui/fuzzy.py +56 -0
- connectonion/tui/input.py +278 -0
- connectonion/tui/keys.py +35 -0
- connectonion/tui/pick.py +130 -0
- connectonion/tui/providers.py +155 -0
- connectonion/tui/status_bar.py +163 -0
- connectonion/usage.py +161 -0
- connectonion/useful_events_handlers/__init__.py +16 -0
- connectonion/useful_events_handlers/reflect.py +116 -0
- connectonion/useful_plugins/__init__.py +20 -0
- connectonion/useful_plugins/calendar_plugin.py +163 -0
- connectonion/useful_plugins/eval.py +139 -0
- connectonion/useful_plugins/gmail_plugin.py +162 -0
- connectonion/useful_plugins/image_result_formatter.py +127 -0
- connectonion/useful_plugins/re_act.py +78 -0
- connectonion/useful_plugins/shell_approval.py +159 -0
- connectonion/useful_tools/__init__.py +44 -0
- connectonion/useful_tools/diff_writer.py +192 -0
- connectonion/useful_tools/get_emails.py +183 -0
- connectonion/useful_tools/gmail.py +1596 -0
- connectonion/useful_tools/google_calendar.py +613 -0
- connectonion/useful_tools/memory.py +380 -0
- connectonion/useful_tools/microsoft_calendar.py +604 -0
- connectonion/useful_tools/outlook.py +488 -0
- connectonion/useful_tools/send_email.py +205 -0
- connectonion/useful_tools/shell.py +97 -0
- connectonion/useful_tools/slash_command.py +201 -0
- connectonion/useful_tools/terminal.py +285 -0
- connectonion/useful_tools/todo_list.py +241 -0
- connectonion/useful_tools/web_fetch.py +216 -0
- connectonion/xray.py +467 -0
- connectonion-0.5.8.dist-info/METADATA +741 -0
- connectonion-0.5.8.dist-info/RECORD +113 -0
- connectonion-0.5.8.dist-info/WHEEL +4 -0
- connectonion-0.5.8.dist-info/entry_points.txt +3 -0
connectonion/llm.py
ADDED
|
@@ -0,0 +1,801 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Unified LLM provider abstraction with factory pattern for OpenAI, Anthropic, Gemini, and OpenOnion
|
|
3
|
+
LLM-Note:
|
|
4
|
+
Dependencies: imports from [abc, typing, dataclasses, json, os, openai, anthropic, google.generativeai, requests, pathlib, toml, pydantic] | imported by [agent.py, llm_do.py, conftest.py] | tested by [tests/test_llm.py, tests/test_llm_do.py, tests/test_real_*.py]
|
|
5
|
+
Data flow: Agent/llm_do calls create_llm(model, api_key) → factory routes to provider class → Provider.__init__() validates API key → Agent calls complete(messages, tools) OR structured_complete(messages, output_schema) → provider converts to native format → calls API → parses response → returns LLMResponse(content, tool_calls, raw_response) OR Pydantic model instance
|
|
6
|
+
State/Effects: reads environment variables (OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENONION_API_KEY) | reads ~/.connectonion/.co/config.toml for OpenOnion auth | makes HTTP requests to LLM APIs | no caching or persistence
|
|
7
|
+
Integration: exposes create_llm(model, api_key), LLM abstract base class, OpenAILLM, AnthropicLLM, GeminiLLM, OpenOnionLLM, LLMResponse, ToolCall dataclasses | providers implement complete() and structured_complete() | OpenAI message format is lingua franca | tool calling uses OpenAI schema converted per-provider
|
|
8
|
+
Performance: stateless (no caching) | synchronous (no streaming) | default max_tokens=8192 for Anthropic (required) | each call hits API
|
|
9
|
+
Errors: raises ValueError for missing API keys, unknown models, invalid parameters | provider-specific errors bubble up (openai.APIError, anthropic.APIError, etc.) | Pydantic ValidationError for invalid structured output
|
|
10
|
+
|
|
11
|
+
Unified LLM provider abstraction layer for ConnectOnion framework.
|
|
12
|
+
|
|
13
|
+
This module provides a consistent interface for interacting with multiple LLM providers
|
|
14
|
+
(OpenAI, Anthropic, Google Gemini, and ConnectOnion managed keys) through a common API.
|
|
15
|
+
|
|
16
|
+
Architecture Overview
|
|
17
|
+
--------------------
|
|
18
|
+
The module follows a factory pattern with provider-specific implementations:
|
|
19
|
+
|
|
20
|
+
1. **Abstract Base Class (LLM)**:
|
|
21
|
+
- Defines the contract all providers must implement
|
|
22
|
+
- Two core methods: complete() for text, structured_complete() for Pydantic models
|
|
23
|
+
- Ensures consistent interface across all providers
|
|
24
|
+
|
|
25
|
+
2. **Provider Implementations**:
|
|
26
|
+
- OpenAILLM: Native OpenAI API with responses.parse() for structured output
|
|
27
|
+
- AnthropicLLM: Claude API with tool calling workaround for structured output
|
|
28
|
+
- GeminiLLM: Google Gemini with response_schema for structured output
|
|
29
|
+
- OpenOnionLLM: Managed keys using OpenAI-compatible proxy endpoint
|
|
30
|
+
|
|
31
|
+
3. **Factory Function (create_llm)**:
|
|
32
|
+
- Routes model names to appropriate providers
|
|
33
|
+
- Handles API key initialization
|
|
34
|
+
- Returns configured provider instance
|
|
35
|
+
|
|
36
|
+
Key Design Decisions
|
|
37
|
+
-------------------
|
|
38
|
+
- **Structured Output**: Each provider uses its native structured output API when available
|
|
39
|
+
* OpenAI: responses.parse() with text_format parameter
|
|
40
|
+
* Anthropic: Forced tool calling with schema validation
|
|
41
|
+
* Gemini: response_schema with JSON MIME type
|
|
42
|
+
* OpenOnion: Proxies to OpenAI with fallback
|
|
43
|
+
|
|
44
|
+
- **Tool Calling**: OpenAI format used as the common schema, converted per-provider
|
|
45
|
+
* All providers return ToolCall dataclasses with (name, arguments, id)
|
|
46
|
+
* Enables consistent agent behavior across providers
|
|
47
|
+
|
|
48
|
+
- **Message Format**: OpenAI's message format (role/content) is the lingua franca
|
|
49
|
+
* Providers convert to their native format internally
|
|
50
|
+
* Simplifies Agent integration
|
|
51
|
+
|
|
52
|
+
- **Parameter Passing**: **kwargs pattern for runtime parameters
|
|
53
|
+
* temperature, max_tokens, etc. flow through to provider APIs
|
|
54
|
+
* Allows provider-specific features without bloating base interface
|
|
55
|
+
|
|
56
|
+
Data Flow
|
|
57
|
+
---------
|
|
58
|
+
Agent/llm_do → create_llm(model) → Provider.__init__(api_key)
|
|
59
|
+
↓
|
|
60
|
+
Provider.complete(messages, tools, **kwargs)
|
|
61
|
+
↓
|
|
62
|
+
Convert messages → Call native API → Parse response
|
|
63
|
+
↓
|
|
64
|
+
Return LLMResponse(content, tool_calls, raw_response)
|
|
65
|
+
|
|
66
|
+
For structured output:
|
|
67
|
+
Provider.structured_complete(messages, output_schema, **kwargs)
|
|
68
|
+
↓
|
|
69
|
+
Use native structured API → Validate with Pydantic
|
|
70
|
+
↓
|
|
71
|
+
Return Pydantic model instance
|
|
72
|
+
|
|
73
|
+
Dependencies
|
|
74
|
+
-----------
|
|
75
|
+
- openai: OpenAI and OpenOnion provider implementations
|
|
76
|
+
- anthropic: Claude provider implementation
|
|
77
|
+
- google.generativeai: Gemini provider implementation
|
|
78
|
+
- pydantic: Structured output validation
|
|
79
|
+
- requests: OpenOnion authentication checks
|
|
80
|
+
- toml: OpenOnion config file parsing
|
|
81
|
+
|
|
82
|
+
Integration Points
|
|
83
|
+
-----------------
|
|
84
|
+
Imported by:
|
|
85
|
+
- agent.py: Agent class uses LLM for reasoning
|
|
86
|
+
- llm_do.py: One-shot function uses LLM directly
|
|
87
|
+
- conftest.py: Test fixtures
|
|
88
|
+
|
|
89
|
+
Tested by:
|
|
90
|
+
- tests/test_llm.py: Unit tests with mocked APIs
|
|
91
|
+
- tests/test_llm_do.py: Integration tests
|
|
92
|
+
- tests/test_real_*.py: Real API integration tests
|
|
93
|
+
|
|
94
|
+
Environment Variables
|
|
95
|
+
--------------------
|
|
96
|
+
Required (pick one):
|
|
97
|
+
- OPENAI_API_KEY: For OpenAI models
|
|
98
|
+
- ANTHROPIC_API_KEY: For Claude models
|
|
99
|
+
- GEMINI_API_KEY or GOOGLE_API_KEY: For Gemini models
|
|
100
|
+
- OPENONION_API_KEY: For co/ managed keys (or from ~/.connectonion/.co/config.toml)
|
|
101
|
+
|
|
102
|
+
Optional:
|
|
103
|
+
- OPENONION_DEV: Use localhost:8000 for OpenOnion (development)
|
|
104
|
+
- ENVIRONMENT=development: Same as OPENONION_DEV
|
|
105
|
+
|
|
106
|
+
Error Handling
|
|
107
|
+
-------------
|
|
108
|
+
- ValueError: Missing API keys, unknown models, invalid parameters
|
|
109
|
+
- Provider-specific errors: Bubble up from native SDKs (openai.APIError, etc.)
|
|
110
|
+
- Structured output errors: Pydantic ValidationError if response doesn't match schema
|
|
111
|
+
|
|
112
|
+
Performance Considerations
|
|
113
|
+
-------------------------
|
|
114
|
+
- Default max_tokens: 8192 for Anthropic (required), configurable for others
|
|
115
|
+
- No caching: Each call is stateless (Agent maintains conversation history)
|
|
116
|
+
- No streaming: Currently synchronous only (streaming planned for future)
|
|
117
|
+
|
|
118
|
+
Example Usage
|
|
119
|
+
------------
|
|
120
|
+
Basic completion:
|
|
121
|
+
>>> from connectonion.llm import create_llm
|
|
122
|
+
>>> llm = create_llm(model="gpt-4o-mini")
|
|
123
|
+
>>> response = llm.complete([{"role": "user", "content": "Hello"}])
|
|
124
|
+
>>> print(response.content)
|
|
125
|
+
|
|
126
|
+
Structured output:
|
|
127
|
+
>>> from pydantic import BaseModel
|
|
128
|
+
>>> class Answer(BaseModel):
|
|
129
|
+
... value: int
|
|
130
|
+
>>> llm = create_llm(model="gpt-4o-mini")
|
|
131
|
+
>>> result = llm.structured_complete(
|
|
132
|
+
... [{"role": "user", "content": "What is 2+2?"}],
|
|
133
|
+
... Answer
|
|
134
|
+
... )
|
|
135
|
+
>>> print(result.value) # 4
|
|
136
|
+
|
|
137
|
+
With tools:
|
|
138
|
+
>>> tools = [{"name": "search", "description": "Search the web", "parameters": {...}}]
|
|
139
|
+
>>> response = llm.complete(messages, tools=tools)
|
|
140
|
+
>>> if response.tool_calls:
|
|
141
|
+
... print(response.tool_calls[0].name) # "search"
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
from abc import ABC, abstractmethod
|
|
145
|
+
from typing import List, Dict, Any, Optional, Type
|
|
146
|
+
from dataclasses import dataclass
|
|
147
|
+
import json
|
|
148
|
+
import os
|
|
149
|
+
import openai
|
|
150
|
+
import anthropic
|
|
151
|
+
# google-genai not needed - using OpenAI-compatible endpoint instead
|
|
152
|
+
import requests
|
|
153
|
+
from pathlib import Path
|
|
154
|
+
import toml
|
|
155
|
+
from pydantic import BaseModel
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass
|
|
159
|
+
class ToolCall:
|
|
160
|
+
"""Represents a tool call from the LLM.
|
|
161
|
+
|
|
162
|
+
Attributes:
|
|
163
|
+
name: The function name to call
|
|
164
|
+
arguments: Dict of arguments to pass to the function
|
|
165
|
+
id: Unique identifier for this tool call
|
|
166
|
+
extra_content: Provider-specific metadata (e.g., Gemini 3 thought_signature).
|
|
167
|
+
Must be echoed back in the assistant message for models that require it.
|
|
168
|
+
See: https://ai.google.dev/gemini-api/docs/thinking#openai-sdk
|
|
169
|
+
"""
|
|
170
|
+
name: str
|
|
171
|
+
arguments: Dict[str, Any]
|
|
172
|
+
id: str
|
|
173
|
+
extra_content: Optional[Dict[str, Any]] = None
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# Import TokenUsage from usage module
|
|
177
|
+
from .usage import TokenUsage, calculate_cost
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@dataclass
|
|
181
|
+
class LLMResponse:
|
|
182
|
+
"""Response from LLM including content and tool calls."""
|
|
183
|
+
content: Optional[str]
|
|
184
|
+
tool_calls: List[ToolCall]
|
|
185
|
+
raw_response: Any
|
|
186
|
+
usage: Optional[TokenUsage] = None
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class LLM(ABC):
|
|
190
|
+
"""Abstract base class for LLM providers."""
|
|
191
|
+
|
|
192
|
+
@abstractmethod
|
|
193
|
+
def complete(self, messages: List[Dict[str, str]], tools: Optional[List[Dict[str, Any]]] = None) -> LLMResponse:
|
|
194
|
+
"""Complete a conversation with optional tool support."""
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
@abstractmethod
|
|
198
|
+
def structured_complete(self, messages: List[Dict], output_schema: Type[BaseModel]) -> BaseModel:
|
|
199
|
+
"""Get structured Pydantic output matching the schema.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
messages: Conversation messages in OpenAI format
|
|
203
|
+
output_schema: Pydantic model class defining the expected output structure
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Instance of output_schema with parsed and validated data
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
ValueError: If the LLM fails to generate valid structured output
|
|
210
|
+
"""
|
|
211
|
+
pass
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class OpenAILLM(LLM):
|
|
215
|
+
"""OpenAI LLM implementation."""
|
|
216
|
+
|
|
217
|
+
def __init__(self, api_key: Optional[str] = None, model: str = "o4-mini", **kwargs):
|
|
218
|
+
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
|
219
|
+
if not self.api_key:
|
|
220
|
+
raise ValueError("OpenAI API key required. Set OPENAI_API_KEY environment variable or pass api_key parameter.")
|
|
221
|
+
|
|
222
|
+
self.client = openai.OpenAI(api_key=self.api_key)
|
|
223
|
+
self.model = model
|
|
224
|
+
|
|
225
|
+
def complete(self, messages: List[Dict[str, str]], tools: Optional[List[Dict[str, Any]]] = None, **kwargs) -> LLMResponse:
|
|
226
|
+
"""Complete a conversation with optional tool support."""
|
|
227
|
+
api_kwargs = {
|
|
228
|
+
"model": self.model,
|
|
229
|
+
"messages": messages,
|
|
230
|
+
**kwargs # Pass through user kwargs (max_tokens, temperature, etc.)
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if tools:
|
|
234
|
+
api_kwargs["tools"] = [{"type": "function", "function": tool} for tool in tools]
|
|
235
|
+
api_kwargs["tool_choice"] = "auto"
|
|
236
|
+
|
|
237
|
+
response = self.client.chat.completions.create(**api_kwargs)
|
|
238
|
+
message = response.choices[0].message
|
|
239
|
+
|
|
240
|
+
# Parse tool calls if present
|
|
241
|
+
tool_calls = []
|
|
242
|
+
if hasattr(message, 'tool_calls') and message.tool_calls:
|
|
243
|
+
for tc in message.tool_calls:
|
|
244
|
+
tool_calls.append(ToolCall(
|
|
245
|
+
name=tc.function.name,
|
|
246
|
+
arguments=json.loads(tc.function.arguments),
|
|
247
|
+
id=tc.id
|
|
248
|
+
))
|
|
249
|
+
|
|
250
|
+
# Extract token usage
|
|
251
|
+
input_tokens = response.usage.prompt_tokens
|
|
252
|
+
output_tokens = response.usage.completion_tokens
|
|
253
|
+
cached_tokens = response.usage.prompt_tokens_details.cached_tokens if response.usage.prompt_tokens_details else 0
|
|
254
|
+
cost = calculate_cost(self.model, input_tokens, output_tokens, cached_tokens)
|
|
255
|
+
|
|
256
|
+
return LLMResponse(
|
|
257
|
+
content=message.content,
|
|
258
|
+
tool_calls=tool_calls,
|
|
259
|
+
raw_response=response,
|
|
260
|
+
usage=TokenUsage(
|
|
261
|
+
input_tokens=input_tokens,
|
|
262
|
+
output_tokens=output_tokens,
|
|
263
|
+
cached_tokens=cached_tokens,
|
|
264
|
+
cost=cost,
|
|
265
|
+
),
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def structured_complete(self, messages: List[Dict], output_schema: Type[BaseModel], **kwargs) -> BaseModel:
|
|
269
|
+
"""Get structured Pydantic output using OpenAI's native responses.parse API.
|
|
270
|
+
|
|
271
|
+
Uses the new OpenAI responses.parse() endpoint with text_format parameter
|
|
272
|
+
for guaranteed schema adherence.
|
|
273
|
+
"""
|
|
274
|
+
response = self.client.responses.parse(
|
|
275
|
+
model=self.model,
|
|
276
|
+
input=messages,
|
|
277
|
+
text_format=output_schema,
|
|
278
|
+
**kwargs # Pass through temperature, max_tokens, etc.
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Handle edge cases
|
|
282
|
+
if response.status == "incomplete":
|
|
283
|
+
if response.incomplete_details.reason == "max_output_tokens":
|
|
284
|
+
raise ValueError("Response incomplete: maximum output tokens reached")
|
|
285
|
+
elif response.incomplete_details.reason == "content_filter":
|
|
286
|
+
raise ValueError("Response incomplete: content filtered")
|
|
287
|
+
|
|
288
|
+
# Check for refusal
|
|
289
|
+
if response.output and len(response.output) > 0:
|
|
290
|
+
first_content = response.output[0].content[0] if response.output[0].content else None
|
|
291
|
+
if first_content and hasattr(first_content, 'type') and first_content.type == "refusal":
|
|
292
|
+
raise ValueError(f"Model refused to respond: {first_content.refusal}")
|
|
293
|
+
|
|
294
|
+
# Return the parsed Pydantic object
|
|
295
|
+
return response.output_parsed
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class AnthropicLLM(LLM):
|
|
299
|
+
"""Anthropic Claude LLM implementation."""
|
|
300
|
+
|
|
301
|
+
def __init__(self, api_key: Optional[str] = None, model: str = "claude-3-5-sonnet-20241022", max_tokens: int = 8192, **kwargs):
|
|
302
|
+
self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
|
|
303
|
+
if not self.api_key:
|
|
304
|
+
raise ValueError("Anthropic API key required. Set ANTHROPIC_API_KEY environment variable or pass api_key parameter.")
|
|
305
|
+
|
|
306
|
+
self.client = anthropic.Anthropic(api_key=self.api_key)
|
|
307
|
+
self.model = model
|
|
308
|
+
self.max_tokens = max_tokens # Anthropic requires max_tokens (default 8192)
|
|
309
|
+
|
|
310
|
+
def complete(self, messages: List[Dict[str, Any]], tools: Optional[List[Dict[str, Any]]] = None, **kwargs) -> LLMResponse:
|
|
311
|
+
"""Complete a conversation with optional tool support."""
|
|
312
|
+
# Convert messages to Anthropic format
|
|
313
|
+
anthropic_messages = self._convert_messages(messages)
|
|
314
|
+
|
|
315
|
+
api_kwargs = {
|
|
316
|
+
"model": self.model,
|
|
317
|
+
"messages": anthropic_messages,
|
|
318
|
+
"max_tokens": self.max_tokens, # Required by Anthropic
|
|
319
|
+
**kwargs # User can override max_tokens via kwargs
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
# Add tools if provided
|
|
323
|
+
if tools:
|
|
324
|
+
api_kwargs["tools"] = self._convert_tools(tools)
|
|
325
|
+
|
|
326
|
+
response = self.client.messages.create(**api_kwargs)
|
|
327
|
+
|
|
328
|
+
# Parse tool calls if present
|
|
329
|
+
tool_calls = []
|
|
330
|
+
content = ""
|
|
331
|
+
|
|
332
|
+
for block in response.content:
|
|
333
|
+
if block.type == "text":
|
|
334
|
+
content += block.text
|
|
335
|
+
elif block.type == "tool_use":
|
|
336
|
+
tool_calls.append(ToolCall(
|
|
337
|
+
name=block.name,
|
|
338
|
+
arguments=block.input,
|
|
339
|
+
id=block.id
|
|
340
|
+
))
|
|
341
|
+
|
|
342
|
+
# Extract token usage - Anthropic uses input_tokens/output_tokens
|
|
343
|
+
input_tokens = response.usage.input_tokens
|
|
344
|
+
output_tokens = response.usage.output_tokens
|
|
345
|
+
cached_tokens = getattr(response.usage, 'cache_read_input_tokens', 0) or 0
|
|
346
|
+
cache_write_tokens = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0
|
|
347
|
+
cost = calculate_cost(self.model, input_tokens, output_tokens, cached_tokens, cache_write_tokens)
|
|
348
|
+
|
|
349
|
+
return LLMResponse(
|
|
350
|
+
content=content if content else None,
|
|
351
|
+
tool_calls=tool_calls,
|
|
352
|
+
raw_response=response,
|
|
353
|
+
usage=TokenUsage(
|
|
354
|
+
input_tokens=input_tokens,
|
|
355
|
+
output_tokens=output_tokens,
|
|
356
|
+
cached_tokens=cached_tokens,
|
|
357
|
+
cache_write_tokens=cache_write_tokens,
|
|
358
|
+
cost=cost,
|
|
359
|
+
),
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
def structured_complete(self, messages: List[Dict], output_schema: Type[BaseModel], **kwargs) -> BaseModel:
|
|
363
|
+
"""Get structured Pydantic output using tool calling method.
|
|
364
|
+
|
|
365
|
+
Anthropic doesn't have native Pydantic support yet, so we use a tool calling
|
|
366
|
+
workaround: create a dummy tool with the Pydantic schema and force its use.
|
|
367
|
+
"""
|
|
368
|
+
# Convert messages to Anthropic format
|
|
369
|
+
anthropic_messages = self._convert_messages(messages)
|
|
370
|
+
|
|
371
|
+
# Create a tool with the Pydantic schema as input_schema
|
|
372
|
+
tool = {
|
|
373
|
+
"name": "return_structured_output",
|
|
374
|
+
"description": "Returns the structured output based on the user's request",
|
|
375
|
+
"input_schema": output_schema.model_json_schema()
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
# Set max_tokens with safe default
|
|
379
|
+
api_kwargs = {
|
|
380
|
+
"model": self.model,
|
|
381
|
+
"max_tokens": self.max_tokens,
|
|
382
|
+
"messages": anthropic_messages,
|
|
383
|
+
"tools": [tool],
|
|
384
|
+
"tool_choice": {"type": "tool", "name": "return_structured_output"},
|
|
385
|
+
**kwargs # User can override max_tokens, temperature, etc.
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
# Force the model to use this tool
|
|
389
|
+
response = self.client.messages.create(**api_kwargs)
|
|
390
|
+
|
|
391
|
+
# Extract structured data from tool call
|
|
392
|
+
for block in response.content:
|
|
393
|
+
if block.type == "tool_use" and block.name == "return_structured_output":
|
|
394
|
+
# Validate and return as Pydantic model
|
|
395
|
+
return output_schema.model_validate(block.input)
|
|
396
|
+
|
|
397
|
+
raise ValueError("No structured output received from Claude")
|
|
398
|
+
|
|
399
|
+
def _convert_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
400
|
+
"""Convert OpenAI-style messages to Anthropic format."""
|
|
401
|
+
anthropic_messages = []
|
|
402
|
+
i = 0
|
|
403
|
+
|
|
404
|
+
while i < len(messages):
|
|
405
|
+
msg = messages[i]
|
|
406
|
+
|
|
407
|
+
# Skip system messages (will be handled separately)
|
|
408
|
+
if msg["role"] == "system":
|
|
409
|
+
i += 1
|
|
410
|
+
continue
|
|
411
|
+
|
|
412
|
+
# Handle assistant messages with tool calls
|
|
413
|
+
if msg["role"] == "assistant" and msg.get("tool_calls"):
|
|
414
|
+
content_blocks = []
|
|
415
|
+
if msg.get("content"):
|
|
416
|
+
content_blocks.append({
|
|
417
|
+
"type": "text",
|
|
418
|
+
"text": msg["content"]
|
|
419
|
+
})
|
|
420
|
+
|
|
421
|
+
for tc in msg["tool_calls"]:
|
|
422
|
+
content_blocks.append({
|
|
423
|
+
"type": "tool_use",
|
|
424
|
+
"id": tc["id"],
|
|
425
|
+
"name": tc["function"]["name"],
|
|
426
|
+
"input": json.loads(tc["function"]["arguments"]) if isinstance(tc["function"]["arguments"], str) else tc["function"]["arguments"]
|
|
427
|
+
})
|
|
428
|
+
|
|
429
|
+
anthropic_messages.append({
|
|
430
|
+
"role": "assistant",
|
|
431
|
+
"content": content_blocks
|
|
432
|
+
})
|
|
433
|
+
|
|
434
|
+
# Now collect all the tool responses that follow immediately
|
|
435
|
+
i += 1
|
|
436
|
+
tool_results = []
|
|
437
|
+
while i < len(messages) and messages[i]["role"] == "tool":
|
|
438
|
+
tool_msg = messages[i]
|
|
439
|
+
tool_results.append({
|
|
440
|
+
"type": "tool_result",
|
|
441
|
+
"tool_use_id": tool_msg["tool_call_id"],
|
|
442
|
+
"content": tool_msg["content"]
|
|
443
|
+
})
|
|
444
|
+
i += 1
|
|
445
|
+
|
|
446
|
+
# Add all tool results in a single user message
|
|
447
|
+
if tool_results:
|
|
448
|
+
anthropic_messages.append({
|
|
449
|
+
"role": "user",
|
|
450
|
+
"content": tool_results
|
|
451
|
+
})
|
|
452
|
+
|
|
453
|
+
# Handle tool role messages that aren't immediately after assistant tool calls
|
|
454
|
+
elif msg["role"] == "tool":
|
|
455
|
+
# This shouldn't happen in normal flow, but handle it just in case
|
|
456
|
+
anthropic_messages.append({
|
|
457
|
+
"role": "user",
|
|
458
|
+
"content": [{
|
|
459
|
+
"type": "tool_result",
|
|
460
|
+
"tool_use_id": msg["tool_call_id"],
|
|
461
|
+
"content": msg["content"]
|
|
462
|
+
}]
|
|
463
|
+
})
|
|
464
|
+
i += 1
|
|
465
|
+
|
|
466
|
+
# Handle user messages
|
|
467
|
+
elif msg["role"] == "user":
|
|
468
|
+
if isinstance(msg.get("content"), list):
|
|
469
|
+
# This is already a structured message
|
|
470
|
+
anthropic_msg = {
|
|
471
|
+
"role": "user",
|
|
472
|
+
"content": []
|
|
473
|
+
}
|
|
474
|
+
for item in msg["content"]:
|
|
475
|
+
if item.get("type") == "tool_result":
|
|
476
|
+
anthropic_msg["content"].append({
|
|
477
|
+
"type": "tool_result",
|
|
478
|
+
"tool_use_id": item["tool_call_id"],
|
|
479
|
+
"content": item["content"]
|
|
480
|
+
})
|
|
481
|
+
anthropic_messages.append(anthropic_msg)
|
|
482
|
+
else:
|
|
483
|
+
# Regular text message
|
|
484
|
+
anthropic_messages.append({
|
|
485
|
+
"role": "user",
|
|
486
|
+
"content": msg["content"]
|
|
487
|
+
})
|
|
488
|
+
i += 1
|
|
489
|
+
|
|
490
|
+
# Handle regular assistant messages
|
|
491
|
+
elif msg["role"] == "assistant":
|
|
492
|
+
anthropic_messages.append({
|
|
493
|
+
"role": "assistant",
|
|
494
|
+
"content": msg["content"]
|
|
495
|
+
})
|
|
496
|
+
i += 1
|
|
497
|
+
|
|
498
|
+
else:
|
|
499
|
+
i += 1
|
|
500
|
+
|
|
501
|
+
return anthropic_messages
|
|
502
|
+
|
|
503
|
+
def _convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
504
|
+
"""Convert OpenAI-style tools to Anthropic format."""
|
|
505
|
+
anthropic_tools = []
|
|
506
|
+
|
|
507
|
+
for tool in tools:
|
|
508
|
+
# Tools already in our internal format
|
|
509
|
+
anthropic_tool = {
|
|
510
|
+
"name": tool["name"],
|
|
511
|
+
"description": tool.get("description", ""),
|
|
512
|
+
"input_schema": tool.get("parameters", {
|
|
513
|
+
"type": "object",
|
|
514
|
+
"properties": {},
|
|
515
|
+
"required": []
|
|
516
|
+
})
|
|
517
|
+
}
|
|
518
|
+
anthropic_tools.append(anthropic_tool)
|
|
519
|
+
|
|
520
|
+
return anthropic_tools
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
class GeminiLLM(LLM):
|
|
524
|
+
"""Google Gemini LLM implementation using OpenAI-compatible endpoint."""
|
|
525
|
+
|
|
526
|
+
def __init__(self, api_key: Optional[str] = None, model: str = "gemini-2.0-flash-exp", **kwargs):
|
|
527
|
+
self.api_key = api_key or os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
|
|
528
|
+
if not self.api_key:
|
|
529
|
+
raise ValueError("Gemini API key required. Set GEMINI_API_KEY environment variable or pass api_key parameter. (GOOGLE_API_KEY is also supported for backward compatibility)")
|
|
530
|
+
|
|
531
|
+
# Use Gemini's OpenAI-compatible endpoint
|
|
532
|
+
self.client = openai.OpenAI(
|
|
533
|
+
api_key=self.api_key,
|
|
534
|
+
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
|
|
535
|
+
)
|
|
536
|
+
self.model = model
|
|
537
|
+
|
|
538
|
+
def complete(self, messages: List[Dict[str, Any]], tools: Optional[List[Dict[str, Any]]] = None, **kwargs) -> LLMResponse:
|
|
539
|
+
"""Complete a conversation using Gemini's OpenAI-compatible endpoint."""
|
|
540
|
+
api_kwargs = {
|
|
541
|
+
"model": self.model,
|
|
542
|
+
"messages": messages,
|
|
543
|
+
**kwargs
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
if tools:
|
|
547
|
+
api_kwargs["tools"] = [{"type": "function", "function": tool} for tool in tools]
|
|
548
|
+
api_kwargs["tool_choice"] = "auto"
|
|
549
|
+
|
|
550
|
+
response = self.client.chat.completions.create(**api_kwargs)
|
|
551
|
+
message = response.choices[0].message
|
|
552
|
+
|
|
553
|
+
# Parse tool calls if present
|
|
554
|
+
# Preserve extra_content for providers that need it (e.g., Gemini 3 thought_signature)
|
|
555
|
+
tool_calls = []
|
|
556
|
+
if hasattr(message, 'tool_calls') and message.tool_calls:
|
|
557
|
+
for tc in message.tool_calls:
|
|
558
|
+
extra = getattr(tc, 'extra_content', None)
|
|
559
|
+
tool_calls.append(ToolCall(
|
|
560
|
+
name=tc.function.name,
|
|
561
|
+
arguments=json.loads(tc.function.arguments) if isinstance(tc.function.arguments, str) else tc.function.arguments,
|
|
562
|
+
id=tc.id,
|
|
563
|
+
extra_content=extra
|
|
564
|
+
))
|
|
565
|
+
|
|
566
|
+
# Extract token usage (OpenAI-compatible format)
|
|
567
|
+
usage = None
|
|
568
|
+
if hasattr(response, 'usage') and response.usage:
|
|
569
|
+
input_tokens = response.usage.prompt_tokens
|
|
570
|
+
output_tokens = response.usage.completion_tokens
|
|
571
|
+
cached_tokens = 0
|
|
572
|
+
if hasattr(response.usage, 'prompt_tokens_details') and response.usage.prompt_tokens_details:
|
|
573
|
+
cached_tokens = getattr(response.usage.prompt_tokens_details, 'cached_tokens', 0) or 0
|
|
574
|
+
cost = calculate_cost(self.model, input_tokens, output_tokens, cached_tokens)
|
|
575
|
+
usage = TokenUsage(
|
|
576
|
+
input_tokens=input_tokens,
|
|
577
|
+
output_tokens=output_tokens,
|
|
578
|
+
cached_tokens=cached_tokens,
|
|
579
|
+
cost=cost,
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
return LLMResponse(
|
|
583
|
+
content=message.content,
|
|
584
|
+
tool_calls=tool_calls,
|
|
585
|
+
raw_response=response,
|
|
586
|
+
usage=usage,
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
def structured_complete(self, messages: List[Dict], output_schema: Type[BaseModel], **kwargs) -> BaseModel:
|
|
590
|
+
"""Get structured Pydantic output using Gemini's OpenAI-compatible endpoint with beta.chat.completions.parse."""
|
|
591
|
+
completion = self.client.beta.chat.completions.parse(
|
|
592
|
+
model=self.model,
|
|
593
|
+
messages=messages,
|
|
594
|
+
response_format=output_schema,
|
|
595
|
+
**kwargs
|
|
596
|
+
)
|
|
597
|
+
return completion.choices[0].message.parsed
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
# Model registry mapping model names to providers
|
|
601
|
+
MODEL_REGISTRY = {
|
|
602
|
+
# OpenAI models
|
|
603
|
+
"gpt-4o": "openai",
|
|
604
|
+
"gpt-4o-mini": "openai",
|
|
605
|
+
"gpt-4-turbo": "openai",
|
|
606
|
+
"gpt-3.5-turbo": "openai",
|
|
607
|
+
"o1": "openai",
|
|
608
|
+
"o1-mini": "openai",
|
|
609
|
+
"o1-preview": "openai",
|
|
610
|
+
"o4-mini": "openai", # Testing placeholder
|
|
611
|
+
|
|
612
|
+
# Anthropic Claude models
|
|
613
|
+
"claude-3-5-sonnet": "anthropic",
|
|
614
|
+
"claude-3-5-sonnet-20241022": "anthropic",
|
|
615
|
+
"claude-3-5-sonnet-latest": "anthropic",
|
|
616
|
+
"claude-3-5-haiku": "anthropic",
|
|
617
|
+
"claude-3-5-haiku-20241022": "anthropic",
|
|
618
|
+
"claude-3-5-haiku-latest": "anthropic",
|
|
619
|
+
"claude-3-haiku-20240307": "anthropic",
|
|
620
|
+
"claude-3-opus-20240229": "anthropic",
|
|
621
|
+
"claude-3-opus-latest": "anthropic",
|
|
622
|
+
"claude-3-sonnet-20240229": "anthropic",
|
|
623
|
+
|
|
624
|
+
# Claude 4 models
|
|
625
|
+
"claude-opus-4.1": "anthropic",
|
|
626
|
+
"claude-opus-4-1-20250805": "anthropic",
|
|
627
|
+
"claude-opus-4-1": "anthropic", # Alias
|
|
628
|
+
"claude-opus-4": "anthropic",
|
|
629
|
+
"claude-opus-4-20250514": "anthropic",
|
|
630
|
+
"claude-opus-4-0": "anthropic", # Alias
|
|
631
|
+
"claude-sonnet-4": "anthropic",
|
|
632
|
+
"claude-sonnet-4-20250514": "anthropic",
|
|
633
|
+
"claude-sonnet-4-0": "anthropic", # Alias
|
|
634
|
+
"claude-3-7-sonnet-latest": "anthropic",
|
|
635
|
+
"claude-3-7-sonnet-20250219": "anthropic",
|
|
636
|
+
|
|
637
|
+
# Google Gemini models
|
|
638
|
+
"gemini-3-pro-preview": "google",
|
|
639
|
+
"gemini-3-pro-image-preview": "google",
|
|
640
|
+
"gemini-2.5-pro": "google",
|
|
641
|
+
"gemini-2.5-flash": "google",
|
|
642
|
+
"gemini-2.0-flash-exp": "google",
|
|
643
|
+
"gemini-2.0-flash-thinking-exp": "google",
|
|
644
|
+
"gemini-1.5-pro": "google",
|
|
645
|
+
"gemini-1.5-pro-002": "google",
|
|
646
|
+
"gemini-1.5-pro-001": "google",
|
|
647
|
+
"gemini-1.5-flash": "google",
|
|
648
|
+
"gemini-1.5-flash-002": "google",
|
|
649
|
+
"gemini-1.5-flash-001": "google",
|
|
650
|
+
"gemini-1.5-flash-8b": "google",
|
|
651
|
+
"gemini-1.0-pro": "google",
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
class OpenOnionLLM(LLM):
|
|
656
|
+
"""OpenOnion managed keys LLM implementation using OpenAI-compatible API."""
|
|
657
|
+
|
|
658
|
+
def __init__(self, api_key: Optional[str] = None, model: str = "co/o4-mini", **kwargs):
|
|
659
|
+
# For co/ models, api_key is actually the auth token
|
|
660
|
+
# Framework auto-loads .env, so OPENONION_API_KEY will be in environment
|
|
661
|
+
self.auth_token = api_key or os.getenv("OPENONION_API_KEY")
|
|
662
|
+
if not self.auth_token:
|
|
663
|
+
raise ValueError(
|
|
664
|
+
"OPENONION_API_KEY not found in environment.\n"
|
|
665
|
+
"Run 'co init' to get started or set OPENONION_API_KEY in your .env file."
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
# Strip co/ prefix - it's only for client-side routing
|
|
669
|
+
self.model = model.removeprefix("co/")
|
|
670
|
+
|
|
671
|
+
# Determine base URL for OpenAI-compatible endpoint
|
|
672
|
+
if os.getenv("OPENONION_DEV") or os.getenv("ENVIRONMENT") == "development":
|
|
673
|
+
base_url = "http://localhost:8000/v1"
|
|
674
|
+
else:
|
|
675
|
+
base_url = "https://oo.openonion.ai/v1"
|
|
676
|
+
|
|
677
|
+
# Use OpenAI client with OpenOnion endpoint
|
|
678
|
+
self.client = openai.OpenAI(
|
|
679
|
+
base_url=base_url,
|
|
680
|
+
api_key=self.auth_token
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
def complete(self, messages: List[Dict[str, Any]], tools: Optional[List[Dict[str, Any]]] = None, **kwargs) -> LLMResponse:
|
|
684
|
+
"""Complete a conversation with optional tool support using OpenAI-compatible API."""
|
|
685
|
+
api_kwargs = {
|
|
686
|
+
"model": self.model,
|
|
687
|
+
"messages": messages,
|
|
688
|
+
**kwargs # Pass through user kwargs (temperature, max_tokens, etc.)
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
# Add tools if provided
|
|
692
|
+
if tools:
|
|
693
|
+
api_kwargs["tools"] = [{"type": "function", "function": tool} for tool in tools]
|
|
694
|
+
api_kwargs["tool_choice"] = "auto"
|
|
695
|
+
|
|
696
|
+
response = self.client.chat.completions.create(**api_kwargs)
|
|
697
|
+
message = response.choices[0].message
|
|
698
|
+
|
|
699
|
+
# Parse tool calls if present
|
|
700
|
+
# Preserve extra_content for providers that need it (e.g., Gemini 3 thought_signature)
|
|
701
|
+
tool_calls = []
|
|
702
|
+
if hasattr(message, 'tool_calls') and message.tool_calls:
|
|
703
|
+
for tc in message.tool_calls:
|
|
704
|
+
extra = getattr(tc, 'extra_content', None)
|
|
705
|
+
tool_calls.append(ToolCall(
|
|
706
|
+
name=tc.function.name,
|
|
707
|
+
arguments=json.loads(tc.function.arguments) if isinstance(tc.function.arguments, str) else tc.function.arguments,
|
|
708
|
+
id=tc.id,
|
|
709
|
+
extra_content=extra
|
|
710
|
+
))
|
|
711
|
+
|
|
712
|
+
# Extract token usage (OpenAI-compatible format)
|
|
713
|
+
usage = None
|
|
714
|
+
if hasattr(response, 'usage') and response.usage:
|
|
715
|
+
input_tokens = response.usage.prompt_tokens
|
|
716
|
+
output_tokens = response.usage.completion_tokens
|
|
717
|
+
cached_tokens = 0
|
|
718
|
+
if hasattr(response.usage, 'prompt_tokens_details') and response.usage.prompt_tokens_details:
|
|
719
|
+
cached_tokens = getattr(response.usage.prompt_tokens_details, 'cached_tokens', 0) or 0
|
|
720
|
+
# Use the underlying model for pricing (without co/ prefix)
|
|
721
|
+
cost = calculate_cost(self.model, input_tokens, output_tokens, cached_tokens)
|
|
722
|
+
usage = TokenUsage(
|
|
723
|
+
input_tokens=input_tokens,
|
|
724
|
+
output_tokens=output_tokens,
|
|
725
|
+
cached_tokens=cached_tokens,
|
|
726
|
+
cost=cost,
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
return LLMResponse(
|
|
730
|
+
content=message.content,
|
|
731
|
+
tool_calls=tool_calls,
|
|
732
|
+
raw_response=response,
|
|
733
|
+
usage=usage,
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
def structured_complete(self, messages: List[Dict], output_schema: Type[BaseModel], **kwargs) -> BaseModel:
|
|
737
|
+
"""Get structured Pydantic output using OpenAI-compatible API."""
|
|
738
|
+
response = self.client.responses.parse(
|
|
739
|
+
model=self.model,
|
|
740
|
+
input=messages,
|
|
741
|
+
text_format=output_schema,
|
|
742
|
+
**kwargs
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
# Handle edge cases
|
|
746
|
+
if response.status == "incomplete":
|
|
747
|
+
if response.incomplete_details.reason == "max_output_tokens":
|
|
748
|
+
raise ValueError("Response incomplete: maximum output tokens reached")
|
|
749
|
+
elif response.incomplete_details.reason == "content_filter":
|
|
750
|
+
raise ValueError("Response incomplete: content filtered")
|
|
751
|
+
|
|
752
|
+
# Check for refusal
|
|
753
|
+
if response.output and len(response.output) > 0:
|
|
754
|
+
first_content = response.output[0].content[0] if response.output[0].content else None
|
|
755
|
+
if first_content and hasattr(first_content, 'type') and first_content.type == "refusal":
|
|
756
|
+
raise ValueError(f"Model refused to respond: {first_content.refusal}")
|
|
757
|
+
|
|
758
|
+
return response.output_parsed
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def create_llm(model: str, api_key: Optional[str] = None, **kwargs) -> LLM:
|
|
762
|
+
"""Factory function to create the appropriate LLM based on model name.
|
|
763
|
+
|
|
764
|
+
Args:
|
|
765
|
+
model: The model name (e.g., "gpt-4o", "claude-3-5-sonnet", "gemini-1.5-pro")
|
|
766
|
+
api_key: Optional API key to override environment variable
|
|
767
|
+
**kwargs: Additional arguments to pass to the LLM constructor
|
|
768
|
+
|
|
769
|
+
Returns:
|
|
770
|
+
An LLM instance for the specified model
|
|
771
|
+
|
|
772
|
+
Raises:
|
|
773
|
+
ValueError: If the model is not recognized
|
|
774
|
+
"""
|
|
775
|
+
# Check if it's a co/ model (OpenOnion managed keys)
|
|
776
|
+
if model.startswith("co/"):
|
|
777
|
+
return OpenOnionLLM(api_key=api_key, model=model, **kwargs)
|
|
778
|
+
|
|
779
|
+
# Get provider from registry
|
|
780
|
+
provider = MODEL_REGISTRY.get(model)
|
|
781
|
+
|
|
782
|
+
if not provider:
|
|
783
|
+
# Try to infer provider from model name
|
|
784
|
+
if model.startswith("gpt") or model.startswith("o"):
|
|
785
|
+
provider = "openai"
|
|
786
|
+
elif model.startswith("claude"):
|
|
787
|
+
provider = "anthropic"
|
|
788
|
+
elif model.startswith("gemini"):
|
|
789
|
+
provider = "google"
|
|
790
|
+
else:
|
|
791
|
+
raise ValueError(f"Unknown model '{model}'")
|
|
792
|
+
|
|
793
|
+
# Create the appropriate LLM
|
|
794
|
+
if provider == "openai":
|
|
795
|
+
return OpenAILLM(api_key=api_key, model=model, **kwargs)
|
|
796
|
+
elif provider == "anthropic":
|
|
797
|
+
return AnthropicLLM(api_key=api_key, model=model, **kwargs)
|
|
798
|
+
elif provider == "google":
|
|
799
|
+
return GeminiLLM(api_key=api_key, model=model, **kwargs)
|
|
800
|
+
else:
|
|
801
|
+
raise ValueError(f"Provider '{provider}' not implemented")
|