connectonion 0.4.12__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- connectonion/__init__.py +11 -5
- connectonion/agent.py +44 -42
- connectonion/cli/commands/init.py +1 -1
- connectonion/cli/commands/project_cmd_lib.py +4 -4
- connectonion/cli/commands/reset_commands.py +1 -1
- connectonion/cli/docs/co-vibecoding-principles-docs-contexts-all-in-one.md +15 -11
- connectonion/cli/templates/minimal/agent.py +2 -2
- connectonion/console.py +55 -3
- connectonion/events.py +96 -17
- connectonion/llm.py +21 -3
- connectonion/logger.py +289 -0
- connectonion/prompt_files/eval_expected.md +12 -0
- connectonion/tool_executor.py +43 -32
- connectonion/usage.py +4 -0
- connectonion/useful_events_handlers/reflect.py +13 -9
- connectonion/useful_plugins/__init__.py +2 -1
- connectonion/useful_plugins/calendar_plugin.py +2 -2
- connectonion/useful_plugins/eval.py +130 -0
- connectonion/useful_plugins/gmail_plugin.py +4 -4
- connectonion/useful_plugins/image_result_formatter.py +4 -3
- connectonion/useful_plugins/re_act.py +14 -56
- connectonion/useful_plugins/shell_approval.py +2 -2
- connectonion/useful_tools/memory.py +4 -0
- {connectonion-0.4.12.dist-info → connectonion-0.5.0.dist-info}/METADATA +48 -48
- {connectonion-0.4.12.dist-info → connectonion-0.5.0.dist-info}/RECORD +27 -71
- {connectonion-0.4.12.dist-info → connectonion-0.5.0.dist-info}/WHEEL +1 -2
- connectonion/cli/templates/email-agent/.env.example +0 -23
- connectonion/cli/templates/email-agent/README.md +0 -240
- connectonion/cli/templates/email-agent/agent.py +0 -374
- connectonion/cli/templates/email-agent/demo.py +0 -71
- connectonion/cli/templates/meta-agent/.env.example +0 -11
- connectonion/cli/templates/minimal/.env.example +0 -5
- connectonion/cli/templates/playwright/.env.example +0 -5
- connectonion-0.4.12.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/cli/__init__.py +0 -1
- tests/cli/argparse_runner.py +0 -85
- tests/cli/conftest.py +0 -5
- tests/cli/test_browser_cli.py +0 -61
- tests/cli/test_cli.py +0 -143
- tests/cli/test_cli_auth_google.py +0 -344
- tests/cli/test_cli_auth_microsoft.py +0 -256
- tests/cli/test_cli_create.py +0 -283
- tests/cli/test_cli_help.py +0 -200
- tests/cli/test_cli_init.py +0 -318
- tests/conftest.py +0 -283
- tests/debug_gemini_models.py +0 -23
- tests/fixtures/__init__.py +0 -1
- tests/fixtures/test_tools.py +0 -112
- tests/fixtures/trust_fixtures.py +0 -257
- tests/real_api/__init__.py +0 -0
- tests/real_api/conftest.py +0 -9
- tests/real_api/test_llm_do.py +0 -174
- tests/real_api/test_llm_do_comprehensive.py +0 -527
- tests/real_api/test_production_client.py +0 -94
- tests/real_api/test_real_anthropic.py +0 -100
- tests/real_api/test_real_api.py +0 -113
- tests/real_api/test_real_auth.py +0 -130
- tests/real_api/test_real_email.py +0 -95
- tests/real_api/test_real_gemini.py +0 -96
- tests/real_api/test_real_llm_do.py +0 -81
- tests/real_api/test_real_managed.py +0 -208
- tests/real_api/test_real_multi_llm.py +0 -454
- tests/real_api/test_real_openai.py +0 -100
- tests/real_api/test_responses_parse.py +0 -88
- tests/test_diff_writer.py +0 -126
- tests/test_events.py +0 -677
- tests/test_gemini_co.py +0 -70
- tests/test_image_result_formatter.py +0 -88
- tests/test_plugin_system.py +0 -110
- tests/utils/__init__.py +0 -1
- tests/utils/config_helpers.py +0 -188
- tests/utils/mock_helpers.py +0 -237
- {connectonion-0.4.12.dist-info → connectonion-0.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,454 +0,0 @@
|
|
|
1
|
-
"""Pytest tests for multi-LLM model support across OpenAI, Google, and Anthropic."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import time
|
|
5
|
-
from unittest.mock import Mock, patch, MagicMock
|
|
6
|
-
from dotenv import load_dotenv
|
|
7
|
-
import pytest
|
|
8
|
-
from connectonion import Agent
|
|
9
|
-
from connectonion.llm import LLMResponse, ToolCall
|
|
10
|
-
|
|
11
|
-
# Load environment variables from .env file
|
|
12
|
-
load_dotenv()
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
# Test tools that will work across all models
|
|
16
|
-
def simple_calculator(a: int, b: int) -> int:
|
|
17
|
-
"""Add two numbers together."""
|
|
18
|
-
return a + b
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def get_greeting(name: str) -> str:
|
|
22
|
-
"""Generate a greeting for a person."""
|
|
23
|
-
return f"Hello, {name}!"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def process_data(data: str, uppercase: bool = False) -> str:
|
|
27
|
-
"""Process text data with optional uppercase conversion."""
|
|
28
|
-
if uppercase:
|
|
29
|
-
return data.upper()
|
|
30
|
-
return data.lower()
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def _available_providers():
|
|
34
|
-
providers = []
|
|
35
|
-
if os.getenv("OPENAI_API_KEY"):
|
|
36
|
-
providers.append("openai")
|
|
37
|
-
if os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY"):
|
|
38
|
-
providers.append("google")
|
|
39
|
-
if os.getenv("ANTHROPIC_API_KEY"):
|
|
40
|
-
providers.append("anthropic")
|
|
41
|
-
return providers
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def _tools():
|
|
45
|
-
return [simple_calculator, get_greeting, process_data]
|
|
46
|
-
|
|
47
|
-
# -------------------------------------------------------------------------
|
|
48
|
-
# Model Detection Tests
|
|
49
|
-
# -------------------------------------------------------------------------
|
|
50
|
-
|
|
51
|
-
def test_model_detection_openai():
|
|
52
|
-
models = ["o4-mini", "gpt-4o", "gpt-4o-mini", "gpt-3.5-turbo", "o1", "o1-mini"]
|
|
53
|
-
for model in models:
|
|
54
|
-
assert model.startswith("gpt") or model.startswith("o")
|
|
55
|
-
|
|
56
|
-
def test_model_detection_google():
|
|
57
|
-
models = [
|
|
58
|
-
"gemini-2.5-pro",
|
|
59
|
-
"gemini-2.0-flash-exp",
|
|
60
|
-
"gemini-2.0-flash-thinking-exp",
|
|
61
|
-
"gemini-2.5-flash",
|
|
62
|
-
"gemini-1.5-flash-8b", # Note: 2.5-flash-8b doesn't exist, use 1.5-flash-8b
|
|
63
|
-
]
|
|
64
|
-
for model in models:
|
|
65
|
-
assert model.startswith("gemini")
|
|
66
|
-
|
|
67
|
-
def test_model_detection_anthropic():
|
|
68
|
-
models = [
|
|
69
|
-
"claude-opus-4.1",
|
|
70
|
-
"claude-opus-4",
|
|
71
|
-
"claude-sonnet-4",
|
|
72
|
-
"claude-3-5-sonnet-latest",
|
|
73
|
-
"claude-3-5-haiku-latest",
|
|
74
|
-
]
|
|
75
|
-
for model in models:
|
|
76
|
-
assert model.startswith("claude")
|
|
77
|
-
|
|
78
|
-
# -------------------------------------------------------------------------
|
|
79
|
-
# Agent Creation Tests (Using Mock)
|
|
80
|
-
# -------------------------------------------------------------------------
|
|
81
|
-
|
|
82
|
-
@patch('connectonion.llm.OpenAILLM')
|
|
83
|
-
def test_create_agent_with_openai_flagship(mock_openai):
|
|
84
|
-
"""Test creating an agent with OpenAI flagship model."""
|
|
85
|
-
mock_instance = Mock()
|
|
86
|
-
# mock_instance.model = "gpt-5" # GPT-5 requires passport verification
|
|
87
|
-
mock_instance.model = "o4-mini" # Using o4-mini for testing
|
|
88
|
-
mock_openai.return_value = mock_instance
|
|
89
|
-
|
|
90
|
-
# agent = Agent("test_gpt5", model="gpt-5") # Original GPT-5 test
|
|
91
|
-
agent = Agent("test_o4", model="o4-mini") # Using o4-mini
|
|
92
|
-
assert agent.name == "test_o4"
|
|
93
|
-
# Will work once multi-LLM is implemented
|
|
94
|
-
# self.assertEqual(agent.llm.model, "o4-mini")
|
|
95
|
-
|
|
96
|
-
@patch('connectonion.llm.GeminiLLM')
|
|
97
|
-
def test_create_agent_with_gemini25(mock_gemini):
|
|
98
|
-
"""Test creating an agent with Gemini 2.5 Pro model."""
|
|
99
|
-
mock_instance = Mock()
|
|
100
|
-
mock_instance.model = "gemini-2.5-pro"
|
|
101
|
-
mock_gemini.return_value = mock_instance
|
|
102
|
-
|
|
103
|
-
# Will work once GeminiLLM is implemented
|
|
104
|
-
# agent = Agent("test_gemini", model="gemini-2.5-pro")
|
|
105
|
-
# self.assertEqual(agent.llm.model, "gemini-2.5-pro")
|
|
106
|
-
pytest.skip("GeminiLLM not yet implemented")
|
|
107
|
-
|
|
108
|
-
@patch('connectonion.llm.AnthropicLLM')
|
|
109
|
-
def test_create_agent_with_claude_opus4(mock_anthropic):
|
|
110
|
-
"""Test creating an agent with Claude Opus 4.1 model."""
|
|
111
|
-
mock_instance = Mock()
|
|
112
|
-
mock_instance.model = "claude-opus-4.1"
|
|
113
|
-
mock_anthropic.return_value = mock_instance
|
|
114
|
-
|
|
115
|
-
# Will work once AnthropicLLM is implemented
|
|
116
|
-
# agent = Agent("test_claude", model="claude-opus-4.1")
|
|
117
|
-
# self.assertEqual(agent.llm.model, "claude-opus-4.1")
|
|
118
|
-
pytest.skip("AnthropicLLM not yet implemented")
|
|
119
|
-
|
|
120
|
-
# -------------------------------------------------------------------------
|
|
121
|
-
# Tool Compatibility Tests
|
|
122
|
-
# -------------------------------------------------------------------------
|
|
123
|
-
|
|
124
|
-
def test_tools_work_across_all_models():
|
|
125
|
-
"""Test that the same tools work with all model providers."""
|
|
126
|
-
test_cases = []
|
|
127
|
-
|
|
128
|
-
# Use actual models from our docs
|
|
129
|
-
available_providers = _available_providers()
|
|
130
|
-
tools = _tools()
|
|
131
|
-
if "openai" in available_providers:
|
|
132
|
-
# test_cases.append(("gpt-5-nano", "openai")) # GPT-5 requires passport
|
|
133
|
-
test_cases.append(("gpt-4o-mini", "openai")) # Use available model for testing
|
|
134
|
-
if "google" in available_providers:
|
|
135
|
-
test_cases.append(("gemini-2.5-flash", "google"))
|
|
136
|
-
if "anthropic" in available_providers:
|
|
137
|
-
test_cases.append(("claude-3-5-haiku-latest", "anthropic"))
|
|
138
|
-
|
|
139
|
-
if not test_cases:
|
|
140
|
-
pytest.skip("No API keys available for testing")
|
|
141
|
-
|
|
142
|
-
for model, provider in test_cases:
|
|
143
|
-
try:
|
|
144
|
-
agent = Agent(f"test_{provider}", model=model, tools=tools)
|
|
145
|
-
assert len(agent.tools) == 3
|
|
146
|
-
assert "simple_calculator" in agent.tools
|
|
147
|
-
assert "get_greeting" in agent.tools
|
|
148
|
-
assert "process_data" in agent.tools
|
|
149
|
-
for tool in agent.tools:
|
|
150
|
-
schema = tool.to_function_schema()
|
|
151
|
-
assert "name" in schema
|
|
152
|
-
assert "description" in schema
|
|
153
|
-
assert "parameters" in schema
|
|
154
|
-
except Exception as e:
|
|
155
|
-
if "Unknown model" in str(e) or "not yet implemented" in str(e):
|
|
156
|
-
pytest.skip(f"Model {model} not yet implemented")
|
|
157
|
-
raise
|
|
158
|
-
|
|
159
|
-
# -------------------------------------------------------------------------
|
|
160
|
-
# Model Registry Tests
|
|
161
|
-
# -------------------------------------------------------------------------
|
|
162
|
-
|
|
163
|
-
def test_model_registry_mapping():
|
|
164
|
-
"""Test that models map to correct providers."""
|
|
165
|
-
# This will be the expected mapping when implemented
|
|
166
|
-
expected_mapping = {
|
|
167
|
-
# OpenAI models
|
|
168
|
-
"o4-mini": "openai", # Testing model
|
|
169
|
-
"gpt-4o": "openai",
|
|
170
|
-
"gpt-4o-mini": "openai",
|
|
171
|
-
"gpt-3.5-turbo": "openai",
|
|
172
|
-
"o1": "openai",
|
|
173
|
-
"o1-mini": "openai",
|
|
174
|
-
|
|
175
|
-
# Google Gemini series
|
|
176
|
-
"gemini-2.5-pro": "google",
|
|
177
|
-
"gemini-2.0-flash-exp": "google",
|
|
178
|
-
"gemini-2.0-flash-thinking-exp": "google",
|
|
179
|
-
"gemini-2.5-flash": "google",
|
|
180
|
-
"gemini-1.5-flash-8b": "google", # Fixed: 2.5-flash-8b doesn't exist
|
|
181
|
-
|
|
182
|
-
# Anthropic Claude series
|
|
183
|
-
"claude-opus-4.1": "anthropic",
|
|
184
|
-
"claude-opus-4": "anthropic",
|
|
185
|
-
"claude-sonnet-4": "anthropic",
|
|
186
|
-
"claude-3-5-sonnet-latest": "anthropic",
|
|
187
|
-
"claude-3-5-haiku-latest": "anthropic"
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
# When MODEL_REGISTRY is implemented, test it
|
|
191
|
-
try:
|
|
192
|
-
from connectonion.llm import MODEL_REGISTRY
|
|
193
|
-
for model, expected_provider in expected_mapping.items():
|
|
194
|
-
assert MODEL_REGISTRY.get(model) == expected_provider, (
|
|
195
|
-
f"Model {model} should map to {expected_provider}"
|
|
196
|
-
)
|
|
197
|
-
except ImportError:
|
|
198
|
-
pytest.skip("MODEL_REGISTRY not yet implemented")
|
|
199
|
-
|
|
200
|
-
# -------------------------------------------------------------------------
|
|
201
|
-
# Error Handling Tests
|
|
202
|
-
# -------------------------------------------------------------------------
|
|
203
|
-
|
|
204
|
-
def test_missing_api_key_error():
|
|
205
|
-
"""Test appropriate error when API key is missing."""
|
|
206
|
-
# Temporarily remove API key
|
|
207
|
-
original_key = os.environ.get("OPENAI_API_KEY")
|
|
208
|
-
if original_key:
|
|
209
|
-
del os.environ["OPENAI_API_KEY"]
|
|
210
|
-
|
|
211
|
-
try:
|
|
212
|
-
with pytest.raises(ValueError) as context:
|
|
213
|
-
Agent("test", model="o4-mini")
|
|
214
|
-
assert "API key" in str(context.value)
|
|
215
|
-
finally:
|
|
216
|
-
# Restore API key
|
|
217
|
-
if original_key:
|
|
218
|
-
os.environ["OPENAI_API_KEY"] = original_key
|
|
219
|
-
|
|
220
|
-
def test_unknown_model_error():
|
|
221
|
-
"""Test error handling for unknown model names."""
|
|
222
|
-
# This should raise an error once model validation is implemented
|
|
223
|
-
try:
|
|
224
|
-
with pytest.raises(ValueError) as context:
|
|
225
|
-
Agent("test", model="unknown-model-xyz")
|
|
226
|
-
assert "Unknown model" in str(context.value)
|
|
227
|
-
except Exception:
|
|
228
|
-
pytest.skip("Model validation not yet implemented")
|
|
229
|
-
|
|
230
|
-
# -------------------------------------------------------------------------
|
|
231
|
-
# Integration Tests (require actual API keys)
|
|
232
|
-
# -------------------------------------------------------------------------
|
|
233
|
-
|
|
234
|
-
def test_openai_flagship_real_call():
|
|
235
|
-
"""Test actual API call with OpenAI flagship model."""
|
|
236
|
-
# Testing with o4-mini (GPT-5 requires passport verification)
|
|
237
|
-
tools = _tools()
|
|
238
|
-
try:
|
|
239
|
-
agent = Agent("test", model="o4-mini", tools=tools)
|
|
240
|
-
response = agent.input("Use the simple_calculator tool to add 5 and 3")
|
|
241
|
-
assert "8" in response
|
|
242
|
-
except Exception as e:
|
|
243
|
-
if "model not found" in str(e).lower() or "o4-mini" in str(e).lower():
|
|
244
|
-
# o4-mini not available yet, try with current model
|
|
245
|
-
agent = Agent("test", model="gpt-4o-mini", tools=tools)
|
|
246
|
-
response = agent.input("Use the simple_calculator tool to add 5 and 3")
|
|
247
|
-
assert "8" in response
|
|
248
|
-
else:
|
|
249
|
-
raise
|
|
250
|
-
|
|
251
|
-
def test_google_gemini_real_call():
|
|
252
|
-
"""Test actual API call with Gemini model."""
|
|
253
|
-
# Try Gemini 2.5 Pro first, fallback to 1.5 if not available
|
|
254
|
-
models_to_try = ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-pro"]
|
|
255
|
-
|
|
256
|
-
tools = _tools()
|
|
257
|
-
for model in models_to_try:
|
|
258
|
-
try:
|
|
259
|
-
agent = Agent("test", model=model, tools=tools)
|
|
260
|
-
response = agent.input("Use the get_greeting tool to greet 'Alice'")
|
|
261
|
-
assert "Alice" in response
|
|
262
|
-
break
|
|
263
|
-
except Exception as e:
|
|
264
|
-
if model == models_to_try[-1]:
|
|
265
|
-
pytest.skip(f"No Gemini models available: {e}")
|
|
266
|
-
continue
|
|
267
|
-
|
|
268
|
-
def test_anthropic_claude_real_call():
|
|
269
|
-
"""Test actual API call with Claude model."""
|
|
270
|
-
# Try Claude Opus 4.1 first, fallback to available models
|
|
271
|
-
models_to_try = ["claude-opus-4.1", "claude-3-5-sonnet-latest", "claude-3-5-haiku-latest"]
|
|
272
|
-
|
|
273
|
-
tools = _tools()
|
|
274
|
-
for model in models_to_try:
|
|
275
|
-
try:
|
|
276
|
-
agent = Agent("test", model=model, tools=tools)
|
|
277
|
-
response = agent.input("Use the process_data tool to convert 'Hello' to uppercase")
|
|
278
|
-
assert "HELLO" in response
|
|
279
|
-
break
|
|
280
|
-
except Exception as e:
|
|
281
|
-
if model == models_to_try[-1]:
|
|
282
|
-
pytest.skip(f"No Claude models available: {e}")
|
|
283
|
-
continue
|
|
284
|
-
|
|
285
|
-
# -------------------------------------------------------------------------
|
|
286
|
-
# Model Comparison Tests
|
|
287
|
-
# -------------------------------------------------------------------------
|
|
288
|
-
|
|
289
|
-
def test_flagship_model_comparison():
|
|
290
|
-
"""Test that flagship models from each provider can handle the same prompt."""
|
|
291
|
-
prompt = "What is 2 + 2?"
|
|
292
|
-
results = {}
|
|
293
|
-
|
|
294
|
-
flagship_models = []
|
|
295
|
-
available_providers = _available_providers()
|
|
296
|
-
if "openai" in available_providers:
|
|
297
|
-
# Use gpt-4o-mini as fallback since GPT-5 isn't available yet
|
|
298
|
-
flagship_models.append(("gpt-4o-mini", "openai"))
|
|
299
|
-
if "google" in available_providers:
|
|
300
|
-
flagship_models.append(("gemini-2.5-flash", "google"))
|
|
301
|
-
if "anthropic" in available_providers:
|
|
302
|
-
flagship_models.append(("claude-3-5-haiku-latest", "anthropic"))
|
|
303
|
-
|
|
304
|
-
if len(flagship_models) < 2:
|
|
305
|
-
pytest.skip("Need at least 2 providers for comparison test")
|
|
306
|
-
|
|
307
|
-
for model, provider in flagship_models:
|
|
308
|
-
try:
|
|
309
|
-
agent = Agent(f"compare_{provider}", model=model)
|
|
310
|
-
response = agent.input(prompt)
|
|
311
|
-
results[model] = response
|
|
312
|
-
|
|
313
|
-
# All should mention "4" in their response
|
|
314
|
-
assert "4" in response.lower()
|
|
315
|
-
except Exception as e:
|
|
316
|
-
print(f"Failed with {model}: {e}")
|
|
317
|
-
continue
|
|
318
|
-
|
|
319
|
-
# -------------------------------------------------------------------------
|
|
320
|
-
# Fallback Chain Tests
|
|
321
|
-
# -------------------------------------------------------------------------
|
|
322
|
-
|
|
323
|
-
def test_fallback_chain_with_new_models():
|
|
324
|
-
"""Test fallback chain with new model hierarchy."""
|
|
325
|
-
# Priority order from docs/models.md
|
|
326
|
-
fallback_models = [
|
|
327
|
-
# "gpt-5", # Best overall (requires passport verification)
|
|
328
|
-
"o4-mini", # Testing flagship model
|
|
329
|
-
"claude-opus-4.1", # Strong alternative (might not be available)
|
|
330
|
-
"gemini-2.5-pro", # Multimodal option (might not be available)
|
|
331
|
-
# "gpt-5-mini", # Faster fallback (requires passport)
|
|
332
|
-
"gpt-4o", # Current best available
|
|
333
|
-
"gpt-4o-mini" # Fallback (should work)
|
|
334
|
-
]
|
|
335
|
-
|
|
336
|
-
agent = None
|
|
337
|
-
successful_model = None
|
|
338
|
-
|
|
339
|
-
for model in fallback_models:
|
|
340
|
-
try:
|
|
341
|
-
agent = Agent("fallback_test", model=model)
|
|
342
|
-
successful_model = model
|
|
343
|
-
break
|
|
344
|
-
except Exception:
|
|
345
|
-
continue
|
|
346
|
-
|
|
347
|
-
if agent is None:
|
|
348
|
-
pytest.skip("No models available for fallback test")
|
|
349
|
-
|
|
350
|
-
assert agent is not None
|
|
351
|
-
assert successful_model is not None
|
|
352
|
-
|
|
353
|
-
# -------------------------------------------------------------------------
|
|
354
|
-
# Model Feature Tests
|
|
355
|
-
# -------------------------------------------------------------------------
|
|
356
|
-
|
|
357
|
-
def test_context_window_sizes():
|
|
358
|
-
"""Test that models have correct context window sizes."""
|
|
359
|
-
# Based on docs/models.md specifications
|
|
360
|
-
context_windows = {
|
|
361
|
-
# "gpt-5": 200000, # Requires passport
|
|
362
|
-
# "gpt-5-mini": 200000, # Requires passport
|
|
363
|
-
# "gpt-5-nano": 128000, # Requires passport
|
|
364
|
-
# "gpt-4.1": 128000, # Not yet available
|
|
365
|
-
"o4-mini": 128000, # Testing model
|
|
366
|
-
"gpt-4o": 128000,
|
|
367
|
-
"gpt-4o-mini": 128000,
|
|
368
|
-
"gpt-3.5-turbo": 16385,
|
|
369
|
-
"gemini-2.5-pro": 2000000, # 2M tokens
|
|
370
|
-
"gemini-2.5-pro": 2000000, # 2M tokens
|
|
371
|
-
"gemini-2.5-flash": 1000000, # 1M tokens
|
|
372
|
-
"claude-opus-4.1": 200000,
|
|
373
|
-
"claude-opus-4": 200000,
|
|
374
|
-
"claude-sonnet-4": 200000
|
|
375
|
-
}
|
|
376
|
-
|
|
377
|
-
# This will be tested once model metadata is implemented
|
|
378
|
-
pytest.skip("Model metadata not yet implemented")
|
|
379
|
-
|
|
380
|
-
def test_multimodal_capabilities():
|
|
381
|
-
"""Test which models support multimodal input."""
|
|
382
|
-
# Based on docs/models.md
|
|
383
|
-
multimodal_models = [
|
|
384
|
-
# "gpt-5", # Requires passport verification
|
|
385
|
-
"o4-mini", # Testing model
|
|
386
|
-
"gpt-4o",
|
|
387
|
-
"gpt-4o-mini",
|
|
388
|
-
"gemini-2.5-pro", # Supports audio, video, images, PDF
|
|
389
|
-
"gemini-2.0-flash-exp",
|
|
390
|
-
"gemini-2.5-pro",
|
|
391
|
-
"gemini-2.5-flash",
|
|
392
|
-
"claude-opus-4.1",
|
|
393
|
-
"claude-opus-4",
|
|
394
|
-
"claude-sonnet-4"
|
|
395
|
-
]
|
|
396
|
-
|
|
397
|
-
# Will be tested once multimodal support is implemented
|
|
398
|
-
pytest.skip("Multimodal support not yet implemented")
|
|
399
|
-
|
|
400
|
-
# -------------------------------------------------------------------------
|
|
401
|
-
# Performance Tests
|
|
402
|
-
# -------------------------------------------------------------------------
|
|
403
|
-
|
|
404
|
-
@pytest.mark.benchmark
|
|
405
|
-
def test_fast_model_performance():
|
|
406
|
-
"""Test that fast models initialize quickly."""
|
|
407
|
-
available_providers = _available_providers()
|
|
408
|
-
if not available_providers:
|
|
409
|
-
pytest.skip("No API keys available")
|
|
410
|
-
|
|
411
|
-
# Use the fastest model from each provider
|
|
412
|
-
fast_models = []
|
|
413
|
-
if "openai" in available_providers:
|
|
414
|
-
fast_models.append("gpt-4o-mini") # Fastest available
|
|
415
|
-
if "google" in available_providers:
|
|
416
|
-
fast_models.append("gemini-2.5-flash")
|
|
417
|
-
if "anthropic" in available_providers:
|
|
418
|
-
fast_models.append("claude-3-5-haiku-latest")
|
|
419
|
-
|
|
420
|
-
for model in fast_models:
|
|
421
|
-
try:
|
|
422
|
-
start_time = time.time()
|
|
423
|
-
agent = Agent("perf_test", model=model)
|
|
424
|
-
end_time = time.time()
|
|
425
|
-
|
|
426
|
-
initialization_time = end_time - start_time
|
|
427
|
-
|
|
428
|
-
# Should initialize in less than 2 seconds
|
|
429
|
-
assert initialization_time < 2.0, f"Model {model} initialization took {initialization_time:.2f}s"
|
|
430
|
-
except Exception as e:
|
|
431
|
-
# Model might not be available yet
|
|
432
|
-
if "not found" in str(e).lower() or "not available" in str(e).lower():
|
|
433
|
-
continue
|
|
434
|
-
raise
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
def test_select_model_for_code_generation():
|
|
438
|
-
"""Test smart model selection based on use case."""
|
|
439
|
-
def select_model_for_task(task_type):
|
|
440
|
-
if task_type == "code":
|
|
441
|
-
return "o4-mini"
|
|
442
|
-
elif task_type == "reasoning":
|
|
443
|
-
return "gemini-2.5-pro"
|
|
444
|
-
elif task_type == "fast":
|
|
445
|
-
return "gpt-4o-mini"
|
|
446
|
-
elif task_type == "long_context":
|
|
447
|
-
return "gemini-2.5-pro"
|
|
448
|
-
else:
|
|
449
|
-
return "o4-mini"
|
|
450
|
-
|
|
451
|
-
assert select_model_for_task("code") == "o4-mini"
|
|
452
|
-
assert select_model_for_task("reasoning") == "gemini-2.5-pro"
|
|
453
|
-
assert select_model_for_task("fast") == "gpt-4o-mini"
|
|
454
|
-
assert select_model_for_task("long_context") == "gemini-2.5-pro"
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Real OpenAI API tests.
|
|
3
|
-
|
|
4
|
-
These tests make actual API calls to OpenAI and cost real money.
|
|
5
|
-
Run with: pytest test_real_openai.py -v
|
|
6
|
-
|
|
7
|
-
Requires: OPENAI_API_KEY environment variable
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import os
|
|
11
|
-
import pytest
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
from dotenv import load_dotenv
|
|
14
|
-
from connectonion import Agent
|
|
15
|
-
from connectonion.llm import OpenAILLM
|
|
16
|
-
|
|
17
|
-
# Load environment variables from tests/.env
|
|
18
|
-
env_path = Path(__file__).parent / ".env"
|
|
19
|
-
if env_path.exists():
|
|
20
|
-
load_dotenv(env_path)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def calculator(expression: str) -> str:
|
|
24
|
-
"""Simple calculator for testing."""
|
|
25
|
-
try:
|
|
26
|
-
result = eval(expression)
|
|
27
|
-
return f"Result: {result}"
|
|
28
|
-
except:
|
|
29
|
-
return "Error in calculation"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class TestRealOpenAI:
|
|
33
|
-
"""Test real OpenAI API integration."""
|
|
34
|
-
|
|
35
|
-
def test_openai_basic_completion(self):
|
|
36
|
-
"""Test basic completion with OpenAI."""
|
|
37
|
-
llm = OpenAILLM(model="gpt-4o-mini")
|
|
38
|
-
agent = Agent(name="openai_test", llm=llm)
|
|
39
|
-
|
|
40
|
-
response = agent.input("Say 'Hello from OpenAI' exactly")
|
|
41
|
-
assert response is not None
|
|
42
|
-
assert "Hello from OpenAI" in response
|
|
43
|
-
|
|
44
|
-
def test_openai_with_tools(self):
|
|
45
|
-
"""Test OpenAI with tool calling."""
|
|
46
|
-
agent = Agent(
|
|
47
|
-
name="openai_tools",
|
|
48
|
-
model="gpt-4o-mini",
|
|
49
|
-
tools=[calculator]
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
response = agent.input("Calculate 42 * 2")
|
|
53
|
-
assert response is not None
|
|
54
|
-
assert "84" in response
|
|
55
|
-
|
|
56
|
-
def test_openai_multi_turn(self):
|
|
57
|
-
"""Test multi-turn conversation with OpenAI."""
|
|
58
|
-
agent = Agent(
|
|
59
|
-
name="openai_conversation",
|
|
60
|
-
model="gpt-4o-mini"
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
# First turn
|
|
64
|
-
response = agent.input("My name is Alice. Remember this.")
|
|
65
|
-
assert response is not None
|
|
66
|
-
|
|
67
|
-
# Second turn - should remember context
|
|
68
|
-
response = agent.input("What's my name?")
|
|
69
|
-
assert response is not None
|
|
70
|
-
assert "Alice" in response
|
|
71
|
-
|
|
72
|
-
def test_openai_streaming(self):
|
|
73
|
-
"""Test streaming responses from OpenAI."""
|
|
74
|
-
agent = Agent(
|
|
75
|
-
name="openai_streaming",
|
|
76
|
-
model="gpt-4o-mini"
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
response = agent.input("Count from 1 to 5")
|
|
80
|
-
assert response is not None
|
|
81
|
-
# Should contain numbers 1 through 5
|
|
82
|
-
for num in ["1", "2", "3", "4", "5"]:
|
|
83
|
-
assert num in response
|
|
84
|
-
|
|
85
|
-
def test_openai_different_models(self):
|
|
86
|
-
"""Test different OpenAI models."""
|
|
87
|
-
models = ["gpt-4o-mini", "gpt-4o"]
|
|
88
|
-
|
|
89
|
-
for model in models:
|
|
90
|
-
if model == "gpt-4o" and not os.getenv("TEST_EXPENSIVE_MODELS"):
|
|
91
|
-
continue # Skip expensive models unless explicitly enabled
|
|
92
|
-
|
|
93
|
-
agent = Agent(
|
|
94
|
-
name=f"openai_{model.replace('-', '_')}",
|
|
95
|
-
model=model
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
response = agent.input("Reply with OK")
|
|
99
|
-
assert response is not None
|
|
100
|
-
assert len(response) > 0
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
"""Test for issue #9 - structured output with managed keys using responses.parse endpoint"""
|
|
2
|
-
import os
|
|
3
|
-
import pytest
|
|
4
|
-
import time
|
|
5
|
-
import requests
|
|
6
|
-
from pydantic import BaseModel
|
|
7
|
-
from nacl.signing import SigningKey
|
|
8
|
-
from nacl.encoding import HexEncoder
|
|
9
|
-
from connectonion import llm_do
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class EmailDraft(BaseModel):
|
|
13
|
-
"""Email draft with subject and body"""
|
|
14
|
-
subject: str
|
|
15
|
-
body: str
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def _is_localhost_available():
|
|
19
|
-
"""Check if localhost:8000 is reachable."""
|
|
20
|
-
try:
|
|
21
|
-
requests.get("http://localhost:8000/health", timeout=2)
|
|
22
|
-
return True
|
|
23
|
-
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
|
24
|
-
return False
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def get_test_token():
|
|
28
|
-
"""Generate test auth token for local backend"""
|
|
29
|
-
signing_key = SigningKey.generate()
|
|
30
|
-
public_key = "0x" + signing_key.verify_key.encode(encoder=HexEncoder).decode()
|
|
31
|
-
timestamp = int(time.time())
|
|
32
|
-
message = f"ConnectOnion-Auth-{public_key}-{timestamp}"
|
|
33
|
-
signature = signing_key.sign(message.encode()).signature.hex()
|
|
34
|
-
|
|
35
|
-
# Authenticate with local backend
|
|
36
|
-
response = requests.post(
|
|
37
|
-
"http://localhost:8000/api/v1/auth",
|
|
38
|
-
json={
|
|
39
|
-
"public_key": public_key,
|
|
40
|
-
"message": message,
|
|
41
|
-
"signature": signature
|
|
42
|
-
}
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
if response.status_code != 200:
|
|
46
|
-
raise Exception(f"Auth failed: {response.status_code} - {response.text}")
|
|
47
|
-
|
|
48
|
-
return response.json()["token"]
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
@pytest.mark.skipif(
|
|
52
|
-
os.getenv("CI") or os.getenv("GITHUB_ACTIONS") or not _is_localhost_available(),
|
|
53
|
-
reason="Skipped in CI or when localhost:8000 is not available"
|
|
54
|
-
)
|
|
55
|
-
def test_structured_output_with_managed_keys():
|
|
56
|
-
"""Test that llm_do() with Pydantic output works with co/ models.
|
|
57
|
-
|
|
58
|
-
This tests the /v1/responses/parse endpoint on the backend.
|
|
59
|
-
"""
|
|
60
|
-
# Use local backend for testing
|
|
61
|
-
os.environ["OPENONION_DEV"] = "1"
|
|
62
|
-
|
|
63
|
-
# Get auth token
|
|
64
|
-
token = get_test_token()
|
|
65
|
-
os.environ["OPENONION_API_KEY"] = token
|
|
66
|
-
|
|
67
|
-
draft = llm_do(
|
|
68
|
-
"Write a friendly hello email to a new colleague",
|
|
69
|
-
output=EmailDraft,
|
|
70
|
-
temperature=0.7,
|
|
71
|
-
model="co/gpt-4o-mini"
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
# Verify we got a valid Pydantic model
|
|
75
|
-
assert isinstance(draft, EmailDraft)
|
|
76
|
-
assert isinstance(draft.subject, str)
|
|
77
|
-
assert isinstance(draft.body, str)
|
|
78
|
-
assert len(draft.subject) > 0
|
|
79
|
-
assert len(draft.body) > 0
|
|
80
|
-
|
|
81
|
-
print(f"\n✓ Structured output test passed!")
|
|
82
|
-
print(f"Subject: {draft.subject}")
|
|
83
|
-
print(f"Body: {draft.body[:100]}...")
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if __name__ == "__main__":
|
|
87
|
-
# Allow running directly for quick testing
|
|
88
|
-
test_structured_output_with_managed_keys()
|