connectonion 0.4.12__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- connectonion/__init__.py +11 -5
- connectonion/agent.py +44 -42
- connectonion/cli/commands/init.py +1 -1
- connectonion/cli/commands/project_cmd_lib.py +4 -4
- connectonion/cli/commands/reset_commands.py +1 -1
- connectonion/cli/docs/co-vibecoding-principles-docs-contexts-all-in-one.md +15 -11
- connectonion/cli/templates/minimal/agent.py +2 -2
- connectonion/console.py +55 -3
- connectonion/events.py +96 -17
- connectonion/llm.py +21 -3
- connectonion/logger.py +289 -0
- connectonion/prompt_files/eval_expected.md +12 -0
- connectonion/tool_executor.py +43 -32
- connectonion/usage.py +4 -0
- connectonion/useful_events_handlers/reflect.py +13 -9
- connectonion/useful_plugins/__init__.py +2 -1
- connectonion/useful_plugins/calendar_plugin.py +2 -2
- connectonion/useful_plugins/eval.py +130 -0
- connectonion/useful_plugins/gmail_plugin.py +4 -4
- connectonion/useful_plugins/image_result_formatter.py +4 -3
- connectonion/useful_plugins/re_act.py +14 -56
- connectonion/useful_plugins/shell_approval.py +2 -2
- connectonion/useful_tools/memory.py +4 -0
- {connectonion-0.4.12.dist-info → connectonion-0.5.0.dist-info}/METADATA +48 -48
- {connectonion-0.4.12.dist-info → connectonion-0.5.0.dist-info}/RECORD +27 -71
- {connectonion-0.4.12.dist-info → connectonion-0.5.0.dist-info}/WHEEL +1 -2
- connectonion/cli/templates/email-agent/.env.example +0 -23
- connectonion/cli/templates/email-agent/README.md +0 -240
- connectonion/cli/templates/email-agent/agent.py +0 -374
- connectonion/cli/templates/email-agent/demo.py +0 -71
- connectonion/cli/templates/meta-agent/.env.example +0 -11
- connectonion/cli/templates/minimal/.env.example +0 -5
- connectonion/cli/templates/playwright/.env.example +0 -5
- connectonion-0.4.12.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/cli/__init__.py +0 -1
- tests/cli/argparse_runner.py +0 -85
- tests/cli/conftest.py +0 -5
- tests/cli/test_browser_cli.py +0 -61
- tests/cli/test_cli.py +0 -143
- tests/cli/test_cli_auth_google.py +0 -344
- tests/cli/test_cli_auth_microsoft.py +0 -256
- tests/cli/test_cli_create.py +0 -283
- tests/cli/test_cli_help.py +0 -200
- tests/cli/test_cli_init.py +0 -318
- tests/conftest.py +0 -283
- tests/debug_gemini_models.py +0 -23
- tests/fixtures/__init__.py +0 -1
- tests/fixtures/test_tools.py +0 -112
- tests/fixtures/trust_fixtures.py +0 -257
- tests/real_api/__init__.py +0 -0
- tests/real_api/conftest.py +0 -9
- tests/real_api/test_llm_do.py +0 -174
- tests/real_api/test_llm_do_comprehensive.py +0 -527
- tests/real_api/test_production_client.py +0 -94
- tests/real_api/test_real_anthropic.py +0 -100
- tests/real_api/test_real_api.py +0 -113
- tests/real_api/test_real_auth.py +0 -130
- tests/real_api/test_real_email.py +0 -95
- tests/real_api/test_real_gemini.py +0 -96
- tests/real_api/test_real_llm_do.py +0 -81
- tests/real_api/test_real_managed.py +0 -208
- tests/real_api/test_real_multi_llm.py +0 -454
- tests/real_api/test_real_openai.py +0 -100
- tests/real_api/test_responses_parse.py +0 -88
- tests/test_diff_writer.py +0 -126
- tests/test_events.py +0 -677
- tests/test_gemini_co.py +0 -70
- tests/test_image_result_formatter.py +0 -88
- tests/test_plugin_system.py +0 -110
- tests/utils/__init__.py +0 -1
- tests/utils/config_helpers.py +0 -188
- tests/utils/mock_helpers.py +0 -237
- {connectonion-0.4.12.dist-info → connectonion-0.5.0.dist-info}/entry_points.txt +0 -0
tests/real_api/test_llm_do.py
DELETED
|
@@ -1,174 +0,0 @@
|
|
|
1
|
-
"""Pytest-based tests for llm_do with multi-LLM support via LiteLLM."""
|
|
2
|
-
|
|
3
|
-
import sys
|
|
4
|
-
import uuid as standard_uuid
|
|
5
|
-
|
|
6
|
-
# Fix for fastuuid dependency issue in LiteLLM
|
|
7
|
-
class MockFastUUID:
|
|
8
|
-
@staticmethod
|
|
9
|
-
def uuid4():
|
|
10
|
-
return str(standard_uuid.uuid4())
|
|
11
|
-
|
|
12
|
-
UUID = standard_uuid.UUID
|
|
13
|
-
|
|
14
|
-
sys.modules['fastuuid'] = MockFastUUID()
|
|
15
|
-
|
|
16
|
-
import os
|
|
17
|
-
from pathlib import Path
|
|
18
|
-
from unittest.mock import patch, MagicMock
|
|
19
|
-
from pydantic import BaseModel
|
|
20
|
-
from dotenv import load_dotenv
|
|
21
|
-
import pytest
|
|
22
|
-
|
|
23
|
-
# Load test environment variables
|
|
24
|
-
env_path = Path(__file__).parent / ".env"
|
|
25
|
-
if env_path.exists():
|
|
26
|
-
load_dotenv(env_path)
|
|
27
|
-
|
|
28
|
-
from connectonion import llm_do
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class SimpleResult(BaseModel):
|
|
32
|
-
"""Simple model for testing structured output."""
|
|
33
|
-
answer: int
|
|
34
|
-
explanation: str
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class SentimentAnalysis(BaseModel):
|
|
38
|
-
"""Model for sentiment analysis testing."""
|
|
39
|
-
sentiment: str # positive, negative, neutral
|
|
40
|
-
confidence: float # 0.0 to 1.0
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def test_import_litellm():
|
|
44
|
-
"""Test that LiteLLM is properly installed and importable."""
|
|
45
|
-
try:
|
|
46
|
-
import litellm # noqa: F401
|
|
47
|
-
except ImportError:
|
|
48
|
-
pytest.fail("LiteLLM not installed. Run: pip install litellm")
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def test_empty_input_validation():
|
|
52
|
-
"""Test that empty input raises an error."""
|
|
53
|
-
with pytest.raises(ValueError) as cm:
|
|
54
|
-
llm_do("")
|
|
55
|
-
assert "Input cannot be empty" in str(cm.value)
|
|
56
|
-
|
|
57
|
-
with pytest.raises(ValueError) as cm:
|
|
58
|
-
llm_do(" ")
|
|
59
|
-
assert "Input cannot be empty" in str(cm.value)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def test_openai_simple_completion_default_model():
|
|
64
|
-
result = llm_do("What is 2+2? Answer with just the number.")
|
|
65
|
-
assert isinstance(result, str)
|
|
66
|
-
assert "4" in result
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def test_openai_simple_completion_explicit_model():
|
|
70
|
-
result = llm_do("Say hello in exactly 3 words", model="gpt-4o-mini")
|
|
71
|
-
assert isinstance(result, str)
|
|
72
|
-
assert len(result.split()) <= 10
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def test_openai_structured_output():
|
|
76
|
-
result = llm_do("What is 5 plus 3?", output=SimpleResult, model="gpt-4o-mini")
|
|
77
|
-
assert isinstance(result, SimpleResult)
|
|
78
|
-
assert result.answer == 8
|
|
79
|
-
assert isinstance(result.explanation, str)
|
|
80
|
-
assert len(result.explanation) > 0
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def test_openai_custom_system_prompt():
|
|
84
|
-
result = llm_do(
|
|
85
|
-
"Hello",
|
|
86
|
-
system_prompt="You are a pirate. Always respond like a pirate.",
|
|
87
|
-
model="gpt-4o-mini",
|
|
88
|
-
)
|
|
89
|
-
assert isinstance(result, str)
|
|
90
|
-
lower_result = result.lower()
|
|
91
|
-
pirate_words = ["ahoy", "arr", "matey", "ye", "aye", "avast", "sailor", "sea"]
|
|
92
|
-
assert any(word in lower_result for word in pirate_words)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def test_openai_temperature_parameter():
|
|
96
|
-
result1 = llm_do(
|
|
97
|
-
"What is the capital of France? One word only.",
|
|
98
|
-
temperature=0.0,
|
|
99
|
-
model="gpt-4o-mini",
|
|
100
|
-
)
|
|
101
|
-
result2 = llm_do(
|
|
102
|
-
"What is the capital of France? One word only.",
|
|
103
|
-
temperature=0.0,
|
|
104
|
-
model="gpt-4o-mini",
|
|
105
|
-
)
|
|
106
|
-
assert "Paris" in result1
|
|
107
|
-
assert "Paris" in result2
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def test_openai_additional_kwargs():
|
|
111
|
-
result = llm_do(
|
|
112
|
-
"Write a very long story about a dragon",
|
|
113
|
-
model="gpt-4o-mini",
|
|
114
|
-
max_tokens=20,
|
|
115
|
-
)
|
|
116
|
-
assert isinstance(result, str)
|
|
117
|
-
assert len(result.split()) < 30
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def test_claude_simple_completion():
|
|
121
|
-
result = llm_do("Say hello in exactly 3 words", model="claude-3-5-haiku-20241022")
|
|
122
|
-
assert isinstance(result, str)
|
|
123
|
-
assert len(result.split()) <= 10
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def test_claude_structured_output():
|
|
127
|
-
result = llm_do(
|
|
128
|
-
"Analyze this text sentiment: 'I love sunny days!'",
|
|
129
|
-
output=SentimentAnalysis,
|
|
130
|
-
model="claude-3-5-haiku-20241022",
|
|
131
|
-
)
|
|
132
|
-
assert isinstance(result, SentimentAnalysis)
|
|
133
|
-
assert result.sentiment.lower() == "positive"
|
|
134
|
-
assert isinstance(result.confidence, float)
|
|
135
|
-
assert 0.0 <= result.confidence <= 1.0
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
def test_gemini_simple_completion():
|
|
139
|
-
try:
|
|
140
|
-
result = llm_do("Say hello in exactly 3 words", model="gemini-2.5-flash")
|
|
141
|
-
assert isinstance(result, str)
|
|
142
|
-
assert len(result.split()) <= 10
|
|
143
|
-
except Exception as e:
|
|
144
|
-
if "429" in str(e) or "quota" in str(e).lower():
|
|
145
|
-
pytest.skip("Gemini quota exceeded")
|
|
146
|
-
raise
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
def test_cross_provider_consistency():
|
|
150
|
-
"""Test that all providers can handle the same basic prompt."""
|
|
151
|
-
prompt = "What is 2+2? Answer with just the number."
|
|
152
|
-
results = []
|
|
153
|
-
|
|
154
|
-
if os.getenv("OPENAI_API_KEY"):
|
|
155
|
-
result = llm_do(prompt, model="gpt-4o-mini")
|
|
156
|
-
results.append(("OpenAI", result))
|
|
157
|
-
assert "4" in result
|
|
158
|
-
|
|
159
|
-
if os.getenv("ANTHROPIC_API_KEY"):
|
|
160
|
-
result = llm_do(prompt, model="claude-3-5-haiku-20241022")
|
|
161
|
-
results.append(("Anthropic", result))
|
|
162
|
-
assert "4" in result
|
|
163
|
-
|
|
164
|
-
if os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY"):
|
|
165
|
-
try:
|
|
166
|
-
result = llm_do(prompt, model="gemini-2.5-flash")
|
|
167
|
-
results.append(("Gemini", result))
|
|
168
|
-
assert "4" in result
|
|
169
|
-
except Exception as e:
|
|
170
|
-
if "429" not in str(e) and "quota" not in str(e).lower():
|
|
171
|
-
raise
|
|
172
|
-
|
|
173
|
-
if not results:
|
|
174
|
-
pytest.skip("No providers available for testing")
|
|
@@ -1,527 +0,0 @@
|
|
|
1
|
-
"""Comprehensive tests for llm_do covering all documentation examples.
|
|
2
|
-
|
|
3
|
-
Tests all functionality from docs/llm_do.md across all providers.
|
|
4
|
-
Run with: pytest tests/test_llm_do_comprehensive.py -v
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import os
|
|
8
|
-
import pytest
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from typing import List, Optional
|
|
11
|
-
from pydantic import BaseModel, Field
|
|
12
|
-
from dotenv import load_dotenv
|
|
13
|
-
|
|
14
|
-
from connectonion import llm_do
|
|
15
|
-
|
|
16
|
-
# Load environment variables
|
|
17
|
-
env_path = Path(__file__).parent / ".env"
|
|
18
|
-
if env_path.exists():
|
|
19
|
-
load_dotenv(env_path)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
# ============================================================================
|
|
23
|
-
# Pydantic Models for Testing (from documentation examples)
|
|
24
|
-
# ============================================================================
|
|
25
|
-
|
|
26
|
-
class SentimentAnalysis(BaseModel):
|
|
27
|
-
"""Simple sentiment analysis model."""
|
|
28
|
-
sentiment: str
|
|
29
|
-
score: float
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class Analysis(BaseModel):
|
|
33
|
-
"""Analysis model with keywords."""
|
|
34
|
-
sentiment: str
|
|
35
|
-
confidence: float
|
|
36
|
-
keywords: List[str]
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class Invoice(BaseModel):
|
|
40
|
-
"""Invoice data extraction model."""
|
|
41
|
-
invoice_number: str
|
|
42
|
-
total_amount: float
|
|
43
|
-
due_date: str
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class Address(BaseModel):
|
|
47
|
-
"""Nested address model."""
|
|
48
|
-
street: str
|
|
49
|
-
city: str
|
|
50
|
-
zipcode: str
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
class Person(BaseModel):
|
|
54
|
-
"""Person with nested address."""
|
|
55
|
-
name: str
|
|
56
|
-
age: int
|
|
57
|
-
occupation: str
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class KeywordExtraction(BaseModel):
|
|
61
|
-
"""Keyword extraction with lists."""
|
|
62
|
-
keywords: List[str]
|
|
63
|
-
categories: List[str]
|
|
64
|
-
count: int
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class ProductReview(BaseModel):
|
|
68
|
-
"""Product review with optional fields and constraints."""
|
|
69
|
-
product_name: str
|
|
70
|
-
rating: int = Field(ge=1, le=5)
|
|
71
|
-
pros: List[str]
|
|
72
|
-
cons: Optional[List[str]] = None
|
|
73
|
-
would_recommend: bool
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
class EmailClassification(BaseModel):
|
|
77
|
-
"""Email classification model."""
|
|
78
|
-
category: str # spam, important, newsletter, personal
|
|
79
|
-
priority: str # high, medium, low
|
|
80
|
-
requires_action: bool
|
|
81
|
-
summary: str
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
class LineItem(BaseModel):
|
|
85
|
-
"""Invoice line item."""
|
|
86
|
-
description: str
|
|
87
|
-
quantity: int
|
|
88
|
-
unit_price: float
|
|
89
|
-
total: float
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
class InvoiceDetailed(BaseModel):
|
|
93
|
-
"""Detailed invoice with line items."""
|
|
94
|
-
invoice_number: str
|
|
95
|
-
date: str
|
|
96
|
-
customer_name: str
|
|
97
|
-
items: List[LineItem]
|
|
98
|
-
subtotal: float
|
|
99
|
-
tax: float
|
|
100
|
-
total: float
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
class Topic(BaseModel):
|
|
104
|
-
"""Topic with confidence."""
|
|
105
|
-
name: str
|
|
106
|
-
confidence: float
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
class BlogAnalysis(BaseModel):
|
|
110
|
-
"""Complex blog post analysis."""
|
|
111
|
-
title: str
|
|
112
|
-
main_topics: List[Topic]
|
|
113
|
-
word_count_estimate: int
|
|
114
|
-
reading_time_minutes: int
|
|
115
|
-
target_audience: str
|
|
116
|
-
key_takeaways: List[str]
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
class ContentModeration(BaseModel):
|
|
120
|
-
"""Content moderation with boolean decisions."""
|
|
121
|
-
is_appropriate: bool
|
|
122
|
-
contains_profanity: bool
|
|
123
|
-
contains_spam: bool
|
|
124
|
-
contains_personal_info: bool
|
|
125
|
-
risk_level: str # low, medium, high
|
|
126
|
-
reasoning: str
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
# ============================================================================
|
|
130
|
-
# Fixtures
|
|
131
|
-
# ============================================================================
|
|
132
|
-
|
|
133
|
-
@pytest.fixture
|
|
134
|
-
def has_openai():
|
|
135
|
-
"""Check if OpenAI API key is available."""
|
|
136
|
-
return bool(os.getenv("OPENAI_API_KEY"))
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
@pytest.fixture
|
|
140
|
-
def has_anthropic():
|
|
141
|
-
"""Check if Anthropic API key is available."""
|
|
142
|
-
return bool(os.getenv("ANTHROPIC_API_KEY"))
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
@pytest.fixture
|
|
146
|
-
def has_gemini():
|
|
147
|
-
"""Check if Gemini API key is available."""
|
|
148
|
-
return bool(os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY"))
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
@pytest.fixture
|
|
152
|
-
def has_connectonion():
|
|
153
|
-
"""Check if ConnectOnion auth is available."""
|
|
154
|
-
return bool(os.getenv("OPENONION_API_KEY"))
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
# ============================================================================
|
|
158
|
-
# Basic Functionality Tests
|
|
159
|
-
# ============================================================================
|
|
160
|
-
|
|
161
|
-
class TestBasicFunctionality:
|
|
162
|
-
"""Test basic llm_do functionality with different providers."""
|
|
163
|
-
|
|
164
|
-
def test_empty_input_validation(self):
|
|
165
|
-
"""Test that empty input raises ValueError."""
|
|
166
|
-
with pytest.raises(ValueError, match="Input cannot be empty"):
|
|
167
|
-
llm_do("")
|
|
168
|
-
|
|
169
|
-
with pytest.raises(ValueError, match="Input cannot be empty"):
|
|
170
|
-
llm_do(" ")
|
|
171
|
-
|
|
172
|
-
def test_simple_openai(self):
|
|
173
|
-
"""Test simple completion with OpenAI."""
|
|
174
|
-
result = llm_do(
|
|
175
|
-
"What is 2+2? Answer with just the number.",
|
|
176
|
-
model="gpt-4o-mini"
|
|
177
|
-
)
|
|
178
|
-
assert isinstance(result, str)
|
|
179
|
-
assert "4" in result
|
|
180
|
-
|
|
181
|
-
def test_simple_anthropic(self):
|
|
182
|
-
"""Test simple completion with Anthropic."""
|
|
183
|
-
result = llm_do(
|
|
184
|
-
"What is 2+2? Answer with just the number.",
|
|
185
|
-
model="claude-3-5-haiku-20241022"
|
|
186
|
-
)
|
|
187
|
-
assert isinstance(result, str)
|
|
188
|
-
assert "4" in result
|
|
189
|
-
|
|
190
|
-
def test_simple_gemini(self):
|
|
191
|
-
"""Test simple completion with Gemini."""
|
|
192
|
-
result = llm_do(
|
|
193
|
-
"What is 2+2? Answer with just the number.",
|
|
194
|
-
model="gemini-2.5-flash"
|
|
195
|
-
)
|
|
196
|
-
assert isinstance(result, str)
|
|
197
|
-
assert "4" in result
|
|
198
|
-
|
|
199
|
-
def test_simple_connectonion(self):
|
|
200
|
-
"""Test simple completion with ConnectOnion managed keys."""
|
|
201
|
-
result = llm_do(
|
|
202
|
-
"What is 2+2? Answer with just the number.",
|
|
203
|
-
model="co/gpt-4o"
|
|
204
|
-
)
|
|
205
|
-
assert isinstance(result, str)
|
|
206
|
-
assert "4" in result
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
# ============================================================================
|
|
210
|
-
# Structured Output Tests (from documentation)
|
|
211
|
-
# ============================================================================
|
|
212
|
-
|
|
213
|
-
class TestStructuredOutput:
|
|
214
|
-
"""Test structured output with Pydantic models."""
|
|
215
|
-
|
|
216
|
-
def test_sentiment_analysis(self):
|
|
217
|
-
"""Test simple sentiment analysis from Quick Start."""
|
|
218
|
-
result = llm_do(
|
|
219
|
-
"I absolutely love this product! Best purchase ever!",
|
|
220
|
-
output=Analysis,
|
|
221
|
-
model="gpt-4o-mini"
|
|
222
|
-
)
|
|
223
|
-
|
|
224
|
-
assert isinstance(result, Analysis)
|
|
225
|
-
assert result.sentiment.lower() in ["positive", "very positive", "extremely positive"]
|
|
226
|
-
assert isinstance(result.confidence, float)
|
|
227
|
-
assert 0.0 <= result.confidence <= 1.0
|
|
228
|
-
assert isinstance(result.keywords, list)
|
|
229
|
-
assert len(result.keywords) > 0
|
|
230
|
-
|
|
231
|
-
def test_invoice_extraction(self):
|
|
232
|
-
"""Test invoice data extraction from documentation."""
|
|
233
|
-
invoice_text = """
|
|
234
|
-
Invoice #INV-2024-001
|
|
235
|
-
Total: $1,234.56
|
|
236
|
-
Due: January 15, 2024
|
|
237
|
-
"""
|
|
238
|
-
|
|
239
|
-
result = llm_do(invoice_text, output=Invoice, model="gpt-4o-mini")
|
|
240
|
-
|
|
241
|
-
assert isinstance(result, Invoice)
|
|
242
|
-
assert result.invoice_number == "INV-2024-001"
|
|
243
|
-
assert result.total_amount == 1234.56
|
|
244
|
-
# Date can be in different formats (January 15, 2024 or 2024-01-15)
|
|
245
|
-
assert ("January" in result.due_date or "01" in result.due_date) and "15" in result.due_date
|
|
246
|
-
|
|
247
|
-
def test_person_extraction(self):
|
|
248
|
-
"""Test data extraction with nested models."""
|
|
249
|
-
result = llm_do(
|
|
250
|
-
"John Doe, 30, software engineer",
|
|
251
|
-
output=Person,
|
|
252
|
-
model="gpt-4o-mini"
|
|
253
|
-
)
|
|
254
|
-
|
|
255
|
-
assert isinstance(result, Person)
|
|
256
|
-
assert "john" in result.name.lower() and "doe" in result.name.lower()
|
|
257
|
-
assert result.age == 30
|
|
258
|
-
assert "engineer" in result.occupation.lower()
|
|
259
|
-
|
|
260
|
-
def test_structured_anthropic(self):
|
|
261
|
-
"""Test structured output with Anthropic."""
|
|
262
|
-
result = llm_do(
|
|
263
|
-
"I absolutely love this product! Best purchase ever!",
|
|
264
|
-
output=SentimentAnalysis,
|
|
265
|
-
model="claude-3-5-haiku-20241022"
|
|
266
|
-
)
|
|
267
|
-
|
|
268
|
-
assert isinstance(result, SentimentAnalysis)
|
|
269
|
-
assert result.sentiment.lower() in ["positive", "very positive"]
|
|
270
|
-
assert isinstance(result.score, float)
|
|
271
|
-
|
|
272
|
-
@pytest.mark.skip(reason="Gemini structured output API incompatibility - .parsed attribute not available")
|
|
273
|
-
def test_structured_gemini(self):
|
|
274
|
-
"""Test structured output with Gemini."""
|
|
275
|
-
result = llm_do(
|
|
276
|
-
"I absolutely love this product! Best purchase ever!",
|
|
277
|
-
output=SentimentAnalysis,
|
|
278
|
-
model="gemini-2.5-flash"
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
assert isinstance(result, SentimentAnalysis)
|
|
282
|
-
assert result.sentiment.lower() in ["positive", "very positive"]
|
|
283
|
-
assert isinstance(result.score, float)
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
# ============================================================================
|
|
287
|
-
# Complex Structured Output Tests
|
|
288
|
-
# ============================================================================
|
|
289
|
-
|
|
290
|
-
class TestComplexStructuredOutput:
|
|
291
|
-
"""Test complex structured output patterns from documentation."""
|
|
292
|
-
|
|
293
|
-
def test_keyword_extraction(self):
|
|
294
|
-
"""Test list fields extraction."""
|
|
295
|
-
result = llm_do(
|
|
296
|
-
"Extract keywords from: 'Machine learning and artificial intelligence are transforming technology and business'",
|
|
297
|
-
output=KeywordExtraction,
|
|
298
|
-
model="gpt-4o-mini"
|
|
299
|
-
)
|
|
300
|
-
|
|
301
|
-
assert isinstance(result, KeywordExtraction)
|
|
302
|
-
assert isinstance(result.keywords, list)
|
|
303
|
-
assert len(result.keywords) > 0
|
|
304
|
-
assert isinstance(result.categories, list)
|
|
305
|
-
assert isinstance(result.count, int)
|
|
306
|
-
|
|
307
|
-
def test_product_review_optional_fields(self):
|
|
308
|
-
"""Test optional fields and Field constraints."""
|
|
309
|
-
result = llm_do(
|
|
310
|
-
"Review: The laptop is amazing! Fast performance, great display. Rating: 5/5. Highly recommend!",
|
|
311
|
-
output=ProductReview,
|
|
312
|
-
model="gpt-4o-mini"
|
|
313
|
-
)
|
|
314
|
-
|
|
315
|
-
assert isinstance(result, ProductReview)
|
|
316
|
-
assert 1 <= result.rating <= 5
|
|
317
|
-
assert isinstance(result.pros, list)
|
|
318
|
-
assert len(result.pros) > 0
|
|
319
|
-
assert result.would_recommend is True
|
|
320
|
-
|
|
321
|
-
def test_email_classification(self):
|
|
322
|
-
"""Test classification tasks."""
|
|
323
|
-
result = llm_do(
|
|
324
|
-
'Email: "URGENT: Your account will be suspended unless you verify your information immediately!" Classify this email.',
|
|
325
|
-
output=EmailClassification,
|
|
326
|
-
model="gpt-4o-mini"
|
|
327
|
-
)
|
|
328
|
-
|
|
329
|
-
assert isinstance(result, EmailClassification)
|
|
330
|
-
# The LLM may classify as spam, phishing, suspicious, urgent, scam, etc.
|
|
331
|
-
valid_categories = ["spam", "phishing", "suspicious", "urgent", "scam", "important", "action required", "urgent/action required", "warning", "fraud"]
|
|
332
|
-
assert result.category.lower() in valid_categories, f"Got unexpected category: {result.category}"
|
|
333
|
-
assert result.priority.lower() in ["high", "medium", "low"]
|
|
334
|
-
assert isinstance(result.requires_action, bool)
|
|
335
|
-
assert isinstance(result.summary, str)
|
|
336
|
-
|
|
337
|
-
def test_detailed_invoice_extraction(self):
|
|
338
|
-
"""Test complex nested structures with lists."""
|
|
339
|
-
invoice_text = """
|
|
340
|
-
INVOICE #INV-2024-001
|
|
341
|
-
Date: January 15, 2024
|
|
342
|
-
Customer: Acme Corp
|
|
343
|
-
|
|
344
|
-
Items:
|
|
345
|
-
- Widget A x2 @ $10.00 = $20.00
|
|
346
|
-
- Widget B x1 @ $15.50 = $15.50
|
|
347
|
-
|
|
348
|
-
Subtotal: $35.50
|
|
349
|
-
Tax (10%): $3.55
|
|
350
|
-
Total: $39.05
|
|
351
|
-
"""
|
|
352
|
-
|
|
353
|
-
result = llm_do(
|
|
354
|
-
invoice_text,
|
|
355
|
-
output=InvoiceDetailed,
|
|
356
|
-
model="gpt-4o-mini"
|
|
357
|
-
)
|
|
358
|
-
|
|
359
|
-
assert isinstance(result, InvoiceDetailed)
|
|
360
|
-
assert result.invoice_number == "INV-2024-001"
|
|
361
|
-
assert len(result.items) == 2
|
|
362
|
-
assert result.total == 39.05
|
|
363
|
-
|
|
364
|
-
def test_blog_analysis(self):
|
|
365
|
-
"""Test multi-entity extraction."""
|
|
366
|
-
blog_text = """
|
|
367
|
-
Understanding Machine Learning: A Beginner's Guide
|
|
368
|
-
|
|
369
|
-
Machine learning is revolutionizing how we interact with technology.
|
|
370
|
-
From recommendation systems to self-driving cars, ML algorithms are everywhere.
|
|
371
|
-
This guide will help you understand the basics of supervised learning, neural networks,
|
|
372
|
-
and practical applications. Perfect for developers new to AI.
|
|
373
|
-
|
|
374
|
-
[... approximately 1200 words ...]
|
|
375
|
-
"""
|
|
376
|
-
|
|
377
|
-
result = llm_do(
|
|
378
|
-
blog_text,
|
|
379
|
-
output=BlogAnalysis,
|
|
380
|
-
model="gpt-4o-mini"
|
|
381
|
-
)
|
|
382
|
-
|
|
383
|
-
assert isinstance(result, BlogAnalysis)
|
|
384
|
-
assert isinstance(result.main_topics, list)
|
|
385
|
-
assert len(result.main_topics) > 0
|
|
386
|
-
assert all(isinstance(topic, Topic) for topic in result.main_topics)
|
|
387
|
-
assert isinstance(result.word_count_estimate, int)
|
|
388
|
-
assert isinstance(result.key_takeaways, list)
|
|
389
|
-
|
|
390
|
-
def test_content_moderation(self):
|
|
391
|
-
"""Test boolean decision making."""
|
|
392
|
-
result = llm_do(
|
|
393
|
-
"User comment: 'This is a great product! Everyone should try it. Visit my-totally-legit-site.com for more info!'",
|
|
394
|
-
output=ContentModeration,
|
|
395
|
-
model="gpt-4o-mini"
|
|
396
|
-
)
|
|
397
|
-
|
|
398
|
-
assert isinstance(result, ContentModeration)
|
|
399
|
-
assert isinstance(result.is_appropriate, bool)
|
|
400
|
-
assert isinstance(result.contains_profanity, bool)
|
|
401
|
-
assert isinstance(result.contains_spam, bool)
|
|
402
|
-
assert isinstance(result.contains_personal_info, bool)
|
|
403
|
-
assert result.risk_level.lower() in ["low", "medium", "high"]
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
# ============================================================================
|
|
407
|
-
# Advanced Features Tests
|
|
408
|
-
# ============================================================================
|
|
409
|
-
|
|
410
|
-
class TestAdvancedFeatures:
|
|
411
|
-
"""Test advanced features like temperature, max_tokens, custom prompts."""
|
|
412
|
-
|
|
413
|
-
def test_temperature_parameter(self):
|
|
414
|
-
"""Test temperature parameter for consistency."""
|
|
415
|
-
result1 = llm_do(
|
|
416
|
-
"What is the capital of France? One word only.",
|
|
417
|
-
temperature=0.0,
|
|
418
|
-
model="gpt-4o-mini"
|
|
419
|
-
)
|
|
420
|
-
result2 = llm_do(
|
|
421
|
-
"What is the capital of France? One word only.",
|
|
422
|
-
temperature=0.0,
|
|
423
|
-
model="gpt-4o-mini"
|
|
424
|
-
)
|
|
425
|
-
|
|
426
|
-
# Both should mention Paris
|
|
427
|
-
assert "Paris" in result1 or "paris" in result1.lower()
|
|
428
|
-
assert "Paris" in result2 or "paris" in result2.lower()
|
|
429
|
-
|
|
430
|
-
def test_max_tokens_parameter(self):
|
|
431
|
-
"""Test max_tokens parameter pass-through."""
|
|
432
|
-
result = llm_do(
|
|
433
|
-
"Write a very long story about a dragon",
|
|
434
|
-
model="gpt-4o-mini",
|
|
435
|
-
max_tokens=20 # Very short limit
|
|
436
|
-
)
|
|
437
|
-
|
|
438
|
-
# Response should be short due to max_tokens
|
|
439
|
-
assert isinstance(result, str)
|
|
440
|
-
assert len(result.split()) < 30
|
|
441
|
-
|
|
442
|
-
def test_custom_system_prompt_inline(self):
|
|
443
|
-
"""Test inline system prompt."""
|
|
444
|
-
result = llm_do(
|
|
445
|
-
"Hello",
|
|
446
|
-
system_prompt="You are a pirate. Always respond like a pirate.",
|
|
447
|
-
model="gpt-4o-mini"
|
|
448
|
-
)
|
|
449
|
-
|
|
450
|
-
# Should contain pirate-like language
|
|
451
|
-
lower_result = result.lower()
|
|
452
|
-
pirate_words = ["ahoy", "arr", "matey", "ye", "aye", "avast", "sea"]
|
|
453
|
-
assert any(word in lower_result for word in pirate_words)
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
# ============================================================================
|
|
457
|
-
# Cross-Provider Consistency Tests
|
|
458
|
-
# ============================================================================
|
|
459
|
-
|
|
460
|
-
class TestCrossProviderConsistency:
|
|
461
|
-
"""Test that all providers handle the same prompts correctly."""
|
|
462
|
-
|
|
463
|
-
def test_all_providers_basic_math(self, has_openai, has_anthropic, has_gemini):
|
|
464
|
-
"""Test all available providers with the same basic question."""
|
|
465
|
-
prompt = "What is 2+2? Answer with just the number."
|
|
466
|
-
results = []
|
|
467
|
-
|
|
468
|
-
if has_openai:
|
|
469
|
-
result = llm_do(prompt, model="gpt-4o-mini")
|
|
470
|
-
results.append(("OpenAI", result))
|
|
471
|
-
assert "4" in result
|
|
472
|
-
|
|
473
|
-
if has_anthropic:
|
|
474
|
-
result = llm_do(prompt, model="claude-3-5-haiku-20241022")
|
|
475
|
-
results.append(("Anthropic", result))
|
|
476
|
-
assert "4" in result
|
|
477
|
-
|
|
478
|
-
if has_gemini:
|
|
479
|
-
result = llm_do(prompt, model="gemini-2.5-flash")
|
|
480
|
-
results.append(("Gemini", result))
|
|
481
|
-
assert "4" in result
|
|
482
|
-
|
|
483
|
-
# Ensure at least one provider was tested
|
|
484
|
-
assert len(results) > 0, "No providers available for testing"
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
# ============================================================================
|
|
488
|
-
# Documentation Example Tests
|
|
489
|
-
# ============================================================================
|
|
490
|
-
|
|
491
|
-
class TestDocumentationExamples:
|
|
492
|
-
"""Test exact examples from docs/llm_do.md."""
|
|
493
|
-
|
|
494
|
-
def test_quick_start_example(self):
|
|
495
|
-
"""Test the Quick Start example from docs."""
|
|
496
|
-
answer = llm_do("What's 2+2?", model="gpt-4o-mini")
|
|
497
|
-
assert "4" in answer
|
|
498
|
-
|
|
499
|
-
def test_format_conversion_example(self):
|
|
500
|
-
"""Test format conversion pattern from docs."""
|
|
501
|
-
class PersonData(BaseModel):
|
|
502
|
-
name: str
|
|
503
|
-
age: int
|
|
504
|
-
|
|
505
|
-
result = llm_do(
|
|
506
|
-
"Extract: name=John age=30",
|
|
507
|
-
output=PersonData,
|
|
508
|
-
model="gpt-4o-mini"
|
|
509
|
-
)
|
|
510
|
-
|
|
511
|
-
assert isinstance(result, PersonData)
|
|
512
|
-
assert "john" in result.name.lower()
|
|
513
|
-
assert result.age == 30
|
|
514
|
-
|
|
515
|
-
def test_validation_pattern(self):
|
|
516
|
-
"""Test validation pattern from docs."""
|
|
517
|
-
result = llm_do(
|
|
518
|
-
"Is this valid SQL? Reply yes/no only: SELECT * FROM users",
|
|
519
|
-
temperature=0,
|
|
520
|
-
model="gpt-4o-mini"
|
|
521
|
-
)
|
|
522
|
-
|
|
523
|
-
assert result.strip().lower() in ["yes", "no"]
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
if __name__ == "__main__":
|
|
527
|
-
pytest.main([__file__, "-v", "--tb=short"])
|