connectonion 0.4.11__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. connectonion/__init__.py +11 -5
  2. connectonion/agent.py +44 -42
  3. connectonion/cli/commands/init.py +1 -1
  4. connectonion/cli/commands/project_cmd_lib.py +4 -4
  5. connectonion/cli/commands/reset_commands.py +1 -1
  6. connectonion/cli/docs/co-vibecoding-principles-docs-contexts-all-in-one.md +15 -11
  7. connectonion/cli/templates/minimal/agent.py +2 -2
  8. connectonion/console.py +55 -3
  9. connectonion/events.py +96 -17
  10. connectonion/llm.py +21 -3
  11. connectonion/logger.py +289 -0
  12. connectonion/prompt_files/eval_expected.md +12 -0
  13. connectonion/tool_executor.py +43 -32
  14. connectonion/usage.py +4 -0
  15. connectonion/useful_events_handlers/reflect.py +14 -10
  16. connectonion/useful_plugins/__init__.py +2 -1
  17. connectonion/useful_plugins/calendar_plugin.py +2 -2
  18. connectonion/useful_plugins/eval.py +130 -0
  19. connectonion/useful_plugins/gmail_plugin.py +4 -4
  20. connectonion/useful_plugins/image_result_formatter.py +4 -3
  21. connectonion/useful_plugins/re_act.py +15 -57
  22. connectonion/useful_plugins/shell_approval.py +2 -2
  23. connectonion/useful_tools/gmail.py +2 -2
  24. connectonion/useful_tools/memory.py +4 -0
  25. {connectonion-0.4.11.dist-info → connectonion-0.5.0.dist-info}/METADATA +48 -48
  26. {connectonion-0.4.11.dist-info → connectonion-0.5.0.dist-info}/RECORD +33 -77
  27. {connectonion-0.4.11.dist-info → connectonion-0.5.0.dist-info}/WHEEL +1 -2
  28. connectonion/cli/templates/email-agent/.env.example +0 -23
  29. connectonion/cli/templates/email-agent/README.md +0 -240
  30. connectonion/cli/templates/email-agent/agent.py +0 -374
  31. connectonion/cli/templates/email-agent/demo.py +0 -71
  32. connectonion/cli/templates/meta-agent/.env.example +0 -11
  33. connectonion/cli/templates/minimal/.env.example +0 -5
  34. connectonion/cli/templates/playwright/.env.example +0 -5
  35. connectonion-0.4.11.dist-info/top_level.txt +0 -2
  36. tests/__init__.py +0 -0
  37. tests/cli/__init__.py +0 -1
  38. tests/cli/argparse_runner.py +0 -85
  39. tests/cli/conftest.py +0 -5
  40. tests/cli/test_browser_cli.py +0 -61
  41. tests/cli/test_cli.py +0 -143
  42. tests/cli/test_cli_auth_google.py +0 -344
  43. tests/cli/test_cli_auth_microsoft.py +0 -256
  44. tests/cli/test_cli_create.py +0 -283
  45. tests/cli/test_cli_help.py +0 -200
  46. tests/cli/test_cli_init.py +0 -318
  47. tests/conftest.py +0 -283
  48. tests/debug_gemini_models.py +0 -23
  49. tests/fixtures/__init__.py +0 -1
  50. tests/fixtures/test_tools.py +0 -112
  51. tests/fixtures/trust_fixtures.py +0 -257
  52. tests/real_api/__init__.py +0 -0
  53. tests/real_api/conftest.py +0 -9
  54. tests/real_api/test_llm_do.py +0 -174
  55. tests/real_api/test_llm_do_comprehensive.py +0 -527
  56. tests/real_api/test_production_client.py +0 -94
  57. tests/real_api/test_real_anthropic.py +0 -100
  58. tests/real_api/test_real_api.py +0 -113
  59. tests/real_api/test_real_auth.py +0 -130
  60. tests/real_api/test_real_email.py +0 -95
  61. tests/real_api/test_real_gemini.py +0 -96
  62. tests/real_api/test_real_llm_do.py +0 -81
  63. tests/real_api/test_real_managed.py +0 -208
  64. tests/real_api/test_real_multi_llm.py +0 -454
  65. tests/real_api/test_real_openai.py +0 -100
  66. tests/real_api/test_responses_parse.py +0 -88
  67. tests/test_diff_writer.py +0 -126
  68. tests/test_events.py +0 -677
  69. tests/test_gemini_co.py +0 -70
  70. tests/test_image_result_formatter.py +0 -88
  71. tests/test_plugin_system.py +0 -110
  72. tests/utils/__init__.py +0 -1
  73. tests/utils/config_helpers.py +0 -188
  74. tests/utils/mock_helpers.py +0 -237
  75. /connectonion/{prompts → prompt_files}/__init__.py +0 -0
  76. /connectonion/{prompts → prompt_files}/analyze_contact.md +0 -0
  77. /connectonion/{prompts → prompt_files}/react_evaluate.md +0 -0
  78. /connectonion/{prompts → prompt_files}/react_plan.md +0 -0
  79. /connectonion/{prompts → prompt_files}/reflect.md +0 -0
  80. {connectonion-0.4.11.dist-info → connectonion-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -1,174 +0,0 @@
1
- """Pytest-based tests for llm_do with multi-LLM support via LiteLLM."""
2
-
3
- import sys
4
- import uuid as standard_uuid
5
-
6
- # Fix for fastuuid dependency issue in LiteLLM
7
- class MockFastUUID:
8
- @staticmethod
9
- def uuid4():
10
- return str(standard_uuid.uuid4())
11
-
12
- UUID = standard_uuid.UUID
13
-
14
- sys.modules['fastuuid'] = MockFastUUID()
15
-
16
- import os
17
- from pathlib import Path
18
- from unittest.mock import patch, MagicMock
19
- from pydantic import BaseModel
20
- from dotenv import load_dotenv
21
- import pytest
22
-
23
- # Load test environment variables
24
- env_path = Path(__file__).parent / ".env"
25
- if env_path.exists():
26
- load_dotenv(env_path)
27
-
28
- from connectonion import llm_do
29
-
30
-
31
- class SimpleResult(BaseModel):
32
- """Simple model for testing structured output."""
33
- answer: int
34
- explanation: str
35
-
36
-
37
- class SentimentAnalysis(BaseModel):
38
- """Model for sentiment analysis testing."""
39
- sentiment: str # positive, negative, neutral
40
- confidence: float # 0.0 to 1.0
41
-
42
-
43
- def test_import_litellm():
44
- """Test that LiteLLM is properly installed and importable."""
45
- try:
46
- import litellm # noqa: F401
47
- except ImportError:
48
- pytest.fail("LiteLLM not installed. Run: pip install litellm")
49
-
50
-
51
- def test_empty_input_validation():
52
- """Test that empty input raises an error."""
53
- with pytest.raises(ValueError) as cm:
54
- llm_do("")
55
- assert "Input cannot be empty" in str(cm.value)
56
-
57
- with pytest.raises(ValueError) as cm:
58
- llm_do(" ")
59
- assert "Input cannot be empty" in str(cm.value)
60
-
61
-
62
-
63
- def test_openai_simple_completion_default_model():
64
- result = llm_do("What is 2+2? Answer with just the number.")
65
- assert isinstance(result, str)
66
- assert "4" in result
67
-
68
-
69
- def test_openai_simple_completion_explicit_model():
70
- result = llm_do("Say hello in exactly 3 words", model="gpt-4o-mini")
71
- assert isinstance(result, str)
72
- assert len(result.split()) <= 10
73
-
74
-
75
- def test_openai_structured_output():
76
- result = llm_do("What is 5 plus 3?", output=SimpleResult, model="gpt-4o-mini")
77
- assert isinstance(result, SimpleResult)
78
- assert result.answer == 8
79
- assert isinstance(result.explanation, str)
80
- assert len(result.explanation) > 0
81
-
82
-
83
- def test_openai_custom_system_prompt():
84
- result = llm_do(
85
- "Hello",
86
- system_prompt="You are a pirate. Always respond like a pirate.",
87
- model="gpt-4o-mini",
88
- )
89
- assert isinstance(result, str)
90
- lower_result = result.lower()
91
- pirate_words = ["ahoy", "arr", "matey", "ye", "aye", "avast", "sailor", "sea"]
92
- assert any(word in lower_result for word in pirate_words)
93
-
94
-
95
- def test_openai_temperature_parameter():
96
- result1 = llm_do(
97
- "What is the capital of France? One word only.",
98
- temperature=0.0,
99
- model="gpt-4o-mini",
100
- )
101
- result2 = llm_do(
102
- "What is the capital of France? One word only.",
103
- temperature=0.0,
104
- model="gpt-4o-mini",
105
- )
106
- assert "Paris" in result1
107
- assert "Paris" in result2
108
-
109
-
110
- def test_openai_additional_kwargs():
111
- result = llm_do(
112
- "Write a very long story about a dragon",
113
- model="gpt-4o-mini",
114
- max_tokens=20,
115
- )
116
- assert isinstance(result, str)
117
- assert len(result.split()) < 30
118
-
119
-
120
- def test_claude_simple_completion():
121
- result = llm_do("Say hello in exactly 3 words", model="claude-3-5-haiku-20241022")
122
- assert isinstance(result, str)
123
- assert len(result.split()) <= 10
124
-
125
-
126
- def test_claude_structured_output():
127
- result = llm_do(
128
- "Analyze this text sentiment: 'I love sunny days!'",
129
- output=SentimentAnalysis,
130
- model="claude-3-5-haiku-20241022",
131
- )
132
- assert isinstance(result, SentimentAnalysis)
133
- assert result.sentiment.lower() == "positive"
134
- assert isinstance(result.confidence, float)
135
- assert 0.0 <= result.confidence <= 1.0
136
-
137
-
138
- def test_gemini_simple_completion():
139
- try:
140
- result = llm_do("Say hello in exactly 3 words", model="gemini-2.5-flash")
141
- assert isinstance(result, str)
142
- assert len(result.split()) <= 10
143
- except Exception as e:
144
- if "429" in str(e) or "quota" in str(e).lower():
145
- pytest.skip("Gemini quota exceeded")
146
- raise
147
-
148
-
149
- def test_cross_provider_consistency():
150
- """Test that all providers can handle the same basic prompt."""
151
- prompt = "What is 2+2? Answer with just the number."
152
- results = []
153
-
154
- if os.getenv("OPENAI_API_KEY"):
155
- result = llm_do(prompt, model="gpt-4o-mini")
156
- results.append(("OpenAI", result))
157
- assert "4" in result
158
-
159
- if os.getenv("ANTHROPIC_API_KEY"):
160
- result = llm_do(prompt, model="claude-3-5-haiku-20241022")
161
- results.append(("Anthropic", result))
162
- assert "4" in result
163
-
164
- if os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY"):
165
- try:
166
- result = llm_do(prompt, model="gemini-2.5-flash")
167
- results.append(("Gemini", result))
168
- assert "4" in result
169
- except Exception as e:
170
- if "429" not in str(e) and "quota" not in str(e).lower():
171
- raise
172
-
173
- if not results:
174
- pytest.skip("No providers available for testing")
@@ -1,527 +0,0 @@
1
- """Comprehensive tests for llm_do covering all documentation examples.
2
-
3
- Tests all functionality from docs/llm_do.md across all providers.
4
- Run with: pytest tests/test_llm_do_comprehensive.py -v
5
- """
6
-
7
- import os
8
- import pytest
9
- from pathlib import Path
10
- from typing import List, Optional
11
- from pydantic import BaseModel, Field
12
- from dotenv import load_dotenv
13
-
14
- from connectonion import llm_do
15
-
16
- # Load environment variables
17
- env_path = Path(__file__).parent / ".env"
18
- if env_path.exists():
19
- load_dotenv(env_path)
20
-
21
-
22
- # ============================================================================
23
- # Pydantic Models for Testing (from documentation examples)
24
- # ============================================================================
25
-
26
- class SentimentAnalysis(BaseModel):
27
- """Simple sentiment analysis model."""
28
- sentiment: str
29
- score: float
30
-
31
-
32
- class Analysis(BaseModel):
33
- """Analysis model with keywords."""
34
- sentiment: str
35
- confidence: float
36
- keywords: List[str]
37
-
38
-
39
- class Invoice(BaseModel):
40
- """Invoice data extraction model."""
41
- invoice_number: str
42
- total_amount: float
43
- due_date: str
44
-
45
-
46
- class Address(BaseModel):
47
- """Nested address model."""
48
- street: str
49
- city: str
50
- zipcode: str
51
-
52
-
53
- class Person(BaseModel):
54
- """Person with nested address."""
55
- name: str
56
- age: int
57
- occupation: str
58
-
59
-
60
- class KeywordExtraction(BaseModel):
61
- """Keyword extraction with lists."""
62
- keywords: List[str]
63
- categories: List[str]
64
- count: int
65
-
66
-
67
- class ProductReview(BaseModel):
68
- """Product review with optional fields and constraints."""
69
- product_name: str
70
- rating: int = Field(ge=1, le=5)
71
- pros: List[str]
72
- cons: Optional[List[str]] = None
73
- would_recommend: bool
74
-
75
-
76
- class EmailClassification(BaseModel):
77
- """Email classification model."""
78
- category: str # spam, important, newsletter, personal
79
- priority: str # high, medium, low
80
- requires_action: bool
81
- summary: str
82
-
83
-
84
- class LineItem(BaseModel):
85
- """Invoice line item."""
86
- description: str
87
- quantity: int
88
- unit_price: float
89
- total: float
90
-
91
-
92
- class InvoiceDetailed(BaseModel):
93
- """Detailed invoice with line items."""
94
- invoice_number: str
95
- date: str
96
- customer_name: str
97
- items: List[LineItem]
98
- subtotal: float
99
- tax: float
100
- total: float
101
-
102
-
103
- class Topic(BaseModel):
104
- """Topic with confidence."""
105
- name: str
106
- confidence: float
107
-
108
-
109
- class BlogAnalysis(BaseModel):
110
- """Complex blog post analysis."""
111
- title: str
112
- main_topics: List[Topic]
113
- word_count_estimate: int
114
- reading_time_minutes: int
115
- target_audience: str
116
- key_takeaways: List[str]
117
-
118
-
119
- class ContentModeration(BaseModel):
120
- """Content moderation with boolean decisions."""
121
- is_appropriate: bool
122
- contains_profanity: bool
123
- contains_spam: bool
124
- contains_personal_info: bool
125
- risk_level: str # low, medium, high
126
- reasoning: str
127
-
128
-
129
- # ============================================================================
130
- # Fixtures
131
- # ============================================================================
132
-
133
- @pytest.fixture
134
- def has_openai():
135
- """Check if OpenAI API key is available."""
136
- return bool(os.getenv("OPENAI_API_KEY"))
137
-
138
-
139
- @pytest.fixture
140
- def has_anthropic():
141
- """Check if Anthropic API key is available."""
142
- return bool(os.getenv("ANTHROPIC_API_KEY"))
143
-
144
-
145
- @pytest.fixture
146
- def has_gemini():
147
- """Check if Gemini API key is available."""
148
- return bool(os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY"))
149
-
150
-
151
- @pytest.fixture
152
- def has_connectonion():
153
- """Check if ConnectOnion auth is available."""
154
- return bool(os.getenv("OPENONION_API_KEY"))
155
-
156
-
157
- # ============================================================================
158
- # Basic Functionality Tests
159
- # ============================================================================
160
-
161
- class TestBasicFunctionality:
162
- """Test basic llm_do functionality with different providers."""
163
-
164
- def test_empty_input_validation(self):
165
- """Test that empty input raises ValueError."""
166
- with pytest.raises(ValueError, match="Input cannot be empty"):
167
- llm_do("")
168
-
169
- with pytest.raises(ValueError, match="Input cannot be empty"):
170
- llm_do(" ")
171
-
172
- def test_simple_openai(self):
173
- """Test simple completion with OpenAI."""
174
- result = llm_do(
175
- "What is 2+2? Answer with just the number.",
176
- model="gpt-4o-mini"
177
- )
178
- assert isinstance(result, str)
179
- assert "4" in result
180
-
181
- def test_simple_anthropic(self):
182
- """Test simple completion with Anthropic."""
183
- result = llm_do(
184
- "What is 2+2? Answer with just the number.",
185
- model="claude-3-5-haiku-20241022"
186
- )
187
- assert isinstance(result, str)
188
- assert "4" in result
189
-
190
- def test_simple_gemini(self):
191
- """Test simple completion with Gemini."""
192
- result = llm_do(
193
- "What is 2+2? Answer with just the number.",
194
- model="gemini-2.5-flash"
195
- )
196
- assert isinstance(result, str)
197
- assert "4" in result
198
-
199
- def test_simple_connectonion(self):
200
- """Test simple completion with ConnectOnion managed keys."""
201
- result = llm_do(
202
- "What is 2+2? Answer with just the number.",
203
- model="co/gpt-4o"
204
- )
205
- assert isinstance(result, str)
206
- assert "4" in result
207
-
208
-
209
- # ============================================================================
210
- # Structured Output Tests (from documentation)
211
- # ============================================================================
212
-
213
- class TestStructuredOutput:
214
- """Test structured output with Pydantic models."""
215
-
216
- def test_sentiment_analysis(self):
217
- """Test simple sentiment analysis from Quick Start."""
218
- result = llm_do(
219
- "I absolutely love this product! Best purchase ever!",
220
- output=Analysis,
221
- model="gpt-4o-mini"
222
- )
223
-
224
- assert isinstance(result, Analysis)
225
- assert result.sentiment.lower() in ["positive", "very positive", "extremely positive"]
226
- assert isinstance(result.confidence, float)
227
- assert 0.0 <= result.confidence <= 1.0
228
- assert isinstance(result.keywords, list)
229
- assert len(result.keywords) > 0
230
-
231
- def test_invoice_extraction(self):
232
- """Test invoice data extraction from documentation."""
233
- invoice_text = """
234
- Invoice #INV-2024-001
235
- Total: $1,234.56
236
- Due: January 15, 2024
237
- """
238
-
239
- result = llm_do(invoice_text, output=Invoice, model="gpt-4o-mini")
240
-
241
- assert isinstance(result, Invoice)
242
- assert result.invoice_number == "INV-2024-001"
243
- assert result.total_amount == 1234.56
244
- # Date can be in different formats (January 15, 2024 or 2024-01-15)
245
- assert ("January" in result.due_date or "01" in result.due_date) and "15" in result.due_date
246
-
247
- def test_person_extraction(self):
248
- """Test data extraction with nested models."""
249
- result = llm_do(
250
- "John Doe, 30, software engineer",
251
- output=Person,
252
- model="gpt-4o-mini"
253
- )
254
-
255
- assert isinstance(result, Person)
256
- assert "john" in result.name.lower() and "doe" in result.name.lower()
257
- assert result.age == 30
258
- assert "engineer" in result.occupation.lower()
259
-
260
- def test_structured_anthropic(self):
261
- """Test structured output with Anthropic."""
262
- result = llm_do(
263
- "I absolutely love this product! Best purchase ever!",
264
- output=SentimentAnalysis,
265
- model="claude-3-5-haiku-20241022"
266
- )
267
-
268
- assert isinstance(result, SentimentAnalysis)
269
- assert result.sentiment.lower() in ["positive", "very positive"]
270
- assert isinstance(result.score, float)
271
-
272
- @pytest.mark.skip(reason="Gemini structured output API incompatibility - .parsed attribute not available")
273
- def test_structured_gemini(self):
274
- """Test structured output with Gemini."""
275
- result = llm_do(
276
- "I absolutely love this product! Best purchase ever!",
277
- output=SentimentAnalysis,
278
- model="gemini-2.5-flash"
279
- )
280
-
281
- assert isinstance(result, SentimentAnalysis)
282
- assert result.sentiment.lower() in ["positive", "very positive"]
283
- assert isinstance(result.score, float)
284
-
285
-
286
- # ============================================================================
287
- # Complex Structured Output Tests
288
- # ============================================================================
289
-
290
- class TestComplexStructuredOutput:
291
- """Test complex structured output patterns from documentation."""
292
-
293
- def test_keyword_extraction(self):
294
- """Test list fields extraction."""
295
- result = llm_do(
296
- "Extract keywords from: 'Machine learning and artificial intelligence are transforming technology and business'",
297
- output=KeywordExtraction,
298
- model="gpt-4o-mini"
299
- )
300
-
301
- assert isinstance(result, KeywordExtraction)
302
- assert isinstance(result.keywords, list)
303
- assert len(result.keywords) > 0
304
- assert isinstance(result.categories, list)
305
- assert isinstance(result.count, int)
306
-
307
- def test_product_review_optional_fields(self):
308
- """Test optional fields and Field constraints."""
309
- result = llm_do(
310
- "Review: The laptop is amazing! Fast performance, great display. Rating: 5/5. Highly recommend!",
311
- output=ProductReview,
312
- model="gpt-4o-mini"
313
- )
314
-
315
- assert isinstance(result, ProductReview)
316
- assert 1 <= result.rating <= 5
317
- assert isinstance(result.pros, list)
318
- assert len(result.pros) > 0
319
- assert result.would_recommend is True
320
-
321
- def test_email_classification(self):
322
- """Test classification tasks."""
323
- result = llm_do(
324
- 'Email: "URGENT: Your account will be suspended unless you verify your information immediately!" Classify this email.',
325
- output=EmailClassification,
326
- model="gpt-4o-mini"
327
- )
328
-
329
- assert isinstance(result, EmailClassification)
330
- # The LLM may classify as spam, phishing, suspicious, urgent, scam, etc.
331
- valid_categories = ["spam", "phishing", "suspicious", "urgent", "scam", "important", "action required", "urgent/action required", "warning", "fraud"]
332
- assert result.category.lower() in valid_categories, f"Got unexpected category: {result.category}"
333
- assert result.priority.lower() in ["high", "medium", "low"]
334
- assert isinstance(result.requires_action, bool)
335
- assert isinstance(result.summary, str)
336
-
337
- def test_detailed_invoice_extraction(self):
338
- """Test complex nested structures with lists."""
339
- invoice_text = """
340
- INVOICE #INV-2024-001
341
- Date: January 15, 2024
342
- Customer: Acme Corp
343
-
344
- Items:
345
- - Widget A x2 @ $10.00 = $20.00
346
- - Widget B x1 @ $15.50 = $15.50
347
-
348
- Subtotal: $35.50
349
- Tax (10%): $3.55
350
- Total: $39.05
351
- """
352
-
353
- result = llm_do(
354
- invoice_text,
355
- output=InvoiceDetailed,
356
- model="gpt-4o-mini"
357
- )
358
-
359
- assert isinstance(result, InvoiceDetailed)
360
- assert result.invoice_number == "INV-2024-001"
361
- assert len(result.items) == 2
362
- assert result.total == 39.05
363
-
364
- def test_blog_analysis(self):
365
- """Test multi-entity extraction."""
366
- blog_text = """
367
- Understanding Machine Learning: A Beginner's Guide
368
-
369
- Machine learning is revolutionizing how we interact with technology.
370
- From recommendation systems to self-driving cars, ML algorithms are everywhere.
371
- This guide will help you understand the basics of supervised learning, neural networks,
372
- and practical applications. Perfect for developers new to AI.
373
-
374
- [... approximately 1200 words ...]
375
- """
376
-
377
- result = llm_do(
378
- blog_text,
379
- output=BlogAnalysis,
380
- model="gpt-4o-mini"
381
- )
382
-
383
- assert isinstance(result, BlogAnalysis)
384
- assert isinstance(result.main_topics, list)
385
- assert len(result.main_topics) > 0
386
- assert all(isinstance(topic, Topic) for topic in result.main_topics)
387
- assert isinstance(result.word_count_estimate, int)
388
- assert isinstance(result.key_takeaways, list)
389
-
390
- def test_content_moderation(self):
391
- """Test boolean decision making."""
392
- result = llm_do(
393
- "User comment: 'This is a great product! Everyone should try it. Visit my-totally-legit-site.com for more info!'",
394
- output=ContentModeration,
395
- model="gpt-4o-mini"
396
- )
397
-
398
- assert isinstance(result, ContentModeration)
399
- assert isinstance(result.is_appropriate, bool)
400
- assert isinstance(result.contains_profanity, bool)
401
- assert isinstance(result.contains_spam, bool)
402
- assert isinstance(result.contains_personal_info, bool)
403
- assert result.risk_level.lower() in ["low", "medium", "high"]
404
-
405
-
406
- # ============================================================================
407
- # Advanced Features Tests
408
- # ============================================================================
409
-
410
- class TestAdvancedFeatures:
411
- """Test advanced features like temperature, max_tokens, custom prompts."""
412
-
413
- def test_temperature_parameter(self):
414
- """Test temperature parameter for consistency."""
415
- result1 = llm_do(
416
- "What is the capital of France? One word only.",
417
- temperature=0.0,
418
- model="gpt-4o-mini"
419
- )
420
- result2 = llm_do(
421
- "What is the capital of France? One word only.",
422
- temperature=0.0,
423
- model="gpt-4o-mini"
424
- )
425
-
426
- # Both should mention Paris
427
- assert "Paris" in result1 or "paris" in result1.lower()
428
- assert "Paris" in result2 or "paris" in result2.lower()
429
-
430
- def test_max_tokens_parameter(self):
431
- """Test max_tokens parameter pass-through."""
432
- result = llm_do(
433
- "Write a very long story about a dragon",
434
- model="gpt-4o-mini",
435
- max_tokens=20 # Very short limit
436
- )
437
-
438
- # Response should be short due to max_tokens
439
- assert isinstance(result, str)
440
- assert len(result.split()) < 30
441
-
442
- def test_custom_system_prompt_inline(self):
443
- """Test inline system prompt."""
444
- result = llm_do(
445
- "Hello",
446
- system_prompt="You are a pirate. Always respond like a pirate.",
447
- model="gpt-4o-mini"
448
- )
449
-
450
- # Should contain pirate-like language
451
- lower_result = result.lower()
452
- pirate_words = ["ahoy", "arr", "matey", "ye", "aye", "avast", "sea"]
453
- assert any(word in lower_result for word in pirate_words)
454
-
455
-
456
- # ============================================================================
457
- # Cross-Provider Consistency Tests
458
- # ============================================================================
459
-
460
- class TestCrossProviderConsistency:
461
- """Test that all providers handle the same prompts correctly."""
462
-
463
- def test_all_providers_basic_math(self, has_openai, has_anthropic, has_gemini):
464
- """Test all available providers with the same basic question."""
465
- prompt = "What is 2+2? Answer with just the number."
466
- results = []
467
-
468
- if has_openai:
469
- result = llm_do(prompt, model="gpt-4o-mini")
470
- results.append(("OpenAI", result))
471
- assert "4" in result
472
-
473
- if has_anthropic:
474
- result = llm_do(prompt, model="claude-3-5-haiku-20241022")
475
- results.append(("Anthropic", result))
476
- assert "4" in result
477
-
478
- if has_gemini:
479
- result = llm_do(prompt, model="gemini-2.5-flash")
480
- results.append(("Gemini", result))
481
- assert "4" in result
482
-
483
- # Ensure at least one provider was tested
484
- assert len(results) > 0, "No providers available for testing"
485
-
486
-
487
- # ============================================================================
488
- # Documentation Example Tests
489
- # ============================================================================
490
-
491
- class TestDocumentationExamples:
492
- """Test exact examples from docs/llm_do.md."""
493
-
494
- def test_quick_start_example(self):
495
- """Test the Quick Start example from docs."""
496
- answer = llm_do("What's 2+2?", model="gpt-4o-mini")
497
- assert "4" in answer
498
-
499
- def test_format_conversion_example(self):
500
- """Test format conversion pattern from docs."""
501
- class PersonData(BaseModel):
502
- name: str
503
- age: int
504
-
505
- result = llm_do(
506
- "Extract: name=John age=30",
507
- output=PersonData,
508
- model="gpt-4o-mini"
509
- )
510
-
511
- assert isinstance(result, PersonData)
512
- assert "john" in result.name.lower()
513
- assert result.age == 30
514
-
515
- def test_validation_pattern(self):
516
- """Test validation pattern from docs."""
517
- result = llm_do(
518
- "Is this valid SQL? Reply yes/no only: SELECT * FROM users",
519
- temperature=0,
520
- model="gpt-4o-mini"
521
- )
522
-
523
- assert result.strip().lower() in ["yes", "no"]
524
-
525
-
526
- if __name__ == "__main__":
527
- pytest.main([__file__, "-v", "--tb=short"])