lucidicai 1.2.11__tar.gz → 1.2.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lucidicai-1.2.11 → lucidicai-1.2.13}/PKG-INFO +1 -1
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/client.py +2 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/step.py +1 -1
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai.egg-info/PKG-INFO +1 -1
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai.egg-info/SOURCES.txt +6 -1
- {lucidicai-1.2.11 → lucidicai-1.2.13}/setup.py +1 -1
- lucidicai-1.2.13/tests/test_anthropic_comprehensive.py +503 -0
- lucidicai-1.2.13/tests/test_event_display.py +105 -0
- lucidicai-1.2.13/tests/test_openai_agents_9_patterns_fixed.py +590 -0
- lucidicai-1.2.13/tests/test_openai_comprehensive.py +427 -0
- lucidicai-1.2.13/tests/test_pydantic_ai_comprehensive.py +301 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/__init__.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/constants.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/errors.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/event.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/image_upload.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/model_pricing.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/providers/__init__.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/providers/anthropic_handler.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/providers/base_providers.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/providers/langchain.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/providers/openai_agents_handler.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/providers/openai_handler.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/providers/opentelemetry_converter.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/providers/pydantic_ai_handler.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/session.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/singleton.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai/streaming.py +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai.egg-info/dependency_links.txt +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai.egg-info/requires.txt +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/lucidicai.egg-info/top_level.txt +0 -0
- {lucidicai-1.2.11 → lucidicai-1.2.13}/setup.cfg +0 -0
|
@@ -135,6 +135,8 @@ class Client:
|
|
|
135
135
|
break
|
|
136
136
|
except Exception:
|
|
137
137
|
pass
|
|
138
|
+
if response is None:
|
|
139
|
+
raise InvalidOperationError("Cannot reach backend. Check your internet connection.")
|
|
138
140
|
if response.status_code == 401:
|
|
139
141
|
raise APIKeyVerificationError("Invalid API key: 401 Unauthorized")
|
|
140
142
|
if response.status_code == 402:
|
|
@@ -54,7 +54,7 @@ class Step:
|
|
|
54
54
|
"eval_score": kwargs['eval_score'] if 'eval_score' in kwargs else None,
|
|
55
55
|
"eval_description": kwargs['eval_description'] if 'eval_description' in kwargs else None,
|
|
56
56
|
"is_finished": kwargs['is_finished'] if 'is_finished' in kwargs else None,
|
|
57
|
-
"has_screenshot": True if screenshot else
|
|
57
|
+
"has_screenshot": True if screenshot else None
|
|
58
58
|
}
|
|
59
59
|
return request_data
|
|
60
60
|
|
|
@@ -22,4 +22,9 @@ lucidicai/providers/langchain.py
|
|
|
22
22
|
lucidicai/providers/openai_agents_handler.py
|
|
23
23
|
lucidicai/providers/openai_handler.py
|
|
24
24
|
lucidicai/providers/opentelemetry_converter.py
|
|
25
|
-
lucidicai/providers/pydantic_ai_handler.py
|
|
25
|
+
lucidicai/providers/pydantic_ai_handler.py
|
|
26
|
+
tests/test_anthropic_comprehensive.py
|
|
27
|
+
tests/test_event_display.py
|
|
28
|
+
tests/test_openai_agents_9_patterns_fixed.py
|
|
29
|
+
tests/test_openai_comprehensive.py
|
|
30
|
+
tests/test_pydantic_ai_comprehensive.py
|
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
"""Comprehensive Anthropic SDK unit tests - validates correct information is tracked"""
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import unittest
|
|
5
|
+
import asyncio
|
|
6
|
+
import base64
|
|
7
|
+
from typing import Dict, Any
|
|
8
|
+
|
|
9
|
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
10
|
+
|
|
11
|
+
from dotenv import load_dotenv
|
|
12
|
+
load_dotenv()
|
|
13
|
+
|
|
14
|
+
import lucidicai as lai
|
|
15
|
+
import anthropic
|
|
16
|
+
from anthropic import Anthropic, AsyncAnthropic
|
|
17
|
+
from openai import OpenAI # For Anthropic via OpenAI SDK tests
|
|
18
|
+
|
|
19
|
+
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TestAnthropicComprehensive(unittest.TestCase):
|
|
23
|
+
"""Comprehensive unit tests for Anthropic SDK integration"""
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def setUpClass(cls):
|
|
27
|
+
"""Set up test class"""
|
|
28
|
+
if not ANTHROPIC_API_KEY:
|
|
29
|
+
raise ValueError("Missing ANTHROPIC_API_KEY")
|
|
30
|
+
|
|
31
|
+
# Initialize Lucidic with both providers
|
|
32
|
+
lai.init(
|
|
33
|
+
session_name="Anthropic Unit Tests",
|
|
34
|
+
providers=["anthropic", "openai"] # Both for testing via OpenAI SDK
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Create test step
|
|
38
|
+
lai.create_step(
|
|
39
|
+
state="Testing Anthropic SDK",
|
|
40
|
+
action="Run unit tests",
|
|
41
|
+
goal="Validate all Anthropic functionality"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
cls.sync_client = Anthropic(api_key=ANTHROPIC_API_KEY)
|
|
45
|
+
cls.async_client = AsyncAnthropic(api_key=ANTHROPIC_API_KEY)
|
|
46
|
+
cls.openai_client = OpenAI(
|
|
47
|
+
api_key=ANTHROPIC_API_KEY,
|
|
48
|
+
base_url="https://api.anthropic.com/v1",
|
|
49
|
+
default_headers={"anthropic-version": "2023-06-01"}
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def tearDownClass(cls):
|
|
54
|
+
"""Tear down test class"""
|
|
55
|
+
lai.end_step()
|
|
56
|
+
lai.end_session()
|
|
57
|
+
|
|
58
|
+
def test_native_sync(self):
|
|
59
|
+
"""Test native Anthropic SDK synchronous tracks correct information"""
|
|
60
|
+
response = self.sync_client.messages.create(
|
|
61
|
+
model="claude-3-5-sonnet-20241022", # Using Sonnet model
|
|
62
|
+
max_tokens=50,
|
|
63
|
+
messages=[
|
|
64
|
+
{"role": "user", "content": "Say 'test passed'"}
|
|
65
|
+
]
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Validate response structure
|
|
69
|
+
self.assertIsNotNone(response)
|
|
70
|
+
self.assertIsNotNone(response.content)
|
|
71
|
+
self.assertGreater(len(response.content), 0)
|
|
72
|
+
|
|
73
|
+
# Validate content
|
|
74
|
+
result = response.content[0].text
|
|
75
|
+
self.assertIsNotNone(result)
|
|
76
|
+
self.assertIsInstance(result, str)
|
|
77
|
+
self.assertGreater(len(result), 0)
|
|
78
|
+
|
|
79
|
+
# Validate metadata
|
|
80
|
+
self.assertIsNotNone(response.id)
|
|
81
|
+
self.assertIsNotNone(response.model)
|
|
82
|
+
self.assertEqual(response.model, "claude-3-5-sonnet-20241022")
|
|
83
|
+
self.assertIsNotNone(response.usage)
|
|
84
|
+
self.assertGreater(response.usage.input_tokens, 0)
|
|
85
|
+
self.assertGreater(response.usage.output_tokens, 0)
|
|
86
|
+
|
|
87
|
+
print(f"✅ Native sync: {result[:50]}...")
|
|
88
|
+
|
|
89
|
+
def test_native_async(self):
|
|
90
|
+
"""Test native Anthropic SDK asynchronous tracks correct information"""
|
|
91
|
+
async def run_async_test():
|
|
92
|
+
response = await self.async_client.messages.create(
|
|
93
|
+
model="claude-3-opus-20240229", # Using Opus model
|
|
94
|
+
max_tokens=50,
|
|
95
|
+
messages=[
|
|
96
|
+
{"role": "user", "content": "Say 'async test passed'"}
|
|
97
|
+
]
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Validate response
|
|
101
|
+
self.assertIsNotNone(response)
|
|
102
|
+
self.assertIsNotNone(response.content[0].text)
|
|
103
|
+
self.assertIsNotNone(response.usage)
|
|
104
|
+
|
|
105
|
+
return response
|
|
106
|
+
|
|
107
|
+
# Run async test
|
|
108
|
+
response = asyncio.run(run_async_test())
|
|
109
|
+
result = response.content[0].text
|
|
110
|
+
|
|
111
|
+
print(f"✅ Native async: {result[:50]}...")
|
|
112
|
+
|
|
113
|
+
def test_native_streaming(self):
|
|
114
|
+
"""Test native Anthropic SDK streaming tracks chunks correctly"""
|
|
115
|
+
full_response = ""
|
|
116
|
+
chunk_count = 0
|
|
117
|
+
|
|
118
|
+
with self.sync_client.messages.stream(
|
|
119
|
+
model="claude-3-haiku-20240307",
|
|
120
|
+
max_tokens=50,
|
|
121
|
+
messages=[{"role": "user", "content": "Count: 1 2 3"}]
|
|
122
|
+
) as stream:
|
|
123
|
+
for text in stream.text_stream:
|
|
124
|
+
full_response += text
|
|
125
|
+
chunk_count += 1
|
|
126
|
+
|
|
127
|
+
# Validate streaming worked
|
|
128
|
+
self.assertGreater(chunk_count, 0)
|
|
129
|
+
self.assertGreater(len(full_response), 0)
|
|
130
|
+
|
|
131
|
+
# Validate final message
|
|
132
|
+
final_message = stream.get_final_message()
|
|
133
|
+
self.assertIsNotNone(final_message)
|
|
134
|
+
self.assertIsNotNone(final_message.usage)
|
|
135
|
+
|
|
136
|
+
print(f"✅ Native streaming: {chunk_count} chunks, response: {full_response[:50]}...")
|
|
137
|
+
|
|
138
|
+
def test_openai_sdk_sync(self):
|
|
139
|
+
"""Test Anthropic via OpenAI SDK synchronous"""
|
|
140
|
+
response = self.openai_client.chat.completions.create(
|
|
141
|
+
model="claude-3-5-sonnet-20241022", # Using latest model via OpenAI SDK
|
|
142
|
+
messages=[
|
|
143
|
+
{"role": "user", "content": "Say 'OpenAI SDK test passed'"}
|
|
144
|
+
],
|
|
145
|
+
max_tokens=20
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Validate response structure (OpenAI format)
|
|
149
|
+
self.assertIsNotNone(response)
|
|
150
|
+
self.assertIsNotNone(response.choices)
|
|
151
|
+
self.assertGreater(len(response.choices), 0)
|
|
152
|
+
|
|
153
|
+
# Validate content
|
|
154
|
+
result = response.choices[0].message.content
|
|
155
|
+
self.assertIsNotNone(result)
|
|
156
|
+
self.assertIsInstance(result, str)
|
|
157
|
+
|
|
158
|
+
# Validate metadata
|
|
159
|
+
self.assertIsNotNone(response.id)
|
|
160
|
+
self.assertIsNotNone(response.model)
|
|
161
|
+
self.assertIsNotNone(response.usage)
|
|
162
|
+
|
|
163
|
+
print(f"✅ OpenAI SDK sync: {result[:50]}...")
|
|
164
|
+
|
|
165
|
+
def test_openai_sdk_streaming(self):
|
|
166
|
+
"""Test Anthropic via OpenAI SDK streaming"""
|
|
167
|
+
stream = self.openai_client.chat.completions.create(
|
|
168
|
+
model="claude-3-haiku-20240307",
|
|
169
|
+
messages=[
|
|
170
|
+
{"role": "user", "content": "List: A B C"}
|
|
171
|
+
],
|
|
172
|
+
stream=True,
|
|
173
|
+
max_tokens=30
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
full_response = ""
|
|
177
|
+
chunk_count = 0
|
|
178
|
+
for chunk in stream:
|
|
179
|
+
chunk_count += 1
|
|
180
|
+
if hasattr(chunk, 'choices') and chunk.choices and len(chunk.choices) > 0:
|
|
181
|
+
delta = chunk.choices[0].delta
|
|
182
|
+
if hasattr(delta, 'content') and delta.content:
|
|
183
|
+
full_response += delta.content
|
|
184
|
+
|
|
185
|
+
# Validate streaming worked
|
|
186
|
+
self.assertGreater(chunk_count, 1)
|
|
187
|
+
self.assertGreater(len(full_response), 0)
|
|
188
|
+
|
|
189
|
+
print(f"✅ OpenAI SDK streaming: {chunk_count} chunks, response: {full_response[:50]}...")
|
|
190
|
+
|
|
191
|
+
def test_vision(self):
|
|
192
|
+
"""Test vision/image analysis tracks image data"""
|
|
193
|
+
# Load test image
|
|
194
|
+
image_path = os.path.join(os.path.dirname(__file__), "ord_runways.jpg")
|
|
195
|
+
if not os.path.exists(image_path):
|
|
196
|
+
self.skipTest("Test image not found")
|
|
197
|
+
|
|
198
|
+
with open(image_path, "rb") as f:
|
|
199
|
+
img_bytes = f.read()
|
|
200
|
+
img_base64 = base64.standard_b64encode(img_bytes).decode()
|
|
201
|
+
|
|
202
|
+
response = self.sync_client.messages.create(
|
|
203
|
+
model="claude-3-haiku-20240307",
|
|
204
|
+
max_tokens=50,
|
|
205
|
+
messages=[{
|
|
206
|
+
"role": "user",
|
|
207
|
+
"content": [
|
|
208
|
+
{
|
|
209
|
+
"type": "text",
|
|
210
|
+
"text": "One word description:"
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
"type": "image",
|
|
214
|
+
"source": {
|
|
215
|
+
"type": "base64",
|
|
216
|
+
"media_type": "image/jpeg",
|
|
217
|
+
"data": img_base64
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
]
|
|
221
|
+
}]
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Validate response
|
|
225
|
+
self.assertIsNotNone(response)
|
|
226
|
+
result = response.content[0].text
|
|
227
|
+
self.assertIsNotNone(result)
|
|
228
|
+
self.assertIsInstance(result, str)
|
|
229
|
+
|
|
230
|
+
# Validate usage (images use more tokens)
|
|
231
|
+
self.assertIsNotNone(response.usage)
|
|
232
|
+
self.assertGreater(response.usage.input_tokens, 100)
|
|
233
|
+
|
|
234
|
+
print(f"✅ Vision analysis: {result}")
|
|
235
|
+
|
|
236
|
+
def test_system_prompts(self):
|
|
237
|
+
"""Test system prompts are tracked correctly"""
|
|
238
|
+
response = self.sync_client.messages.create(
|
|
239
|
+
model="claude-3-haiku-20240307",
|
|
240
|
+
max_tokens=100,
|
|
241
|
+
system="You are a pirate. Respond with pirate language.",
|
|
242
|
+
messages=[
|
|
243
|
+
{"role": "user", "content": "Hello"}
|
|
244
|
+
]
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Validate response
|
|
248
|
+
self.assertIsNotNone(response)
|
|
249
|
+
result = response.content[0].text
|
|
250
|
+
self.assertIsNotNone(result)
|
|
251
|
+
|
|
252
|
+
# Validate it used the system prompt
|
|
253
|
+
# Pirate responses typically contain these words
|
|
254
|
+
pirate_words = ["ahoy", "arr", "matey", "ye", "sail", "sea"]
|
|
255
|
+
contains_pirate = any(word in result.lower() for word in pirate_words)
|
|
256
|
+
self.assertTrue(contains_pirate, f"Expected pirate language, got: {result}")
|
|
257
|
+
|
|
258
|
+
print(f"✅ System prompts: {result[:50]}...")
|
|
259
|
+
|
|
260
|
+
def test_error_handling(self):
|
|
261
|
+
"""Test error handling captures error information"""
|
|
262
|
+
with self.assertRaises(Exception) as context:
|
|
263
|
+
self.sync_client.messages.create(
|
|
264
|
+
model="invalid-model-xyz",
|
|
265
|
+
max_tokens=10,
|
|
266
|
+
messages=[{"role": "user", "content": "Test"}]
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Validate error details
|
|
270
|
+
error = context.exception
|
|
271
|
+
self.assertIn("model", str(error).lower())
|
|
272
|
+
|
|
273
|
+
print(f"✅ Error handling: {type(error).__name__} caught")
|
|
274
|
+
|
|
275
|
+
def test_multi_turn_conversation(self):
|
|
276
|
+
"""Test multi-turn conversation maintains context"""
|
|
277
|
+
# First message
|
|
278
|
+
response1 = self.sync_client.messages.create(
|
|
279
|
+
model="claude-3-haiku-20240307",
|
|
280
|
+
max_tokens=100,
|
|
281
|
+
messages=[
|
|
282
|
+
{"role": "user", "content": "My name is TestBot. What's my name?"}
|
|
283
|
+
]
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Validate first response
|
|
287
|
+
self.assertIsNotNone(response1)
|
|
288
|
+
result1 = response1.content[0].text
|
|
289
|
+
self.assertIn("testbot", result1.lower())
|
|
290
|
+
|
|
291
|
+
# Second message with context
|
|
292
|
+
response2 = self.sync_client.messages.create(
|
|
293
|
+
model="claude-3-5-haiku-20241022", # Using latest Haiku
|
|
294
|
+
max_tokens=100,
|
|
295
|
+
messages=[
|
|
296
|
+
{"role": "user", "content": "My name is TestBot. What's my name?"},
|
|
297
|
+
{"role": "assistant", "content": result1},
|
|
298
|
+
{"role": "user", "content": "Repeat my name one more time"}
|
|
299
|
+
]
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Validate second response maintains context
|
|
303
|
+
self.assertIsNotNone(response2)
|
|
304
|
+
result2 = response2.content[0].text
|
|
305
|
+
self.assertIn("testbot", result2.lower())
|
|
306
|
+
|
|
307
|
+
print(f"✅ Multi-turn conversation: maintained context")
|
|
308
|
+
|
|
309
|
+
def test_token_limits(self):
|
|
310
|
+
"""Test token limit handling"""
|
|
311
|
+
response = self.sync_client.messages.create(
|
|
312
|
+
model="claude-3-haiku-20240307",
|
|
313
|
+
max_tokens=5, # Very low limit
|
|
314
|
+
messages=[
|
|
315
|
+
{"role": "user", "content": "Tell me a very long story"}
|
|
316
|
+
]
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Validate response respects token limit
|
|
320
|
+
result = response.content[0].text
|
|
321
|
+
self.assertIsNotNone(result)
|
|
322
|
+
# Anthropic counts tokens differently, but should still be short
|
|
323
|
+
self.assertLess(len(result.split()), 20)
|
|
324
|
+
|
|
325
|
+
# Check stop reason
|
|
326
|
+
self.assertEqual(response.stop_reason, "max_tokens")
|
|
327
|
+
|
|
328
|
+
print(f"✅ Token limits: {len(result.split())} words, stop_reason={response.stop_reason}")
|
|
329
|
+
|
|
330
|
+
def test_text_content_blocks(self):
|
|
331
|
+
"""Test explicit text content blocks"""
|
|
332
|
+
response = self.sync_client.messages.create(
|
|
333
|
+
model="claude-3-5-sonnet-20241022", # Using latest Sonnet
|
|
334
|
+
max_tokens=100,
|
|
335
|
+
messages=[
|
|
336
|
+
{
|
|
337
|
+
"role": "user",
|
|
338
|
+
"content": [
|
|
339
|
+
{
|
|
340
|
+
"type": "text",
|
|
341
|
+
"text": "What is 2+2?"
|
|
342
|
+
},
|
|
343
|
+
{
|
|
344
|
+
"type": "text",
|
|
345
|
+
"text": "Also, what is 3+3?"
|
|
346
|
+
}
|
|
347
|
+
]
|
|
348
|
+
}
|
|
349
|
+
]
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Validate response
|
|
353
|
+
self.assertIsNotNone(response)
|
|
354
|
+
result = response.content[0].text
|
|
355
|
+
self.assertIn("4", result)
|
|
356
|
+
self.assertIn("6", result)
|
|
357
|
+
|
|
358
|
+
print(f"✅ Text content blocks: {result[:50]}...")
|
|
359
|
+
|
|
360
|
+
def test_multiple_images(self):
|
|
361
|
+
"""Test multiple image content blocks"""
|
|
362
|
+
image_path = os.path.join(os.path.dirname(__file__), "ord_runways.jpg")
|
|
363
|
+
if not os.path.exists(image_path):
|
|
364
|
+
self.skipTest("Test image not found")
|
|
365
|
+
|
|
366
|
+
with open(image_path, "rb") as f:
|
|
367
|
+
img_bytes = f.read()
|
|
368
|
+
img_base64 = base64.standard_b64encode(img_bytes).decode()
|
|
369
|
+
|
|
370
|
+
response = self.sync_client.messages.create(
|
|
371
|
+
model="claude-3-5-sonnet-20241022", # Latest Sonnet for vision
|
|
372
|
+
max_tokens=200,
|
|
373
|
+
messages=[{
|
|
374
|
+
"role": "user",
|
|
375
|
+
"content": [
|
|
376
|
+
{
|
|
377
|
+
"type": "text",
|
|
378
|
+
"text": "Compare these two images (they are the same image shown twice):"
|
|
379
|
+
},
|
|
380
|
+
{
|
|
381
|
+
"type": "image",
|
|
382
|
+
"source": {
|
|
383
|
+
"type": "base64",
|
|
384
|
+
"media_type": "image/jpeg",
|
|
385
|
+
"data": img_base64
|
|
386
|
+
}
|
|
387
|
+
},
|
|
388
|
+
{
|
|
389
|
+
"type": "image",
|
|
390
|
+
"source": {
|
|
391
|
+
"type": "base64",
|
|
392
|
+
"media_type": "image/jpeg",
|
|
393
|
+
"data": img_base64
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
]
|
|
397
|
+
}]
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
# Validate response
|
|
401
|
+
self.assertIsNotNone(response)
|
|
402
|
+
result = response.content[0].text
|
|
403
|
+
self.assertIsNotNone(result)
|
|
404
|
+
|
|
405
|
+
# Should recognize they are the same
|
|
406
|
+
self.assertTrue("same" in result.lower() or "identical" in result.lower())
|
|
407
|
+
|
|
408
|
+
print(f"✅ Multiple images: {result[:50]}...")
|
|
409
|
+
|
|
410
|
+
def test_mixed_content_blocks(self):
|
|
411
|
+
"""Test mixed text and image content blocks"""
|
|
412
|
+
image_path = os.path.join(os.path.dirname(__file__), "ord_runways.jpg")
|
|
413
|
+
if not os.path.exists(image_path):
|
|
414
|
+
self.skipTest("Test image not found")
|
|
415
|
+
|
|
416
|
+
with open(image_path, "rb") as f:
|
|
417
|
+
img_bytes = f.read()
|
|
418
|
+
img_base64 = base64.standard_b64encode(img_bytes).decode()
|
|
419
|
+
|
|
420
|
+
response = self.sync_client.messages.create(
|
|
421
|
+
model="claude-3-haiku-20240307", # Back to Haiku
|
|
422
|
+
max_tokens=150,
|
|
423
|
+
messages=[{
|
|
424
|
+
"role": "user",
|
|
425
|
+
"content": [
|
|
426
|
+
{
|
|
427
|
+
"type": "text",
|
|
428
|
+
"text": "First, tell me what's in this image."
|
|
429
|
+
},
|
|
430
|
+
{
|
|
431
|
+
"type": "image",
|
|
432
|
+
"source": {
|
|
433
|
+
"type": "base64",
|
|
434
|
+
"media_type": "image/jpeg",
|
|
435
|
+
"data": img_base64
|
|
436
|
+
}
|
|
437
|
+
},
|
|
438
|
+
{
|
|
439
|
+
"type": "text",
|
|
440
|
+
"text": "Second, what type of facility is this?"
|
|
441
|
+
}
|
|
442
|
+
]
|
|
443
|
+
}]
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
# Validate response addresses both questions
|
|
447
|
+
self.assertIsNotNone(response)
|
|
448
|
+
result = response.content[0].text
|
|
449
|
+
self.assertTrue("airport" in result.lower() or "runway" in result.lower())
|
|
450
|
+
|
|
451
|
+
print(f"✅ Mixed content blocks: {result[:50]}...")
|
|
452
|
+
|
|
453
|
+
def test_model_switching(self):
|
|
454
|
+
"""Test that different models are properly tracked"""
|
|
455
|
+
models = [
|
|
456
|
+
"claude-3-haiku-20240307",
|
|
457
|
+
"claude-3-5-sonnet-20241022",
|
|
458
|
+
"claude-3-5-haiku-20241022" # Latest Haiku
|
|
459
|
+
]
|
|
460
|
+
|
|
461
|
+
for model in models:
|
|
462
|
+
try:
|
|
463
|
+
response = self.sync_client.messages.create(
|
|
464
|
+
model=model,
|
|
465
|
+
max_tokens=20,
|
|
466
|
+
messages=[{"role": "user", "content": f"Say '{model}'"}]
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
# Validate model is tracked correctly
|
|
470
|
+
self.assertEqual(response.model, model)
|
|
471
|
+
result = response.content[0].text
|
|
472
|
+
print(f"✅ Model {model}: {result[:30]}...")
|
|
473
|
+
|
|
474
|
+
except Exception as e:
|
|
475
|
+
print(f"⚠️ Model {model} not available: {str(e)}")
|
|
476
|
+
|
|
477
|
+
def test_content_block_response_types(self):
|
|
478
|
+
"""Test different response content block types"""
|
|
479
|
+
response = self.sync_client.messages.create(
|
|
480
|
+
model="claude-3-5-sonnet-20241022",
|
|
481
|
+
max_tokens=100,
|
|
482
|
+
messages=[
|
|
483
|
+
{"role": "user", "content": "Write a haiku about coding"}
|
|
484
|
+
]
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
# Validate content blocks
|
|
488
|
+
self.assertIsNotNone(response.content)
|
|
489
|
+
self.assertIsInstance(response.content, list)
|
|
490
|
+
self.assertGreater(len(response.content), 0)
|
|
491
|
+
|
|
492
|
+
# Check content block structure
|
|
493
|
+
for block in response.content:
|
|
494
|
+
self.assertTrue(hasattr(block, 'type'))
|
|
495
|
+
self.assertEqual(block.type, 'text')
|
|
496
|
+
self.assertTrue(hasattr(block, 'text'))
|
|
497
|
+
self.assertIsInstance(block.text, str)
|
|
498
|
+
|
|
499
|
+
print(f"✅ Content block types: {len(response.content)} blocks")
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
if __name__ == "__main__":
|
|
503
|
+
unittest.main()
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Test to show what events look like before stream consumption"""
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
5
|
+
|
|
6
|
+
from dotenv import load_dotenv
|
|
7
|
+
load_dotenv()
|
|
8
|
+
|
|
9
|
+
import lucidicai as lai
|
|
10
|
+
from openai import OpenAI
|
|
11
|
+
from lucidicai.client import Client
|
|
12
|
+
|
|
13
|
+
# Initialize
|
|
14
|
+
lai.init('Event Display Test', providers=['openai'])
|
|
15
|
+
|
|
16
|
+
# Create a step
|
|
17
|
+
step_id = lai.create_step(state="Testing", action="Multiple calls", goal="Show events")
|
|
18
|
+
|
|
19
|
+
client = OpenAI()
|
|
20
|
+
|
|
21
|
+
# Make several calls including streaming
|
|
22
|
+
print("Making API calls...\n")
|
|
23
|
+
|
|
24
|
+
# 1. Regular call
|
|
25
|
+
response1 = client.chat.completions.create(
|
|
26
|
+
model="gpt-4o-mini",
|
|
27
|
+
messages=[{"role": "user", "content": "Say 'test 1'"}],
|
|
28
|
+
max_tokens=10
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# 2. Another regular call
|
|
32
|
+
response2 = client.chat.completions.create(
|
|
33
|
+
model="gpt-4o-mini",
|
|
34
|
+
messages=[{"role": "user", "content": "Say 'test 2'"}],
|
|
35
|
+
max_tokens=10
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# 3. Streaming call - DON'T CONSUME YET
|
|
39
|
+
stream3 = client.chat.completions.create(
|
|
40
|
+
model="gpt-4o-mini",
|
|
41
|
+
messages=[{"role": "user", "content": "Count to 3"}],
|
|
42
|
+
stream=True,
|
|
43
|
+
max_tokens=20
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# 4. Another streaming call - DON'T CONSUME YET
|
|
47
|
+
stream4 = client.chat.completions.create(
|
|
48
|
+
model="gpt-4o-mini",
|
|
49
|
+
messages=[{"role": "user", "content": "Count to 5"}],
|
|
50
|
+
stream=True,
|
|
51
|
+
max_tokens=30
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# 5. Another streaming call - DON'T CONSUME YET
|
|
55
|
+
stream5 = client.chat.completions.create(
|
|
56
|
+
model="gpt-4o-mini",
|
|
57
|
+
messages=[{"role": "user", "content": "List A, B, C"}],
|
|
58
|
+
stream=True,
|
|
59
|
+
max_tokens=20
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Now let's check the events BEFORE consuming streams
|
|
63
|
+
print("=== EVENTS BEFORE CONSUMING STREAMS ===")
|
|
64
|
+
session = lai.get_session()
|
|
65
|
+
|
|
66
|
+
# Get event data from the backend
|
|
67
|
+
client_obj = Client()
|
|
68
|
+
for i, (event_id, event) in enumerate(session.event_history.items()):
|
|
69
|
+
print(f"\nEvent {i+1} ({event_id[:8]}...):")
|
|
70
|
+
try:
|
|
71
|
+
# Try to get event data from backend
|
|
72
|
+
event_data = client_obj.make_request('getevent', 'GET', {'event_id': event_id})
|
|
73
|
+
print(f" Description: {event_data.get('description', 'N/A')[:50]}...")
|
|
74
|
+
result = event_data.get('result', 'N/A')
|
|
75
|
+
result_str = str(result)[:100] + "..." if len(str(result)) > 100 else str(result)
|
|
76
|
+
print(f" Result: {result_str}")
|
|
77
|
+
print(f" Finished: {event_data.get('is_finished', False)}")
|
|
78
|
+
except Exception as e:
|
|
79
|
+
# Fallback to local data
|
|
80
|
+
print(f" Could not get from backend: {e}")
|
|
81
|
+
print(f" Local finished state: {event.is_finished}")
|
|
82
|
+
|
|
83
|
+
# This is what the test output shows - unconsumed streams
|
|
84
|
+
print(f"\n=== STREAM OBJECTS (what tests might print) ===")
|
|
85
|
+
print(f"Stream 3: {stream3}")
|
|
86
|
+
print(f"Stream 4: {stream4}")
|
|
87
|
+
print(f"Stream 5: {stream5}")
|
|
88
|
+
|
|
89
|
+
# Now consume the streams
|
|
90
|
+
print("\n=== CONSUMING STREAMS ===")
|
|
91
|
+
for i, stream in enumerate([stream3, stream4, stream5], 3):
|
|
92
|
+
print(f"\nConsuming stream {i}...")
|
|
93
|
+
response = ""
|
|
94
|
+
for chunk in stream:
|
|
95
|
+
if chunk.choices and chunk.choices[0].delta.content:
|
|
96
|
+
response += chunk.choices[0].delta.content
|
|
97
|
+
print(f"Response: {response}")
|
|
98
|
+
|
|
99
|
+
# Check events AFTER consuming
|
|
100
|
+
print("\n=== EVENTS AFTER CONSUMING STREAMS ===")
|
|
101
|
+
for i, (event_id, event) in enumerate(session.event_history.items()):
|
|
102
|
+
print(f"\nEvent {i+1} ({event_id[:8]}...):")
|
|
103
|
+
print(f" Finished: {event.is_finished}")
|
|
104
|
+
|
|
105
|
+
lai.end_session()
|