mojentic 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _examples/audit_openai_capabilities.py +561 -0
- _examples/streaming.py +0 -1
- mojentic/llm/chat_session.py +24 -1
- mojentic/llm/chat_session_spec.py +40 -0
- mojentic/llm/gateways/openai_model_registry.py +141 -40
- mojentic/llm/gateways/openai_model_registry_spec.py +173 -6
- mojentic/llm/gateways/openai_temperature_handling_spec.py +22 -20
- {mojentic-1.0.0.dist-info → mojentic-1.1.0.dist-info}/METADATA +15 -15
- {mojentic-1.0.0.dist-info → mojentic-1.1.0.dist-info}/RECORD +12 -11
- {mojentic-1.0.0.dist-info → mojentic-1.1.0.dist-info}/WHEEL +1 -1
- {mojentic-1.0.0.dist-info → mojentic-1.1.0.dist-info}/licenses/LICENSE.md +0 -0
- {mojentic-1.0.0.dist-info → mojentic-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Audit script that probes OpenAI models for their actual capabilities
|
|
3
|
+
and compares against our hardcoded model registry.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
OPENAI_API_KEY=sk-... python src/_examples/audit_openai_capabilities.py
|
|
7
|
+
OPENAI_API_KEY=sk-... python src/_examples/audit_openai_capabilities.py --cheap
|
|
8
|
+
|
|
9
|
+
The --cheap flag skips expensive model families and infers capabilities
|
|
10
|
+
from their -mini variants instead.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import base64
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
import sys
|
|
17
|
+
import time
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from openai import OpenAI, BadRequestError, APIError, RateLimitError
|
|
22
|
+
|
|
23
|
+
from mojentic.llm.gateways.openai_model_registry import (
|
|
24
|
+
OpenAIModelRegistry, ModelType
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Models that use different API endpoints (not chat-compatible)
|
|
28
|
+
SKIP_PREFIXES = [
|
|
29
|
+
"tts-", "whisper-", "dall-e-", "text-moderation-",
|
|
30
|
+
"davinci-", "babbage-", "canary-",
|
|
31
|
+
"codex-", "computer-",
|
|
32
|
+
]
|
|
33
|
+
SKIP_CONTAINS = [
|
|
34
|
+
"-realtime-", "-transcribe", "-tts",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
# Expensive model families to skip in --cheap mode
|
|
38
|
+
EXPENSIVE_FAMILIES = [
|
|
39
|
+
"o1-pro", "o3-pro", "o3-deep-research", "o4-mini-deep-research",
|
|
40
|
+
"gpt-5-codex",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
# 1x1 white PNG for vision testing
|
|
44
|
+
TINY_PNG_B64 = (
|
|
45
|
+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4"
|
|
46
|
+
"nGP4z8BQDwAEgAF/pooBPQAAAABJRU5ErkJggg=="
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
MINIMAL_TOOL = {
|
|
50
|
+
"type": "function",
|
|
51
|
+
"function": {
|
|
52
|
+
"name": "get_weather",
|
|
53
|
+
"description": "Get weather for a city",
|
|
54
|
+
"parameters": {
|
|
55
|
+
"type": "object",
|
|
56
|
+
"properties": {
|
|
57
|
+
"city": {"type": "string", "description": "City name"}
|
|
58
|
+
},
|
|
59
|
+
"required": ["city"]
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def should_skip_model(model_id: str) -> bool:
|
|
66
|
+
"""Check if a model should be skipped (non-chat endpoint)."""
|
|
67
|
+
model_lower = model_id.lower()
|
|
68
|
+
for prefix in SKIP_PREFIXES:
|
|
69
|
+
if model_lower.startswith(prefix):
|
|
70
|
+
return True
|
|
71
|
+
for pattern in SKIP_CONTAINS:
|
|
72
|
+
if pattern in model_lower:
|
|
73
|
+
return True
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def is_chat_model_candidate(model_id: str) -> bool:
|
|
78
|
+
"""Check if a model is a candidate for chat API probing."""
|
|
79
|
+
model_lower = model_id.lower()
|
|
80
|
+
chat_patterns = [
|
|
81
|
+
"gpt-3.5", "gpt-4", "gpt-5",
|
|
82
|
+
"o1", "o3", "o4",
|
|
83
|
+
"chatgpt",
|
|
84
|
+
]
|
|
85
|
+
return any(p in model_lower for p in chat_patterns)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def is_embedding_model(model_id: str) -> bool:
|
|
89
|
+
"""Check if a model is an embedding model."""
|
|
90
|
+
return "embedding" in model_id.lower()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def is_expensive(model_id: str) -> bool:
|
|
94
|
+
"""Check if a model is in an expensive family."""
|
|
95
|
+
model_lower = model_id.lower()
|
|
96
|
+
return any(family in model_lower for family in EXPENSIVE_FAMILIES)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def rate_limited_call(func, *args, **kwargs):
|
|
100
|
+
"""Call a function with rate limit handling and backoff."""
|
|
101
|
+
max_retries = 3
|
|
102
|
+
delay = 1.0
|
|
103
|
+
for attempt in range(max_retries):
|
|
104
|
+
try:
|
|
105
|
+
return func(*args, **kwargs)
|
|
106
|
+
except RateLimitError:
|
|
107
|
+
if attempt < max_retries - 1:
|
|
108
|
+
print(f" Rate limited, waiting {delay}s...")
|
|
109
|
+
time.sleep(delay)
|
|
110
|
+
delay *= 2
|
|
111
|
+
else:
|
|
112
|
+
raise
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def probe_basic_chat(client: OpenAI, model_id: str) -> dict:
|
|
116
|
+
"""Test basic chat completion and determine token parameter name."""
|
|
117
|
+
result = {"works": False, "uses_max_tokens": None, "error": None}
|
|
118
|
+
|
|
119
|
+
# Try with max_tokens first (standard chat models)
|
|
120
|
+
try:
|
|
121
|
+
response = rate_limited_call(
|
|
122
|
+
client.chat.completions.create,
|
|
123
|
+
model=model_id,
|
|
124
|
+
messages=[{"role": "user", "content": "Say hi"}],
|
|
125
|
+
max_tokens=10,
|
|
126
|
+
)
|
|
127
|
+
result["works"] = True
|
|
128
|
+
result["uses_max_tokens"] = True
|
|
129
|
+
return result
|
|
130
|
+
except BadRequestError as e:
|
|
131
|
+
error_msg = str(e).lower()
|
|
132
|
+
if "max_completion_tokens" in error_msg:
|
|
133
|
+
# Reasoning model - retry with max_completion_tokens
|
|
134
|
+
try:
|
|
135
|
+
response = rate_limited_call(
|
|
136
|
+
client.chat.completions.create,
|
|
137
|
+
model=model_id,
|
|
138
|
+
messages=[{"role": "user", "content": "Say hi"}],
|
|
139
|
+
max_completion_tokens=10,
|
|
140
|
+
)
|
|
141
|
+
result["works"] = True
|
|
142
|
+
result["uses_max_tokens"] = False
|
|
143
|
+
return result
|
|
144
|
+
except (BadRequestError, APIError) as e2:
|
|
145
|
+
result["error"] = str(e2)
|
|
146
|
+
return result
|
|
147
|
+
else:
|
|
148
|
+
result["error"] = str(e)
|
|
149
|
+
return result
|
|
150
|
+
except APIError as e:
|
|
151
|
+
result["error"] = str(e)
|
|
152
|
+
return result
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def probe_tool_calling(client: OpenAI, model_id: str, uses_max_tokens: bool) -> dict:
|
|
156
|
+
"""Test if a model supports tool calling."""
|
|
157
|
+
result = {"supports_tools": False, "error": None}
|
|
158
|
+
|
|
159
|
+
token_kwargs = {}
|
|
160
|
+
if uses_max_tokens:
|
|
161
|
+
token_kwargs["max_tokens"] = 10
|
|
162
|
+
else:
|
|
163
|
+
token_kwargs["max_completion_tokens"] = 100
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
response = rate_limited_call(
|
|
167
|
+
client.chat.completions.create,
|
|
168
|
+
model=model_id,
|
|
169
|
+
messages=[{"role": "user", "content": "What is the weather in London?"}],
|
|
170
|
+
tools=[MINIMAL_TOOL],
|
|
171
|
+
**token_kwargs,
|
|
172
|
+
)
|
|
173
|
+
result["supports_tools"] = True
|
|
174
|
+
return result
|
|
175
|
+
except BadRequestError as e:
|
|
176
|
+
error_msg = str(e).lower()
|
|
177
|
+
if "tool" in error_msg or "function" in error_msg:
|
|
178
|
+
result["supports_tools"] = False
|
|
179
|
+
else:
|
|
180
|
+
result["error"] = str(e)
|
|
181
|
+
result["supports_tools"] = False
|
|
182
|
+
return result
|
|
183
|
+
except APIError as e:
|
|
184
|
+
result["error"] = str(e)
|
|
185
|
+
return result
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def probe_streaming(client: OpenAI, model_id: str, uses_max_tokens: bool) -> dict:
|
|
189
|
+
"""Test if a model supports streaming."""
|
|
190
|
+
result = {"supports_streaming": False, "error": None}
|
|
191
|
+
|
|
192
|
+
token_kwargs = {}
|
|
193
|
+
if uses_max_tokens:
|
|
194
|
+
token_kwargs["max_tokens"] = 10
|
|
195
|
+
else:
|
|
196
|
+
token_kwargs["max_completion_tokens"] = 50
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
stream = rate_limited_call(
|
|
200
|
+
client.chat.completions.create,
|
|
201
|
+
model=model_id,
|
|
202
|
+
messages=[{"role": "user", "content": "Say hi"}],
|
|
203
|
+
stream=True,
|
|
204
|
+
**token_kwargs,
|
|
205
|
+
)
|
|
206
|
+
# Consume the stream to verify it works
|
|
207
|
+
for chunk in stream:
|
|
208
|
+
pass
|
|
209
|
+
result["supports_streaming"] = True
|
|
210
|
+
return result
|
|
211
|
+
except BadRequestError as e:
|
|
212
|
+
error_msg = str(e).lower()
|
|
213
|
+
if "stream" in error_msg:
|
|
214
|
+
result["supports_streaming"] = False
|
|
215
|
+
else:
|
|
216
|
+
result["error"] = str(e)
|
|
217
|
+
result["supports_streaming"] = False
|
|
218
|
+
return result
|
|
219
|
+
except APIError as e:
|
|
220
|
+
result["error"] = str(e)
|
|
221
|
+
return result
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def probe_vision(client: OpenAI, model_id: str, uses_max_tokens: bool) -> dict:
|
|
225
|
+
"""Test if a model supports vision (image input)."""
|
|
226
|
+
result = {"supports_vision": False, "error": None}
|
|
227
|
+
|
|
228
|
+
token_kwargs = {}
|
|
229
|
+
if uses_max_tokens:
|
|
230
|
+
token_kwargs["max_tokens"] = 10
|
|
231
|
+
else:
|
|
232
|
+
token_kwargs["max_completion_tokens"] = 50
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
response = rate_limited_call(
|
|
236
|
+
client.chat.completions.create,
|
|
237
|
+
model=model_id,
|
|
238
|
+
messages=[{
|
|
239
|
+
"role": "user",
|
|
240
|
+
"content": [
|
|
241
|
+
{"type": "text", "text": "Describe this image in one word."},
|
|
242
|
+
{
|
|
243
|
+
"type": "image_url",
|
|
244
|
+
"image_url": {
|
|
245
|
+
"url": f"data:image/png;base64,{TINY_PNG_B64}",
|
|
246
|
+
"detail": "low"
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
]
|
|
250
|
+
}],
|
|
251
|
+
**token_kwargs,
|
|
252
|
+
)
|
|
253
|
+
result["supports_vision"] = True
|
|
254
|
+
return result
|
|
255
|
+
except BadRequestError as e:
|
|
256
|
+
error_msg = str(e).lower()
|
|
257
|
+
if "image" in error_msg or "vision" in error_msg or "content" in error_msg:
|
|
258
|
+
result["supports_vision"] = False
|
|
259
|
+
else:
|
|
260
|
+
result["error"] = str(e)
|
|
261
|
+
result["supports_vision"] = False
|
|
262
|
+
return result
|
|
263
|
+
except APIError as e:
|
|
264
|
+
result["error"] = str(e)
|
|
265
|
+
return result
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def probe_temperature(client: OpenAI, model_id: str, uses_max_tokens: bool) -> dict:
|
|
269
|
+
"""Test which temperature values a model supports."""
|
|
270
|
+
result = {"supported_temperatures": None, "error": None}
|
|
271
|
+
test_temps = [0.0, 0.5, 1.0]
|
|
272
|
+
supported = []
|
|
273
|
+
|
|
274
|
+
token_kwargs = {}
|
|
275
|
+
if uses_max_tokens:
|
|
276
|
+
token_kwargs["max_tokens"] = 5
|
|
277
|
+
else:
|
|
278
|
+
token_kwargs["max_completion_tokens"] = 20
|
|
279
|
+
|
|
280
|
+
for temp in test_temps:
|
|
281
|
+
try:
|
|
282
|
+
response = rate_limited_call(
|
|
283
|
+
client.chat.completions.create,
|
|
284
|
+
model=model_id,
|
|
285
|
+
messages=[{"role": "user", "content": "Say ok"}],
|
|
286
|
+
temperature=temp,
|
|
287
|
+
**token_kwargs,
|
|
288
|
+
)
|
|
289
|
+
supported.append(temp)
|
|
290
|
+
except BadRequestError as e:
|
|
291
|
+
error_msg = str(e).lower()
|
|
292
|
+
if "temperature" in error_msg:
|
|
293
|
+
pass # This temperature not supported
|
|
294
|
+
else:
|
|
295
|
+
result["error"] = str(e)
|
|
296
|
+
break
|
|
297
|
+
except APIError as e:
|
|
298
|
+
result["error"] = str(e)
|
|
299
|
+
break
|
|
300
|
+
time.sleep(0.3)
|
|
301
|
+
|
|
302
|
+
if len(supported) == len(test_temps):
|
|
303
|
+
result["supported_temperatures"] = None # All supported
|
|
304
|
+
elif len(supported) == 0:
|
|
305
|
+
result["supported_temperatures"] = [] # None supported
|
|
306
|
+
else:
|
|
307
|
+
result["supported_temperatures"] = supported
|
|
308
|
+
|
|
309
|
+
return result
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def probe_embedding(client: OpenAI, model_id: str) -> dict:
|
|
313
|
+
"""Test if a model works as an embedding model."""
|
|
314
|
+
result = {"is_embedding": False, "error": None}
|
|
315
|
+
try:
|
|
316
|
+
response = rate_limited_call(
|
|
317
|
+
client.embeddings.create,
|
|
318
|
+
model=model_id,
|
|
319
|
+
input="test",
|
|
320
|
+
)
|
|
321
|
+
result["is_embedding"] = True
|
|
322
|
+
return result
|
|
323
|
+
except (BadRequestError, APIError) as e:
|
|
324
|
+
result["error"] = str(e)
|
|
325
|
+
return result
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def probe_model(client: OpenAI, model_id: str, cheap_mode: bool = False) -> Optional[dict]:
|
|
329
|
+
"""Run all capability probes against a single model."""
|
|
330
|
+
if should_skip_model(model_id):
|
|
331
|
+
return None
|
|
332
|
+
|
|
333
|
+
if cheap_mode and is_expensive(model_id):
|
|
334
|
+
print(f" Skipping {model_id} (expensive, --cheap mode)")
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
# Handle embedding models separately
|
|
338
|
+
if is_embedding_model(model_id):
|
|
339
|
+
print(f" Probing {model_id} (embedding)...")
|
|
340
|
+
embed_result = probe_embedding(client, model_id)
|
|
341
|
+
return {
|
|
342
|
+
"model_type": "embedding" if embed_result["is_embedding"] else "unknown",
|
|
343
|
+
"supports_tools": False,
|
|
344
|
+
"supports_streaming": False,
|
|
345
|
+
"supports_vision": False,
|
|
346
|
+
"supported_temperatures": None,
|
|
347
|
+
"uses_max_tokens": None,
|
|
348
|
+
"errors": [embed_result["error"]] if embed_result["error"] else []
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
if not is_chat_model_candidate(model_id):
|
|
352
|
+
return None
|
|
353
|
+
|
|
354
|
+
print(f" Probing {model_id}...")
|
|
355
|
+
|
|
356
|
+
# Test 1: Basic chat
|
|
357
|
+
basic = probe_basic_chat(client, model_id)
|
|
358
|
+
if not basic["works"]:
|
|
359
|
+
print(f" Basic chat failed: {basic['error']}")
|
|
360
|
+
return {
|
|
361
|
+
"model_type": "unknown",
|
|
362
|
+
"supports_tools": False,
|
|
363
|
+
"supports_streaming": False,
|
|
364
|
+
"supports_vision": False,
|
|
365
|
+
"supported_temperatures": None,
|
|
366
|
+
"uses_max_tokens": None,
|
|
367
|
+
"errors": [basic["error"]]
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
uses_max_tokens = basic["uses_max_tokens"]
|
|
371
|
+
model_type = "chat" if uses_max_tokens else "reasoning"
|
|
372
|
+
time.sleep(0.5)
|
|
373
|
+
|
|
374
|
+
# Test 2: Tool calling
|
|
375
|
+
tools_result = probe_tool_calling(client, model_id, uses_max_tokens)
|
|
376
|
+
time.sleep(0.5)
|
|
377
|
+
|
|
378
|
+
# Test 3: Streaming
|
|
379
|
+
stream_result = probe_streaming(client, model_id, uses_max_tokens)
|
|
380
|
+
time.sleep(0.5)
|
|
381
|
+
|
|
382
|
+
# Test 4: Vision
|
|
383
|
+
vision_result = probe_vision(client, model_id, uses_max_tokens)
|
|
384
|
+
time.sleep(0.5)
|
|
385
|
+
|
|
386
|
+
# Test 5: Temperature
|
|
387
|
+
temp_result = probe_temperature(client, model_id, uses_max_tokens)
|
|
388
|
+
|
|
389
|
+
errors = [r["error"] for r in [tools_result, stream_result, vision_result, temp_result]
|
|
390
|
+
if r.get("error")]
|
|
391
|
+
|
|
392
|
+
return {
|
|
393
|
+
"model_type": model_type,
|
|
394
|
+
"supports_tools": tools_result["supports_tools"],
|
|
395
|
+
"supports_streaming": stream_result["supports_streaming"],
|
|
396
|
+
"supports_vision": vision_result["supports_vision"],
|
|
397
|
+
"supported_temperatures": temp_result["supported_temperatures"],
|
|
398
|
+
"uses_max_tokens": uses_max_tokens,
|
|
399
|
+
"errors": errors if errors else []
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def compare_with_registry(probed_models: dict, registry: OpenAIModelRegistry) -> dict:
|
|
404
|
+
"""Compare probed results with current registry."""
|
|
405
|
+
registered_models = set(registry.get_registered_models())
|
|
406
|
+
probed_model_names = set(probed_models.keys())
|
|
407
|
+
|
|
408
|
+
# Find new models (in API but not registry)
|
|
409
|
+
new_models = sorted(probed_model_names - registered_models)
|
|
410
|
+
|
|
411
|
+
# Find removed models (in registry but not in any API model)
|
|
412
|
+
removed_models = sorted(registered_models - probed_model_names)
|
|
413
|
+
|
|
414
|
+
# Find capability changes for models in both sets
|
|
415
|
+
capability_changes = {}
|
|
416
|
+
for model_name in sorted(probed_model_names & registered_models):
|
|
417
|
+
probed = probed_models[model_name]
|
|
418
|
+
registered_caps = registry.get_model_capabilities(model_name)
|
|
419
|
+
|
|
420
|
+
changes = {}
|
|
421
|
+
|
|
422
|
+
# Compare model type
|
|
423
|
+
reg_type = registered_caps.model_type.value
|
|
424
|
+
if probed["model_type"] != reg_type and probed["model_type"] != "unknown":
|
|
425
|
+
changes["model_type"] = {"was": reg_type, "now": probed["model_type"]}
|
|
426
|
+
|
|
427
|
+
# Compare tools support
|
|
428
|
+
if probed["supports_tools"] != registered_caps.supports_tools:
|
|
429
|
+
changes["supports_tools"] = {
|
|
430
|
+
"was": registered_caps.supports_tools,
|
|
431
|
+
"now": probed["supports_tools"]
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
# Compare streaming support
|
|
435
|
+
if probed["supports_streaming"] != registered_caps.supports_streaming:
|
|
436
|
+
changes["supports_streaming"] = {
|
|
437
|
+
"was": registered_caps.supports_streaming,
|
|
438
|
+
"now": probed["supports_streaming"]
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
# Compare vision support
|
|
442
|
+
if probed["supports_vision"] != registered_caps.supports_vision:
|
|
443
|
+
changes["supports_vision"] = {
|
|
444
|
+
"was": registered_caps.supports_vision,
|
|
445
|
+
"now": probed["supports_vision"]
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
# Compare temperature support
|
|
449
|
+
reg_temps = registered_caps.supported_temperatures
|
|
450
|
+
probed_temps = probed["supported_temperatures"]
|
|
451
|
+
if reg_temps != probed_temps:
|
|
452
|
+
changes["supported_temperatures"] = {
|
|
453
|
+
"was": reg_temps,
|
|
454
|
+
"now": probed_temps
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if changes:
|
|
458
|
+
capability_changes[model_name] = changes
|
|
459
|
+
|
|
460
|
+
return {
|
|
461
|
+
"new_models": new_models,
|
|
462
|
+
"removed_models": removed_models,
|
|
463
|
+
"capability_changes": capability_changes
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def main():
|
|
468
|
+
cheap_mode = "--cheap" in sys.argv
|
|
469
|
+
|
|
470
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
471
|
+
if not api_key:
|
|
472
|
+
print("ERROR: OPENAI_API_KEY environment variable not set")
|
|
473
|
+
sys.exit(1)
|
|
474
|
+
|
|
475
|
+
client = OpenAI(api_key=api_key)
|
|
476
|
+
|
|
477
|
+
print("Fetching available OpenAI models...")
|
|
478
|
+
all_models = sorted([m.id for m in client.models.list()])
|
|
479
|
+
print(f"Found {len(all_models)} models total")
|
|
480
|
+
|
|
481
|
+
# Separate into categories
|
|
482
|
+
models_to_probe = []
|
|
483
|
+
models_skipped = []
|
|
484
|
+
|
|
485
|
+
for model_id in all_models:
|
|
486
|
+
if should_skip_model(model_id):
|
|
487
|
+
models_skipped.append(model_id)
|
|
488
|
+
elif is_chat_model_candidate(model_id) or is_embedding_model(model_id):
|
|
489
|
+
models_to_probe.append(model_id)
|
|
490
|
+
else:
|
|
491
|
+
models_skipped.append(model_id)
|
|
492
|
+
|
|
493
|
+
print(f"\nWill probe {len(models_to_probe)} models, skipping {len(models_skipped)}")
|
|
494
|
+
if cheap_mode:
|
|
495
|
+
print("Running in --cheap mode (skipping expensive model families)")
|
|
496
|
+
|
|
497
|
+
# Probe each model
|
|
498
|
+
probed_results = {}
|
|
499
|
+
for model_id in models_to_probe:
|
|
500
|
+
result = probe_model(client, model_id, cheap_mode)
|
|
501
|
+
if result is not None:
|
|
502
|
+
probed_results[model_id] = result
|
|
503
|
+
time.sleep(0.5) # Rate limit between models
|
|
504
|
+
|
|
505
|
+
# Compare with registry
|
|
506
|
+
print("\nComparing with current registry...")
|
|
507
|
+
registry = OpenAIModelRegistry()
|
|
508
|
+
comparison = compare_with_registry(probed_results, registry)
|
|
509
|
+
|
|
510
|
+
# Build report
|
|
511
|
+
report = {
|
|
512
|
+
"audit_date": datetime.now(timezone.utc).isoformat(),
|
|
513
|
+
"cheap_mode": cheap_mode,
|
|
514
|
+
"api_models_available": all_models,
|
|
515
|
+
"models_skipped": models_skipped,
|
|
516
|
+
"models_probed": probed_results,
|
|
517
|
+
"comparison": comparison
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
# Write report
|
|
521
|
+
report_path = os.path.join(
|
|
522
|
+
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
|
|
523
|
+
"openai_model_audit_report.json"
|
|
524
|
+
)
|
|
525
|
+
with open(report_path, "w") as f:
|
|
526
|
+
json.dump(report, f, indent=2, default=str)
|
|
527
|
+
|
|
528
|
+
print(f"\nReport written to: {report_path}")
|
|
529
|
+
|
|
530
|
+
# Print summary
|
|
531
|
+
print("\n=== AUDIT SUMMARY ===")
|
|
532
|
+
print(f"Models in API: {len(all_models)}")
|
|
533
|
+
print(f"Models probed: {len(probed_results)}")
|
|
534
|
+
print(f"Models skipped: {len(models_skipped)}")
|
|
535
|
+
|
|
536
|
+
if comparison["new_models"]:
|
|
537
|
+
print(f"\nNew models (not in registry): {len(comparison['new_models'])}")
|
|
538
|
+
for m in comparison["new_models"]:
|
|
539
|
+
caps = probed_results.get(m, {})
|
|
540
|
+
print(f" + {m} (type={caps.get('model_type', '?')}, "
|
|
541
|
+
f"tools={caps.get('supports_tools', '?')}, "
|
|
542
|
+
f"stream={caps.get('supports_streaming', '?')})")
|
|
543
|
+
|
|
544
|
+
if comparison["removed_models"]:
|
|
545
|
+
print(f"\nRemoved models (in registry, not in API): {len(comparison['removed_models'])}")
|
|
546
|
+
for m in comparison["removed_models"]:
|
|
547
|
+
print(f" - {m}")
|
|
548
|
+
|
|
549
|
+
if comparison["capability_changes"]:
|
|
550
|
+
print(f"\nCapability changes: {len(comparison['capability_changes'])}")
|
|
551
|
+
for model, changes in comparison["capability_changes"].items():
|
|
552
|
+
print(f" ~ {model}:")
|
|
553
|
+
for field, diff in changes.items():
|
|
554
|
+
print(f" {field}: {diff['was']} -> {diff['now']}")
|
|
555
|
+
|
|
556
|
+
if not comparison["new_models"] and not comparison["removed_models"] and not comparison["capability_changes"]:
|
|
557
|
+
print("\nNo discrepancies found - registry is up to date!")
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
if __name__ == "__main__":
|
|
561
|
+
main()
|
_examples/streaming.py
CHANGED
mojentic/llm/chat_session.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Optional
|
|
1
|
+
from typing import Iterator, List, Optional
|
|
2
2
|
|
|
3
3
|
from mojentic.llm import LLMBroker
|
|
4
4
|
from mojentic.llm.gateways.models import LLMMessage, MessageRole
|
|
@@ -78,6 +78,29 @@ class ChatSession:
|
|
|
78
78
|
self.insert_message(LLMMessage(role=MessageRole.Assistant, content=response))
|
|
79
79
|
return response
|
|
80
80
|
|
|
81
|
+
def send_stream(self, query) -> Iterator[str]:
|
|
82
|
+
"""
|
|
83
|
+
Send a query to the LLM and yield response chunks as they arrive. Records the query and
|
|
84
|
+
the full assembled response in the ongoing chat session after the stream is consumed.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
query : str
|
|
89
|
+
The query to send to the LLM.
|
|
90
|
+
|
|
91
|
+
Yields
|
|
92
|
+
------
|
|
93
|
+
str
|
|
94
|
+
Content chunks from the LLM response as they arrive.
|
|
95
|
+
"""
|
|
96
|
+
self.insert_message(LLMMessage(role=MessageRole.User, content=query))
|
|
97
|
+
accumulated = []
|
|
98
|
+
for chunk in self.llm.generate_stream(self.messages, tools=self.tools, temperature=self.temperature):
|
|
99
|
+
accumulated.append(chunk)
|
|
100
|
+
yield chunk
|
|
101
|
+
self._ensure_all_messages_are_sized()
|
|
102
|
+
self.insert_message(LLMMessage(role=MessageRole.Assistant, content="".join(accumulated)))
|
|
103
|
+
|
|
81
104
|
def insert_message(self, message: LLMMessage):
|
|
82
105
|
"""
|
|
83
106
|
Add a message onto the end of the chat session. If the total token count exceeds the max context, the oldest
|
|
@@ -94,6 +94,46 @@ class DescribeChatSession:
|
|
|
94
94
|
assert chat_session.messages[1].content == "Query message 2"
|
|
95
95
|
assert chat_session.messages[2].content == INTENDED_RESPONSE_MESSAGE
|
|
96
96
|
|
|
97
|
+
class DescribeStreamingSend:
|
|
98
|
+
|
|
99
|
+
def should_yield_content_chunks(self, chat_session):
|
|
100
|
+
chat_session.llm.generate_stream.return_value = iter(["Hello", " world"])
|
|
101
|
+
|
|
102
|
+
chunks = list(chat_session.send_stream("Query message"))
|
|
103
|
+
|
|
104
|
+
assert chunks == ["Hello", " world"]
|
|
105
|
+
|
|
106
|
+
def should_grow_message_history_after_stream_consumed(self, chat_session):
|
|
107
|
+
chat_session.llm.generate_stream.return_value = iter(["Response"])
|
|
108
|
+
|
|
109
|
+
list(chat_session.send_stream("Query message"))
|
|
110
|
+
|
|
111
|
+
assert len(chat_session.messages) == 3
|
|
112
|
+
|
|
113
|
+
def should_record_full_assembled_response_in_history(self, chat_session):
|
|
114
|
+
chat_session.llm.generate_stream.return_value = iter(["Hello", " world"])
|
|
115
|
+
|
|
116
|
+
list(chat_session.send_stream("Query message"))
|
|
117
|
+
|
|
118
|
+
assert chat_session.messages[2].content == "Hello world"
|
|
119
|
+
|
|
120
|
+
def should_record_user_message_in_history(self, chat_session):
|
|
121
|
+
chat_session.llm.generate_stream.return_value = iter(["Response"])
|
|
122
|
+
|
|
123
|
+
list(chat_session.send_stream("Query message"))
|
|
124
|
+
|
|
125
|
+
assert chat_session.messages[1].role == MessageRole.User
|
|
126
|
+
assert chat_session.messages[1].content == "Query message"
|
|
127
|
+
|
|
128
|
+
def should_respect_context_capacity(self, chat_session):
|
|
129
|
+
chat_session.llm.generate_stream.return_value = iter(["Response 1"])
|
|
130
|
+
list(chat_session.send_stream("Query 1"))
|
|
131
|
+
|
|
132
|
+
chat_session.llm.generate_stream.return_value = iter(["Response 2"])
|
|
133
|
+
list(chat_session.send_stream("Query 2"))
|
|
134
|
+
|
|
135
|
+
assert len(chat_session.messages) == 3
|
|
136
|
+
|
|
97
137
|
class DescribeMessageRoles:
|
|
98
138
|
"""
|
|
99
139
|
Specifications for message role handling
|