llms-py 2.0.15__py3-none-any.whl → 2.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llms/__pycache__/__init__.cpython-313.pyc +0 -0
- llms/__pycache__/__init__.cpython-314.pyc +0 -0
- llms/__pycache__/__main__.cpython-314.pyc +0 -0
- llms/__pycache__/main.cpython-312.pyc +0 -0
- llms/__pycache__/main.cpython-313.pyc +0 -0
- llms/__pycache__/main.cpython-314.pyc +0 -0
- llms/index.html +5 -1
- llms/llms.json +721 -66
- llms/main.py +203 -11
- llms/ui/Analytics.mjs +1483 -0
- llms/ui/Brand.mjs +19 -8
- llms/ui/ChatPrompt.mjs +58 -36
- llms/ui/Main.mjs +205 -5
- llms/ui/ModelSelector.mjs +35 -4
- llms/ui/ProviderIcon.mjs +29 -0
- llms/ui/Sidebar.mjs +20 -4
- llms/ui/ai.mjs +1 -1
- llms/ui/app.css +211 -64
- llms/ui/lib/chart.js +14 -0
- llms/ui/lib/charts.mjs +20 -0
- llms/ui/lib/color.js +14 -0
- llms/ui/tailwind.input.css +1 -1
- llms/ui/threadStore.mjs +270 -19
- llms/ui/utils.mjs +36 -0
- {llms_py-2.0.15.dist-info → llms_py-2.0.16.dist-info}/METADATA +8 -35
- llms_py-2.0.16.dist-info/RECORD +56 -0
- llms_py-2.0.15.dist-info/RECORD +0 -46
- {llms_py-2.0.15.dist-info → llms_py-2.0.16.dist-info}/WHEEL +0 -0
- {llms_py-2.0.15.dist-info → llms_py-2.0.16.dist-info}/entry_points.txt +0 -0
- {llms_py-2.0.15.dist-info → llms_py-2.0.16.dist-info}/licenses/LICENSE +0 -0
- {llms_py-2.0.15.dist-info → llms_py-2.0.16.dist-info}/top_level.txt +0 -0
llms/main.py
CHANGED
|
@@ -22,7 +22,7 @@ from aiohttp import web
|
|
|
22
22
|
from pathlib import Path
|
|
23
23
|
from importlib import resources # Py≥3.9 (pip install importlib_resources for 3.7/3.8)
|
|
24
24
|
|
|
25
|
-
VERSION = "2.0.
|
|
25
|
+
VERSION = "2.0.16"
|
|
26
26
|
_ROOT = None
|
|
27
27
|
g_config_path = None
|
|
28
28
|
g_ui_path = None
|
|
@@ -156,6 +156,47 @@ def get_file_mime_type(filename):
|
|
|
156
156
|
mime_type, _ = mimetypes.guess_type(filename)
|
|
157
157
|
return mime_type or "application/octet-stream"
|
|
158
158
|
|
|
159
|
+
|
|
160
|
+
def price_to_string(price: float | int | str | None) -> str | None:
|
|
161
|
+
"""Convert numeric price to string without scientific notation.
|
|
162
|
+
|
|
163
|
+
Detects and rounds up numbers with recurring 9s (e.g., 0.00014999999999999999)
|
|
164
|
+
to avoid floating-point precision artifacts.
|
|
165
|
+
"""
|
|
166
|
+
if price is None or price == 0 or price == "0":
|
|
167
|
+
return "0"
|
|
168
|
+
try:
|
|
169
|
+
price_float = float(price)
|
|
170
|
+
# Format with enough decimal places to avoid scientific notation
|
|
171
|
+
formatted = format(price_float, '.20f')
|
|
172
|
+
|
|
173
|
+
# Detect recurring 9s pattern (e.g., "...9999999")
|
|
174
|
+
# If we have 4 or more consecutive 9s, round up
|
|
175
|
+
if '9999' in formatted:
|
|
176
|
+
# Round up by adding a small amount and reformatting
|
|
177
|
+
# Find the position of the 9s to determine precision
|
|
178
|
+
import decimal
|
|
179
|
+
decimal.getcontext().prec = 28
|
|
180
|
+
d = decimal.Decimal(str(price_float))
|
|
181
|
+
# Round to one less decimal place than where the 9s start
|
|
182
|
+
nines_pos = formatted.find('9999')
|
|
183
|
+
if nines_pos > 0:
|
|
184
|
+
# Round up at the position before the 9s
|
|
185
|
+
decimal_places = nines_pos - formatted.find('.') - 1
|
|
186
|
+
if decimal_places > 0:
|
|
187
|
+
quantize_str = '0.' + '0' * (decimal_places - 1) + '1'
|
|
188
|
+
d = d.quantize(decimal.Decimal(quantize_str), rounding=decimal.ROUND_UP)
|
|
189
|
+
result = str(d)
|
|
190
|
+
# Remove trailing zeros
|
|
191
|
+
if '.' in result:
|
|
192
|
+
result = result.rstrip('0').rstrip('.')
|
|
193
|
+
return result
|
|
194
|
+
|
|
195
|
+
# Normal case: strip trailing zeros
|
|
196
|
+
return formatted.rstrip('0').rstrip('.')
|
|
197
|
+
except (ValueError, TypeError):
|
|
198
|
+
return None
|
|
199
|
+
|
|
159
200
|
async def process_chat(chat):
|
|
160
201
|
if not chat:
|
|
161
202
|
raise Exception("No chat provided")
|
|
@@ -307,6 +348,9 @@ class OpenAiProvider:
|
|
|
307
348
|
self.verbosity = kwargs['verbosity'] if 'verbosity' in kwargs else None
|
|
308
349
|
self.stream = bool(kwargs['stream']) if 'stream' in kwargs else None
|
|
309
350
|
self.enable_thinking = bool(kwargs['enable_thinking']) if 'enable_thinking' in kwargs else None
|
|
351
|
+
self.pricing = kwargs['pricing'] if 'pricing' in kwargs else None
|
|
352
|
+
self.default_pricing = kwargs['default_pricing'] if 'default_pricing' in kwargs else None
|
|
353
|
+
self.check = kwargs['check'] if 'check' in kwargs else None
|
|
310
354
|
|
|
311
355
|
@classmethod
|
|
312
356
|
def test(cls, base_url=None, api_key=None, models={}, **kwargs):
|
|
@@ -315,10 +359,30 @@ class OpenAiProvider:
|
|
|
315
359
|
async def load(self):
|
|
316
360
|
pass
|
|
317
361
|
|
|
318
|
-
|
|
319
|
-
|
|
362
|
+
def model_pricing(self, model):
|
|
363
|
+
provider_model = self.provider_model(model) or model
|
|
364
|
+
if self.pricing and provider_model in self.pricing:
|
|
365
|
+
return self.pricing[provider_model]
|
|
366
|
+
return self.default_pricing or None
|
|
367
|
+
|
|
368
|
+
def provider_model(self, model):
|
|
320
369
|
if model in self.models:
|
|
321
|
-
|
|
370
|
+
return self.models[model]
|
|
371
|
+
return None
|
|
372
|
+
|
|
373
|
+
def to_response(self, response, chat, started_at):
|
|
374
|
+
if 'metadata' not in response:
|
|
375
|
+
response['metadata'] = {}
|
|
376
|
+
response['metadata']['duration'] = int((time.time() - started_at) * 1000)
|
|
377
|
+
if chat is not None and 'model' in chat:
|
|
378
|
+
pricing = self.model_pricing(chat['model'])
|
|
379
|
+
if pricing and 'input' in pricing and 'output' in pricing:
|
|
380
|
+
response['metadata']['pricing'] = f"{pricing['input']}/{pricing['output']}"
|
|
381
|
+
_log(json.dumps(response, indent=2))
|
|
382
|
+
return response
|
|
383
|
+
|
|
384
|
+
async def chat(self, chat):
|
|
385
|
+
chat['model'] = self.provider_model(chat['model']) or chat['model']
|
|
322
386
|
|
|
323
387
|
# with open(os.path.join(os.path.dirname(__file__), 'chat.wip.json'), "w") as f:
|
|
324
388
|
# f.write(json.dumps(chat, indent=2))
|
|
@@ -361,9 +425,11 @@ class OpenAiProvider:
|
|
|
361
425
|
chat = await process_chat(chat)
|
|
362
426
|
_log(f"POST {self.chat_url}")
|
|
363
427
|
_log(chat_summary(chat))
|
|
428
|
+
|
|
364
429
|
async with aiohttp.ClientSession() as session:
|
|
430
|
+
started_at = time.time()
|
|
365
431
|
async with session.post(self.chat_url, headers=self.headers, data=json.dumps(chat), timeout=aiohttp.ClientTimeout(total=120)) as response:
|
|
366
|
-
return await response_json(response)
|
|
432
|
+
return self.to_response(await response_json(response), chat, started_at)
|
|
367
433
|
|
|
368
434
|
class OllamaProvider(OpenAiProvider):
|
|
369
435
|
def __init__(self, base_url, models, all_models=False, **kwargs):
|
|
@@ -430,9 +496,7 @@ class GoogleProvider(OpenAiProvider):
|
|
|
430
496
|
return api_key is not None and len(models) > 0
|
|
431
497
|
|
|
432
498
|
async def chat(self, chat):
|
|
433
|
-
model = chat['model']
|
|
434
|
-
if model in self.models:
|
|
435
|
-
chat['model'] = self.models[model]
|
|
499
|
+
chat['model'] = self.provider_model(chat['model']) or chat['model']
|
|
436
500
|
|
|
437
501
|
chat = await process_chat(chat)
|
|
438
502
|
generationConfig = {}
|
|
@@ -530,6 +594,8 @@ class GoogleProvider(OpenAiProvider):
|
|
|
530
594
|
"parts": [{"text": system_prompt}]
|
|
531
595
|
}
|
|
532
596
|
|
|
597
|
+
if 'max_completion_tokens' in chat:
|
|
598
|
+
generationConfig['maxOutputTokens'] = chat['max_completion_tokens']
|
|
533
599
|
if 'stop' in chat:
|
|
534
600
|
generationConfig['stopSequences'] = [chat['stop']]
|
|
535
601
|
if 'temperature' in chat:
|
|
@@ -552,6 +618,7 @@ class GoogleProvider(OpenAiProvider):
|
|
|
552
618
|
|
|
553
619
|
_log(f"POST {gemini_chat_url}")
|
|
554
620
|
_log(gemini_chat_summary(gemini_chat))
|
|
621
|
+
started_at = time.time()
|
|
555
622
|
|
|
556
623
|
if self.curl:
|
|
557
624
|
curl_args = [
|
|
@@ -621,7 +688,7 @@ class GoogleProvider(OpenAiProvider):
|
|
|
621
688
|
"total_tokens": usage['totalTokenCount'],
|
|
622
689
|
"prompt_tokens": usage['promptTokenCount'],
|
|
623
690
|
}
|
|
624
|
-
return response
|
|
691
|
+
return self.to_response(response, chat, started_at)
|
|
625
692
|
|
|
626
693
|
def get_models():
|
|
627
694
|
ret = []
|
|
@@ -632,6 +699,24 @@ def get_models():
|
|
|
632
699
|
ret.sort()
|
|
633
700
|
return ret
|
|
634
701
|
|
|
702
|
+
def get_active_models():
|
|
703
|
+
ret = []
|
|
704
|
+
existing_models = set()
|
|
705
|
+
for id, provider in g_handlers.items():
|
|
706
|
+
for model in provider.models.keys():
|
|
707
|
+
if model not in existing_models:
|
|
708
|
+
existing_models.add(model)
|
|
709
|
+
provider_model = provider.models[model]
|
|
710
|
+
pricing = provider.model_pricing(model)
|
|
711
|
+
ret.append({
|
|
712
|
+
"id": model,
|
|
713
|
+
"provider": id,
|
|
714
|
+
"provider_model": provider_model,
|
|
715
|
+
"pricing": pricing
|
|
716
|
+
})
|
|
717
|
+
ret.sort(key=lambda x: x["id"])
|
|
718
|
+
return ret
|
|
719
|
+
|
|
635
720
|
async def chat_completion(chat):
|
|
636
721
|
model = chat['model']
|
|
637
722
|
# get first provider that has the model
|
|
@@ -834,7 +919,7 @@ def save_config(config):
|
|
|
834
919
|
_log(f"Saved config to {g_config_path}")
|
|
835
920
|
|
|
836
921
|
def github_url(filename):
|
|
837
|
-
return f"https://raw.githubusercontent.com/ServiceStack/llms/refs/heads/main/{filename}"
|
|
922
|
+
return f"https://raw.githubusercontent.com/ServiceStack/llms/refs/heads/main/llms/{filename}"
|
|
838
923
|
|
|
839
924
|
async def save_text(url, save_path):
|
|
840
925
|
async with aiohttp.ClientSession() as session:
|
|
@@ -1082,6 +1167,101 @@ def read_resource_file_bytes(resource_file):
|
|
|
1082
1167
|
except (OSError, PermissionError, AttributeError) as e:
|
|
1083
1168
|
_log(f"Error reading resource bytes: {e}")
|
|
1084
1169
|
|
|
1170
|
+
async def check_models(provider_name, model_names=None):
|
|
1171
|
+
"""
|
|
1172
|
+
Check validity of models for a specific provider by sending a ping message.
|
|
1173
|
+
|
|
1174
|
+
Args:
|
|
1175
|
+
provider_name: Name of the provider to check
|
|
1176
|
+
model_names: List of specific model names to check, or None to check all models
|
|
1177
|
+
"""
|
|
1178
|
+
if provider_name not in g_handlers:
|
|
1179
|
+
print(f"Provider '{provider_name}' not found or not enabled")
|
|
1180
|
+
print(f"Available providers: {', '.join(g_handlers.keys())}")
|
|
1181
|
+
return
|
|
1182
|
+
|
|
1183
|
+
provider = g_handlers[provider_name]
|
|
1184
|
+
models_to_check = []
|
|
1185
|
+
|
|
1186
|
+
# Determine which models to check
|
|
1187
|
+
if model_names is None or (len(model_names) == 1 and model_names[0] == 'all'):
|
|
1188
|
+
# Check all models for this provider
|
|
1189
|
+
models_to_check = list(provider.models.keys())
|
|
1190
|
+
else:
|
|
1191
|
+
# Check only specified models
|
|
1192
|
+
for model_name in model_names:
|
|
1193
|
+
if model_name in provider.models:
|
|
1194
|
+
models_to_check.append(model_name)
|
|
1195
|
+
else:
|
|
1196
|
+
print(f"Model '{model_name}' not found in provider '{provider_name}'")
|
|
1197
|
+
|
|
1198
|
+
if not models_to_check:
|
|
1199
|
+
print(f"No models to check for provider '{provider_name}'")
|
|
1200
|
+
return
|
|
1201
|
+
|
|
1202
|
+
print(f"\nChecking {len(models_to_check)} model{'' if len(models_to_check) == 1 else 's'} for provider '{provider_name}':\n")
|
|
1203
|
+
|
|
1204
|
+
# Test each model
|
|
1205
|
+
for model in models_to_check:
|
|
1206
|
+
# Create a simple ping chat request
|
|
1207
|
+
chat = (provider.check or g_config['defaults']['check']).copy()
|
|
1208
|
+
chat["model"] = model
|
|
1209
|
+
|
|
1210
|
+
started_at = time.time()
|
|
1211
|
+
try:
|
|
1212
|
+
# Try to get a response from the model
|
|
1213
|
+
response = await provider.chat(chat)
|
|
1214
|
+
duration_ms = int((time.time() - started_at) * 1000)
|
|
1215
|
+
|
|
1216
|
+
# Check if we got a valid response
|
|
1217
|
+
if response and 'choices' in response and len(response['choices']) > 0:
|
|
1218
|
+
print(f" ✓ {model:<40} ({duration_ms}ms)")
|
|
1219
|
+
else:
|
|
1220
|
+
print(f" ✗ {model:<40} Invalid response format")
|
|
1221
|
+
except HTTPError as e:
|
|
1222
|
+
duration_ms = int((time.time() - started_at) * 1000)
|
|
1223
|
+
error_msg = f"HTTP {e.status}"
|
|
1224
|
+
try:
|
|
1225
|
+
# Try to parse error body for more details
|
|
1226
|
+
error_body = json.loads(e.body) if e.body else {}
|
|
1227
|
+
if 'error' in error_body:
|
|
1228
|
+
error = error_body['error']
|
|
1229
|
+
if isinstance(error, dict):
|
|
1230
|
+
if 'message' in error:
|
|
1231
|
+
# OpenRouter
|
|
1232
|
+
if isinstance(error['message'], str):
|
|
1233
|
+
error_msg = error['message']
|
|
1234
|
+
if 'code' in error:
|
|
1235
|
+
error_msg = f"{error['code']} {error_msg}"
|
|
1236
|
+
if 'metadata' in error and 'raw' in error['metadata']:
|
|
1237
|
+
error_msg += f" - {error['metadata']['raw']}"
|
|
1238
|
+
if 'provider' in error:
|
|
1239
|
+
error_msg += f" ({error['provider']})"
|
|
1240
|
+
elif isinstance(error, str):
|
|
1241
|
+
error_msg = error
|
|
1242
|
+
elif 'message' in error_body:
|
|
1243
|
+
if isinstance(error_body['message'], str):
|
|
1244
|
+
error_msg = error_body['message']
|
|
1245
|
+
elif isinstance(error_body['message'], dict):
|
|
1246
|
+
# codestral error format
|
|
1247
|
+
if 'detail' in error_body['message'] and isinstance(error_body['message']['detail'], list):
|
|
1248
|
+
error_msg = error_body['message']['detail'][0]['msg']
|
|
1249
|
+
if 'loc' in error_body['message']['detail'][0] and len(error_body['message']['detail'][0]['loc']) > 0:
|
|
1250
|
+
error_msg += f" (in {' '.join(error_body['message']['detail'][0]['loc'])})"
|
|
1251
|
+
except Exception as parse_error:
|
|
1252
|
+
_log(f"Error parsing error body: {parse_error}")
|
|
1253
|
+
error_msg = e.body[:100] if e.body else f"HTTP {e.status}"
|
|
1254
|
+
print(f" ✗ {model:<40} {error_msg}")
|
|
1255
|
+
except asyncio.TimeoutError:
|
|
1256
|
+
duration_ms = int((time.time() - started_at) * 1000)
|
|
1257
|
+
print(f" ✗ {model:<40} Timeout after {duration_ms}ms")
|
|
1258
|
+
except Exception as e:
|
|
1259
|
+
duration_ms = int((time.time() - started_at) * 1000)
|
|
1260
|
+
error_msg = str(e)[:100]
|
|
1261
|
+
print(f" ✗ {model:<40} {error_msg}")
|
|
1262
|
+
|
|
1263
|
+
print()
|
|
1264
|
+
|
|
1085
1265
|
def main():
|
|
1086
1266
|
global _ROOT, g_verbose, g_default_model, g_logprefix, g_config_path, g_ui_path
|
|
1087
1267
|
|
|
@@ -1098,6 +1278,7 @@ def main():
|
|
|
1098
1278
|
parser.add_argument('--raw', action='store_true', help='Return raw AI JSON response')
|
|
1099
1279
|
|
|
1100
1280
|
parser.add_argument('--list', action='store_true', help='Show list of enabled providers and their models (alias ls provider?)')
|
|
1281
|
+
parser.add_argument('--check', default=None, help='Check validity of models for a provider', metavar='PROVIDER')
|
|
1101
1282
|
|
|
1102
1283
|
parser.add_argument('--serve', default=None, help='Port to start an OpenAI Chat compatible server on', metavar='PORT')
|
|
1103
1284
|
|
|
@@ -1244,6 +1425,13 @@ def main():
|
|
|
1244
1425
|
print_status()
|
|
1245
1426
|
exit(0)
|
|
1246
1427
|
|
|
1428
|
+
if cli_args.check is not None:
|
|
1429
|
+
# Check validity of models for a provider
|
|
1430
|
+
provider_name = cli_args.check
|
|
1431
|
+
model_names = extra_args if len(extra_args) > 0 else None
|
|
1432
|
+
asyncio.run(check_models(provider_name, model_names))
|
|
1433
|
+
exit(0)
|
|
1434
|
+
|
|
1247
1435
|
if cli_args.serve is not None:
|
|
1248
1436
|
port = int(cli_args.serve)
|
|
1249
1437
|
|
|
@@ -1264,7 +1452,11 @@ def main():
|
|
|
1264
1452
|
|
|
1265
1453
|
async def models_handler(request):
|
|
1266
1454
|
return web.json_response(get_models())
|
|
1267
|
-
app.router.add_get('/models', models_handler)
|
|
1455
|
+
app.router.add_get('/models/list', models_handler)
|
|
1456
|
+
|
|
1457
|
+
async def active_models_handler(request):
|
|
1458
|
+
return web.json_response(get_active_models())
|
|
1459
|
+
app.router.add_get('/models', active_models_handler)
|
|
1268
1460
|
|
|
1269
1461
|
async def status_handler(request):
|
|
1270
1462
|
enabled, disabled = provider_status()
|