llms-py 2.0.15__py3-none-any.whl → 2.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llms/main.py CHANGED
@@ -22,7 +22,7 @@ from aiohttp import web
22
22
  from pathlib import Path
23
23
  from importlib import resources # Py≥3.9 (pip install importlib_resources for 3.7/3.8)
24
24
 
25
- VERSION = "2.0.15"
25
+ VERSION = "2.0.16"
26
26
  _ROOT = None
27
27
  g_config_path = None
28
28
  g_ui_path = None
@@ -156,6 +156,47 @@ def get_file_mime_type(filename):
156
156
  mime_type, _ = mimetypes.guess_type(filename)
157
157
  return mime_type or "application/octet-stream"
158
158
 
159
+
160
+ def price_to_string(price: float | int | str | None) -> str | None:
161
+ """Convert numeric price to string without scientific notation.
162
+
163
+ Detects and rounds up numbers with recurring 9s (e.g., 0.00014999999999999999)
164
+ to avoid floating-point precision artifacts.
165
+ """
166
+ if price is None or price == 0 or price == "0":
167
+ return "0"
168
+ try:
169
+ price_float = float(price)
170
+ # Format with enough decimal places to avoid scientific notation
171
+ formatted = format(price_float, '.20f')
172
+
173
+ # Detect recurring 9s pattern (e.g., "...9999999")
174
+ # If we have 4 or more consecutive 9s, round up
175
+ if '9999' in formatted:
176
+ # Round up by adding a small amount and reformatting
177
+ # Find the position of the 9s to determine precision
178
+ import decimal
179
+ decimal.getcontext().prec = 28
180
+ d = decimal.Decimal(str(price_float))
181
+ # Round to one less decimal place than where the 9s start
182
+ nines_pos = formatted.find('9999')
183
+ if nines_pos > 0:
184
+ # Round up at the position before the 9s
185
+ decimal_places = nines_pos - formatted.find('.') - 1
186
+ if decimal_places > 0:
187
+ quantize_str = '0.' + '0' * (decimal_places - 1) + '1'
188
+ d = d.quantize(decimal.Decimal(quantize_str), rounding=decimal.ROUND_UP)
189
+ result = str(d)
190
+ # Remove trailing zeros
191
+ if '.' in result:
192
+ result = result.rstrip('0').rstrip('.')
193
+ return result
194
+
195
+ # Normal case: strip trailing zeros
196
+ return formatted.rstrip('0').rstrip('.')
197
+ except (ValueError, TypeError):
198
+ return None
199
+
159
200
  async def process_chat(chat):
160
201
  if not chat:
161
202
  raise Exception("No chat provided")
@@ -307,6 +348,9 @@ class OpenAiProvider:
307
348
  self.verbosity = kwargs['verbosity'] if 'verbosity' in kwargs else None
308
349
  self.stream = bool(kwargs['stream']) if 'stream' in kwargs else None
309
350
  self.enable_thinking = bool(kwargs['enable_thinking']) if 'enable_thinking' in kwargs else None
351
+ self.pricing = kwargs['pricing'] if 'pricing' in kwargs else None
352
+ self.default_pricing = kwargs['default_pricing'] if 'default_pricing' in kwargs else None
353
+ self.check = kwargs['check'] if 'check' in kwargs else None
310
354
 
311
355
  @classmethod
312
356
  def test(cls, base_url=None, api_key=None, models={}, **kwargs):
@@ -315,10 +359,30 @@ class OpenAiProvider:
315
359
  async def load(self):
316
360
  pass
317
361
 
318
- async def chat(self, chat):
319
- model = chat['model']
362
+ def model_pricing(self, model):
363
+ provider_model = self.provider_model(model) or model
364
+ if self.pricing and provider_model in self.pricing:
365
+ return self.pricing[provider_model]
366
+ return self.default_pricing or None
367
+
368
+ def provider_model(self, model):
320
369
  if model in self.models:
321
- chat['model'] = self.models[model]
370
+ return self.models[model]
371
+ return None
372
+
373
+ def to_response(self, response, chat, started_at):
374
+ if 'metadata' not in response:
375
+ response['metadata'] = {}
376
+ response['metadata']['duration'] = int((time.time() - started_at) * 1000)
377
+ if chat is not None and 'model' in chat:
378
+ pricing = self.model_pricing(chat['model'])
379
+ if pricing and 'input' in pricing and 'output' in pricing:
380
+ response['metadata']['pricing'] = f"{pricing['input']}/{pricing['output']}"
381
+ _log(json.dumps(response, indent=2))
382
+ return response
383
+
384
+ async def chat(self, chat):
385
+ chat['model'] = self.provider_model(chat['model']) or chat['model']
322
386
 
323
387
  # with open(os.path.join(os.path.dirname(__file__), 'chat.wip.json'), "w") as f:
324
388
  # f.write(json.dumps(chat, indent=2))
@@ -361,9 +425,11 @@ class OpenAiProvider:
361
425
  chat = await process_chat(chat)
362
426
  _log(f"POST {self.chat_url}")
363
427
  _log(chat_summary(chat))
428
+
364
429
  async with aiohttp.ClientSession() as session:
430
+ started_at = time.time()
365
431
  async with session.post(self.chat_url, headers=self.headers, data=json.dumps(chat), timeout=aiohttp.ClientTimeout(total=120)) as response:
366
- return await response_json(response)
432
+ return self.to_response(await response_json(response), chat, started_at)
367
433
 
368
434
  class OllamaProvider(OpenAiProvider):
369
435
  def __init__(self, base_url, models, all_models=False, **kwargs):
@@ -430,9 +496,7 @@ class GoogleProvider(OpenAiProvider):
430
496
  return api_key is not None and len(models) > 0
431
497
 
432
498
  async def chat(self, chat):
433
- model = chat['model']
434
- if model in self.models:
435
- chat['model'] = self.models[model]
499
+ chat['model'] = self.provider_model(chat['model']) or chat['model']
436
500
 
437
501
  chat = await process_chat(chat)
438
502
  generationConfig = {}
@@ -530,6 +594,8 @@ class GoogleProvider(OpenAiProvider):
530
594
  "parts": [{"text": system_prompt}]
531
595
  }
532
596
 
597
+ if 'max_completion_tokens' in chat:
598
+ generationConfig['maxOutputTokens'] = chat['max_completion_tokens']
533
599
  if 'stop' in chat:
534
600
  generationConfig['stopSequences'] = [chat['stop']]
535
601
  if 'temperature' in chat:
@@ -552,6 +618,7 @@ class GoogleProvider(OpenAiProvider):
552
618
 
553
619
  _log(f"POST {gemini_chat_url}")
554
620
  _log(gemini_chat_summary(gemini_chat))
621
+ started_at = time.time()
555
622
 
556
623
  if self.curl:
557
624
  curl_args = [
@@ -621,7 +688,7 @@ class GoogleProvider(OpenAiProvider):
621
688
  "total_tokens": usage['totalTokenCount'],
622
689
  "prompt_tokens": usage['promptTokenCount'],
623
690
  }
624
- return response
691
+ return self.to_response(response, chat, started_at)
625
692
 
626
693
  def get_models():
627
694
  ret = []
@@ -632,6 +699,24 @@ def get_models():
632
699
  ret.sort()
633
700
  return ret
634
701
 
702
+ def get_active_models():
703
+ ret = []
704
+ existing_models = set()
705
+ for id, provider in g_handlers.items():
706
+ for model in provider.models.keys():
707
+ if model not in existing_models:
708
+ existing_models.add(model)
709
+ provider_model = provider.models[model]
710
+ pricing = provider.model_pricing(model)
711
+ ret.append({
712
+ "id": model,
713
+ "provider": id,
714
+ "provider_model": provider_model,
715
+ "pricing": pricing
716
+ })
717
+ ret.sort(key=lambda x: x["id"])
718
+ return ret
719
+
635
720
  async def chat_completion(chat):
636
721
  model = chat['model']
637
722
  # get first provider that has the model
@@ -834,7 +919,7 @@ def save_config(config):
834
919
  _log(f"Saved config to {g_config_path}")
835
920
 
836
921
  def github_url(filename):
837
- return f"https://raw.githubusercontent.com/ServiceStack/llms/refs/heads/main/{filename}"
922
+ return f"https://raw.githubusercontent.com/ServiceStack/llms/refs/heads/main/llms/{filename}"
838
923
 
839
924
  async def save_text(url, save_path):
840
925
  async with aiohttp.ClientSession() as session:
@@ -1082,6 +1167,101 @@ def read_resource_file_bytes(resource_file):
1082
1167
  except (OSError, PermissionError, AttributeError) as e:
1083
1168
  _log(f"Error reading resource bytes: {e}")
1084
1169
 
1170
+ async def check_models(provider_name, model_names=None):
1171
+ """
1172
+ Check validity of models for a specific provider by sending a ping message.
1173
+
1174
+ Args:
1175
+ provider_name: Name of the provider to check
1176
+ model_names: List of specific model names to check, or None to check all models
1177
+ """
1178
+ if provider_name not in g_handlers:
1179
+ print(f"Provider '{provider_name}' not found or not enabled")
1180
+ print(f"Available providers: {', '.join(g_handlers.keys())}")
1181
+ return
1182
+
1183
+ provider = g_handlers[provider_name]
1184
+ models_to_check = []
1185
+
1186
+ # Determine which models to check
1187
+ if model_names is None or (len(model_names) == 1 and model_names[0] == 'all'):
1188
+ # Check all models for this provider
1189
+ models_to_check = list(provider.models.keys())
1190
+ else:
1191
+ # Check only specified models
1192
+ for model_name in model_names:
1193
+ if model_name in provider.models:
1194
+ models_to_check.append(model_name)
1195
+ else:
1196
+ print(f"Model '{model_name}' not found in provider '{provider_name}'")
1197
+
1198
+ if not models_to_check:
1199
+ print(f"No models to check for provider '{provider_name}'")
1200
+ return
1201
+
1202
+ print(f"\nChecking {len(models_to_check)} model{'' if len(models_to_check) == 1 else 's'} for provider '{provider_name}':\n")
1203
+
1204
+ # Test each model
1205
+ for model in models_to_check:
1206
+ # Create a simple ping chat request
1207
+ chat = (provider.check or g_config['defaults']['check']).copy()
1208
+ chat["model"] = model
1209
+
1210
+ started_at = time.time()
1211
+ try:
1212
+ # Try to get a response from the model
1213
+ response = await provider.chat(chat)
1214
+ duration_ms = int((time.time() - started_at) * 1000)
1215
+
1216
+ # Check if we got a valid response
1217
+ if response and 'choices' in response and len(response['choices']) > 0:
1218
+ print(f" ✓ {model:<40} ({duration_ms}ms)")
1219
+ else:
1220
+ print(f" ✗ {model:<40} Invalid response format")
1221
+ except HTTPError as e:
1222
+ duration_ms = int((time.time() - started_at) * 1000)
1223
+ error_msg = f"HTTP {e.status}"
1224
+ try:
1225
+ # Try to parse error body for more details
1226
+ error_body = json.loads(e.body) if e.body else {}
1227
+ if 'error' in error_body:
1228
+ error = error_body['error']
1229
+ if isinstance(error, dict):
1230
+ if 'message' in error:
1231
+ # OpenRouter
1232
+ if isinstance(error['message'], str):
1233
+ error_msg = error['message']
1234
+ if 'code' in error:
1235
+ error_msg = f"{error['code']} {error_msg}"
1236
+ if 'metadata' in error and 'raw' in error['metadata']:
1237
+ error_msg += f" - {error['metadata']['raw']}"
1238
+ if 'provider' in error:
1239
+ error_msg += f" ({error['provider']})"
1240
+ elif isinstance(error, str):
1241
+ error_msg = error
1242
+ elif 'message' in error_body:
1243
+ if isinstance(error_body['message'], str):
1244
+ error_msg = error_body['message']
1245
+ elif isinstance(error_body['message'], dict):
1246
+ # codestral error format
1247
+ if 'detail' in error_body['message'] and isinstance(error_body['message']['detail'], list):
1248
+ error_msg = error_body['message']['detail'][0]['msg']
1249
+ if 'loc' in error_body['message']['detail'][0] and len(error_body['message']['detail'][0]['loc']) > 0:
1250
+ error_msg += f" (in {' '.join(error_body['message']['detail'][0]['loc'])})"
1251
+ except Exception as parse_error:
1252
+ _log(f"Error parsing error body: {parse_error}")
1253
+ error_msg = e.body[:100] if e.body else f"HTTP {e.status}"
1254
+ print(f" ✗ {model:<40} {error_msg}")
1255
+ except asyncio.TimeoutError:
1256
+ duration_ms = int((time.time() - started_at) * 1000)
1257
+ print(f" ✗ {model:<40} Timeout after {duration_ms}ms")
1258
+ except Exception as e:
1259
+ duration_ms = int((time.time() - started_at) * 1000)
1260
+ error_msg = str(e)[:100]
1261
+ print(f" ✗ {model:<40} {error_msg}")
1262
+
1263
+ print()
1264
+
1085
1265
  def main():
1086
1266
  global _ROOT, g_verbose, g_default_model, g_logprefix, g_config_path, g_ui_path
1087
1267
 
@@ -1098,6 +1278,7 @@ def main():
1098
1278
  parser.add_argument('--raw', action='store_true', help='Return raw AI JSON response')
1099
1279
 
1100
1280
  parser.add_argument('--list', action='store_true', help='Show list of enabled providers and their models (alias ls provider?)')
1281
+ parser.add_argument('--check', default=None, help='Check validity of models for a provider', metavar='PROVIDER')
1101
1282
 
1102
1283
  parser.add_argument('--serve', default=None, help='Port to start an OpenAI Chat compatible server on', metavar='PORT')
1103
1284
 
@@ -1244,6 +1425,13 @@ def main():
1244
1425
  print_status()
1245
1426
  exit(0)
1246
1427
 
1428
+ if cli_args.check is not None:
1429
+ # Check validity of models for a provider
1430
+ provider_name = cli_args.check
1431
+ model_names = extra_args if len(extra_args) > 0 else None
1432
+ asyncio.run(check_models(provider_name, model_names))
1433
+ exit(0)
1434
+
1247
1435
  if cli_args.serve is not None:
1248
1436
  port = int(cli_args.serve)
1249
1437
 
@@ -1264,7 +1452,11 @@ def main():
1264
1452
 
1265
1453
  async def models_handler(request):
1266
1454
  return web.json_response(get_models())
1267
- app.router.add_get('/models', models_handler)
1455
+ app.router.add_get('/models/list', models_handler)
1456
+
1457
+ async def active_models_handler(request):
1458
+ return web.json_response(get_active_models())
1459
+ app.router.add_get('/models', active_models_handler)
1268
1460
 
1269
1461
  async def status_handler(request):
1270
1462
  enabled, disabled = provider_status()