llms-py 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llms.py ADDED
@@ -0,0 +1,994 @@
1
+ #!/usr/bin/env python
2
+
3
+ # A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers.
4
+ # Docs: https://github.com/ServiceStack/llms
5
+
6
+ import os
7
+ import time
8
+ import json
9
+ import argparse
10
+ import asyncio
11
+ import subprocess
12
+ import base64
13
+ import mimetypes
14
+ import traceback
15
+
16
+ import aiohttp
17
+ from aiohttp import web
18
+
19
+ VERSION = "1.0.3"
20
+ g_config_path = None
21
+ g_config = None
22
+ g_handlers = {}
23
+ g_verbose = False
24
+ g_logprefix=""
25
+ g_default_model=""
26
+
27
+ def _log(message):
28
+ """Helper method for logging from the global polling task."""
29
+ if g_verbose:
30
+ print(f"{g_logprefix}{message}", flush=True)
31
+
32
+ def printdump(obj):
33
+ args = obj.__dict__ if hasattr(obj, '__dict__') else obj
34
+ print(json.dumps(args, indent=2))
35
+
36
+ def print_chat(chat):
37
+ _log(f"Chat: {chat_summary(chat)}")
38
+
39
+ def chat_summary(chat):
40
+ """Summarize chat completion request for logging."""
41
+ # replace image_url.url with <image>
42
+ clone = json.loads(json.dumps(chat))
43
+ for message in clone['messages']:
44
+ if 'content' in message:
45
+ if isinstance(message['content'], list):
46
+ for item in message['content']:
47
+ if 'image_url' in item:
48
+ if 'url' in item['image_url']:
49
+ url = item['image_url']['url']
50
+ prefix = url.split(',', 1)[0]
51
+ item['image_url']['url'] = prefix + f",({len(url) - len(prefix)})"
52
+ elif 'input_audio' in item:
53
+ if 'data' in item['input_audio']:
54
+ data = item['input_audio']['data']
55
+ item['input_audio']['data'] = f"({len(data)})"
56
+ elif 'file' in item:
57
+ if 'file_data' in item['file']:
58
+ data = item['file']['file_data']
59
+ item['file']['file_data'] = f"({len(data)})"
60
+ return json.dumps(clone, indent=2)
61
+
62
+ image_exts = 'png,webp,jpg,jpeg,gif,bmp,svg,tiff,ico'.split(',')
63
+ audio_exts = 'mp3,wav,ogg,flac,m4a,opus,webm'.split(',')
64
+
65
+ def is_file_path(path):
66
+ # macOs max path is 1023
67
+ return path and len(path) < 1024 and os.path.exists(path)
68
+
69
+ def is_url(url):
70
+ return url and (url.startswith('http://') or url.startswith('https://'))
71
+
72
+ def get_filename(file):
73
+ return file.rsplit('/',1)[1] if '/' in file else 'file'
74
+
75
+ def is_base_64(data):
76
+ try:
77
+ base64.b64decode(data)
78
+ return True
79
+ except Exception:
80
+ return False
81
+
82
+ def get_file_mime_type(filename):
83
+ mime_type, _ = mimetypes.guess_type(filename)
84
+ return mime_type or "application/octet-stream"
85
+
86
+ async def process_chat(chat):
87
+ if not chat:
88
+ raise Exception("No chat provided")
89
+ if 'stream' not in chat:
90
+ chat['stream'] = False
91
+ if 'messages' not in chat:
92
+ return chat
93
+
94
+ async with aiohttp.ClientSession() as session:
95
+ for message in chat['messages']:
96
+ if 'content' not in message:
97
+ continue
98
+
99
+ if isinstance(message['content'], list):
100
+ for item in message['content']:
101
+ if 'type' not in item:
102
+ continue
103
+ if item['type'] == 'image_url' and 'image_url' in item:
104
+ image_url = item['image_url']
105
+ if 'url' in image_url:
106
+ url = image_url['url']
107
+ if is_url(url):
108
+ _log(f"Downloading image: {url}")
109
+ async with session.get(url, timeout=aiohttp.ClientTimeout(total=120)) as response:
110
+ response.raise_for_status()
111
+ content = await response.read()
112
+ # get mimetype from response headers
113
+ mimetype = get_file_mime_type(get_filename(url))
114
+ if 'Content-Type' in response.headers:
115
+ mimetype = response.headers['Content-Type']
116
+ # convert to data uri
117
+ image_url['url'] = f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
118
+ elif is_file_path(url):
119
+ _log(f"Reading image: {url}")
120
+ with open(url, "rb") as f:
121
+ content = f.read()
122
+ ext = os.path.splitext(url)[1].lower().lstrip('.') if '.' in url else 'png'
123
+ # get mimetype from file extension
124
+ mimetype = get_file_mime_type(get_filename(url))
125
+ # convert to data uri
126
+ image_url['url'] = f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
127
+ elif url.startswith('data:'):
128
+ pass
129
+ else:
130
+ raise Exception(f"Invalid image: {url}")
131
+ elif item['type'] == 'input_audio' and 'input_audio' in item:
132
+ input_audio = item['input_audio']
133
+ if 'data' in input_audio:
134
+ url = input_audio['data']
135
+ mimetype = get_file_mime_type(get_filename(url))
136
+ if is_url(url):
137
+ _log(f"Downloading audio: {url}")
138
+ async with session.get(url, timeout=aiohttp.ClientTimeout(total=120)) as response:
139
+ response.raise_for_status()
140
+ content = await response.read()
141
+ # get mimetype from response headers
142
+ if 'Content-Type' in response.headers:
143
+ mimetype = response.headers['Content-Type']
144
+ # convert to base64
145
+ input_audio['data'] = base64.b64encode(content).decode('utf-8')
146
+ input_audio['format'] = mimetype.rsplit('/',1)[1]
147
+ elif is_file_path(url):
148
+ _log(f"Reading audio: {url}")
149
+ with open(url, "rb") as f:
150
+ content = f.read()
151
+ # convert to base64
152
+ input_audio['data'] = base64.b64encode(content).decode('utf-8')
153
+ input_audio['format'] = mimetype.rsplit('/',1)[1]
154
+ elif is_base_64(url):
155
+ pass # use base64 data as-is
156
+ else:
157
+ raise Exception(f"Invalid audio: {url}")
158
+ elif item['type'] == 'file' and 'file' in item:
159
+ file = item['file']
160
+ if 'file_data' in file:
161
+ url = file['file_data']
162
+ mimetype = get_file_mime_type(get_filename(url))
163
+ if is_url(url):
164
+ _log(f"Downloading file: {url}")
165
+ async with session.get(url, timeout=aiohttp.ClientTimeout(total=120)) as response:
166
+ response.raise_for_status()
167
+ content = await response.read()
168
+ file['filename'] = get_filename(url)
169
+ file['file_data'] = f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
170
+ elif is_file_path(url):
171
+ _log(f"Reading file: {url}")
172
+ with open(url, "rb") as f:
173
+ content = f.read()
174
+ file['filename'] = get_filename(url)
175
+ file['file_data'] = f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
176
+ elif is_base_64(url):
177
+ file['filename'] = 'file'
178
+ pass # use base64 data as-is
179
+ else:
180
+ raise Exception(f"Invalid file: {url}")
181
+ return chat
182
+
183
+ class HTTPError(Exception):
184
+ def __init__(self, status, reason, body, headers=None):
185
+ self.status = status
186
+ self.reason = reason
187
+ self.body = body
188
+ self.headers = headers
189
+ super().__init__(f"HTTP {status} {reason}")
190
+
191
+ async def response_json(response):
192
+ text = await response.text()
193
+ if response.status >= 400:
194
+ raise HTTPError(response.status, reason=response.reason, body=text, headers=dict(response.headers))
195
+ response.raise_for_status()
196
+ body = json.loads(text)
197
+ return body
198
+
199
+ class OpenAiProvider:
200
+ def __init__(self, base_url, api_key=None, models={}, **kwargs):
201
+ self.base_url = base_url.strip("/")
202
+ self.api_key = api_key
203
+ self.models = models
204
+
205
+ self.chat_url = f"{base_url}/v1/chat/completions"
206
+ self.headers = kwargs['headers'] if 'headers' in kwargs else {
207
+ "Content-Type": "application/json",
208
+ }
209
+ if api_key is not None:
210
+ self.headers["Authorization"] = f"Bearer {api_key}"
211
+
212
+ @classmethod
213
+ def test(cls, base_url=None, api_key=None, models={}, **kwargs):
214
+ return base_url is not None and api_key is not None and len(models) > 0
215
+
216
+ async def load(self):
217
+ pass
218
+
219
+ async def chat(self, chat):
220
+ model = chat['model']
221
+ if model in self.models:
222
+ chat['model'] = self.models[model]
223
+
224
+ # with open(os.path.join(os.path.dirname(__file__), 'chat.wip.json'), "w") as f:
225
+ # f.write(json.dumps(chat, indent=2))
226
+
227
+ chat = await process_chat(chat)
228
+ print_chat(chat)
229
+ async with aiohttp.ClientSession() as session:
230
+ async with session.post(self.chat_url, headers=self.headers, data=json.dumps(chat), timeout=aiohttp.ClientTimeout(total=120)) as response:
231
+ return await response_json(response)
232
+
233
+ class OllamaProvider(OpenAiProvider):
234
+ def __init__(self, base_url, models, all_models=False, **kwargs):
235
+ super().__init__(base_url=base_url, models=models, **kwargs)
236
+ self.all_models = all_models
237
+
238
+ async def load(self):
239
+ if self.all_models:
240
+ await self.load_models(default_models=self.models)
241
+
242
+ async def get_models(self):
243
+ ret = {}
244
+ try:
245
+ async with aiohttp.ClientSession() as session:
246
+ async with session.get(f"{self.base_url}/api/tags", headers=self.headers, timeout=aiohttp.ClientTimeout(total=120)) as response:
247
+ data = await response_json(response)
248
+ for model in data.get('models', []):
249
+ name = model['model']
250
+ if name.endswith(":latest"):
251
+ name = name[:-7]
252
+ ret[name] = name
253
+ _log(f"Loaded Ollama models: {ret}")
254
+ except Exception as e:
255
+ _log(f"Error getting Ollama models: {e}")
256
+ # return empty dict if ollama is not available
257
+ return ret
258
+
259
+ async def load_models(self, default_models):
260
+ """Load models if all_models was requested"""
261
+ if self.all_models:
262
+ self.models = await self.get_models()
263
+ if default_models:
264
+ self.models = {**default_models, **self.models}
265
+
266
+ @classmethod
267
+ def test(cls, base_url=None, models={}, all_models=False, **kwargs):
268
+ return base_url is not None and (len(models) > 0 or all_models)
269
+
270
+ class GoogleOpenAiProvider(OpenAiProvider):
271
+ def __init__(self, api_key, models, **kwargs):
272
+ super().__init__(base_url="https://generativelanguage.googleapis.com", api_key=api_key, models=models, **kwargs)
273
+ self.chat_url = "https://generativelanguage.googleapis.com/v1beta/chat/completions"
274
+
275
+ @classmethod
276
+ def test(cls, api_key=None, models={}, **kwargs):
277
+ return api_key is not None and len(models) > 0
278
+
279
+ class GoogleProvider(OpenAiProvider):
280
+ def __init__(self, models, api_key, safety_settings=None, curl=False, **kwargs):
281
+ super().__init__(base_url="https://generativelanguage.googleapis.com", api_key=api_key, models=models, **kwargs)
282
+ self.safety_settings = safety_settings
283
+ self.curl = curl
284
+ self.headers = kwargs['headers'] if 'headers' in kwargs else {
285
+ "Content-Type": "application/json",
286
+ }
287
+ # Google fails when using Authorization header, use query string param instead
288
+ if 'Authorization' in self.headers:
289
+ del self.headers['Authorization']
290
+
291
+ @classmethod
292
+ def test(cls, api_key=None, models={}, **kwargs):
293
+ return api_key is not None and len(models) > 0
294
+
295
+ async def chat(self, chat):
296
+ model = chat['model']
297
+ if model in self.models:
298
+ chat['model'] = self.models[model]
299
+
300
+ chat = await process_chat(chat)
301
+ generationConfig = {}
302
+
303
+ # Filter out system messages and convert to proper Gemini format
304
+ contents = []
305
+ system_prompt = None
306
+
307
+ async with aiohttp.ClientSession() as session:
308
+ for message in chat['messages']:
309
+ if message['role'] == 'system':
310
+ system_prompt = message
311
+ elif 'content' in message:
312
+ if isinstance(message['content'], list):
313
+ parts = []
314
+ for item in message['content']:
315
+ if 'type' in item:
316
+ if item['type'] == 'image_url' and 'image_url' in item:
317
+ image_url = item['image_url']
318
+ if 'url' not in image_url:
319
+ continue
320
+ url = image_url['url']
321
+ if not url.startswith('data:'):
322
+ raise(Exception("Image was not downloaded: " + url))
323
+ # Extract mime type from data uri
324
+ mimetype = url.split(';',1)[0].split(':',1)[1] if ';' in url else "image/png"
325
+ base64Data = url.split(',',1)[1]
326
+ parts.append({
327
+ "inline_data": {
328
+ "mime_type": mimetype,
329
+ "data": base64Data
330
+ }
331
+ })
332
+ elif item['type'] == 'input_audio' and 'input_audio' in item:
333
+ input_audio = item['input_audio']
334
+ if 'data' not in input_audio:
335
+ continue
336
+ data = input_audio['data']
337
+ format = input_audio['format']
338
+ mimetype = f"audio/{format}"
339
+ parts.append({
340
+ "inline_data": {
341
+ "mime_type": mimetype,
342
+ "data": data
343
+ }
344
+ })
345
+ elif item['type'] == 'file' and 'file' in item:
346
+ file = item['file']
347
+ if 'file_data' not in file:
348
+ continue
349
+ data = file['file_data']
350
+ if not data.startswith('data:'):
351
+ raise(Exception("File was not downloaded: " + data))
352
+ # Extract mime type from data uri
353
+ mimetype = data.split(';',1)[0].split(':',1)[1] if ';' in data else "application/octet-stream"
354
+ base64Data = data.split(',',1)[1]
355
+ parts.append({
356
+ "inline_data": {
357
+ "mime_type": mimetype,
358
+ "data": base64Data
359
+ }
360
+ })
361
+ if 'text' in item:
362
+ text = item['text']
363
+ parts.append({"text": text})
364
+ if len(parts) > 0:
365
+ contents.append({
366
+ "parts": parts
367
+ })
368
+ else:
369
+ content = message['content']
370
+ contents.append({
371
+ "parts": [{"text": content}]
372
+ })
373
+
374
+ gemini_chat = {
375
+ "contents": contents,
376
+ }
377
+
378
+ if self.safety_settings:
379
+ gemini_chat['safetySettings'] = self.safety_settings
380
+
381
+ # Add system instruction if present
382
+ if system_prompt is not None:
383
+ gemini_chat['systemInstruction'] = {
384
+ "parts": [{"text": system_prompt['content']}]
385
+ }
386
+
387
+ if 'stop' in chat:
388
+ generationConfig['stopSequences'] = [chat['stop']]
389
+ if 'temperature' in chat:
390
+ generationConfig['temperature'] = chat['temperature']
391
+ if 'top_p' in chat:
392
+ generationConfig['topP'] = chat['top_p']
393
+ if 'top_logprobs' in chat:
394
+ generationConfig['topK'] = chat['top_logprobs']
395
+ if len(generationConfig) > 0:
396
+ gemini_chat['generationConfig'] = generationConfig
397
+
398
+ started_at = int(time.time() * 1000)
399
+ gemini_chat_url = f"https://generativelanguage.googleapis.com/v1beta/models/{chat['model']}:generateContent?key={self.api_key}"
400
+
401
+ _log(f"gemini_chat: {gemini_chat_url}")
402
+ if g_verbose:
403
+ print(json.dumps(gemini_chat))
404
+
405
+ if self.curl:
406
+ curl_args = [
407
+ 'curl',
408
+ '-X', 'POST',
409
+ '-H', 'Content-Type: application/json',
410
+ '-d', json.dumps(gemini_chat),
411
+ gemini_chat_url
412
+ ]
413
+ try:
414
+ o = subprocess.run(curl_args, check=True, capture_output=True, text=True, timeout=120)
415
+ obj = json.loads(o.stdout)
416
+ except Exception as e:
417
+ raise Exception(f"Error executing curl: {e}")
418
+ else:
419
+ async with session.post(gemini_chat_url, headers=self.headers, data=json.dumps(gemini_chat), timeout=aiohttp.ClientTimeout(total=120)) as res:
420
+ obj = await response_json(res)
421
+
422
+ response = {
423
+ "id": f"chatcmpl-{started_at}",
424
+ "created": started_at,
425
+ "model": obj.get('modelVersion', chat['model']),
426
+ }
427
+ choices = []
428
+ i = 0
429
+ _log(json.dumps(obj))
430
+ if 'error' in obj:
431
+ _log(f"Error: {obj['error']}")
432
+ raise Exception(obj['error']['message'])
433
+ for candidate in obj['candidates']:
434
+ role = "assistant"
435
+ if 'content' in candidate and 'role' in candidate['content']:
436
+ role = "assistant" if candidate['content']['role'] == 'model' else candidate['content']['role']
437
+
438
+ # Safely extract content from all text parts
439
+ content = ""
440
+ if 'content' in candidate and 'parts' in candidate['content']:
441
+ text_parts = []
442
+ for part in candidate['content']['parts']:
443
+ if 'text' in part:
444
+ text_parts.append(part['text'])
445
+ content = ' '.join(text_parts)
446
+
447
+ choices.append({
448
+ "index": i,
449
+ "finish_reason": candidate.get('finishReason', 'stop'),
450
+ "message": {
451
+ "role": role,
452
+ "content": content
453
+ },
454
+ })
455
+ i += 1
456
+ response['choices'] = choices
457
+ if 'usageMetadata' in obj:
458
+ usage = obj['usageMetadata']
459
+ response['usage'] = {
460
+ "completion_tokens": usage['candidatesTokenCount'],
461
+ "total_tokens": usage['totalTokenCount'],
462
+ "prompt_tokens": usage['promptTokenCount'],
463
+ }
464
+ return response
465
+
466
+ def get_models():
467
+ ret = []
468
+ for provider in g_handlers.values():
469
+ for model in provider.models.keys():
470
+ if model not in ret:
471
+ ret.append(model)
472
+ ret.sort()
473
+ return ret
474
+
475
+ async def chat_completion(chat):
476
+ model = chat['model']
477
+ # get first provider that has the model
478
+ candidate_providers = [name for name, provider in g_handlers.items() if model in provider.models]
479
+ if len(candidate_providers) == 0:
480
+ raise(Exception(f"Model {model} not found"))
481
+
482
+ first_exception = None
483
+ for name in candidate_providers:
484
+ provider = g_handlers[name]
485
+ _log(f"provider: {name} {type(provider).__name__}")
486
+ try:
487
+ response = await provider.chat(chat.copy())
488
+ return response
489
+ except Exception as e:
490
+ if first_exception is None:
491
+ first_exception = e
492
+ _log(f"Provider {name} failed: {e}")
493
+ continue
494
+
495
+ # If we get here, all providers failed
496
+ raise first_exception
497
+
498
+ async def cli_chat(chat, image=None, audio=None, file=None, raw=False):
499
+ if g_default_model:
500
+ chat['model'] = g_default_model
501
+
502
+ # process_chat downloads the image, just adding the reference here
503
+ if image is not None:
504
+ first_message = None
505
+ for message in chat['messages']:
506
+ if message['role'] == 'user':
507
+ first_message = message
508
+ break
509
+ image_content = {
510
+ "type": "image_url",
511
+ "image_url": {
512
+ "url": image
513
+ }
514
+ }
515
+ if 'content' in first_message:
516
+ if isinstance(first_message['content'], list):
517
+ image_url = None
518
+ for item in first_message['content']:
519
+ if 'image_url' in item:
520
+ image_url = item['image_url']
521
+ # If no image_url, add one
522
+ if image_url is None:
523
+ first_message['content'].insert(0,image_content)
524
+ else:
525
+ image_url['url'] = image
526
+ else:
527
+ first_message['content'] = [
528
+ image_content,
529
+ { "type": "text", "text": first_message['content'] }
530
+ ]
531
+ if audio is not None:
532
+ first_message = None
533
+ for message in chat['messages']:
534
+ if message['role'] == 'user':
535
+ first_message = message
536
+ break
537
+ audio_content = {
538
+ "type": "input_audio",
539
+ "input_audio": {
540
+ "data": audio,
541
+ "format": "mp3"
542
+ }
543
+ }
544
+ if 'content' in first_message:
545
+ if isinstance(first_message['content'], list):
546
+ input_audio = None
547
+ for item in first_message['content']:
548
+ if 'input_audio' in item:
549
+ input_audio = item['input_audio']
550
+ # If no input_audio, add one
551
+ if input_audio is None:
552
+ first_message['content'].insert(0,audio_content)
553
+ else:
554
+ input_audio['data'] = audio
555
+ else:
556
+ first_message['content'] = [
557
+ audio_content,
558
+ { "type": "text", "text": first_message['content'] }
559
+ ]
560
+ if file is not None:
561
+ first_message = None
562
+ for message in chat['messages']:
563
+ if message['role'] == 'user':
564
+ first_message = message
565
+ break
566
+ file_content = {
567
+ "type": "file",
568
+ "file": {
569
+ "filename": get_filename(file),
570
+ "file_data": file
571
+ }
572
+ }
573
+ if 'content' in first_message:
574
+ if isinstance(first_message['content'], list):
575
+ file_data = None
576
+ for item in first_message['content']:
577
+ if 'file' in item:
578
+ file_data = item['file']
579
+ # If no file_data, add one
580
+ if file_data is None:
581
+ first_message['content'].insert(0,file_content)
582
+ else:
583
+ file_data['filename'] = get_filename(file)
584
+ file_data['file_data'] = file
585
+ else:
586
+ first_message['content'] = [
587
+ file_content,
588
+ { "type": "text", "text": first_message['content'] }
589
+ ]
590
+
591
+ if g_verbose:
592
+ printdump(chat)
593
+
594
+ try:
595
+ response = await chat_completion(chat)
596
+ if raw:
597
+ print(json.dumps(response, indent=2))
598
+ exit(0)
599
+ else:
600
+ answer = response['choices'][0]['message']['content']
601
+ print(answer)
602
+ except HTTPError as e:
603
+ # HTTP error (4xx, 5xx)
604
+ print(f"{e}:\n{e.body}")
605
+ exit(1)
606
+ except aiohttp.ClientConnectionError as e:
607
+ # Connection issues
608
+ print(f"Connection error: {e}")
609
+ exit(1)
610
+ except aiohttp.ClientTimeout as e:
611
+ # Timeout
612
+ print(f"Timeout error: {e}")
613
+ exit(1)
614
+
615
+ def config_str(key):
616
+ return key in g_config and g_config[key] or None
617
+
618
+ def init_llms(config):
619
+ global g_config
620
+
621
+ g_config = config
622
+ # iterate over config and replace $ENV with env value
623
+ for key, value in g_config.items():
624
+ if isinstance(value, str) and value.startswith("$"):
625
+ g_config[key] = os.environ.get(value[1:], "")
626
+
627
+ # if g_verbose:
628
+ # printdump(g_config)
629
+ providers = g_config['providers']
630
+
631
+ for name, orig in providers.items():
632
+ definition = orig.copy()
633
+ provider_type = definition['type']
634
+ if 'enabled' in definition and not definition['enabled']:
635
+ continue
636
+
637
+ # Replace API keys with environment variables if they start with $
638
+ if 'api_key' in definition:
639
+ value = definition['api_key']
640
+ if isinstance(value, str) and value.startswith("$"):
641
+ definition['api_key'] = os.environ.get(value[1:], "")
642
+
643
+ # Create a copy of definition without the 'type' key for constructor kwargs
644
+ constructor_kwargs = {k: v for k, v in definition.items() if k != 'type' and k != 'enabled'}
645
+ constructor_kwargs['headers'] = g_config['defaults']['headers'].copy()
646
+
647
+ if provider_type == 'OpenAiProvider' and OpenAiProvider.test(**constructor_kwargs):
648
+ g_handlers[name] = OpenAiProvider(**constructor_kwargs)
649
+ elif provider_type == 'OllamaProvider' and OllamaProvider.test(**constructor_kwargs):
650
+ g_handlers[name] = OllamaProvider(**constructor_kwargs)
651
+ elif provider_type == 'GoogleProvider' and GoogleProvider.test(**constructor_kwargs):
652
+ g_handlers[name] = GoogleProvider(**constructor_kwargs)
653
+ elif provider_type == 'GoogleOpenAiProvider' and GoogleOpenAiProvider.test(**constructor_kwargs):
654
+ g_handlers[name] = GoogleOpenAiProvider(**constructor_kwargs)
655
+
656
+ return g_handlers
657
+
658
+ async def load_llms():
659
+ global g_handlers
660
+ _log("Loading providers...")
661
+ for name, provider in g_handlers.items():
662
+ await provider.load()
663
+
664
+ def save_config(config):
665
+ global g_config
666
+ g_config = config
667
+ with open(g_config_path, "w") as f:
668
+ json.dump(g_config, f, indent=4)
669
+
670
+ async def save_default_config(config_path):
671
+ """
672
+ Download default config from https://raw.githubusercontent.com/ServiceStack/llms/refs/heads/main/llms.json using asyncio
673
+ """
674
+ global g_config
675
+ url = "https://raw.githubusercontent.com/ServiceStack/llms/refs/heads/main/llms.json"
676
+ async with aiohttp.ClientSession() as session:
677
+ async with session.get(url) as resp:
678
+ resp.raise_for_status()
679
+ config_json = await resp.text()
680
+ os.makedirs(os.path.dirname(config_path), exist_ok=True)
681
+ with open(config_path, "w") as f:
682
+ f.write(config_json)
683
+ g_config = json.loads(config_json)
684
+
685
+ async def update_llms():
686
+ """
687
+ Update llms.py and llms.json from https://raw.githubusercontent.com/ServiceStack/llms/refs/heads/main/llms.py
688
+ """
689
+ url = "https://raw.githubusercontent.com/ServiceStack/llms/refs/heads/main/llms.py"
690
+ async with aiohttp.ClientSession() as session:
691
+ async with session.get(url) as resp:
692
+ resp.raise_for_status()
693
+ llms_py = await resp.text()
694
+ with open(__file__, "w") as f:
695
+ f.write(llms_py)
696
+
697
+ def provider_status():
698
+ enabled = list(g_handlers.keys())
699
+ disabled = [provider for provider in g_config['providers'].keys() if provider not in enabled]
700
+ enabled.sort()
701
+ disabled.sort()
702
+ return enabled, disabled
703
+
704
+ def print_status():
705
+ enabled, disabled = provider_status()
706
+ if len(enabled) > 0:
707
+ print(f"\nEnabled: {', '.join(enabled)}")
708
+ else:
709
+ print("\nEnabled: None")
710
+ if len(disabled) > 0:
711
+ print(f"Disabled: {', '.join(disabled)}")
712
+ else:
713
+ print("Disabled: None")
714
+
715
+ def main():
716
+ global g_verbose, g_default_model, g_logprefix, g_config_path
717
+
718
+ parser = argparse.ArgumentParser(description=f"llms v{VERSION}")
719
+ parser.add_argument('--config', default=None, help='Path to config file', metavar='FILE')
720
+ parser.add_argument('-m', '--model', default=None, help='Model to use')
721
+
722
+ parser.add_argument('--chat', default=None, help='OpenAI Chat Completion Request to send', metavar='REQUEST')
723
+ parser.add_argument('-s', '--system', default=None, help='System prompt to use for chat completion', metavar='PROMPT')
724
+ parser.add_argument('--image', default=None, help='Image input to use in chat completion')
725
+ parser.add_argument('--audio', default=None, help='Audio input to use in chat completion')
726
+ parser.add_argument('--file', default=None, help='File input to use in chat completion')
727
+ parser.add_argument('--raw', action='store_true', help='Return raw AI JSON response')
728
+
729
+ parser.add_argument('--list', action='store_true', help='Show list of enabled providers and their models (alias ls provider?)')
730
+
731
+ parser.add_argument('--serve', default=None, help='Port to start an OpenAI Chat compatible server on', metavar='PORT')
732
+
733
+ parser.add_argument('--enable', default=None, help='Enable a provider', metavar='PROVIDER')
734
+ parser.add_argument('--disable', default=None, help='Disable a provider', metavar='PROVIDER')
735
+ parser.add_argument('--default', default=None, help='Configure the default model to use', metavar='MODEL')
736
+
737
+ parser.add_argument('--init', action='store_true', help='Create a default llms.json')
738
+
739
+ parser.add_argument('--logprefix', default="", help='Prefix used in log messages', metavar='PREFIX')
740
+ parser.add_argument('--verbose', action='store_true', help='Verbose output')
741
+ parser.add_argument('--update', action='store_true', help='Update to latest version')
742
+
743
+ cli_args, extra_args = parser.parse_known_args()
744
+ if cli_args.verbose:
745
+ g_verbose = True
746
+ # printdump(cli_args)
747
+ if cli_args.model:
748
+ g_default_model = cli_args.model
749
+ if cli_args.logprefix:
750
+ g_logprefix = cli_args.logprefix
751
+
752
+ if cli_args.config is not None:
753
+ g_config_path = os.path.join(os.path.dirname(__file__), cli_args.config)
754
+
755
+ config_path = cli_args.config
756
+ if config_path:
757
+ g_config_path = os.path.join(os.path.dirname(__file__), config_path)
758
+ else:
759
+ home_config_path = f"{os.environ.get('HOME')}/.llms/llms.json"
760
+ check_paths = [
761
+ "./llms.json",
762
+ home_config_path,
763
+ ]
764
+ if os.environ.get("LLMS_CONFIG_PATH"):
765
+ check_paths.insert(0, os.environ.get("LLMS_CONFIG_PATH"))
766
+
767
+ for check_path in check_paths:
768
+ g_config_path = os.path.join(os.path.dirname(__file__), check_path)
769
+ if os.path.exists(g_config_path):
770
+ break
771
+
772
+ if cli_args.init:
773
+ if os.path.exists(g_config_path):
774
+ print(f"llms.json already exists at {g_config_path}")
775
+ exit(1)
776
+ save_config_path = g_config_path or home_config_path
777
+ asyncio.run(save_default_config(save_config_path))
778
+ print(f"Created default config at {save_config_path}")
779
+ exit(0)
780
+
781
+ if not os.path.exists(g_config_path):
782
+ print("Config file not found. Create one with --init or use --config <path>")
783
+ exit(1)
784
+
785
+ # read contents
786
+ with open(g_config_path, "r") as f:
787
+ config_json = f.read()
788
+ init_llms(json.loads(config_json))
789
+ asyncio.run(load_llms())
790
+
791
+ # print names
792
+ _log(f"enabled providers: {', '.join(g_handlers.keys())}")
793
+
794
+ filter_list = []
795
+ if len(extra_args) > 0:
796
+ arg = extra_args[0]
797
+ if arg == 'ls':
798
+ cli_args.list = True
799
+ if len(extra_args) > 1:
800
+ filter_list = extra_args[1:]
801
+
802
+ if cli_args.list:
803
+ # Show list of enabled providers and their models
804
+ enabled = []
805
+ for name, provider in g_handlers.items():
806
+ if len(filter_list) > 0 and name not in filter_list:
807
+ continue
808
+ print(f"{name}:")
809
+ enabled.append(name)
810
+ for model in provider.models:
811
+ print(f" {model}")
812
+
813
+ print_status()
814
+ exit(0)
815
+
816
+ if cli_args.serve is not None:
817
+ port = int(cli_args.serve)
818
+
819
+ app = web.Application()
820
+
821
+ async def chat_handler(request):
822
+ try:
823
+ chat = await request.json()
824
+ response = await chat_completion(chat)
825
+ return web.json_response(response)
826
+ except Exception as e:
827
+ return web.json_response({"error": str(e)}, status=500)
828
+ app.router.add_post('/v1/chat/completions', chat_handler)
829
+
830
+ async def models_handler(request):
831
+ return web.json_response(get_models())
832
+ app.router.add_get('/models', models_handler)
833
+
834
+ # Serve static files from ui/ directory
835
+ script_dir = os.path.dirname(os.path.abspath(__file__))
836
+ ui_path = os.path.join(script_dir, 'ui')
837
+ if os.path.exists(ui_path):
838
+ app.router.add_static('/ui/', ui_path, name='ui')
839
+
840
+ async def not_found_handler(request):
841
+ return web.Response(text="404: Not Found", status=404)
842
+ app.router.add_get('/favicon.ico', not_found_handler)
843
+
844
+ # Serve index.html from root
845
+ index_path = os.path.join(script_dir, 'index.html')
846
+ if os.path.exists(index_path):
847
+ async def index_handler(request):
848
+ return web.FileResponse(index_path)
849
+ app.router.add_get('/', index_handler)
850
+
851
+ # Serve index.html as fallback route (SPA routing)
852
+ async def fallback_route_handler(request):
853
+ return web.FileResponse(index_path)
854
+ app.router.add_route('*', '/{tail:.*}', fallback_route_handler)
855
+
856
+ ui_paths = [
857
+ f"{os.environ.get('HOME')}/.llms/ui.json",
858
+ "ui.json"
859
+ ]
860
+ for ui_path in ui_paths:
861
+ if os.path.exists(ui_path):
862
+ break
863
+ if os.path.exists(ui_path):
864
+ async def ui_json_handler(request):
865
+ with open(ui_path, "r") as f:
866
+ ui = json.load(f)
867
+ if 'defaults' not in ui:
868
+ ui['defaults'] = g_config['defaults']
869
+ enabled, disabled = provider_status()
870
+ ui['status'] = {
871
+ "enabled": enabled,
872
+ "disabled": disabled
873
+ }
874
+ return web.json_response(ui)
875
+ app.router.add_get('/ui.json', ui_json_handler)
876
+
877
+ print(f"Starting server on port {port}...")
878
+ web.run_app(app, host='0.0.0.0', port=port)
879
+ exit(0)
880
+
881
+ if cli_args.enable is not None:
882
+ if cli_args.enable.endswith(','):
883
+ cli_args.enable = cli_args.enable[:-1].strip()
884
+ enable_providers = [cli_args.enable]
885
+ all_providers = g_config['providers'].keys()
886
+ if len(extra_args) > 0:
887
+ for arg in extra_args:
888
+ if arg.endswith(','):
889
+ arg = arg[:-1].strip()
890
+ if arg in all_providers:
891
+ enable_providers.append(arg)
892
+ for provider in enable_providers:
893
+ if provider not in g_config['providers']:
894
+ print(f"Provider {provider} not found")
895
+ print(f"Available providers: {', '.join(g_config['providers'].keys())}")
896
+ exit(1)
897
+ if provider in g_config['providers']:
898
+ g_config['providers'][provider]['enabled'] = True
899
+ save_config(g_config)
900
+ init_llms(g_config)
901
+ print(f"\nEnabled provider {provider}:")
902
+ printdump(g_config['providers'][provider])
903
+ print_status()
904
+ exit(0)
905
+
906
+ if cli_args.disable is not None:
907
+ if cli_args.disable.endswith(','):
908
+ cli_args.disable = cli_args.disable[:-1].strip()
909
+ disable_providers = [cli_args.disable]
910
+ all_providers = g_config['providers'].keys()
911
+ if len(extra_args) > 0:
912
+ for arg in extra_args:
913
+ if arg.endswith(','):
914
+ arg = arg[:-1].strip()
915
+ if arg in all_providers:
916
+ disable_providers.append(arg)
917
+ for provider in disable_providers:
918
+ if provider not in g_config['providers']:
919
+ print(f"Provider {provider} not found")
920
+ print(f"Available providers: {', '.join(g_config['providers'].keys())}")
921
+ exit(1)
922
+ if provider in g_config['providers']:
923
+ g_config['providers'][provider]['enabled'] = False
924
+ save_config(g_config)
925
+ init_llms(g_config)
926
+ print(f"\nDisabled provider {provider}")
927
+ printdump(g_config['providers'][provider])
928
+ print_status()
929
+ exit(0)
930
+
931
+ if cli_args.default is not None:
932
+ default_model = cli_args.default
933
+ all_models = get_models()
934
+ if default_model not in all_models:
935
+ print(f"Model {default_model} not found")
936
+ print(f"Available models: {', '.join(all_models)}")
937
+ exit(1)
938
+ default_text = g_config['defaults']['text']
939
+ default_text['model'] = default_model
940
+ save_config(g_config)
941
+ print(f"\nDefault model set to: {default_model}")
942
+ exit(0)
943
+
944
+ if cli_args.update:
945
+ asyncio.run(update_llms())
946
+ print(f"{__file__} updated")
947
+ exit(0)
948
+
949
+ if cli_args.chat is not None or cli_args.image is not None or cli_args.audio is not None or cli_args.file is not None or len(extra_args) > 0:
950
+ try:
951
+ chat = g_config['defaults']['text']
952
+ if cli_args.image is not None:
953
+ chat = g_config['defaults']['image']
954
+ elif cli_args.audio is not None:
955
+ chat = g_config['defaults']['audio']
956
+ elif cli_args.file is not None:
957
+ chat = g_config['defaults']['file']
958
+ if cli_args.chat is not None:
959
+ chat_path = os.path.join(os.path.dirname(__file__), cli_args.chat)
960
+ if not os.path.exists(chat_path):
961
+ print(f"Chat request template not found: {chat_path}")
962
+ exit(1)
963
+ _log(f"Using chat: {chat_path}")
964
+
965
+ with open (chat_path, "r") as f:
966
+ chat_json = f.read()
967
+ chat = json.loads(chat_json)
968
+
969
+ if cli_args.system is not None:
970
+ chat['messages'].insert(0, {'role': 'system', 'content': cli_args.system})
971
+
972
+ if len(extra_args) > 0:
973
+ prompt = ' '.join(extra_args)
974
+ # replace content of last message if exists, else add
975
+ last_msg = chat['messages'][-1]
976
+ if last_msg['role'] == 'user':
977
+ last_msg['content'] = prompt
978
+ else:
979
+ chat['messages'].append({'role': 'user', 'content': prompt})
980
+
981
+ asyncio.run(cli_chat(chat, image=cli_args.image, audio=cli_args.audio, file=cli_args.file, raw=cli_args.raw))
982
+ exit(0)
983
+ except Exception as e:
984
+ print(f"{cli_args.logprefix}Error: {e}")
985
+ if cli_args.verbose:
986
+ traceback.print_exc()
987
+ exit(1)
988
+
989
+ # show usage from ArgumentParser
990
+ parser.print_help()
991
+
992
+
993
+ if __name__ == "__main__":
994
+ main()