llms-py 2.0.34__py3-none-any.whl → 2.0.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llms/__init__.py +3 -1
- llms/main.py +743 -653
- llms/ui/ai.mjs +1 -1
- {llms_py-2.0.34.dist-info → llms_py-2.0.35.dist-info}/METADATA +1 -1
- {llms_py-2.0.34.dist-info → llms_py-2.0.35.dist-info}/RECORD +9 -9
- {llms_py-2.0.34.dist-info → llms_py-2.0.35.dist-info}/WHEEL +0 -0
- {llms_py-2.0.34.dist-info → llms_py-2.0.35.dist-info}/entry_points.txt +0 -0
- {llms_py-2.0.34.dist-info → llms_py-2.0.35.dist-info}/licenses/LICENSE +0 -0
- {llms_py-2.0.34.dist-info → llms_py-2.0.35.dist-info}/top_level.txt +0 -0
llms/main.py
CHANGED
|
@@ -6,104 +6,112 @@
|
|
|
6
6
|
# A lightweight CLI tool and OpenAI-compatible server for querying multiple Large Language Model (LLM) providers.
|
|
7
7
|
# Docs: https://github.com/ServiceStack/llms
|
|
8
8
|
|
|
9
|
-
import os
|
|
10
|
-
import time
|
|
11
|
-
import json
|
|
12
9
|
import argparse
|
|
13
10
|
import asyncio
|
|
14
|
-
import subprocess
|
|
15
11
|
import base64
|
|
12
|
+
import json
|
|
16
13
|
import mimetypes
|
|
17
|
-
import
|
|
18
|
-
import sys
|
|
19
|
-
import site
|
|
20
|
-
import secrets
|
|
14
|
+
import os
|
|
21
15
|
import re
|
|
16
|
+
import secrets
|
|
17
|
+
import site
|
|
18
|
+
import subprocess
|
|
19
|
+
import sys
|
|
20
|
+
import time
|
|
21
|
+
import traceback
|
|
22
|
+
from importlib import resources # Py≥3.9 (pip install importlib_resources for 3.7/3.8)
|
|
22
23
|
from io import BytesIO
|
|
24
|
+
from pathlib import Path
|
|
23
25
|
from urllib.parse import parse_qs, urlencode
|
|
24
26
|
|
|
25
27
|
import aiohttp
|
|
26
28
|
from aiohttp import web
|
|
27
29
|
|
|
28
|
-
from pathlib import Path
|
|
29
|
-
from importlib import resources # Py≥3.9 (pip install importlib_resources for 3.7/3.8)
|
|
30
|
-
|
|
31
30
|
try:
|
|
32
31
|
from PIL import Image
|
|
32
|
+
|
|
33
33
|
HAS_PIL = True
|
|
34
34
|
except ImportError:
|
|
35
35
|
HAS_PIL = False
|
|
36
36
|
|
|
37
|
-
VERSION = "2.0.
|
|
37
|
+
VERSION = "2.0.35"
|
|
38
38
|
_ROOT = None
|
|
39
39
|
g_config_path = None
|
|
40
40
|
g_ui_path = None
|
|
41
41
|
g_config = None
|
|
42
42
|
g_handlers = {}
|
|
43
43
|
g_verbose = False
|
|
44
|
-
g_logprefix=""
|
|
45
|
-
g_default_model=""
|
|
44
|
+
g_logprefix = ""
|
|
45
|
+
g_default_model = ""
|
|
46
46
|
g_sessions = {} # OAuth session storage: {session_token: {userId, userName, displayName, profileUrl, email, created}}
|
|
47
47
|
g_oauth_states = {} # CSRF protection: {state: {created, redirect_uri}}
|
|
48
48
|
|
|
49
|
+
|
|
49
50
|
def _log(message):
|
|
50
51
|
"""Helper method for logging from the global polling task."""
|
|
51
52
|
if g_verbose:
|
|
52
53
|
print(f"{g_logprefix}{message}", flush=True)
|
|
53
54
|
|
|
55
|
+
|
|
54
56
|
def printdump(obj):
|
|
55
|
-
args = obj.__dict__ if hasattr(obj,
|
|
57
|
+
args = obj.__dict__ if hasattr(obj, "__dict__") else obj
|
|
56
58
|
print(json.dumps(args, indent=2))
|
|
57
59
|
|
|
60
|
+
|
|
58
61
|
def print_chat(chat):
|
|
59
62
|
_log(f"Chat: {chat_summary(chat)}")
|
|
60
63
|
|
|
64
|
+
|
|
61
65
|
def chat_summary(chat):
|
|
62
66
|
"""Summarize chat completion request for logging."""
|
|
63
67
|
# replace image_url.url with <image>
|
|
64
68
|
clone = json.loads(json.dumps(chat))
|
|
65
|
-
for message in clone[
|
|
66
|
-
if
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
if
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
prefix = data.split(',', 1)[0]
|
|
82
|
-
item['file']['file_data'] = prefix + f",({len(data) - len(prefix)})"
|
|
69
|
+
for message in clone["messages"]:
|
|
70
|
+
if "content" in message and isinstance(message["content"], list):
|
|
71
|
+
for item in message["content"]:
|
|
72
|
+
if "image_url" in item:
|
|
73
|
+
if "url" in item["image_url"]:
|
|
74
|
+
url = item["image_url"]["url"]
|
|
75
|
+
prefix = url.split(",", 1)[0]
|
|
76
|
+
item["image_url"]["url"] = prefix + f",({len(url) - len(prefix)})"
|
|
77
|
+
elif "input_audio" in item:
|
|
78
|
+
if "data" in item["input_audio"]:
|
|
79
|
+
data = item["input_audio"]["data"]
|
|
80
|
+
item["input_audio"]["data"] = f"({len(data)})"
|
|
81
|
+
elif "file" in item and "file_data" in item["file"]:
|
|
82
|
+
data = item["file"]["file_data"]
|
|
83
|
+
prefix = data.split(",", 1)[0]
|
|
84
|
+
item["file"]["file_data"] = prefix + f",({len(data) - len(prefix)})"
|
|
83
85
|
return json.dumps(clone, indent=2)
|
|
84
86
|
|
|
87
|
+
|
|
85
88
|
def gemini_chat_summary(gemini_chat):
|
|
86
89
|
"""Summarize Gemini chat completion request for logging. Replace inline_data with size of content only"""
|
|
87
90
|
clone = json.loads(json.dumps(gemini_chat))
|
|
88
|
-
for content in clone[
|
|
89
|
-
for part in content[
|
|
90
|
-
if
|
|
91
|
-
data = part[
|
|
92
|
-
part[
|
|
91
|
+
for content in clone["contents"]:
|
|
92
|
+
for part in content["parts"]:
|
|
93
|
+
if "inline_data" in part:
|
|
94
|
+
data = part["inline_data"]["data"]
|
|
95
|
+
part["inline_data"]["data"] = f"({len(data)})"
|
|
93
96
|
return json.dumps(clone, indent=2)
|
|
94
97
|
|
|
95
|
-
|
|
96
|
-
|
|
98
|
+
|
|
99
|
+
image_exts = ["png", "webp", "jpg", "jpeg", "gif", "bmp", "svg", "tiff", "ico"]
|
|
100
|
+
audio_exts = ["mp3", "wav", "ogg", "flac", "m4a", "opus", "webm"]
|
|
101
|
+
|
|
97
102
|
|
|
98
103
|
def is_file_path(path):
|
|
99
104
|
# macOs max path is 1023
|
|
100
105
|
return path and len(path) < 1024 and os.path.exists(path)
|
|
101
106
|
|
|
107
|
+
|
|
102
108
|
def is_url(url):
|
|
103
|
-
return url and (url.startswith(
|
|
109
|
+
return url and (url.startswith("http://") or url.startswith("https://"))
|
|
110
|
+
|
|
104
111
|
|
|
105
112
|
def get_filename(file):
|
|
106
|
-
return file.rsplit(
|
|
113
|
+
return file.rsplit("/", 1)[1] if "/" in file else "file"
|
|
114
|
+
|
|
107
115
|
|
|
108
116
|
def parse_args_params(args_str):
|
|
109
117
|
"""Parse URL-encoded parameters and return a dictionary."""
|
|
@@ -119,9 +127,9 @@ def parse_args_params(args_str):
|
|
|
119
127
|
if len(values) == 1:
|
|
120
128
|
value = values[0]
|
|
121
129
|
# Try to convert to appropriate types
|
|
122
|
-
if value.lower() ==
|
|
130
|
+
if value.lower() == "true":
|
|
123
131
|
result[key] = True
|
|
124
|
-
elif value.lower() ==
|
|
132
|
+
elif value.lower() == "false":
|
|
125
133
|
result[key] = False
|
|
126
134
|
elif value.isdigit():
|
|
127
135
|
result[key] = int(value)
|
|
@@ -138,6 +146,7 @@ def parse_args_params(args_str):
|
|
|
138
146
|
|
|
139
147
|
return result
|
|
140
148
|
|
|
149
|
+
|
|
141
150
|
def apply_args_to_chat(chat, args_params):
|
|
142
151
|
"""Apply parsed arguments to the chat request."""
|
|
143
152
|
if not args_params:
|
|
@@ -146,19 +155,32 @@ def apply_args_to_chat(chat, args_params):
|
|
|
146
155
|
# Apply each parameter to the chat request
|
|
147
156
|
for key, value in args_params.items():
|
|
148
157
|
if isinstance(value, str):
|
|
149
|
-
if key ==
|
|
150
|
-
if
|
|
151
|
-
value = value.split(
|
|
152
|
-
elif
|
|
158
|
+
if key == "stop":
|
|
159
|
+
if "," in value:
|
|
160
|
+
value = value.split(",")
|
|
161
|
+
elif (
|
|
162
|
+
key == "max_completion_tokens"
|
|
163
|
+
or key == "max_tokens"
|
|
164
|
+
or key == "n"
|
|
165
|
+
or key == "seed"
|
|
166
|
+
or key == "top_logprobs"
|
|
167
|
+
):
|
|
153
168
|
value = int(value)
|
|
154
|
-
elif key ==
|
|
169
|
+
elif key == "temperature" or key == "top_p" or key == "frequency_penalty" or key == "presence_penalty":
|
|
155
170
|
value = float(value)
|
|
156
|
-
elif
|
|
171
|
+
elif (
|
|
172
|
+
key == "store"
|
|
173
|
+
or key == "logprobs"
|
|
174
|
+
or key == "enable_thinking"
|
|
175
|
+
or key == "parallel_tool_calls"
|
|
176
|
+
or key == "stream"
|
|
177
|
+
):
|
|
157
178
|
value = bool(value)
|
|
158
179
|
chat[key] = value
|
|
159
180
|
|
|
160
181
|
return chat
|
|
161
182
|
|
|
183
|
+
|
|
162
184
|
def is_base_64(data):
|
|
163
185
|
try:
|
|
164
186
|
base64.b64decode(data)
|
|
@@ -166,6 +188,7 @@ def is_base_64(data):
|
|
|
166
188
|
except Exception:
|
|
167
189
|
return False
|
|
168
190
|
|
|
191
|
+
|
|
169
192
|
def get_file_mime_type(filename):
|
|
170
193
|
mime_type, _ = mimetypes.guess_type(filename)
|
|
171
194
|
return mime_type or "application/octet-stream"
|
|
@@ -182,36 +205,38 @@ def price_to_string(price: float | int | str | None) -> str | None:
|
|
|
182
205
|
try:
|
|
183
206
|
price_float = float(price)
|
|
184
207
|
# Format with enough decimal places to avoid scientific notation
|
|
185
|
-
formatted = format(price_float,
|
|
208
|
+
formatted = format(price_float, ".20f")
|
|
186
209
|
|
|
187
210
|
# Detect recurring 9s pattern (e.g., "...9999999")
|
|
188
211
|
# If we have 4 or more consecutive 9s, round up
|
|
189
|
-
if
|
|
212
|
+
if "9999" in formatted:
|
|
190
213
|
# Round up by adding a small amount and reformatting
|
|
191
214
|
# Find the position of the 9s to determine precision
|
|
192
215
|
import decimal
|
|
216
|
+
|
|
193
217
|
decimal.getcontext().prec = 28
|
|
194
218
|
d = decimal.Decimal(str(price_float))
|
|
195
219
|
# Round to one less decimal place than where the 9s start
|
|
196
|
-
nines_pos = formatted.find(
|
|
220
|
+
nines_pos = formatted.find("9999")
|
|
197
221
|
if nines_pos > 0:
|
|
198
222
|
# Round up at the position before the 9s
|
|
199
|
-
decimal_places = nines_pos - formatted.find(
|
|
223
|
+
decimal_places = nines_pos - formatted.find(".") - 1
|
|
200
224
|
if decimal_places > 0:
|
|
201
|
-
quantize_str =
|
|
225
|
+
quantize_str = "0." + "0" * (decimal_places - 1) + "1"
|
|
202
226
|
d = d.quantize(decimal.Decimal(quantize_str), rounding=decimal.ROUND_UP)
|
|
203
227
|
result = str(d)
|
|
204
228
|
# Remove trailing zeros
|
|
205
|
-
if
|
|
206
|
-
result = result.rstrip(
|
|
229
|
+
if "." in result:
|
|
230
|
+
result = result.rstrip("0").rstrip(".")
|
|
207
231
|
return result
|
|
208
232
|
|
|
209
233
|
# Normal case: strip trailing zeros
|
|
210
|
-
return formatted.rstrip(
|
|
234
|
+
return formatted.rstrip("0").rstrip(".")
|
|
211
235
|
except (ValueError, TypeError):
|
|
212
236
|
return None
|
|
213
237
|
|
|
214
|
-
|
|
238
|
+
|
|
239
|
+
def convert_image_if_needed(image_bytes, mimetype="image/png"):
|
|
215
240
|
"""
|
|
216
241
|
Convert and resize image to WebP if it exceeds configured limits.
|
|
217
242
|
|
|
@@ -226,16 +251,16 @@ def convert_image_if_needed(image_bytes, mimetype='image/png'):
|
|
|
226
251
|
return image_bytes, mimetype
|
|
227
252
|
|
|
228
253
|
# Get conversion config
|
|
229
|
-
convert_config = g_config.get(
|
|
254
|
+
convert_config = g_config.get("convert", {}).get("image", {}) if g_config else {}
|
|
230
255
|
if not convert_config:
|
|
231
256
|
return image_bytes, mimetype
|
|
232
257
|
|
|
233
|
-
max_size_str = convert_config.get(
|
|
234
|
-
max_length = convert_config.get(
|
|
258
|
+
max_size_str = convert_config.get("max_size", "1536x1024")
|
|
259
|
+
max_length = convert_config.get("max_length", 1.5 * 1024 * 1024) # 1.5MB
|
|
235
260
|
|
|
236
261
|
try:
|
|
237
262
|
# Parse max_size (e.g., "1536x1024")
|
|
238
|
-
max_width, max_height = map(int, max_size_str.split(
|
|
263
|
+
max_width, max_height = map(int, max_size_str.split("x"))
|
|
239
264
|
|
|
240
265
|
# Open image
|
|
241
266
|
with Image.open(BytesIO(image_bytes)) as img:
|
|
@@ -253,15 +278,15 @@ def convert_image_if_needed(image_bytes, mimetype='image/png'):
|
|
|
253
278
|
return image_bytes, mimetype
|
|
254
279
|
|
|
255
280
|
# Convert RGBA to RGB if necessary (WebP doesn't support transparency in RGB mode)
|
|
256
|
-
if img.mode in (
|
|
281
|
+
if img.mode in ("RGBA", "LA", "P"):
|
|
257
282
|
# Create a white background
|
|
258
|
-
background = Image.new(
|
|
259
|
-
if img.mode ==
|
|
260
|
-
img = img.convert(
|
|
261
|
-
background.paste(img, mask=img.split()[-1] if img.mode in (
|
|
283
|
+
background = Image.new("RGB", img.size, (255, 255, 255))
|
|
284
|
+
if img.mode == "P":
|
|
285
|
+
img = img.convert("RGBA")
|
|
286
|
+
background.paste(img, mask=img.split()[-1] if img.mode in ("RGBA", "LA") else None)
|
|
262
287
|
img = background
|
|
263
|
-
elif img.mode !=
|
|
264
|
-
img = img.convert(
|
|
288
|
+
elif img.mode != "RGB":
|
|
289
|
+
img = img.convert("RGB")
|
|
265
290
|
|
|
266
291
|
# Resize if needed (preserve aspect ratio)
|
|
267
292
|
if needs_resize:
|
|
@@ -270,39 +295,42 @@ def convert_image_if_needed(image_bytes, mimetype='image/png'):
|
|
|
270
295
|
|
|
271
296
|
# Convert to WebP
|
|
272
297
|
output = BytesIO()
|
|
273
|
-
img.save(output, format=
|
|
298
|
+
img.save(output, format="WEBP", quality=85, method=6)
|
|
274
299
|
converted_bytes = output.getvalue()
|
|
275
300
|
|
|
276
|
-
_log(
|
|
301
|
+
_log(
|
|
302
|
+
f"Converted image to WebP: {len(image_bytes)} bytes -> {len(converted_bytes)} bytes ({len(converted_bytes) * 100 // len(image_bytes)}%)"
|
|
303
|
+
)
|
|
277
304
|
|
|
278
|
-
return converted_bytes,
|
|
305
|
+
return converted_bytes, "image/webp"
|
|
279
306
|
|
|
280
307
|
except Exception as e:
|
|
281
308
|
_log(f"Error converting image: {e}")
|
|
282
309
|
# Return original if conversion fails
|
|
283
310
|
return image_bytes, mimetype
|
|
284
311
|
|
|
312
|
+
|
|
285
313
|
async def process_chat(chat):
|
|
286
314
|
if not chat:
|
|
287
315
|
raise Exception("No chat provided")
|
|
288
|
-
if
|
|
289
|
-
chat[
|
|
290
|
-
if
|
|
316
|
+
if "stream" not in chat:
|
|
317
|
+
chat["stream"] = False
|
|
318
|
+
if "messages" not in chat:
|
|
291
319
|
return chat
|
|
292
320
|
|
|
293
321
|
async with aiohttp.ClientSession() as session:
|
|
294
|
-
for message in chat[
|
|
295
|
-
if
|
|
322
|
+
for message in chat["messages"]:
|
|
323
|
+
if "content" not in message:
|
|
296
324
|
continue
|
|
297
325
|
|
|
298
|
-
if isinstance(message[
|
|
299
|
-
for item in message[
|
|
300
|
-
if
|
|
326
|
+
if isinstance(message["content"], list):
|
|
327
|
+
for item in message["content"]:
|
|
328
|
+
if "type" not in item:
|
|
301
329
|
continue
|
|
302
|
-
if item[
|
|
303
|
-
image_url = item[
|
|
304
|
-
if
|
|
305
|
-
url = image_url[
|
|
330
|
+
if item["type"] == "image_url" and "image_url" in item:
|
|
331
|
+
image_url = item["image_url"]
|
|
332
|
+
if "url" in image_url:
|
|
333
|
+
url = image_url["url"]
|
|
306
334
|
if is_url(url):
|
|
307
335
|
_log(f"Downloading image: {url}")
|
|
308
336
|
async with session.get(url, timeout=aiohttp.ClientTimeout(total=120)) as response:
|
|
@@ -310,12 +338,14 @@ async def process_chat(chat):
|
|
|
310
338
|
content = await response.read()
|
|
311
339
|
# get mimetype from response headers
|
|
312
340
|
mimetype = get_file_mime_type(get_filename(url))
|
|
313
|
-
if
|
|
314
|
-
mimetype = response.headers[
|
|
341
|
+
if "Content-Type" in response.headers:
|
|
342
|
+
mimetype = response.headers["Content-Type"]
|
|
315
343
|
# convert/resize image if needed
|
|
316
344
|
content, mimetype = convert_image_if_needed(content, mimetype)
|
|
317
345
|
# convert to data uri
|
|
318
|
-
image_url[
|
|
346
|
+
image_url["url"] = (
|
|
347
|
+
f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
|
|
348
|
+
)
|
|
319
349
|
elif is_file_path(url):
|
|
320
350
|
_log(f"Reading image: {url}")
|
|
321
351
|
with open(url, "rb") as f:
|
|
@@ -325,24 +355,28 @@ async def process_chat(chat):
|
|
|
325
355
|
# convert/resize image if needed
|
|
326
356
|
content, mimetype = convert_image_if_needed(content, mimetype)
|
|
327
357
|
# convert to data uri
|
|
328
|
-
image_url[
|
|
329
|
-
|
|
358
|
+
image_url["url"] = (
|
|
359
|
+
f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
|
|
360
|
+
)
|
|
361
|
+
elif url.startswith("data:"):
|
|
330
362
|
# Extract existing data URI and process it
|
|
331
|
-
if
|
|
332
|
-
prefix = url.split(
|
|
333
|
-
mimetype = prefix.split(
|
|
334
|
-
base64_data = url.split(
|
|
363
|
+
if ";base64," in url:
|
|
364
|
+
prefix = url.split(";base64,")[0]
|
|
365
|
+
mimetype = prefix.split(":")[1] if ":" in prefix else "image/png"
|
|
366
|
+
base64_data = url.split(";base64,")[1]
|
|
335
367
|
content = base64.b64decode(base64_data)
|
|
336
368
|
# convert/resize image if needed
|
|
337
369
|
content, mimetype = convert_image_if_needed(content, mimetype)
|
|
338
370
|
# update data uri with potentially converted image
|
|
339
|
-
image_url[
|
|
371
|
+
image_url["url"] = (
|
|
372
|
+
f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
|
|
373
|
+
)
|
|
340
374
|
else:
|
|
341
375
|
raise Exception(f"Invalid image: {url}")
|
|
342
|
-
elif item[
|
|
343
|
-
input_audio = item[
|
|
344
|
-
if
|
|
345
|
-
url = input_audio[
|
|
376
|
+
elif item["type"] == "input_audio" and "input_audio" in item:
|
|
377
|
+
input_audio = item["input_audio"]
|
|
378
|
+
if "data" in input_audio:
|
|
379
|
+
url = input_audio["data"]
|
|
346
380
|
mimetype = get_file_mime_type(get_filename(url))
|
|
347
381
|
if is_url(url):
|
|
348
382
|
_log(f"Downloading audio: {url}")
|
|
@@ -350,48 +384,53 @@ async def process_chat(chat):
|
|
|
350
384
|
response.raise_for_status()
|
|
351
385
|
content = await response.read()
|
|
352
386
|
# get mimetype from response headers
|
|
353
|
-
if
|
|
354
|
-
mimetype = response.headers[
|
|
387
|
+
if "Content-Type" in response.headers:
|
|
388
|
+
mimetype = response.headers["Content-Type"]
|
|
355
389
|
# convert to base64
|
|
356
|
-
input_audio[
|
|
357
|
-
input_audio[
|
|
390
|
+
input_audio["data"] = base64.b64encode(content).decode("utf-8")
|
|
391
|
+
input_audio["format"] = mimetype.rsplit("/", 1)[1]
|
|
358
392
|
elif is_file_path(url):
|
|
359
393
|
_log(f"Reading audio: {url}")
|
|
360
394
|
with open(url, "rb") as f:
|
|
361
395
|
content = f.read()
|
|
362
396
|
# convert to base64
|
|
363
|
-
input_audio[
|
|
364
|
-
input_audio[
|
|
397
|
+
input_audio["data"] = base64.b64encode(content).decode("utf-8")
|
|
398
|
+
input_audio["format"] = mimetype.rsplit("/", 1)[1]
|
|
365
399
|
elif is_base_64(url):
|
|
366
|
-
pass
|
|
400
|
+
pass # use base64 data as-is
|
|
367
401
|
else:
|
|
368
402
|
raise Exception(f"Invalid audio: {url}")
|
|
369
|
-
elif item[
|
|
370
|
-
file = item[
|
|
371
|
-
if
|
|
372
|
-
url = file[
|
|
403
|
+
elif item["type"] == "file" and "file" in item:
|
|
404
|
+
file = item["file"]
|
|
405
|
+
if "file_data" in file:
|
|
406
|
+
url = file["file_data"]
|
|
373
407
|
mimetype = get_file_mime_type(get_filename(url))
|
|
374
408
|
if is_url(url):
|
|
375
409
|
_log(f"Downloading file: {url}")
|
|
376
410
|
async with session.get(url, timeout=aiohttp.ClientTimeout(total=120)) as response:
|
|
377
411
|
response.raise_for_status()
|
|
378
412
|
content = await response.read()
|
|
379
|
-
file[
|
|
380
|
-
file[
|
|
413
|
+
file["filename"] = get_filename(url)
|
|
414
|
+
file["file_data"] = (
|
|
415
|
+
f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
|
|
416
|
+
)
|
|
381
417
|
elif is_file_path(url):
|
|
382
418
|
_log(f"Reading file: {url}")
|
|
383
419
|
with open(url, "rb") as f:
|
|
384
420
|
content = f.read()
|
|
385
|
-
file[
|
|
386
|
-
file[
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
421
|
+
file["filename"] = get_filename(url)
|
|
422
|
+
file["file_data"] = (
|
|
423
|
+
f"data:{mimetype};base64,{base64.b64encode(content).decode('utf-8')}"
|
|
424
|
+
)
|
|
425
|
+
elif url.startswith("data:"):
|
|
426
|
+
if "filename" not in file:
|
|
427
|
+
file["filename"] = "file"
|
|
428
|
+
pass # use base64 data as-is
|
|
391
429
|
else:
|
|
392
430
|
raise Exception(f"Invalid file: {url}")
|
|
393
431
|
return chat
|
|
394
432
|
|
|
433
|
+
|
|
395
434
|
class HTTPError(Exception):
|
|
396
435
|
def __init__(self, status, reason, body, headers=None):
|
|
397
436
|
self.status = status
|
|
@@ -400,6 +439,7 @@ class HTTPError(Exception):
|
|
|
400
439
|
self.headers = headers
|
|
401
440
|
super().__init__(f"HTTP {status} {reason}")
|
|
402
441
|
|
|
442
|
+
|
|
403
443
|
async def response_json(response):
|
|
404
444
|
text = await response.text()
|
|
405
445
|
if response.status >= 400:
|
|
@@ -408,49 +448,52 @@ async def response_json(response):
|
|
|
408
448
|
body = json.loads(text)
|
|
409
449
|
return body
|
|
410
450
|
|
|
451
|
+
|
|
411
452
|
class OpenAiProvider:
|
|
412
|
-
def __init__(self, base_url, api_key=None, models=
|
|
453
|
+
def __init__(self, base_url, api_key=None, models=None, **kwargs):
|
|
454
|
+
if models is None:
|
|
455
|
+
models = {}
|
|
413
456
|
self.base_url = base_url.strip("/")
|
|
414
457
|
self.api_key = api_key
|
|
415
458
|
self.models = models
|
|
416
459
|
|
|
417
460
|
# check if base_url ends with /v{\d} to handle providers with different versions (e.g. z.ai uses /v4)
|
|
418
|
-
last_segment = base_url.rsplit(
|
|
419
|
-
if last_segment.startswith(
|
|
461
|
+
last_segment = base_url.rsplit("/", 1)[1]
|
|
462
|
+
if last_segment.startswith("v") and last_segment[1:].isdigit():
|
|
420
463
|
self.chat_url = f"{base_url}/chat/completions"
|
|
421
464
|
else:
|
|
422
465
|
self.chat_url = f"{base_url}/v1/chat/completions"
|
|
423
466
|
|
|
424
|
-
self.headers = kwargs
|
|
425
|
-
"Content-Type": "application/json",
|
|
426
|
-
}
|
|
467
|
+
self.headers = kwargs.get("headers", {"Content-Type": "application/json"})
|
|
427
468
|
if api_key is not None:
|
|
428
469
|
self.headers["Authorization"] = f"Bearer {api_key}"
|
|
429
470
|
|
|
430
|
-
self.frequency_penalty = float(kwargs[
|
|
431
|
-
self.max_completion_tokens = int(kwargs[
|
|
432
|
-
self.n = int(kwargs[
|
|
433
|
-
self.parallel_tool_calls = bool(kwargs[
|
|
434
|
-
self.presence_penalty = float(kwargs[
|
|
435
|
-
self.prompt_cache_key = kwargs
|
|
436
|
-
self.reasoning_effort = kwargs
|
|
437
|
-
self.safety_identifier = kwargs
|
|
438
|
-
self.seed = int(kwargs[
|
|
439
|
-
self.service_tier = kwargs
|
|
440
|
-
self.stop = kwargs
|
|
441
|
-
self.store = bool(kwargs[
|
|
442
|
-
self.temperature = float(kwargs[
|
|
443
|
-
self.top_logprobs = int(kwargs[
|
|
444
|
-
self.top_p = float(kwargs[
|
|
445
|
-
self.verbosity = kwargs
|
|
446
|
-
self.stream = bool(kwargs[
|
|
447
|
-
self.enable_thinking = bool(kwargs[
|
|
448
|
-
self.pricing = kwargs
|
|
449
|
-
self.default_pricing = kwargs
|
|
450
|
-
self.check = kwargs
|
|
471
|
+
self.frequency_penalty = float(kwargs["frequency_penalty"]) if "frequency_penalty" in kwargs else None
|
|
472
|
+
self.max_completion_tokens = int(kwargs["max_completion_tokens"]) if "max_completion_tokens" in kwargs else None
|
|
473
|
+
self.n = int(kwargs["n"]) if "n" in kwargs else None
|
|
474
|
+
self.parallel_tool_calls = bool(kwargs["parallel_tool_calls"]) if "parallel_tool_calls" in kwargs else None
|
|
475
|
+
self.presence_penalty = float(kwargs["presence_penalty"]) if "presence_penalty" in kwargs else None
|
|
476
|
+
self.prompt_cache_key = kwargs.get("prompt_cache_key")
|
|
477
|
+
self.reasoning_effort = kwargs.get("reasoning_effort")
|
|
478
|
+
self.safety_identifier = kwargs.get("safety_identifier")
|
|
479
|
+
self.seed = int(kwargs["seed"]) if "seed" in kwargs else None
|
|
480
|
+
self.service_tier = kwargs.get("service_tier")
|
|
481
|
+
self.stop = kwargs.get("stop")
|
|
482
|
+
self.store = bool(kwargs["store"]) if "store" in kwargs else None
|
|
483
|
+
self.temperature = float(kwargs["temperature"]) if "temperature" in kwargs else None
|
|
484
|
+
self.top_logprobs = int(kwargs["top_logprobs"]) if "top_logprobs" in kwargs else None
|
|
485
|
+
self.top_p = float(kwargs["top_p"]) if "top_p" in kwargs else None
|
|
486
|
+
self.verbosity = kwargs.get("verbosity")
|
|
487
|
+
self.stream = bool(kwargs["stream"]) if "stream" in kwargs else None
|
|
488
|
+
self.enable_thinking = bool(kwargs["enable_thinking"]) if "enable_thinking" in kwargs else None
|
|
489
|
+
self.pricing = kwargs.get("pricing")
|
|
490
|
+
self.default_pricing = kwargs.get("default_pricing")
|
|
491
|
+
self.check = kwargs.get("check")
|
|
451
492
|
|
|
452
493
|
@classmethod
|
|
453
|
-
def test(cls, base_url=None, api_key=None, models=
|
|
494
|
+
def test(cls, base_url=None, api_key=None, models=None, **kwargs):
|
|
495
|
+
if models is None:
|
|
496
|
+
models = {}
|
|
454
497
|
return base_url and api_key and len(models) > 0
|
|
455
498
|
|
|
456
499
|
async def load(self):
|
|
@@ -468,68 +511,71 @@ class OpenAiProvider:
|
|
|
468
511
|
return None
|
|
469
512
|
|
|
470
513
|
def to_response(self, response, chat, started_at):
|
|
471
|
-
if
|
|
472
|
-
response[
|
|
473
|
-
response[
|
|
474
|
-
if chat is not None and
|
|
475
|
-
pricing = self.model_pricing(chat[
|
|
476
|
-
if pricing and
|
|
477
|
-
response[
|
|
514
|
+
if "metadata" not in response:
|
|
515
|
+
response["metadata"] = {}
|
|
516
|
+
response["metadata"]["duration"] = int((time.time() - started_at) * 1000)
|
|
517
|
+
if chat is not None and "model" in chat:
|
|
518
|
+
pricing = self.model_pricing(chat["model"])
|
|
519
|
+
if pricing and "input" in pricing and "output" in pricing:
|
|
520
|
+
response["metadata"]["pricing"] = f"{pricing['input']}/{pricing['output']}"
|
|
478
521
|
_log(json.dumps(response, indent=2))
|
|
479
522
|
return response
|
|
480
523
|
|
|
481
524
|
async def chat(self, chat):
|
|
482
|
-
chat[
|
|
525
|
+
chat["model"] = self.provider_model(chat["model"]) or chat["model"]
|
|
483
526
|
|
|
484
527
|
# with open(os.path.join(os.path.dirname(__file__), 'chat.wip.json'), "w") as f:
|
|
485
528
|
# f.write(json.dumps(chat, indent=2))
|
|
486
529
|
|
|
487
530
|
if self.frequency_penalty is not None:
|
|
488
|
-
chat[
|
|
531
|
+
chat["frequency_penalty"] = self.frequency_penalty
|
|
489
532
|
if self.max_completion_tokens is not None:
|
|
490
|
-
chat[
|
|
533
|
+
chat["max_completion_tokens"] = self.max_completion_tokens
|
|
491
534
|
if self.n is not None:
|
|
492
|
-
chat[
|
|
535
|
+
chat["n"] = self.n
|
|
493
536
|
if self.parallel_tool_calls is not None:
|
|
494
|
-
chat[
|
|
537
|
+
chat["parallel_tool_calls"] = self.parallel_tool_calls
|
|
495
538
|
if self.presence_penalty is not None:
|
|
496
|
-
chat[
|
|
539
|
+
chat["presence_penalty"] = self.presence_penalty
|
|
497
540
|
if self.prompt_cache_key is not None:
|
|
498
|
-
chat[
|
|
541
|
+
chat["prompt_cache_key"] = self.prompt_cache_key
|
|
499
542
|
if self.reasoning_effort is not None:
|
|
500
|
-
chat[
|
|
543
|
+
chat["reasoning_effort"] = self.reasoning_effort
|
|
501
544
|
if self.safety_identifier is not None:
|
|
502
|
-
chat[
|
|
545
|
+
chat["safety_identifier"] = self.safety_identifier
|
|
503
546
|
if self.seed is not None:
|
|
504
|
-
chat[
|
|
547
|
+
chat["seed"] = self.seed
|
|
505
548
|
if self.service_tier is not None:
|
|
506
|
-
chat[
|
|
549
|
+
chat["service_tier"] = self.service_tier
|
|
507
550
|
if self.stop is not None:
|
|
508
|
-
chat[
|
|
551
|
+
chat["stop"] = self.stop
|
|
509
552
|
if self.store is not None:
|
|
510
|
-
chat[
|
|
553
|
+
chat["store"] = self.store
|
|
511
554
|
if self.temperature is not None:
|
|
512
|
-
chat[
|
|
555
|
+
chat["temperature"] = self.temperature
|
|
513
556
|
if self.top_logprobs is not None:
|
|
514
|
-
chat[
|
|
557
|
+
chat["top_logprobs"] = self.top_logprobs
|
|
515
558
|
if self.top_p is not None:
|
|
516
|
-
chat[
|
|
559
|
+
chat["top_p"] = self.top_p
|
|
517
560
|
if self.verbosity is not None:
|
|
518
|
-
chat[
|
|
561
|
+
chat["verbosity"] = self.verbosity
|
|
519
562
|
if self.enable_thinking is not None:
|
|
520
|
-
chat[
|
|
563
|
+
chat["enable_thinking"] = self.enable_thinking
|
|
521
564
|
|
|
522
565
|
chat = await process_chat(chat)
|
|
523
566
|
_log(f"POST {self.chat_url}")
|
|
524
567
|
_log(chat_summary(chat))
|
|
525
568
|
# remove metadata if any (conflicts with some providers, e.g. Z.ai)
|
|
526
|
-
chat.pop(
|
|
569
|
+
chat.pop("metadata", None)
|
|
527
570
|
|
|
528
571
|
async with aiohttp.ClientSession() as session:
|
|
529
572
|
started_at = time.time()
|
|
530
|
-
async with session.post(
|
|
573
|
+
async with session.post(
|
|
574
|
+
self.chat_url, headers=self.headers, data=json.dumps(chat), timeout=aiohttp.ClientTimeout(total=120)
|
|
575
|
+
) as response:
|
|
531
576
|
return self.to_response(await response_json(response), chat, started_at)
|
|
532
577
|
|
|
578
|
+
|
|
533
579
|
class OllamaProvider(OpenAiProvider):
|
|
534
580
|
def __init__(self, base_url, models, all_models=False, **kwargs):
|
|
535
581
|
super().__init__(base_url=base_url, models=models, **kwargs)
|
|
@@ -544,10 +590,12 @@ class OllamaProvider(OpenAiProvider):
|
|
|
544
590
|
try:
|
|
545
591
|
async with aiohttp.ClientSession() as session:
|
|
546
592
|
_log(f"GET {self.base_url}/api/tags")
|
|
547
|
-
async with session.get(
|
|
593
|
+
async with session.get(
|
|
594
|
+
f"{self.base_url}/api/tags", headers=self.headers, timeout=aiohttp.ClientTimeout(total=120)
|
|
595
|
+
) as response:
|
|
548
596
|
data = await response_json(response)
|
|
549
|
-
for model in data.get(
|
|
550
|
-
name = model[
|
|
597
|
+
for model in data.get("models", []):
|
|
598
|
+
name = model["model"]
|
|
551
599
|
if name.endswith(":latest"):
|
|
552
600
|
name = name[:-7]
|
|
553
601
|
ret[name] = name
|
|
@@ -565,152 +613,151 @@ class OllamaProvider(OpenAiProvider):
|
|
|
565
613
|
self.models = {**default_models, **self.models}
|
|
566
614
|
|
|
567
615
|
@classmethod
|
|
568
|
-
def test(cls, base_url=None, models=
|
|
616
|
+
def test(cls, base_url=None, models=None, all_models=False, **kwargs):
|
|
617
|
+
if models is None:
|
|
618
|
+
models = {}
|
|
569
619
|
return base_url and (len(models) > 0 or all_models)
|
|
570
620
|
|
|
621
|
+
|
|
571
622
|
class GoogleOpenAiProvider(OpenAiProvider):
|
|
572
623
|
def __init__(self, api_key, models, **kwargs):
|
|
573
624
|
super().__init__(base_url="https://generativelanguage.googleapis.com", api_key=api_key, models=models, **kwargs)
|
|
574
625
|
self.chat_url = "https://generativelanguage.googleapis.com/v1beta/chat/completions"
|
|
575
626
|
|
|
576
627
|
@classmethod
|
|
577
|
-
def test(cls, api_key=None, models=
|
|
628
|
+
def test(cls, api_key=None, models=None, **kwargs):
|
|
629
|
+
if models is None:
|
|
630
|
+
models = {}
|
|
578
631
|
return api_key and len(models) > 0
|
|
579
632
|
|
|
633
|
+
|
|
580
634
|
class GoogleProvider(OpenAiProvider):
|
|
581
635
|
def __init__(self, models, api_key, safety_settings=None, thinking_config=None, curl=False, **kwargs):
|
|
582
636
|
super().__init__(base_url="https://generativelanguage.googleapis.com", api_key=api_key, models=models, **kwargs)
|
|
583
637
|
self.safety_settings = safety_settings
|
|
584
638
|
self.thinking_config = thinking_config
|
|
585
639
|
self.curl = curl
|
|
586
|
-
self.headers = kwargs
|
|
587
|
-
"Content-Type": "application/json",
|
|
588
|
-
}
|
|
640
|
+
self.headers = kwargs.get("headers", {"Content-Type": "application/json"})
|
|
589
641
|
# Google fails when using Authorization header, use query string param instead
|
|
590
|
-
if
|
|
591
|
-
del self.headers[
|
|
642
|
+
if "Authorization" in self.headers:
|
|
643
|
+
del self.headers["Authorization"]
|
|
592
644
|
|
|
593
645
|
@classmethod
|
|
594
|
-
def test(cls, api_key=None, models=
|
|
646
|
+
def test(cls, api_key=None, models=None, **kwargs):
|
|
647
|
+
if models is None:
|
|
648
|
+
models = {}
|
|
595
649
|
return api_key is not None and len(models) > 0
|
|
596
650
|
|
|
597
651
|
async def chat(self, chat):
|
|
598
|
-
chat[
|
|
652
|
+
chat["model"] = self.provider_model(chat["model"]) or chat["model"]
|
|
599
653
|
|
|
600
654
|
chat = await process_chat(chat)
|
|
601
|
-
|
|
655
|
+
generation_config = {}
|
|
602
656
|
|
|
603
657
|
# Filter out system messages and convert to proper Gemini format
|
|
604
658
|
contents = []
|
|
605
659
|
system_prompt = None
|
|
606
660
|
|
|
607
661
|
async with aiohttp.ClientSession() as session:
|
|
608
|
-
for message in chat[
|
|
609
|
-
if message[
|
|
610
|
-
content = message[
|
|
662
|
+
for message in chat["messages"]:
|
|
663
|
+
if message["role"] == "system":
|
|
664
|
+
content = message["content"]
|
|
611
665
|
if isinstance(content, list):
|
|
612
666
|
for item in content:
|
|
613
|
-
if
|
|
614
|
-
system_prompt = item[
|
|
667
|
+
if "text" in item:
|
|
668
|
+
system_prompt = item["text"]
|
|
615
669
|
break
|
|
616
670
|
elif isinstance(content, str):
|
|
617
671
|
system_prompt = content
|
|
618
|
-
elif
|
|
619
|
-
if isinstance(message[
|
|
672
|
+
elif "content" in message:
|
|
673
|
+
if isinstance(message["content"], list):
|
|
620
674
|
parts = []
|
|
621
|
-
for item in message[
|
|
622
|
-
if
|
|
623
|
-
if item[
|
|
624
|
-
image_url = item[
|
|
625
|
-
if
|
|
675
|
+
for item in message["content"]:
|
|
676
|
+
if "type" in item:
|
|
677
|
+
if item["type"] == "image_url" and "image_url" in item:
|
|
678
|
+
image_url = item["image_url"]
|
|
679
|
+
if "url" not in image_url:
|
|
626
680
|
continue
|
|
627
|
-
url = image_url[
|
|
628
|
-
if not url.startswith(
|
|
629
|
-
raise(Exception("Image was not downloaded: " + url))
|
|
681
|
+
url = image_url["url"]
|
|
682
|
+
if not url.startswith("data:"):
|
|
683
|
+
raise (Exception("Image was not downloaded: " + url))
|
|
630
684
|
# Extract mime type from data uri
|
|
631
|
-
mimetype = url.split(
|
|
632
|
-
|
|
633
|
-
parts.append({
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
}
|
|
638
|
-
})
|
|
639
|
-
elif item['type'] == 'input_audio' and 'input_audio' in item:
|
|
640
|
-
input_audio = item['input_audio']
|
|
641
|
-
if 'data' not in input_audio:
|
|
685
|
+
mimetype = url.split(";", 1)[0].split(":", 1)[1] if ";" in url else "image/png"
|
|
686
|
+
base64_data = url.split(",", 1)[1]
|
|
687
|
+
parts.append({"inline_data": {"mime_type": mimetype, "data": base64_data}})
|
|
688
|
+
elif item["type"] == "input_audio" and "input_audio" in item:
|
|
689
|
+
input_audio = item["input_audio"]
|
|
690
|
+
if "data" not in input_audio:
|
|
642
691
|
continue
|
|
643
|
-
data = input_audio[
|
|
644
|
-
format = input_audio[
|
|
692
|
+
data = input_audio["data"]
|
|
693
|
+
format = input_audio["format"]
|
|
645
694
|
mimetype = f"audio/{format}"
|
|
646
|
-
parts.append({
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
}
|
|
651
|
-
})
|
|
652
|
-
elif item['type'] == 'file' and 'file' in item:
|
|
653
|
-
file = item['file']
|
|
654
|
-
if 'file_data' not in file:
|
|
695
|
+
parts.append({"inline_data": {"mime_type": mimetype, "data": data}})
|
|
696
|
+
elif item["type"] == "file" and "file" in item:
|
|
697
|
+
file = item["file"]
|
|
698
|
+
if "file_data" not in file:
|
|
655
699
|
continue
|
|
656
|
-
data = file[
|
|
657
|
-
if not data.startswith(
|
|
658
|
-
raise(Exception("File was not downloaded: " + data))
|
|
700
|
+
data = file["file_data"]
|
|
701
|
+
if not data.startswith("data:"):
|
|
702
|
+
raise (Exception("File was not downloaded: " + data))
|
|
659
703
|
# Extract mime type from data uri
|
|
660
|
-
mimetype =
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
"
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
text = item['text']
|
|
704
|
+
mimetype = (
|
|
705
|
+
data.split(";", 1)[0].split(":", 1)[1]
|
|
706
|
+
if ";" in data
|
|
707
|
+
else "application/octet-stream"
|
|
708
|
+
)
|
|
709
|
+
base64_data = data.split(",", 1)[1]
|
|
710
|
+
parts.append({"inline_data": {"mime_type": mimetype, "data": base64_data}})
|
|
711
|
+
if "text" in item:
|
|
712
|
+
text = item["text"]
|
|
670
713
|
parts.append({"text": text})
|
|
671
714
|
if len(parts) > 0:
|
|
672
|
-
contents.append(
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
715
|
+
contents.append(
|
|
716
|
+
{
|
|
717
|
+
"role": message["role"]
|
|
718
|
+
if "role" in message and message["role"] == "user"
|
|
719
|
+
else "model",
|
|
720
|
+
"parts": parts,
|
|
721
|
+
}
|
|
722
|
+
)
|
|
676
723
|
else:
|
|
677
|
-
content = message[
|
|
678
|
-
contents.append(
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
724
|
+
content = message["content"]
|
|
725
|
+
contents.append(
|
|
726
|
+
{
|
|
727
|
+
"role": message["role"] if "role" in message and message["role"] == "user" else "model",
|
|
728
|
+
"parts": [{"text": content}],
|
|
729
|
+
}
|
|
730
|
+
)
|
|
682
731
|
|
|
683
732
|
gemini_chat = {
|
|
684
733
|
"contents": contents,
|
|
685
734
|
}
|
|
686
735
|
|
|
687
736
|
if self.safety_settings:
|
|
688
|
-
gemini_chat[
|
|
737
|
+
gemini_chat["safetySettings"] = self.safety_settings
|
|
689
738
|
|
|
690
739
|
# Add system instruction if present
|
|
691
740
|
if system_prompt is not None:
|
|
692
|
-
gemini_chat[
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
if
|
|
697
|
-
|
|
698
|
-
if
|
|
699
|
-
|
|
700
|
-
if
|
|
701
|
-
|
|
702
|
-
if
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
if 'thinkingConfig' in chat:
|
|
708
|
-
generationConfig['thinkingConfig'] = chat['thinkingConfig']
|
|
741
|
+
gemini_chat["systemInstruction"] = {"parts": [{"text": system_prompt}]}
|
|
742
|
+
|
|
743
|
+
if "max_completion_tokens" in chat:
|
|
744
|
+
generation_config["maxOutputTokens"] = chat["max_completion_tokens"]
|
|
745
|
+
if "stop" in chat:
|
|
746
|
+
generation_config["stopSequences"] = [chat["stop"]]
|
|
747
|
+
if "temperature" in chat:
|
|
748
|
+
generation_config["temperature"] = chat["temperature"]
|
|
749
|
+
if "top_p" in chat:
|
|
750
|
+
generation_config["topP"] = chat["top_p"]
|
|
751
|
+
if "top_logprobs" in chat:
|
|
752
|
+
generation_config["topK"] = chat["top_logprobs"]
|
|
753
|
+
|
|
754
|
+
if "thinkingConfig" in chat:
|
|
755
|
+
generation_config["thinkingConfig"] = chat["thinkingConfig"]
|
|
709
756
|
elif self.thinking_config:
|
|
710
|
-
|
|
757
|
+
generation_config["thinkingConfig"] = self.thinking_config
|
|
711
758
|
|
|
712
|
-
if len(
|
|
713
|
-
gemini_chat[
|
|
759
|
+
if len(generation_config) > 0:
|
|
760
|
+
gemini_chat["generationConfig"] = generation_config
|
|
714
761
|
|
|
715
762
|
started_at = int(time.time() * 1000)
|
|
716
763
|
gemini_chat_url = f"https://generativelanguage.googleapis.com/v1beta/models/{chat['model']}:generateContent?key={self.api_key}"
|
|
@@ -721,107 +768,111 @@ class GoogleProvider(OpenAiProvider):
|
|
|
721
768
|
|
|
722
769
|
if self.curl:
|
|
723
770
|
curl_args = [
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
771
|
+
"curl",
|
|
772
|
+
"-X",
|
|
773
|
+
"POST",
|
|
774
|
+
"-H",
|
|
775
|
+
"Content-Type: application/json",
|
|
776
|
+
"-d",
|
|
777
|
+
json.dumps(gemini_chat),
|
|
778
|
+
gemini_chat_url,
|
|
729
779
|
]
|
|
730
780
|
try:
|
|
731
781
|
o = subprocess.run(curl_args, check=True, capture_output=True, text=True, timeout=120)
|
|
732
782
|
obj = json.loads(o.stdout)
|
|
733
783
|
except Exception as e:
|
|
734
|
-
raise Exception(f"Error executing curl: {e}")
|
|
784
|
+
raise Exception(f"Error executing curl: {e}") from e
|
|
735
785
|
else:
|
|
736
|
-
async with session.post(
|
|
786
|
+
async with session.post(
|
|
787
|
+
gemini_chat_url,
|
|
788
|
+
headers=self.headers,
|
|
789
|
+
data=json.dumps(gemini_chat),
|
|
790
|
+
timeout=aiohttp.ClientTimeout(total=120),
|
|
791
|
+
) as res:
|
|
737
792
|
obj = await response_json(res)
|
|
738
793
|
_log(f"google response:\n{json.dumps(obj, indent=2)}")
|
|
739
794
|
|
|
740
795
|
response = {
|
|
741
796
|
"id": f"chatcmpl-{started_at}",
|
|
742
797
|
"created": started_at,
|
|
743
|
-
"model": obj.get(
|
|
798
|
+
"model": obj.get("modelVersion", chat["model"]),
|
|
744
799
|
}
|
|
745
800
|
choices = []
|
|
746
|
-
|
|
747
|
-
if 'error' in obj:
|
|
801
|
+
if "error" in obj:
|
|
748
802
|
_log(f"Error: {obj['error']}")
|
|
749
|
-
raise Exception(obj[
|
|
750
|
-
for candidate in obj[
|
|
803
|
+
raise Exception(obj["error"]["message"])
|
|
804
|
+
for i, candidate in enumerate(obj["candidates"]):
|
|
751
805
|
role = "assistant"
|
|
752
|
-
if
|
|
753
|
-
role = "assistant" if candidate[
|
|
806
|
+
if "content" in candidate and "role" in candidate["content"]:
|
|
807
|
+
role = "assistant" if candidate["content"]["role"] == "model" else candidate["content"]["role"]
|
|
754
808
|
|
|
755
809
|
# Safely extract content from all text parts
|
|
756
810
|
content = ""
|
|
757
811
|
reasoning = ""
|
|
758
|
-
if
|
|
812
|
+
if "content" in candidate and "parts" in candidate["content"]:
|
|
759
813
|
text_parts = []
|
|
760
814
|
reasoning_parts = []
|
|
761
|
-
for part in candidate[
|
|
762
|
-
if
|
|
763
|
-
if
|
|
764
|
-
reasoning_parts.append(part[
|
|
815
|
+
for part in candidate["content"]["parts"]:
|
|
816
|
+
if "text" in part:
|
|
817
|
+
if "thought" in part and part["thought"]:
|
|
818
|
+
reasoning_parts.append(part["text"])
|
|
765
819
|
else:
|
|
766
|
-
text_parts.append(part[
|
|
767
|
-
content =
|
|
768
|
-
reasoning =
|
|
820
|
+
text_parts.append(part["text"])
|
|
821
|
+
content = " ".join(text_parts)
|
|
822
|
+
reasoning = " ".join(reasoning_parts)
|
|
769
823
|
|
|
770
824
|
choice = {
|
|
771
825
|
"index": i,
|
|
772
|
-
"finish_reason": candidate.get(
|
|
826
|
+
"finish_reason": candidate.get("finishReason", "stop"),
|
|
773
827
|
"message": {
|
|
774
828
|
"role": role,
|
|
775
829
|
"content": content,
|
|
776
830
|
},
|
|
777
831
|
}
|
|
778
832
|
if reasoning:
|
|
779
|
-
choice[
|
|
833
|
+
choice["message"]["reasoning"] = reasoning
|
|
780
834
|
choices.append(choice)
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
usage =
|
|
785
|
-
|
|
786
|
-
"
|
|
787
|
-
"
|
|
788
|
-
"prompt_tokens": usage['promptTokenCount'],
|
|
835
|
+
response["choices"] = choices
|
|
836
|
+
if "usageMetadata" in obj:
|
|
837
|
+
usage = obj["usageMetadata"]
|
|
838
|
+
response["usage"] = {
|
|
839
|
+
"completion_tokens": usage["candidatesTokenCount"],
|
|
840
|
+
"total_tokens": usage["totalTokenCount"],
|
|
841
|
+
"prompt_tokens": usage["promptTokenCount"],
|
|
789
842
|
}
|
|
790
843
|
return self.to_response(response, chat, started_at)
|
|
791
844
|
|
|
845
|
+
|
|
792
846
|
def get_models():
|
|
793
847
|
ret = []
|
|
794
848
|
for provider in g_handlers.values():
|
|
795
|
-
for model in provider.models
|
|
849
|
+
for model in provider.models:
|
|
796
850
|
if model not in ret:
|
|
797
851
|
ret.append(model)
|
|
798
852
|
ret.sort()
|
|
799
853
|
return ret
|
|
800
854
|
|
|
855
|
+
|
|
801
856
|
def get_active_models():
|
|
802
857
|
ret = []
|
|
803
858
|
existing_models = set()
|
|
804
859
|
for id, provider in g_handlers.items():
|
|
805
|
-
for model in provider.models
|
|
860
|
+
for model in provider.models:
|
|
806
861
|
if model not in existing_models:
|
|
807
862
|
existing_models.add(model)
|
|
808
863
|
provider_model = provider.models[model]
|
|
809
864
|
pricing = provider.model_pricing(model)
|
|
810
|
-
ret.append({
|
|
811
|
-
"id": model,
|
|
812
|
-
"provider": id,
|
|
813
|
-
"provider_model": provider_model,
|
|
814
|
-
"pricing": pricing
|
|
815
|
-
})
|
|
865
|
+
ret.append({"id": model, "provider": id, "provider_model": provider_model, "pricing": pricing})
|
|
816
866
|
ret.sort(key=lambda x: x["id"])
|
|
817
867
|
return ret
|
|
818
868
|
|
|
869
|
+
|
|
819
870
|
async def chat_completion(chat):
|
|
820
|
-
model = chat[
|
|
871
|
+
model = chat["model"]
|
|
821
872
|
# get first provider that has the model
|
|
822
873
|
candidate_providers = [name for name, provider in g_handlers.items() if model in provider.models]
|
|
823
874
|
if len(candidate_providers) == 0:
|
|
824
|
-
raise(Exception(f"Model {model} not found"))
|
|
875
|
+
raise (Exception(f"Model {model} not found"))
|
|
825
876
|
|
|
826
877
|
first_exception = None
|
|
827
878
|
for name in candidate_providers:
|
|
@@ -839,9 +890,10 @@ async def chat_completion(chat):
|
|
|
839
890
|
# If we get here, all providers failed
|
|
840
891
|
raise first_exception
|
|
841
892
|
|
|
893
|
+
|
|
842
894
|
async def cli_chat(chat, image=None, audio=None, file=None, args=None, raw=False):
|
|
843
895
|
if g_default_model:
|
|
844
|
-
chat[
|
|
896
|
+
chat["model"] = g_default_model
|
|
845
897
|
|
|
846
898
|
# Apply args parameters to chat request
|
|
847
899
|
if args:
|
|
@@ -850,91 +902,65 @@ async def cli_chat(chat, image=None, audio=None, file=None, args=None, raw=False
|
|
|
850
902
|
# process_chat downloads the image, just adding the reference here
|
|
851
903
|
if image is not None:
|
|
852
904
|
first_message = None
|
|
853
|
-
for message in chat[
|
|
854
|
-
if message[
|
|
905
|
+
for message in chat["messages"]:
|
|
906
|
+
if message["role"] == "user":
|
|
855
907
|
first_message = message
|
|
856
908
|
break
|
|
857
|
-
image_content = {
|
|
858
|
-
|
|
859
|
-
"
|
|
860
|
-
"url": image
|
|
861
|
-
}
|
|
862
|
-
}
|
|
863
|
-
if 'content' in first_message:
|
|
864
|
-
if isinstance(first_message['content'], list):
|
|
909
|
+
image_content = {"type": "image_url", "image_url": {"url": image}}
|
|
910
|
+
if "content" in first_message:
|
|
911
|
+
if isinstance(first_message["content"], list):
|
|
865
912
|
image_url = None
|
|
866
|
-
for item in first_message[
|
|
867
|
-
if
|
|
868
|
-
image_url = item[
|
|
913
|
+
for item in first_message["content"]:
|
|
914
|
+
if "image_url" in item:
|
|
915
|
+
image_url = item["image_url"]
|
|
869
916
|
# If no image_url, add one
|
|
870
917
|
if image_url is None:
|
|
871
|
-
first_message[
|
|
918
|
+
first_message["content"].insert(0, image_content)
|
|
872
919
|
else:
|
|
873
|
-
image_url[
|
|
920
|
+
image_url["url"] = image
|
|
874
921
|
else:
|
|
875
|
-
first_message[
|
|
876
|
-
image_content,
|
|
877
|
-
{ "type": "text", "text": first_message['content'] }
|
|
878
|
-
]
|
|
922
|
+
first_message["content"] = [image_content, {"type": "text", "text": first_message["content"]}]
|
|
879
923
|
if audio is not None:
|
|
880
924
|
first_message = None
|
|
881
|
-
for message in chat[
|
|
882
|
-
if message[
|
|
925
|
+
for message in chat["messages"]:
|
|
926
|
+
if message["role"] == "user":
|
|
883
927
|
first_message = message
|
|
884
928
|
break
|
|
885
|
-
audio_content = {
|
|
886
|
-
|
|
887
|
-
"
|
|
888
|
-
"data": audio,
|
|
889
|
-
"format": "mp3"
|
|
890
|
-
}
|
|
891
|
-
}
|
|
892
|
-
if 'content' in first_message:
|
|
893
|
-
if isinstance(first_message['content'], list):
|
|
929
|
+
audio_content = {"type": "input_audio", "input_audio": {"data": audio, "format": "mp3"}}
|
|
930
|
+
if "content" in first_message:
|
|
931
|
+
if isinstance(first_message["content"], list):
|
|
894
932
|
input_audio = None
|
|
895
|
-
for item in first_message[
|
|
896
|
-
if
|
|
897
|
-
input_audio = item[
|
|
933
|
+
for item in first_message["content"]:
|
|
934
|
+
if "input_audio" in item:
|
|
935
|
+
input_audio = item["input_audio"]
|
|
898
936
|
# If no input_audio, add one
|
|
899
937
|
if input_audio is None:
|
|
900
|
-
first_message[
|
|
938
|
+
first_message["content"].insert(0, audio_content)
|
|
901
939
|
else:
|
|
902
|
-
input_audio[
|
|
940
|
+
input_audio["data"] = audio
|
|
903
941
|
else:
|
|
904
|
-
first_message[
|
|
905
|
-
audio_content,
|
|
906
|
-
{ "type": "text", "text": first_message['content'] }
|
|
907
|
-
]
|
|
942
|
+
first_message["content"] = [audio_content, {"type": "text", "text": first_message["content"]}]
|
|
908
943
|
if file is not None:
|
|
909
944
|
first_message = None
|
|
910
|
-
for message in chat[
|
|
911
|
-
if message[
|
|
945
|
+
for message in chat["messages"]:
|
|
946
|
+
if message["role"] == "user":
|
|
912
947
|
first_message = message
|
|
913
948
|
break
|
|
914
|
-
file_content = {
|
|
915
|
-
|
|
916
|
-
"
|
|
917
|
-
"filename": get_filename(file),
|
|
918
|
-
"file_data": file
|
|
919
|
-
}
|
|
920
|
-
}
|
|
921
|
-
if 'content' in first_message:
|
|
922
|
-
if isinstance(first_message['content'], list):
|
|
949
|
+
file_content = {"type": "file", "file": {"filename": get_filename(file), "file_data": file}}
|
|
950
|
+
if "content" in first_message:
|
|
951
|
+
if isinstance(first_message["content"], list):
|
|
923
952
|
file_data = None
|
|
924
|
-
for item in first_message[
|
|
925
|
-
if
|
|
926
|
-
file_data = item[
|
|
953
|
+
for item in first_message["content"]:
|
|
954
|
+
if "file" in item:
|
|
955
|
+
file_data = item["file"]
|
|
927
956
|
# If no file_data, add one
|
|
928
957
|
if file_data is None:
|
|
929
|
-
first_message[
|
|
958
|
+
first_message["content"].insert(0, file_content)
|
|
930
959
|
else:
|
|
931
|
-
file_data[
|
|
932
|
-
file_data[
|
|
960
|
+
file_data["filename"] = get_filename(file)
|
|
961
|
+
file_data["file_data"] = file
|
|
933
962
|
else:
|
|
934
|
-
first_message[
|
|
935
|
-
file_content,
|
|
936
|
-
{ "type": "text", "text": first_message['content'] }
|
|
937
|
-
]
|
|
963
|
+
first_message["content"] = [file_content, {"type": "text", "text": first_message["content"]}]
|
|
938
964
|
|
|
939
965
|
if g_verbose:
|
|
940
966
|
printdump(chat)
|
|
@@ -945,7 +971,7 @@ async def cli_chat(chat, image=None, audio=None, file=None, args=None, raw=False
|
|
|
945
971
|
print(json.dumps(response, indent=2))
|
|
946
972
|
exit(0)
|
|
947
973
|
else:
|
|
948
|
-
answer = response[
|
|
974
|
+
answer = response["choices"][0]["message"]["content"]
|
|
949
975
|
print(answer)
|
|
950
976
|
except HTTPError as e:
|
|
951
977
|
# HTTP error (4xx, 5xx)
|
|
@@ -960,9 +986,11 @@ async def cli_chat(chat, image=None, audio=None, file=None, args=None, raw=False
|
|
|
960
986
|
print(f"Timeout error: {e}")
|
|
961
987
|
exit(1)
|
|
962
988
|
|
|
989
|
+
|
|
963
990
|
def config_str(key):
|
|
964
991
|
return key in g_config and g_config[key] or None
|
|
965
992
|
|
|
993
|
+
|
|
966
994
|
def init_llms(config):
|
|
967
995
|
global g_config, g_handlers
|
|
968
996
|
|
|
@@ -975,51 +1003,55 @@ def init_llms(config):
|
|
|
975
1003
|
|
|
976
1004
|
# if g_verbose:
|
|
977
1005
|
# printdump(g_config)
|
|
978
|
-
providers = g_config[
|
|
1006
|
+
providers = g_config["providers"]
|
|
979
1007
|
|
|
980
1008
|
for name, orig in providers.items():
|
|
981
1009
|
definition = orig.copy()
|
|
982
|
-
provider_type = definition[
|
|
983
|
-
if
|
|
1010
|
+
provider_type = definition["type"]
|
|
1011
|
+
if "enabled" in definition and not definition["enabled"]:
|
|
984
1012
|
continue
|
|
985
1013
|
|
|
986
1014
|
# Replace API keys with environment variables if they start with $
|
|
987
|
-
if
|
|
988
|
-
value = definition[
|
|
1015
|
+
if "api_key" in definition:
|
|
1016
|
+
value = definition["api_key"]
|
|
989
1017
|
if isinstance(value, str) and value.startswith("$"):
|
|
990
|
-
definition[
|
|
1018
|
+
definition["api_key"] = os.environ.get(value[1:], "")
|
|
991
1019
|
|
|
992
1020
|
# Create a copy of definition without the 'type' key for constructor kwargs
|
|
993
|
-
constructor_kwargs = {k: v for k, v in definition.items() if k !=
|
|
994
|
-
constructor_kwargs[
|
|
1021
|
+
constructor_kwargs = {k: v for k, v in definition.items() if k != "type" and k != "enabled"}
|
|
1022
|
+
constructor_kwargs["headers"] = g_config["defaults"]["headers"].copy()
|
|
995
1023
|
|
|
996
|
-
if provider_type ==
|
|
1024
|
+
if provider_type == "OpenAiProvider" and OpenAiProvider.test(**constructor_kwargs):
|
|
997
1025
|
g_handlers[name] = OpenAiProvider(**constructor_kwargs)
|
|
998
|
-
elif provider_type ==
|
|
1026
|
+
elif provider_type == "OllamaProvider" and OllamaProvider.test(**constructor_kwargs):
|
|
999
1027
|
g_handlers[name] = OllamaProvider(**constructor_kwargs)
|
|
1000
|
-
elif provider_type ==
|
|
1028
|
+
elif provider_type == "GoogleProvider" and GoogleProvider.test(**constructor_kwargs):
|
|
1001
1029
|
g_handlers[name] = GoogleProvider(**constructor_kwargs)
|
|
1002
|
-
elif provider_type ==
|
|
1030
|
+
elif provider_type == "GoogleOpenAiProvider" and GoogleOpenAiProvider.test(**constructor_kwargs):
|
|
1003
1031
|
g_handlers[name] = GoogleOpenAiProvider(**constructor_kwargs)
|
|
1004
1032
|
|
|
1005
1033
|
return g_handlers
|
|
1006
1034
|
|
|
1035
|
+
|
|
1007
1036
|
async def load_llms():
|
|
1008
1037
|
global g_handlers
|
|
1009
1038
|
_log("Loading providers...")
|
|
1010
|
-
for
|
|
1039
|
+
for _name, provider in g_handlers.items():
|
|
1011
1040
|
await provider.load()
|
|
1012
1041
|
|
|
1042
|
+
|
|
1013
1043
|
def save_config(config):
|
|
1014
1044
|
global g_config, g_config_path
|
|
1015
1045
|
g_config = config
|
|
1016
|
-
with open(g_config_path, "w") as f:
|
|
1046
|
+
with open(g_config_path, "w", encoding="utf-8") as f:
|
|
1017
1047
|
json.dump(g_config, f, indent=4)
|
|
1018
1048
|
_log(f"Saved config to {g_config_path}")
|
|
1019
1049
|
|
|
1050
|
+
|
|
1020
1051
|
def github_url(filename):
|
|
1021
1052
|
return f"https://raw.githubusercontent.com/ServiceStack/llms/refs/heads/main/llms/{filename}"
|
|
1022
1053
|
|
|
1054
|
+
|
|
1023
1055
|
async def get_text(url):
|
|
1024
1056
|
async with aiohttp.ClientSession() as session:
|
|
1025
1057
|
_log(f"GET {url}")
|
|
@@ -1029,25 +1061,29 @@ async def get_text(url):
|
|
|
1029
1061
|
raise HTTPError(resp.status, reason=resp.reason, body=text, headers=dict(resp.headers))
|
|
1030
1062
|
return text
|
|
1031
1063
|
|
|
1064
|
+
|
|
1032
1065
|
async def save_text_url(url, save_path):
|
|
1033
1066
|
text = await get_text(url)
|
|
1034
1067
|
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
|
1035
|
-
with open(save_path, "w") as f:
|
|
1068
|
+
with open(save_path, "w", encoding="utf-8") as f:
|
|
1036
1069
|
f.write(text)
|
|
1037
1070
|
return text
|
|
1038
1071
|
|
|
1072
|
+
|
|
1039
1073
|
async def save_default_config(config_path):
|
|
1040
1074
|
global g_config
|
|
1041
1075
|
config_json = await save_text_url(github_url("llms.json"), config_path)
|
|
1042
1076
|
g_config = json.loads(config_json)
|
|
1043
1077
|
|
|
1078
|
+
|
|
1044
1079
|
def provider_status():
|
|
1045
1080
|
enabled = list(g_handlers.keys())
|
|
1046
|
-
disabled = [provider for provider in g_config[
|
|
1081
|
+
disabled = [provider for provider in g_config["providers"] if provider not in enabled]
|
|
1047
1082
|
enabled.sort()
|
|
1048
1083
|
disabled.sort()
|
|
1049
1084
|
return enabled, disabled
|
|
1050
1085
|
|
|
1086
|
+
|
|
1051
1087
|
def print_status():
|
|
1052
1088
|
enabled, disabled = provider_status()
|
|
1053
1089
|
if len(enabled) > 0:
|
|
@@ -1059,9 +1095,11 @@ def print_status():
|
|
|
1059
1095
|
else:
|
|
1060
1096
|
print("Disabled: None")
|
|
1061
1097
|
|
|
1098
|
+
|
|
1062
1099
|
def home_llms_path(filename):
|
|
1063
1100
|
return f"{os.environ.get('HOME')}/.llms/{filename}"
|
|
1064
1101
|
|
|
1102
|
+
|
|
1065
1103
|
def get_config_path():
|
|
1066
1104
|
home_config_path = home_llms_path("llms.json")
|
|
1067
1105
|
check_paths = [
|
|
@@ -1077,22 +1115,21 @@ def get_config_path():
|
|
|
1077
1115
|
return g_config_path
|
|
1078
1116
|
return None
|
|
1079
1117
|
|
|
1118
|
+
|
|
1080
1119
|
def get_ui_path():
|
|
1081
|
-
ui_paths = [
|
|
1082
|
-
home_llms_path("ui.json"),
|
|
1083
|
-
"ui.json"
|
|
1084
|
-
]
|
|
1120
|
+
ui_paths = [home_llms_path("ui.json"), "ui.json"]
|
|
1085
1121
|
for ui_path in ui_paths:
|
|
1086
1122
|
if os.path.exists(ui_path):
|
|
1087
1123
|
return ui_path
|
|
1088
1124
|
return None
|
|
1089
1125
|
|
|
1126
|
+
|
|
1090
1127
|
def enable_provider(provider):
|
|
1091
1128
|
msg = None
|
|
1092
|
-
provider_config = g_config[
|
|
1093
|
-
provider_config[
|
|
1094
|
-
if
|
|
1095
|
-
api_key = provider_config[
|
|
1129
|
+
provider_config = g_config["providers"][provider]
|
|
1130
|
+
provider_config["enabled"] = True
|
|
1131
|
+
if "api_key" in provider_config:
|
|
1132
|
+
api_key = provider_config["api_key"]
|
|
1096
1133
|
if isinstance(api_key, str):
|
|
1097
1134
|
if api_key.startswith("$"):
|
|
1098
1135
|
if not os.environ.get(api_key[1:], ""):
|
|
@@ -1103,12 +1140,14 @@ def enable_provider(provider):
|
|
|
1103
1140
|
init_llms(g_config)
|
|
1104
1141
|
return provider_config, msg
|
|
1105
1142
|
|
|
1143
|
+
|
|
1106
1144
|
def disable_provider(provider):
|
|
1107
|
-
provider_config = g_config[
|
|
1108
|
-
provider_config[
|
|
1145
|
+
provider_config = g_config["providers"][provider]
|
|
1146
|
+
provider_config["enabled"] = False
|
|
1109
1147
|
save_config(g_config)
|
|
1110
1148
|
init_llms(g_config)
|
|
1111
1149
|
|
|
1150
|
+
|
|
1112
1151
|
def resolve_root():
|
|
1113
1152
|
# Try to find the resource root directory
|
|
1114
1153
|
# When installed as a package, static files may be in different locations
|
|
@@ -1119,7 +1158,7 @@ def resolve_root():
|
|
|
1119
1158
|
# Try to access the package resources
|
|
1120
1159
|
pkg_files = resources.files("llms")
|
|
1121
1160
|
# Check if ui directory exists in package resources
|
|
1122
|
-
if hasattr(pkg_files,
|
|
1161
|
+
if hasattr(pkg_files, "is_dir") and (pkg_files / "ui").is_dir():
|
|
1123
1162
|
_log(f"RESOURCE ROOT (package): {pkg_files}")
|
|
1124
1163
|
return pkg_files
|
|
1125
1164
|
except (FileNotFoundError, AttributeError, TypeError):
|
|
@@ -1132,8 +1171,9 @@ def resolve_root():
|
|
|
1132
1171
|
# Method 1b: Look for the installed package and check for UI files
|
|
1133
1172
|
try:
|
|
1134
1173
|
import llms
|
|
1174
|
+
|
|
1135
1175
|
# If llms is a package, check its directory
|
|
1136
|
-
if hasattr(llms,
|
|
1176
|
+
if hasattr(llms, "__path__"):
|
|
1137
1177
|
# It's a package
|
|
1138
1178
|
package_path = Path(llms.__path__[0])
|
|
1139
1179
|
|
|
@@ -1170,21 +1210,25 @@ def resolve_root():
|
|
|
1170
1210
|
|
|
1171
1211
|
# Add site-packages directories
|
|
1172
1212
|
for site_dir in site.getsitepackages():
|
|
1173
|
-
possible_roots.extend(
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1213
|
+
possible_roots.extend(
|
|
1214
|
+
[
|
|
1215
|
+
Path(site_dir),
|
|
1216
|
+
Path(site_dir).parent,
|
|
1217
|
+
Path(site_dir).parent / "share",
|
|
1218
|
+
]
|
|
1219
|
+
)
|
|
1178
1220
|
|
|
1179
1221
|
# Add user site directory
|
|
1180
1222
|
try:
|
|
1181
1223
|
user_site = site.getusersitepackages()
|
|
1182
1224
|
if user_site:
|
|
1183
|
-
possible_roots.extend(
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1225
|
+
possible_roots.extend(
|
|
1226
|
+
[
|
|
1227
|
+
Path(user_site),
|
|
1228
|
+
Path(user_site).parent,
|
|
1229
|
+
Path(user_site).parent / "share",
|
|
1230
|
+
]
|
|
1231
|
+
)
|
|
1188
1232
|
except AttributeError:
|
|
1189
1233
|
pass
|
|
1190
1234
|
|
|
@@ -1195,12 +1239,17 @@ def resolve_root():
|
|
|
1195
1239
|
homebrew_prefixes = ["/opt/homebrew", "/usr/local"] # Apple Silicon and Intel
|
|
1196
1240
|
for prefix in homebrew_prefixes:
|
|
1197
1241
|
if Path(prefix).exists():
|
|
1198
|
-
homebrew_roots.extend(
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1242
|
+
homebrew_roots.extend(
|
|
1243
|
+
[
|
|
1244
|
+
Path(prefix),
|
|
1245
|
+
Path(prefix) / "share",
|
|
1246
|
+
Path(prefix) / "lib" / "python3.11" / "site-packages",
|
|
1247
|
+
Path(prefix)
|
|
1248
|
+
/ "lib"
|
|
1249
|
+
/ f"python{sys.version_info.major}.{sys.version_info.minor}"
|
|
1250
|
+
/ "site-packages",
|
|
1251
|
+
]
|
|
1252
|
+
)
|
|
1204
1253
|
|
|
1205
1254
|
possible_roots.extend(homebrew_roots)
|
|
1206
1255
|
|
|
@@ -1232,26 +1281,29 @@ def resolve_root():
|
|
|
1232
1281
|
_log(f"RESOURCE ROOT (fallback): {from_file}")
|
|
1233
1282
|
return from_file
|
|
1234
1283
|
|
|
1284
|
+
|
|
1235
1285
|
def resource_exists(resource_path):
|
|
1236
1286
|
# Check if resource files exist (handle both Path and Traversable objects)
|
|
1237
1287
|
try:
|
|
1238
|
-
if hasattr(resource_path,
|
|
1288
|
+
if hasattr(resource_path, "is_file"):
|
|
1239
1289
|
return resource_path.is_file()
|
|
1240
1290
|
else:
|
|
1241
1291
|
return os.path.exists(resource_path)
|
|
1242
1292
|
except (OSError, AttributeError):
|
|
1243
1293
|
pass
|
|
1244
1294
|
|
|
1295
|
+
|
|
1245
1296
|
def read_resource_text(resource_path):
|
|
1246
|
-
if hasattr(resource_path,
|
|
1297
|
+
if hasattr(resource_path, "read_text"):
|
|
1247
1298
|
return resource_path.read_text()
|
|
1248
1299
|
else:
|
|
1249
|
-
with open(resource_path, "
|
|
1300
|
+
with open(resource_path, encoding="utf-8") as f:
|
|
1250
1301
|
return f.read()
|
|
1251
1302
|
|
|
1303
|
+
|
|
1252
1304
|
def read_resource_file_bytes(resource_file):
|
|
1253
1305
|
try:
|
|
1254
|
-
if hasattr(_ROOT,
|
|
1306
|
+
if hasattr(_ROOT, "joinpath"):
|
|
1255
1307
|
# importlib.resources Traversable
|
|
1256
1308
|
index_resource = _ROOT.joinpath(resource_file)
|
|
1257
1309
|
if index_resource.is_file():
|
|
@@ -1264,6 +1316,7 @@ def read_resource_file_bytes(resource_file):
|
|
|
1264
1316
|
except (OSError, PermissionError, AttributeError) as e:
|
|
1265
1317
|
_log(f"Error reading resource bytes: {e}")
|
|
1266
1318
|
|
|
1319
|
+
|
|
1267
1320
|
async def check_models(provider_name, model_names=None):
|
|
1268
1321
|
"""
|
|
1269
1322
|
Check validity of models for a specific provider by sending a ping message.
|
|
@@ -1281,7 +1334,7 @@ async def check_models(provider_name, model_names=None):
|
|
|
1281
1334
|
models_to_check = []
|
|
1282
1335
|
|
|
1283
1336
|
# Determine which models to check
|
|
1284
|
-
if model_names is None or (len(model_names) == 1 and model_names[0] ==
|
|
1337
|
+
if model_names is None or (len(model_names) == 1 and model_names[0] == "all"):
|
|
1285
1338
|
# Check all models for this provider
|
|
1286
1339
|
models_to_check = list(provider.models.keys())
|
|
1287
1340
|
else:
|
|
@@ -1296,12 +1349,14 @@ async def check_models(provider_name, model_names=None):
|
|
|
1296
1349
|
print(f"No models to check for provider '{provider_name}'")
|
|
1297
1350
|
return
|
|
1298
1351
|
|
|
1299
|
-
print(
|
|
1352
|
+
print(
|
|
1353
|
+
f"\nChecking {len(models_to_check)} model{'' if len(models_to_check) == 1 else 's'} for provider '{provider_name}':\n"
|
|
1354
|
+
)
|
|
1300
1355
|
|
|
1301
1356
|
# Test each model
|
|
1302
1357
|
for model in models_to_check:
|
|
1303
1358
|
# Create a simple ping chat request
|
|
1304
|
-
chat = (provider.check or g_config[
|
|
1359
|
+
chat = (provider.check or g_config["defaults"]["check"]).copy()
|
|
1305
1360
|
chat["model"] = model
|
|
1306
1361
|
|
|
1307
1362
|
started_at = time.time()
|
|
@@ -1311,7 +1366,7 @@ async def check_models(provider_name, model_names=None):
|
|
|
1311
1366
|
duration_ms = int((time.time() - started_at) * 1000)
|
|
1312
1367
|
|
|
1313
1368
|
# Check if we got a valid response
|
|
1314
|
-
if response and
|
|
1369
|
+
if response and "choices" in response and len(response["choices"]) > 0:
|
|
1315
1370
|
print(f" ✓ {model:<40} ({duration_ms}ms)")
|
|
1316
1371
|
else:
|
|
1317
1372
|
print(f" ✗ {model:<40} Invalid response format")
|
|
@@ -1321,30 +1376,35 @@ async def check_models(provider_name, model_names=None):
|
|
|
1321
1376
|
try:
|
|
1322
1377
|
# Try to parse error body for more details
|
|
1323
1378
|
error_body = json.loads(e.body) if e.body else {}
|
|
1324
|
-
if
|
|
1325
|
-
error = error_body[
|
|
1379
|
+
if "error" in error_body:
|
|
1380
|
+
error = error_body["error"]
|
|
1326
1381
|
if isinstance(error, dict):
|
|
1327
|
-
if
|
|
1382
|
+
if "message" in error and isinstance(error["message"], str):
|
|
1328
1383
|
# OpenRouter
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
error_msg += f" ({error['provider']})"
|
|
1384
|
+
error_msg = error["message"]
|
|
1385
|
+
if "code" in error:
|
|
1386
|
+
error_msg = f"{error['code']} {error_msg}"
|
|
1387
|
+
if "metadata" in error and "raw" in error["metadata"]:
|
|
1388
|
+
error_msg += f" - {error['metadata']['raw']}"
|
|
1389
|
+
if "provider" in error:
|
|
1390
|
+
error_msg += f" ({error['provider']})"
|
|
1337
1391
|
elif isinstance(error, str):
|
|
1338
1392
|
error_msg = error
|
|
1339
|
-
elif
|
|
1340
|
-
if isinstance(error_body[
|
|
1341
|
-
error_msg = error_body[
|
|
1342
|
-
elif
|
|
1393
|
+
elif "message" in error_body:
|
|
1394
|
+
if isinstance(error_body["message"], str):
|
|
1395
|
+
error_msg = error_body["message"]
|
|
1396
|
+
elif (
|
|
1397
|
+
isinstance(error_body["message"], dict)
|
|
1398
|
+
and "detail" in error_body["message"]
|
|
1399
|
+
and isinstance(error_body["message"]["detail"], list)
|
|
1400
|
+
):
|
|
1343
1401
|
# codestral error format
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1402
|
+
error_msg = error_body["message"]["detail"][0]["msg"]
|
|
1403
|
+
if (
|
|
1404
|
+
"loc" in error_body["message"]["detail"][0]
|
|
1405
|
+
and len(error_body["message"]["detail"][0]["loc"]) > 0
|
|
1406
|
+
):
|
|
1407
|
+
error_msg += f" (in {' '.join(error_body['message']['detail'][0]['loc'])})"
|
|
1348
1408
|
except Exception as parse_error:
|
|
1349
1409
|
_log(f"Error parsing error body: {parse_error}")
|
|
1350
1410
|
error_msg = e.body[:100] if e.body else f"HTTP {e.status}"
|
|
@@ -1359,6 +1419,7 @@ async def check_models(provider_name, model_names=None):
|
|
|
1359
1419
|
|
|
1360
1420
|
print()
|
|
1361
1421
|
|
|
1422
|
+
|
|
1362
1423
|
def text_from_resource(filename):
|
|
1363
1424
|
global _ROOT
|
|
1364
1425
|
resource_path = _ROOT / filename
|
|
@@ -1369,12 +1430,14 @@ def text_from_resource(filename):
|
|
|
1369
1430
|
_log(f"Error reading resource config {filename}: {e}")
|
|
1370
1431
|
return None
|
|
1371
1432
|
|
|
1433
|
+
|
|
1372
1434
|
def text_from_file(filename):
|
|
1373
1435
|
if os.path.exists(filename):
|
|
1374
|
-
with open(filename, "
|
|
1436
|
+
with open(filename, encoding="utf-8") as f:
|
|
1375
1437
|
return f.read()
|
|
1376
1438
|
return None
|
|
1377
1439
|
|
|
1440
|
+
|
|
1378
1441
|
async def text_from_resource_or_url(filename):
|
|
1379
1442
|
text = text_from_resource(filename)
|
|
1380
1443
|
if not text:
|
|
@@ -1386,6 +1449,7 @@ async def text_from_resource_or_url(filename):
|
|
|
1386
1449
|
raise e
|
|
1387
1450
|
return text
|
|
1388
1451
|
|
|
1452
|
+
|
|
1389
1453
|
async def save_home_configs():
|
|
1390
1454
|
home_config_path = home_llms_path("llms.json")
|
|
1391
1455
|
home_ui_path = home_llms_path("ui.json")
|
|
@@ -1397,19 +1461,20 @@ async def save_home_configs():
|
|
|
1397
1461
|
try:
|
|
1398
1462
|
if not os.path.exists(home_config_path):
|
|
1399
1463
|
config_json = await text_from_resource_or_url("llms.json")
|
|
1400
|
-
with open(home_config_path, "w") as f:
|
|
1464
|
+
with open(home_config_path, "w", encoding="utf-8") as f:
|
|
1401
1465
|
f.write(config_json)
|
|
1402
1466
|
_log(f"Created default config at {home_config_path}")
|
|
1403
1467
|
|
|
1404
1468
|
if not os.path.exists(home_ui_path):
|
|
1405
1469
|
ui_json = await text_from_resource_or_url("ui.json")
|
|
1406
|
-
with open(home_ui_path, "w") as f:
|
|
1470
|
+
with open(home_ui_path, "w", encoding="utf-8") as f:
|
|
1407
1471
|
f.write(ui_json)
|
|
1408
1472
|
_log(f"Created default ui config at {home_ui_path}")
|
|
1409
|
-
except Exception
|
|
1473
|
+
except Exception:
|
|
1410
1474
|
print("Could not create llms.json. Create one with --init or use --config <path>")
|
|
1411
1475
|
exit(1)
|
|
1412
1476
|
|
|
1477
|
+
|
|
1413
1478
|
async def reload_providers():
|
|
1414
1479
|
global g_config, g_handlers
|
|
1415
1480
|
g_handlers = init_llms(g_config)
|
|
@@ -1417,6 +1482,7 @@ async def reload_providers():
|
|
|
1417
1482
|
_log(f"{len(g_handlers)} providers loaded")
|
|
1418
1483
|
return g_handlers
|
|
1419
1484
|
|
|
1485
|
+
|
|
1420
1486
|
async def watch_config_files(config_path, ui_path, interval=1):
|
|
1421
1487
|
"""Watch config files and reload providers when they change"""
|
|
1422
1488
|
global g_config
|
|
@@ -1444,7 +1510,7 @@ async def watch_config_files(config_path, ui_path, interval=1):
|
|
|
1444
1510
|
|
|
1445
1511
|
try:
|
|
1446
1512
|
# Reload llms.json
|
|
1447
|
-
with open(config_path
|
|
1513
|
+
with open(config_path) as f:
|
|
1448
1514
|
g_config = json.load(f)
|
|
1449
1515
|
|
|
1450
1516
|
# Reload providers
|
|
@@ -1470,41 +1536,53 @@ async def watch_config_files(config_path, ui_path, interval=1):
|
|
|
1470
1536
|
except FileNotFoundError:
|
|
1471
1537
|
pass
|
|
1472
1538
|
|
|
1539
|
+
|
|
1473
1540
|
def main():
|
|
1474
1541
|
global _ROOT, g_verbose, g_default_model, g_logprefix, g_config, g_config_path, g_ui_path
|
|
1475
1542
|
|
|
1476
1543
|
parser = argparse.ArgumentParser(description=f"llms v{VERSION}")
|
|
1477
|
-
parser.add_argument(
|
|
1478
|
-
parser.add_argument(
|
|
1479
|
-
|
|
1480
|
-
parser.add_argument(
|
|
1481
|
-
parser.add_argument(
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
parser.add_argument(
|
|
1485
|
-
parser.add_argument(
|
|
1486
|
-
parser.add_argument(
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
parser.add_argument(
|
|
1494
|
-
|
|
1495
|
-
parser.add_argument(
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
parser.add_argument(
|
|
1501
|
-
|
|
1544
|
+
parser.add_argument("--config", default=None, help="Path to config file", metavar="FILE")
|
|
1545
|
+
parser.add_argument("-m", "--model", default=None, help="Model to use")
|
|
1546
|
+
|
|
1547
|
+
parser.add_argument("--chat", default=None, help="OpenAI Chat Completion Request to send", metavar="REQUEST")
|
|
1548
|
+
parser.add_argument(
|
|
1549
|
+
"-s", "--system", default=None, help="System prompt to use for chat completion", metavar="PROMPT"
|
|
1550
|
+
)
|
|
1551
|
+
parser.add_argument("--image", default=None, help="Image input to use in chat completion")
|
|
1552
|
+
parser.add_argument("--audio", default=None, help="Audio input to use in chat completion")
|
|
1553
|
+
parser.add_argument("--file", default=None, help="File input to use in chat completion")
|
|
1554
|
+
parser.add_argument(
|
|
1555
|
+
"--args",
|
|
1556
|
+
default=None,
|
|
1557
|
+
help='URL-encoded parameters to add to chat request (e.g. "temperature=0.7&seed=111")',
|
|
1558
|
+
metavar="PARAMS",
|
|
1559
|
+
)
|
|
1560
|
+
parser.add_argument("--raw", action="store_true", help="Return raw AI JSON response")
|
|
1561
|
+
|
|
1562
|
+
parser.add_argument(
|
|
1563
|
+
"--list", action="store_true", help="Show list of enabled providers and their models (alias ls provider?)"
|
|
1564
|
+
)
|
|
1565
|
+
parser.add_argument("--check", default=None, help="Check validity of models for a provider", metavar="PROVIDER")
|
|
1566
|
+
|
|
1567
|
+
parser.add_argument(
|
|
1568
|
+
"--serve", default=None, help="Port to start an OpenAI Chat compatible server on", metavar="PORT"
|
|
1569
|
+
)
|
|
1570
|
+
|
|
1571
|
+
parser.add_argument("--enable", default=None, help="Enable a provider", metavar="PROVIDER")
|
|
1572
|
+
parser.add_argument("--disable", default=None, help="Disable a provider", metavar="PROVIDER")
|
|
1573
|
+
parser.add_argument("--default", default=None, help="Configure the default model to use", metavar="MODEL")
|
|
1574
|
+
|
|
1575
|
+
parser.add_argument("--init", action="store_true", help="Create a default llms.json")
|
|
1576
|
+
|
|
1577
|
+
parser.add_argument("--root", default=None, help="Change root directory for UI files", metavar="PATH")
|
|
1578
|
+
parser.add_argument("--logprefix", default="", help="Prefix used in log messages", metavar="PREFIX")
|
|
1579
|
+
parser.add_argument("--verbose", action="store_true", help="Verbose output")
|
|
1502
1580
|
|
|
1503
1581
|
cli_args, extra_args = parser.parse_known_args()
|
|
1504
1582
|
|
|
1505
1583
|
# Check for verbose mode from CLI argument or environment variables
|
|
1506
|
-
verbose_env = os.environ.get(
|
|
1507
|
-
if cli_args.verbose or verbose_env in (
|
|
1584
|
+
verbose_env = os.environ.get("VERBOSE", "").lower()
|
|
1585
|
+
if cli_args.verbose or verbose_env in ("1", "true"):
|
|
1508
1586
|
g_verbose = True
|
|
1509
1587
|
# printdump(cli_args)
|
|
1510
1588
|
if cli_args.model:
|
|
@@ -1537,7 +1615,7 @@ def main():
|
|
|
1537
1615
|
if cli_args.config:
|
|
1538
1616
|
# read contents
|
|
1539
1617
|
g_config_path = cli_args.config
|
|
1540
|
-
with open(g_config_path, "
|
|
1618
|
+
with open(g_config_path, encoding="utf-8") as f:
|
|
1541
1619
|
config_json = f.read()
|
|
1542
1620
|
g_config = json.loads(config_json)
|
|
1543
1621
|
|
|
@@ -1549,7 +1627,7 @@ def main():
|
|
|
1549
1627
|
else:
|
|
1550
1628
|
if not os.path.exists(home_ui_path):
|
|
1551
1629
|
ui_json = text_from_resource("ui.json")
|
|
1552
|
-
with open(home_ui_path, "w") as f:
|
|
1630
|
+
with open(home_ui_path, "w", encoding="utf-8") as f:
|
|
1553
1631
|
f.write(ui_json)
|
|
1554
1632
|
_log(f"Created default ui config at {home_ui_path}")
|
|
1555
1633
|
g_ui_path = home_ui_path
|
|
@@ -1568,7 +1646,7 @@ def main():
|
|
|
1568
1646
|
filter_list = []
|
|
1569
1647
|
if len(extra_args) > 0:
|
|
1570
1648
|
arg = extra_args[0]
|
|
1571
|
-
if arg ==
|
|
1649
|
+
if arg == "ls":
|
|
1572
1650
|
cli_args.list = True
|
|
1573
1651
|
if len(extra_args) > 1:
|
|
1574
1652
|
filter_list = extra_args[1:]
|
|
@@ -1596,16 +1674,15 @@ def main():
|
|
|
1596
1674
|
|
|
1597
1675
|
if cli_args.serve is not None:
|
|
1598
1676
|
# Disable inactive providers and save to config before starting server
|
|
1599
|
-
all_providers = g_config[
|
|
1677
|
+
all_providers = g_config["providers"].keys()
|
|
1600
1678
|
enabled_providers = list(g_handlers.keys())
|
|
1601
1679
|
disable_providers = []
|
|
1602
1680
|
for provider in all_providers:
|
|
1603
|
-
provider_config = g_config[
|
|
1604
|
-
if provider not in enabled_providers:
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1681
|
+
provider_config = g_config["providers"][provider]
|
|
1682
|
+
if provider not in enabled_providers and "enabled" in provider_config and provider_config["enabled"]:
|
|
1683
|
+
provider_config["enabled"] = False
|
|
1684
|
+
disable_providers.append(provider)
|
|
1685
|
+
|
|
1609
1686
|
if len(disable_providers) > 0:
|
|
1610
1687
|
_log(f"Disabled unavailable providers: {', '.join(disable_providers)}")
|
|
1611
1688
|
save_config(g_config)
|
|
@@ -1618,17 +1695,17 @@ def main():
|
|
|
1618
1695
|
exit(1)
|
|
1619
1696
|
|
|
1620
1697
|
# Validate auth configuration if enabled
|
|
1621
|
-
auth_enabled = g_config.get(
|
|
1698
|
+
auth_enabled = g_config.get("auth", {}).get("enabled", False)
|
|
1622
1699
|
if auth_enabled:
|
|
1623
|
-
github_config = g_config.get(
|
|
1624
|
-
client_id = github_config.get(
|
|
1625
|
-
client_secret = github_config.get(
|
|
1700
|
+
github_config = g_config.get("auth", {}).get("github", {})
|
|
1701
|
+
client_id = github_config.get("client_id", "")
|
|
1702
|
+
client_secret = github_config.get("client_secret", "")
|
|
1626
1703
|
|
|
1627
1704
|
# Expand environment variables
|
|
1628
|
-
if client_id.startswith(
|
|
1629
|
-
client_id = os.environ.get(client_id[1:],
|
|
1630
|
-
if client_secret.startswith(
|
|
1631
|
-
client_secret = os.environ.get(client_secret[1:],
|
|
1705
|
+
if client_id.startswith("$"):
|
|
1706
|
+
client_id = os.environ.get(client_id[1:], "")
|
|
1707
|
+
if client_secret.startswith("$"):
|
|
1708
|
+
client_secret = os.environ.get(client_secret[1:], "")
|
|
1632
1709
|
|
|
1633
1710
|
if not client_id or not client_secret:
|
|
1634
1711
|
print("ERROR: Authentication is enabled but GitHub OAuth is not properly configured.")
|
|
@@ -1638,8 +1715,10 @@ def main():
|
|
|
1638
1715
|
|
|
1639
1716
|
_log("Authentication enabled - GitHub OAuth configured")
|
|
1640
1717
|
|
|
1641
|
-
client_max_size = g_config.get(
|
|
1642
|
-
|
|
1718
|
+
client_max_size = g_config.get("limits", {}).get(
|
|
1719
|
+
"client_max_size", 20 * 1024 * 1024
|
|
1720
|
+
) # 20MB max request size (to handle base64 encoding overhead)
|
|
1721
|
+
_log(f"client_max_size set to {client_max_size} bytes ({client_max_size / 1024 / 1024:.1f}MB)")
|
|
1643
1722
|
app = web.Application(client_max_size=client_max_size)
|
|
1644
1723
|
|
|
1645
1724
|
# Authentication middleware helper
|
|
@@ -1649,13 +1728,13 @@ def main():
|
|
|
1649
1728
|
return True, None
|
|
1650
1729
|
|
|
1651
1730
|
# Check for OAuth session token
|
|
1652
|
-
session_token = request.query.get(
|
|
1731
|
+
session_token = request.query.get("session") or request.headers.get("X-Session-Token")
|
|
1653
1732
|
if session_token and session_token in g_sessions:
|
|
1654
1733
|
return True, g_sessions[session_token]
|
|
1655
1734
|
|
|
1656
1735
|
# Check for API key
|
|
1657
|
-
auth_header = request.headers.get(
|
|
1658
|
-
if auth_header.startswith(
|
|
1736
|
+
auth_header = request.headers.get("Authorization", "")
|
|
1737
|
+
if auth_header.startswith("Bearer "):
|
|
1659
1738
|
api_key = auth_header[7:]
|
|
1660
1739
|
if api_key:
|
|
1661
1740
|
return True, {"authProvider": "apikey"}
|
|
@@ -1666,13 +1745,16 @@ def main():
|
|
|
1666
1745
|
# Check authentication if enabled
|
|
1667
1746
|
is_authenticated, user_data = check_auth(request)
|
|
1668
1747
|
if not is_authenticated:
|
|
1669
|
-
return web.json_response(
|
|
1670
|
-
|
|
1671
|
-
"
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1748
|
+
return web.json_response(
|
|
1749
|
+
{
|
|
1750
|
+
"error": {
|
|
1751
|
+
"message": "Authentication required",
|
|
1752
|
+
"type": "authentication_error",
|
|
1753
|
+
"code": "unauthorized",
|
|
1754
|
+
}
|
|
1755
|
+
},
|
|
1756
|
+
status=401,
|
|
1757
|
+
)
|
|
1676
1758
|
|
|
1677
1759
|
try:
|
|
1678
1760
|
chat = await request.json()
|
|
@@ -1680,115 +1762,121 @@ def main():
|
|
|
1680
1762
|
return web.json_response(response)
|
|
1681
1763
|
except Exception as e:
|
|
1682
1764
|
return web.json_response({"error": str(e)}, status=500)
|
|
1683
|
-
|
|
1765
|
+
|
|
1766
|
+
app.router.add_post("/v1/chat/completions", chat_handler)
|
|
1684
1767
|
|
|
1685
1768
|
async def models_handler(request):
|
|
1686
1769
|
return web.json_response(get_models())
|
|
1687
|
-
|
|
1770
|
+
|
|
1771
|
+
app.router.add_get("/models/list", models_handler)
|
|
1688
1772
|
|
|
1689
1773
|
async def active_models_handler(request):
|
|
1690
1774
|
return web.json_response(get_active_models())
|
|
1691
|
-
|
|
1775
|
+
|
|
1776
|
+
app.router.add_get("/models", active_models_handler)
|
|
1692
1777
|
|
|
1693
1778
|
async def status_handler(request):
|
|
1694
1779
|
enabled, disabled = provider_status()
|
|
1695
|
-
return web.json_response(
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1780
|
+
return web.json_response(
|
|
1781
|
+
{
|
|
1782
|
+
"all": list(g_config["providers"].keys()),
|
|
1783
|
+
"enabled": enabled,
|
|
1784
|
+
"disabled": disabled,
|
|
1785
|
+
}
|
|
1786
|
+
)
|
|
1787
|
+
|
|
1788
|
+
app.router.add_get("/status", status_handler)
|
|
1701
1789
|
|
|
1702
1790
|
async def provider_handler(request):
|
|
1703
|
-
provider = request.match_info.get(
|
|
1791
|
+
provider = request.match_info.get("provider", "")
|
|
1704
1792
|
data = await request.json()
|
|
1705
1793
|
msg = None
|
|
1706
|
-
if provider:
|
|
1707
|
-
if data.get(
|
|
1794
|
+
if provider:
|
|
1795
|
+
if data.get("enable", False):
|
|
1708
1796
|
provider_config, msg = enable_provider(provider)
|
|
1709
1797
|
_log(f"Enabled provider {provider}")
|
|
1710
1798
|
await load_llms()
|
|
1711
|
-
elif data.get(
|
|
1799
|
+
elif data.get("disable", False):
|
|
1712
1800
|
disable_provider(provider)
|
|
1713
1801
|
_log(f"Disabled provider {provider}")
|
|
1714
1802
|
enabled, disabled = provider_status()
|
|
1715
|
-
return web.json_response(
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1803
|
+
return web.json_response(
|
|
1804
|
+
{
|
|
1805
|
+
"enabled": enabled,
|
|
1806
|
+
"disabled": disabled,
|
|
1807
|
+
"feedback": msg or "",
|
|
1808
|
+
}
|
|
1809
|
+
)
|
|
1810
|
+
|
|
1811
|
+
app.router.add_post("/providers/{provider}", provider_handler)
|
|
1721
1812
|
|
|
1722
1813
|
# OAuth handlers
|
|
1723
1814
|
async def github_auth_handler(request):
|
|
1724
1815
|
"""Initiate GitHub OAuth flow"""
|
|
1725
|
-
if
|
|
1816
|
+
if "auth" not in g_config or "github" not in g_config["auth"]:
|
|
1726
1817
|
return web.json_response({"error": "GitHub OAuth not configured"}, status=500)
|
|
1727
1818
|
|
|
1728
|
-
auth_config = g_config[
|
|
1729
|
-
client_id = auth_config.get(
|
|
1730
|
-
redirect_uri = auth_config.get(
|
|
1819
|
+
auth_config = g_config["auth"]["github"]
|
|
1820
|
+
client_id = auth_config.get("client_id", "")
|
|
1821
|
+
redirect_uri = auth_config.get("redirect_uri", "")
|
|
1731
1822
|
|
|
1732
1823
|
# Expand environment variables
|
|
1733
|
-
if client_id.startswith(
|
|
1734
|
-
client_id = os.environ.get(client_id[1:],
|
|
1735
|
-
if redirect_uri.startswith(
|
|
1736
|
-
redirect_uri = os.environ.get(redirect_uri[1:],
|
|
1824
|
+
if client_id.startswith("$"):
|
|
1825
|
+
client_id = os.environ.get(client_id[1:], "")
|
|
1826
|
+
if redirect_uri.startswith("$"):
|
|
1827
|
+
redirect_uri = os.environ.get(redirect_uri[1:], "")
|
|
1737
1828
|
|
|
1738
1829
|
if not client_id:
|
|
1739
1830
|
return web.json_response({"error": "GitHub client_id not configured"}, status=500)
|
|
1740
1831
|
|
|
1741
1832
|
# Generate CSRF state token
|
|
1742
1833
|
state = secrets.token_urlsafe(32)
|
|
1743
|
-
g_oauth_states[state] = {
|
|
1744
|
-
'created': time.time(),
|
|
1745
|
-
'redirect_uri': redirect_uri
|
|
1746
|
-
}
|
|
1834
|
+
g_oauth_states[state] = {"created": time.time(), "redirect_uri": redirect_uri}
|
|
1747
1835
|
|
|
1748
1836
|
# Clean up old states (older than 10 minutes)
|
|
1749
1837
|
current_time = time.time()
|
|
1750
|
-
expired_states = [s for s, data in g_oauth_states.items() if current_time - data[
|
|
1838
|
+
expired_states = [s for s, data in g_oauth_states.items() if current_time - data["created"] > 600]
|
|
1751
1839
|
for s in expired_states:
|
|
1752
1840
|
del g_oauth_states[s]
|
|
1753
1841
|
|
|
1754
1842
|
# Build GitHub authorization URL
|
|
1755
1843
|
params = {
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1844
|
+
"client_id": client_id,
|
|
1845
|
+
"redirect_uri": redirect_uri,
|
|
1846
|
+
"state": state,
|
|
1847
|
+
"scope": "read:user user:email",
|
|
1760
1848
|
}
|
|
1761
1849
|
auth_url = f"https://github.com/login/oauth/authorize?{urlencode(params)}"
|
|
1762
1850
|
|
|
1763
1851
|
return web.HTTPFound(auth_url)
|
|
1764
|
-
|
|
1852
|
+
|
|
1765
1853
|
def validate_user(github_username):
|
|
1766
|
-
auth_config = g_config[
|
|
1854
|
+
auth_config = g_config["auth"]["github"]
|
|
1767
1855
|
# Check if user is restricted
|
|
1768
|
-
restrict_to = auth_config.get(
|
|
1856
|
+
restrict_to = auth_config.get("restrict_to", "")
|
|
1769
1857
|
|
|
1770
1858
|
# Expand environment variables
|
|
1771
|
-
if restrict_to.startswith(
|
|
1772
|
-
restrict_to = os.environ.get(restrict_to[1:],
|
|
1859
|
+
if restrict_to.startswith("$"):
|
|
1860
|
+
restrict_to = os.environ.get(restrict_to[1:], "")
|
|
1773
1861
|
|
|
1774
1862
|
# If restrict_to is configured, validate the user
|
|
1775
1863
|
if restrict_to:
|
|
1776
1864
|
# Parse allowed users (comma or space delimited)
|
|
1777
|
-
allowed_users = [u.strip() for u in re.split(r
|
|
1865
|
+
allowed_users = [u.strip() for u in re.split(r"[,\s]+", restrict_to) if u.strip()]
|
|
1778
1866
|
|
|
1779
1867
|
# Check if user is in the allowed list
|
|
1780
1868
|
if not github_username or github_username not in allowed_users:
|
|
1781
1869
|
_log(f"Access denied for user: {github_username}. Not in allowed list: {allowed_users}")
|
|
1782
1870
|
return web.Response(
|
|
1783
1871
|
text=f"Access denied. User '{github_username}' is not authorized to access this application.",
|
|
1784
|
-
status=403
|
|
1872
|
+
status=403,
|
|
1785
1873
|
)
|
|
1786
1874
|
return None
|
|
1787
1875
|
|
|
1788
1876
|
async def github_callback_handler(request):
|
|
1789
1877
|
"""Handle GitHub OAuth callback"""
|
|
1790
|
-
code = request.query.get(
|
|
1791
|
-
state = request.query.get(
|
|
1878
|
+
code = request.query.get("code")
|
|
1879
|
+
state = request.query.get("state")
|
|
1792
1880
|
|
|
1793
1881
|
if not code or not state:
|
|
1794
1882
|
return web.Response(text="Missing code or state parameter", status=400)
|
|
@@ -1797,23 +1885,23 @@ def main():
|
|
|
1797
1885
|
if state not in g_oauth_states:
|
|
1798
1886
|
return web.Response(text="Invalid state parameter", status=400)
|
|
1799
1887
|
|
|
1800
|
-
|
|
1888
|
+
g_oauth_states.pop(state)
|
|
1801
1889
|
|
|
1802
|
-
if
|
|
1890
|
+
if "auth" not in g_config or "github" not in g_config["auth"]:
|
|
1803
1891
|
return web.json_response({"error": "GitHub OAuth not configured"}, status=500)
|
|
1804
1892
|
|
|
1805
|
-
auth_config = g_config[
|
|
1806
|
-
client_id = auth_config.get(
|
|
1807
|
-
client_secret = auth_config.get(
|
|
1808
|
-
redirect_uri = auth_config.get(
|
|
1893
|
+
auth_config = g_config["auth"]["github"]
|
|
1894
|
+
client_id = auth_config.get("client_id", "")
|
|
1895
|
+
client_secret = auth_config.get("client_secret", "")
|
|
1896
|
+
redirect_uri = auth_config.get("redirect_uri", "")
|
|
1809
1897
|
|
|
1810
1898
|
# Expand environment variables
|
|
1811
|
-
if client_id.startswith(
|
|
1812
|
-
client_id = os.environ.get(client_id[1:],
|
|
1813
|
-
if client_secret.startswith(
|
|
1814
|
-
client_secret = os.environ.get(client_secret[1:],
|
|
1815
|
-
if redirect_uri.startswith(
|
|
1816
|
-
redirect_uri = os.environ.get(redirect_uri[1:],
|
|
1899
|
+
if client_id.startswith("$"):
|
|
1900
|
+
client_id = os.environ.get(client_id[1:], "")
|
|
1901
|
+
if client_secret.startswith("$"):
|
|
1902
|
+
client_secret = os.environ.get(client_secret[1:], "")
|
|
1903
|
+
if redirect_uri.startswith("$"):
|
|
1904
|
+
redirect_uri = os.environ.get(redirect_uri[1:], "")
|
|
1817
1905
|
|
|
1818
1906
|
if not client_id or not client_secret:
|
|
1819
1907
|
return web.json_response({"error": "GitHub OAuth credentials not configured"}, status=500)
|
|
@@ -1822,45 +1910,42 @@ def main():
|
|
|
1822
1910
|
async with aiohttp.ClientSession() as session:
|
|
1823
1911
|
token_url = "https://github.com/login/oauth/access_token"
|
|
1824
1912
|
token_data = {
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1913
|
+
"client_id": client_id,
|
|
1914
|
+
"client_secret": client_secret,
|
|
1915
|
+
"code": code,
|
|
1916
|
+
"redirect_uri": redirect_uri,
|
|
1829
1917
|
}
|
|
1830
|
-
headers = {
|
|
1918
|
+
headers = {"Accept": "application/json"}
|
|
1831
1919
|
|
|
1832
1920
|
async with session.post(token_url, data=token_data, headers=headers) as resp:
|
|
1833
1921
|
token_response = await resp.json()
|
|
1834
|
-
access_token = token_response.get(
|
|
1922
|
+
access_token = token_response.get("access_token")
|
|
1835
1923
|
|
|
1836
1924
|
if not access_token:
|
|
1837
|
-
error = token_response.get(
|
|
1925
|
+
error = token_response.get("error_description", "Failed to get access token")
|
|
1838
1926
|
return web.Response(text=f"OAuth error: {error}", status=400)
|
|
1839
1927
|
|
|
1840
1928
|
# Fetch user info
|
|
1841
1929
|
user_url = "https://api.github.com/user"
|
|
1842
|
-
headers = {
|
|
1843
|
-
"Authorization": f"Bearer {access_token}",
|
|
1844
|
-
"Accept": "application/json"
|
|
1845
|
-
}
|
|
1930
|
+
headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"}
|
|
1846
1931
|
|
|
1847
1932
|
async with session.get(user_url, headers=headers) as resp:
|
|
1848
1933
|
user_data = await resp.json()
|
|
1849
1934
|
|
|
1850
1935
|
# Validate user
|
|
1851
|
-
error_response = validate_user(user_data.get(
|
|
1936
|
+
error_response = validate_user(user_data.get("login", ""))
|
|
1852
1937
|
if error_response:
|
|
1853
1938
|
return error_response
|
|
1854
1939
|
|
|
1855
1940
|
# Create session
|
|
1856
1941
|
session_token = secrets.token_urlsafe(32)
|
|
1857
1942
|
g_sessions[session_token] = {
|
|
1858
|
-
"userId": str(user_data.get(
|
|
1859
|
-
"userName": user_data.get(
|
|
1860
|
-
"displayName": user_data.get(
|
|
1861
|
-
"profileUrl": user_data.get(
|
|
1862
|
-
"email": user_data.get(
|
|
1863
|
-
"created": time.time()
|
|
1943
|
+
"userId": str(user_data.get("id", "")),
|
|
1944
|
+
"userName": user_data.get("login", ""),
|
|
1945
|
+
"displayName": user_data.get("name", ""),
|
|
1946
|
+
"profileUrl": user_data.get("avatar_url", ""),
|
|
1947
|
+
"email": user_data.get("email", ""),
|
|
1948
|
+
"created": time.time(),
|
|
1864
1949
|
}
|
|
1865
1950
|
|
|
1866
1951
|
# Redirect to UI with session token
|
|
@@ -1868,7 +1953,7 @@ def main():
|
|
|
1868
1953
|
|
|
1869
1954
|
async def session_handler(request):
|
|
1870
1955
|
"""Validate and return session info"""
|
|
1871
|
-
session_token = request.query.get(
|
|
1956
|
+
session_token = request.query.get("session") or request.headers.get("X-Session-Token")
|
|
1872
1957
|
|
|
1873
1958
|
if not session_token or session_token not in g_sessions:
|
|
1874
1959
|
return web.json_response({"error": "Invalid or expired session"}, status=401)
|
|
@@ -1877,18 +1962,15 @@ def main():
|
|
|
1877
1962
|
|
|
1878
1963
|
# Clean up old sessions (older than 24 hours)
|
|
1879
1964
|
current_time = time.time()
|
|
1880
|
-
expired_sessions = [token for token, data in g_sessions.items() if current_time - data[
|
|
1965
|
+
expired_sessions = [token for token, data in g_sessions.items() if current_time - data["created"] > 86400]
|
|
1881
1966
|
for token in expired_sessions:
|
|
1882
1967
|
del g_sessions[token]
|
|
1883
1968
|
|
|
1884
|
-
return web.json_response({
|
|
1885
|
-
**session_data,
|
|
1886
|
-
"sessionToken": session_token
|
|
1887
|
-
})
|
|
1969
|
+
return web.json_response({**session_data, "sessionToken": session_token})
|
|
1888
1970
|
|
|
1889
1971
|
async def logout_handler(request):
|
|
1890
1972
|
"""End OAuth session"""
|
|
1891
|
-
session_token = request.query.get(
|
|
1973
|
+
session_token = request.query.get("session") or request.headers.get("X-Session-Token")
|
|
1892
1974
|
|
|
1893
1975
|
if session_token and session_token in g_sessions:
|
|
1894
1976
|
del g_sessions[session_token]
|
|
@@ -1898,17 +1980,19 @@ def main():
|
|
|
1898
1980
|
async def auth_handler(request):
|
|
1899
1981
|
"""Check authentication status and return user info"""
|
|
1900
1982
|
# Check for OAuth session token
|
|
1901
|
-
session_token = request.query.get(
|
|
1983
|
+
session_token = request.query.get("session") or request.headers.get("X-Session-Token")
|
|
1902
1984
|
|
|
1903
1985
|
if session_token and session_token in g_sessions:
|
|
1904
1986
|
session_data = g_sessions[session_token]
|
|
1905
|
-
return web.json_response(
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
1987
|
+
return web.json_response(
|
|
1988
|
+
{
|
|
1989
|
+
"userId": session_data.get("userId", ""),
|
|
1990
|
+
"userName": session_data.get("userName", ""),
|
|
1991
|
+
"displayName": session_data.get("displayName", ""),
|
|
1992
|
+
"profileUrl": session_data.get("profileUrl", ""),
|
|
1993
|
+
"authProvider": "github",
|
|
1994
|
+
}
|
|
1995
|
+
)
|
|
1912
1996
|
|
|
1913
1997
|
# Check for API key in Authorization header
|
|
1914
1998
|
# auth_header = request.headers.get('Authorization', '')
|
|
@@ -1926,25 +2010,22 @@ def main():
|
|
|
1926
2010
|
# })
|
|
1927
2011
|
|
|
1928
2012
|
# Not authenticated - return error in expected format
|
|
1929
|
-
return web.json_response(
|
|
1930
|
-
"responseStatus": {
|
|
1931
|
-
|
|
1932
|
-
"message": "Not authenticated"
|
|
1933
|
-
}
|
|
1934
|
-
}, status=401)
|
|
2013
|
+
return web.json_response(
|
|
2014
|
+
{"responseStatus": {"errorCode": "Unauthorized", "message": "Not authenticated"}}, status=401
|
|
2015
|
+
)
|
|
1935
2016
|
|
|
1936
|
-
app.router.add_get(
|
|
1937
|
-
app.router.add_get(
|
|
1938
|
-
app.router.add_get(
|
|
1939
|
-
app.router.add_get(
|
|
1940
|
-
app.router.add_post(
|
|
2017
|
+
app.router.add_get("/auth", auth_handler)
|
|
2018
|
+
app.router.add_get("/auth/github", github_auth_handler)
|
|
2019
|
+
app.router.add_get("/auth/github/callback", github_callback_handler)
|
|
2020
|
+
app.router.add_get("/auth/session", session_handler)
|
|
2021
|
+
app.router.add_post("/auth/logout", logout_handler)
|
|
1941
2022
|
|
|
1942
2023
|
async def ui_static(request: web.Request) -> web.Response:
|
|
1943
2024
|
path = Path(request.match_info["path"])
|
|
1944
2025
|
|
|
1945
2026
|
try:
|
|
1946
2027
|
# Handle both Path objects and importlib.resources Traversable objects
|
|
1947
|
-
if hasattr(_ROOT,
|
|
2028
|
+
if hasattr(_ROOT, "joinpath"):
|
|
1948
2029
|
# importlib.resources Traversable
|
|
1949
2030
|
resource = _ROOT.joinpath("ui").joinpath(str(path))
|
|
1950
2031
|
if not resource.is_file():
|
|
@@ -1957,50 +2038,49 @@ def main():
|
|
|
1957
2038
|
raise web.HTTPNotFound
|
|
1958
2039
|
try:
|
|
1959
2040
|
resource.relative_to(Path(_ROOT)) # basic directory-traversal guard
|
|
1960
|
-
except ValueError:
|
|
1961
|
-
raise web.HTTPBadRequest(text="Invalid path")
|
|
2041
|
+
except ValueError as e:
|
|
2042
|
+
raise web.HTTPBadRequest(text="Invalid path") from e
|
|
1962
2043
|
content = resource.read_bytes()
|
|
1963
2044
|
|
|
1964
2045
|
content_type, _ = mimetypes.guess_type(str(path))
|
|
1965
2046
|
if content_type is None:
|
|
1966
2047
|
content_type = "application/octet-stream"
|
|
1967
2048
|
return web.Response(body=content, content_type=content_type)
|
|
1968
|
-
except (OSError, PermissionError, AttributeError):
|
|
1969
|
-
raise web.HTTPNotFound
|
|
2049
|
+
except (OSError, PermissionError, AttributeError) as e:
|
|
2050
|
+
raise web.HTTPNotFound from e
|
|
1970
2051
|
|
|
1971
2052
|
app.router.add_get("/ui/{path:.*}", ui_static, name="ui_static")
|
|
1972
|
-
|
|
2053
|
+
|
|
1973
2054
|
async def ui_config_handler(request):
|
|
1974
|
-
with open(g_ui_path, "
|
|
2055
|
+
with open(g_ui_path, encoding="utf-8") as f:
|
|
1975
2056
|
ui = json.load(f)
|
|
1976
|
-
if
|
|
1977
|
-
ui[
|
|
2057
|
+
if "defaults" not in ui:
|
|
2058
|
+
ui["defaults"] = g_config["defaults"]
|
|
1978
2059
|
enabled, disabled = provider_status()
|
|
1979
|
-
ui[
|
|
1980
|
-
"all": list(g_config['providers'].keys()),
|
|
1981
|
-
"enabled": enabled,
|
|
1982
|
-
"disabled": disabled
|
|
1983
|
-
}
|
|
2060
|
+
ui["status"] = {"all": list(g_config["providers"].keys()), "enabled": enabled, "disabled": disabled}
|
|
1984
2061
|
# Add auth configuration
|
|
1985
|
-
ui[
|
|
1986
|
-
ui[
|
|
2062
|
+
ui["requiresAuth"] = auth_enabled
|
|
2063
|
+
ui["authType"] = "oauth" if auth_enabled else "apikey"
|
|
1987
2064
|
return web.json_response(ui)
|
|
1988
|
-
|
|
2065
|
+
|
|
2066
|
+
app.router.add_get("/config", ui_config_handler)
|
|
1989
2067
|
|
|
1990
2068
|
async def not_found_handler(request):
|
|
1991
2069
|
return web.Response(text="404: Not Found", status=404)
|
|
1992
|
-
|
|
2070
|
+
|
|
2071
|
+
app.router.add_get("/favicon.ico", not_found_handler)
|
|
1993
2072
|
|
|
1994
2073
|
# Serve index.html from root
|
|
1995
2074
|
async def index_handler(request):
|
|
1996
2075
|
index_content = read_resource_file_bytes("index.html")
|
|
1997
2076
|
if index_content is None:
|
|
1998
2077
|
raise web.HTTPNotFound
|
|
1999
|
-
return web.Response(body=index_content, content_type=
|
|
2000
|
-
|
|
2078
|
+
return web.Response(body=index_content, content_type="text/html")
|
|
2079
|
+
|
|
2080
|
+
app.router.add_get("/", index_handler)
|
|
2001
2081
|
|
|
2002
2082
|
# Serve index.html as fallback route (SPA routing)
|
|
2003
|
-
app.router.add_route(
|
|
2083
|
+
app.router.add_route("*", "/{tail:.*}", index_handler)
|
|
2004
2084
|
|
|
2005
2085
|
# Setup file watcher for config files
|
|
2006
2086
|
async def start_background_tasks(app):
|
|
@@ -2011,28 +2091,28 @@ def main():
|
|
|
2011
2091
|
app.on_startup.append(start_background_tasks)
|
|
2012
2092
|
|
|
2013
2093
|
print(f"Starting server on port {port}...")
|
|
2014
|
-
web.run_app(app, host=
|
|
2094
|
+
web.run_app(app, host="0.0.0.0", port=port, print=_log)
|
|
2015
2095
|
exit(0)
|
|
2016
2096
|
|
|
2017
2097
|
if cli_args.enable is not None:
|
|
2018
|
-
if cli_args.enable.endswith(
|
|
2098
|
+
if cli_args.enable.endswith(","):
|
|
2019
2099
|
cli_args.enable = cli_args.enable[:-1].strip()
|
|
2020
2100
|
enable_providers = [cli_args.enable]
|
|
2021
|
-
all_providers = g_config[
|
|
2101
|
+
all_providers = g_config["providers"].keys()
|
|
2022
2102
|
msgs = []
|
|
2023
2103
|
if len(extra_args) > 0:
|
|
2024
2104
|
for arg in extra_args:
|
|
2025
|
-
if arg.endswith(
|
|
2105
|
+
if arg.endswith(","):
|
|
2026
2106
|
arg = arg[:-1].strip()
|
|
2027
2107
|
if arg in all_providers:
|
|
2028
2108
|
enable_providers.append(arg)
|
|
2029
2109
|
|
|
2030
2110
|
for provider in enable_providers:
|
|
2031
|
-
if provider not in g_config[
|
|
2111
|
+
if provider not in g_config["providers"]:
|
|
2032
2112
|
print(f"Provider {provider} not found")
|
|
2033
2113
|
print(f"Available providers: {', '.join(g_config['providers'].keys())}")
|
|
2034
2114
|
exit(1)
|
|
2035
|
-
if provider in g_config[
|
|
2115
|
+
if provider in g_config["providers"]:
|
|
2036
2116
|
provider_config, msg = enable_provider(provider)
|
|
2037
2117
|
print(f"\nEnabled provider {provider}:")
|
|
2038
2118
|
printdump(provider_config)
|
|
@@ -2045,19 +2125,19 @@ def main():
|
|
|
2045
2125
|
exit(0)
|
|
2046
2126
|
|
|
2047
2127
|
if cli_args.disable is not None:
|
|
2048
|
-
if cli_args.disable.endswith(
|
|
2128
|
+
if cli_args.disable.endswith(","):
|
|
2049
2129
|
cli_args.disable = cli_args.disable[:-1].strip()
|
|
2050
2130
|
disable_providers = [cli_args.disable]
|
|
2051
|
-
all_providers = g_config[
|
|
2131
|
+
all_providers = g_config["providers"].keys()
|
|
2052
2132
|
if len(extra_args) > 0:
|
|
2053
2133
|
for arg in extra_args:
|
|
2054
|
-
if arg.endswith(
|
|
2134
|
+
if arg.endswith(","):
|
|
2055
2135
|
arg = arg[:-1].strip()
|
|
2056
2136
|
if arg in all_providers:
|
|
2057
2137
|
disable_providers.append(arg)
|
|
2058
2138
|
|
|
2059
2139
|
for provider in disable_providers:
|
|
2060
|
-
if provider not in g_config[
|
|
2140
|
+
if provider not in g_config["providers"]:
|
|
2061
2141
|
print(f"Provider {provider} not found")
|
|
2062
2142
|
print(f"Available providers: {', '.join(g_config['providers'].keys())}")
|
|
2063
2143
|
exit(1)
|
|
@@ -2074,21 +2154,27 @@ def main():
|
|
|
2074
2154
|
print(f"Model {default_model} not found")
|
|
2075
2155
|
print(f"Available models: {', '.join(all_models)}")
|
|
2076
2156
|
exit(1)
|
|
2077
|
-
default_text = g_config[
|
|
2078
|
-
default_text[
|
|
2157
|
+
default_text = g_config["defaults"]["text"]
|
|
2158
|
+
default_text["model"] = default_model
|
|
2079
2159
|
save_config(g_config)
|
|
2080
2160
|
print(f"\nDefault model set to: {default_model}")
|
|
2081
2161
|
exit(0)
|
|
2082
2162
|
|
|
2083
|
-
if
|
|
2163
|
+
if (
|
|
2164
|
+
cli_args.chat is not None
|
|
2165
|
+
or cli_args.image is not None
|
|
2166
|
+
or cli_args.audio is not None
|
|
2167
|
+
or cli_args.file is not None
|
|
2168
|
+
or len(extra_args) > 0
|
|
2169
|
+
):
|
|
2084
2170
|
try:
|
|
2085
|
-
chat = g_config[
|
|
2171
|
+
chat = g_config["defaults"]["text"]
|
|
2086
2172
|
if cli_args.image is not None:
|
|
2087
|
-
chat = g_config[
|
|
2173
|
+
chat = g_config["defaults"]["image"]
|
|
2088
2174
|
elif cli_args.audio is not None:
|
|
2089
|
-
chat = g_config[
|
|
2175
|
+
chat = g_config["defaults"]["audio"]
|
|
2090
2176
|
elif cli_args.file is not None:
|
|
2091
|
-
chat = g_config[
|
|
2177
|
+
chat = g_config["defaults"]["file"]
|
|
2092
2178
|
if cli_args.chat is not None:
|
|
2093
2179
|
chat_path = os.path.join(os.path.dirname(__file__), cli_args.chat)
|
|
2094
2180
|
if not os.path.exists(chat_path):
|
|
@@ -2096,31 +2182,35 @@ def main():
|
|
|
2096
2182
|
exit(1)
|
|
2097
2183
|
_log(f"Using chat: {chat_path}")
|
|
2098
2184
|
|
|
2099
|
-
with open
|
|
2185
|
+
with open(chat_path) as f:
|
|
2100
2186
|
chat_json = f.read()
|
|
2101
2187
|
chat = json.loads(chat_json)
|
|
2102
2188
|
|
|
2103
2189
|
if cli_args.system is not None:
|
|
2104
|
-
chat[
|
|
2190
|
+
chat["messages"].insert(0, {"role": "system", "content": cli_args.system})
|
|
2105
2191
|
|
|
2106
2192
|
if len(extra_args) > 0:
|
|
2107
|
-
prompt =
|
|
2193
|
+
prompt = " ".join(extra_args)
|
|
2108
2194
|
# replace content of last message if exists, else add
|
|
2109
|
-
last_msg = chat[
|
|
2110
|
-
if last_msg and last_msg[
|
|
2111
|
-
if isinstance(last_msg[
|
|
2112
|
-
last_msg[
|
|
2195
|
+
last_msg = chat["messages"][-1] if "messages" in chat else None
|
|
2196
|
+
if last_msg and last_msg["role"] == "user":
|
|
2197
|
+
if isinstance(last_msg["content"], list):
|
|
2198
|
+
last_msg["content"][-1]["text"] = prompt
|
|
2113
2199
|
else:
|
|
2114
|
-
last_msg[
|
|
2200
|
+
last_msg["content"] = prompt
|
|
2115
2201
|
else:
|
|
2116
|
-
chat[
|
|
2202
|
+
chat["messages"].append({"role": "user", "content": prompt})
|
|
2117
2203
|
|
|
2118
2204
|
# Parse args parameters if provided
|
|
2119
2205
|
args = None
|
|
2120
2206
|
if cli_args.args is not None:
|
|
2121
2207
|
args = parse_args_params(cli_args.args)
|
|
2122
2208
|
|
|
2123
|
-
asyncio.run(
|
|
2209
|
+
asyncio.run(
|
|
2210
|
+
cli_chat(
|
|
2211
|
+
chat, image=cli_args.image, audio=cli_args.audio, file=cli_args.file, args=args, raw=cli_args.raw
|
|
2212
|
+
)
|
|
2213
|
+
)
|
|
2124
2214
|
exit(0)
|
|
2125
2215
|
except Exception as e:
|
|
2126
2216
|
print(f"{cli_args.logprefix}Error: {e}")
|
|
@@ -2132,5 +2222,5 @@ def main():
|
|
|
2132
2222
|
parser.print_help()
|
|
2133
2223
|
|
|
2134
2224
|
|
|
2135
|
-
if __name__ == "__main__":
|
|
2225
|
+
if __name__ == "__main__":
|
|
2136
2226
|
main()
|