gseai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gseai-0.1.0/PKG-INFO +7 -0
- gseai-0.1.0/README.md +0 -0
- gseai-0.1.0/gseai/__init__.py +3 -0
- gseai-0.1.0/gseai/server.py +414 -0
- gseai-0.1.0/gseai.egg-info/PKG-INFO +7 -0
- gseai-0.1.0/gseai.egg-info/SOURCES.txt +9 -0
- gseai-0.1.0/gseai.egg-info/dependency_links.txt +1 -0
- gseai-0.1.0/gseai.egg-info/requires.txt +1 -0
- gseai-0.1.0/gseai.egg-info/top_level.txt +1 -0
- gseai-0.1.0/pyproject.toml +16 -0
- gseai-0.1.0/setup.cfg +4 -0
gseai-0.1.0/PKG-INFO
ADDED
gseai-0.1.0/README.md
ADDED
|
File without changes
|
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
"""Client for the GSE AI LMStudio server's REST API v1."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Generator, Iterator
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GSEAIServer:
|
|
10
|
+
"""Client for the GSE AI LMStudio server.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
api_token: Bearer token for authentication.
|
|
14
|
+
host: Hostname of the server.
|
|
15
|
+
port: Port the server listens on.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
api_token: str,
|
|
21
|
+
host: str = "gseai.gse.buffalo.edu",
|
|
22
|
+
port: int = 11434,
|
|
23
|
+
) -> None:
|
|
24
|
+
self.base_url = f"http://{host}:{port}"
|
|
25
|
+
self._client = httpx.Client(
|
|
26
|
+
base_url=self.base_url,
|
|
27
|
+
headers={"Authorization": f"Bearer {api_token}"},
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def close(self) -> None:
|
|
31
|
+
self._client.close()
|
|
32
|
+
|
|
33
|
+
def __enter__(self):
|
|
34
|
+
return self
|
|
35
|
+
|
|
36
|
+
def __exit__(self, *args: Any) -> None:
|
|
37
|
+
self.close()
|
|
38
|
+
|
|
39
|
+
# -------------------------------------------------------------------------
|
|
40
|
+
# Native REST API v1 (/api/v1/)
|
|
41
|
+
# -------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
def list_models(self) -> dict:
|
|
44
|
+
"""GET /api/v1/models — list available models."""
|
|
45
|
+
return self._client.get("/api/v1/models").raise_for_status().json()
|
|
46
|
+
|
|
47
|
+
def load_model(self, model: str, ttl: int | None = None) -> dict:
|
|
48
|
+
"""POST /api/v1/models/load — load a model into memory.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
model: Model identifier.
|
|
52
|
+
ttl: Idle time-to-live in seconds before the model is evicted.
|
|
53
|
+
"""
|
|
54
|
+
body: dict[str, Any] = {"model": model}
|
|
55
|
+
if ttl is not None:
|
|
56
|
+
body["ttl"] = ttl
|
|
57
|
+
return self._client.post("/api/v1/models/load", json=body).raise_for_status().json()
|
|
58
|
+
|
|
59
|
+
def unload_model(self, model: str) -> dict:
|
|
60
|
+
"""POST /api/v1/models/unload — unload a model from memory.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
model: Model identifier.
|
|
64
|
+
"""
|
|
65
|
+
return (
|
|
66
|
+
self._client.post("/api/v1/models/unload", json={"model": model})
|
|
67
|
+
.raise_for_status()
|
|
68
|
+
.json()
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def download_model(self, model: str) -> dict:
|
|
72
|
+
"""POST /api/v1/models/download — start downloading a model.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
model: Model identifier to download.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dict containing a ``job_id`` for tracking download progress.
|
|
79
|
+
"""
|
|
80
|
+
return (
|
|
81
|
+
self._client.post("/api/v1/models/download", json={"model": model})
|
|
82
|
+
.raise_for_status()
|
|
83
|
+
.json()
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def get_download_status(self, job_id: str) -> dict:
|
|
87
|
+
"""GET /api/v1/models/download/status/{job_id} — check download progress.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
job_id: Job ID returned by :meth:`download_model`.
|
|
91
|
+
"""
|
|
92
|
+
return (
|
|
93
|
+
self._client.get(f"/api/v1/models/download/status/{job_id}")
|
|
94
|
+
.raise_for_status()
|
|
95
|
+
.json()
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def chat(
|
|
99
|
+
self,
|
|
100
|
+
model: str,
|
|
101
|
+
input: str | list[dict],
|
|
102
|
+
*,
|
|
103
|
+
system_prompt: str | None = None,
|
|
104
|
+
temperature: float | None = None,
|
|
105
|
+
top_p: float | None = None,
|
|
106
|
+
top_k: int | None = None,
|
|
107
|
+
min_p: float | None = None,
|
|
108
|
+
repeat_penalty: float | None = None,
|
|
109
|
+
max_output_tokens: int | None = None,
|
|
110
|
+
reasoning: str | None = None,
|
|
111
|
+
context_length: int | None = None,
|
|
112
|
+
store: bool = True,
|
|
113
|
+
previous_response_id: str | None = None,
|
|
114
|
+
ttl: int | None = None,
|
|
115
|
+
integrations: list[dict] | None = None,
|
|
116
|
+
stream: bool = False,
|
|
117
|
+
) -> dict | Generator[dict, None, None]:
|
|
118
|
+
"""POST /api/v1/chat — generate a chat response.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
model: Model identifier.
|
|
122
|
+
input: A string prompt or a list of message dicts.
|
|
123
|
+
system_prompt: Optional system message.
|
|
124
|
+
temperature: Sampling temperature (0–2).
|
|
125
|
+
top_p: Nucleus sampling probability (0–1).
|
|
126
|
+
top_k: Top-k sampling limit.
|
|
127
|
+
min_p: Minimum probability threshold (0–1).
|
|
128
|
+
repeat_penalty: Repetition penalty.
|
|
129
|
+
max_output_tokens: Maximum number of tokens to generate.
|
|
130
|
+
reasoning: Reasoning effort — "off", "low", "medium", "high", or "on".
|
|
131
|
+
context_length: Context window size override.
|
|
132
|
+
store: Whether to persist the conversation server-side (default True).
|
|
133
|
+
previous_response_id: ID of a prior response to continue.
|
|
134
|
+
ttl: Model idle time-to-live in seconds.
|
|
135
|
+
integrations: MCP servers or plugin configs.
|
|
136
|
+
stream: If True, return a generator of Server-Sent Event dicts.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Response dict, or a generator of SSE event dicts when ``stream=True``.
|
|
140
|
+
"""
|
|
141
|
+
body: dict[str, Any] = {"model": model, "input": input, "store": store}
|
|
142
|
+
if system_prompt is not None:
|
|
143
|
+
body["system_prompt"] = system_prompt
|
|
144
|
+
if temperature is not None:
|
|
145
|
+
body["temperature"] = temperature
|
|
146
|
+
if top_p is not None:
|
|
147
|
+
body["top_p"] = top_p
|
|
148
|
+
if top_k is not None:
|
|
149
|
+
body["top_k"] = top_k
|
|
150
|
+
if min_p is not None:
|
|
151
|
+
body["min_p"] = min_p
|
|
152
|
+
if repeat_penalty is not None:
|
|
153
|
+
body["repeat_penalty"] = repeat_penalty
|
|
154
|
+
if max_output_tokens is not None:
|
|
155
|
+
body["max_output_tokens"] = max_output_tokens
|
|
156
|
+
if reasoning is not None:
|
|
157
|
+
body["reasoning"] = reasoning
|
|
158
|
+
if context_length is not None:
|
|
159
|
+
body["context_length"] = context_length
|
|
160
|
+
if previous_response_id is not None:
|
|
161
|
+
body["previous_response_id"] = previous_response_id
|
|
162
|
+
if ttl is not None:
|
|
163
|
+
body["ttl"] = ttl
|
|
164
|
+
if integrations is not None:
|
|
165
|
+
body["integrations"] = integrations
|
|
166
|
+
|
|
167
|
+
if stream:
|
|
168
|
+
body["stream"] = True
|
|
169
|
+
return self._stream_sse("/api/v1/chat", body)
|
|
170
|
+
else:
|
|
171
|
+
return self._client.post("/api/v1/chat", json=body).raise_for_status().json()
|
|
172
|
+
|
|
173
|
+
# -------------------------------------------------------------------------
|
|
174
|
+
# OpenAI-compatible endpoints (/v1/)
|
|
175
|
+
# -------------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
def list_models_openai(self) -> dict:
|
|
178
|
+
"""GET /v1/models — list models (OpenAI-compatible format)."""
|
|
179
|
+
return self._client.get("/v1/models").raise_for_status().json()
|
|
180
|
+
|
|
181
|
+
def chat_completions(
|
|
182
|
+
self,
|
|
183
|
+
model: str,
|
|
184
|
+
messages: list[dict],
|
|
185
|
+
*,
|
|
186
|
+
temperature: float | None = None,
|
|
187
|
+
max_tokens: int | None = None,
|
|
188
|
+
stream: bool = False,
|
|
189
|
+
top_p: float | None = None,
|
|
190
|
+
top_k: int | None = None,
|
|
191
|
+
stop: str | list[str] | None = None,
|
|
192
|
+
presence_penalty: float | None = None,
|
|
193
|
+
frequency_penalty: float | None = None,
|
|
194
|
+
repeat_penalty: float | None = None,
|
|
195
|
+
logit_bias: dict | None = None,
|
|
196
|
+
seed: int | None = None,
|
|
197
|
+
response_format: dict | None = None,
|
|
198
|
+
tools: list[dict] | None = None,
|
|
199
|
+
tool_choice: str | None = None,
|
|
200
|
+
ttl: int | None = None,
|
|
201
|
+
) -> dict | Generator[dict, None, None]:
|
|
202
|
+
"""POST /v1/chat/completions — OpenAI-compatible chat completions.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
model: Model identifier.
|
|
206
|
+
messages: List of message dicts with ``role`` and ``content``.
|
|
207
|
+
temperature: Sampling temperature (0–2).
|
|
208
|
+
max_tokens: Maximum tokens to generate.
|
|
209
|
+
stream: If True, return a generator of SSE event dicts.
|
|
210
|
+
top_p: Nucleus sampling (0–1).
|
|
211
|
+
top_k: Top-k sampling limit.
|
|
212
|
+
stop: Stop sequence(s).
|
|
213
|
+
presence_penalty: Presence penalty (-2 to 2).
|
|
214
|
+
frequency_penalty: Frequency penalty (-2 to 2).
|
|
215
|
+
repeat_penalty: Repetition penalty.
|
|
216
|
+
logit_bias: Token probability bias adjustments.
|
|
217
|
+
seed: Random seed for reproducibility.
|
|
218
|
+
response_format: JSON schema for structured output.
|
|
219
|
+
tools: Function definitions for tool/function calling.
|
|
220
|
+
tool_choice: Tool selection mode — "auto", "none", or "required".
|
|
221
|
+
ttl: Model idle time-to-live in seconds.
|
|
222
|
+
"""
|
|
223
|
+
body: dict[str, Any] = {"model": model, "messages": messages}
|
|
224
|
+
if temperature is not None:
|
|
225
|
+
body["temperature"] = temperature
|
|
226
|
+
if max_tokens is not None:
|
|
227
|
+
body["max_tokens"] = max_tokens
|
|
228
|
+
if top_p is not None:
|
|
229
|
+
body["top_p"] = top_p
|
|
230
|
+
if top_k is not None:
|
|
231
|
+
body["top_k"] = top_k
|
|
232
|
+
if stop is not None:
|
|
233
|
+
body["stop"] = stop
|
|
234
|
+
if presence_penalty is not None:
|
|
235
|
+
body["presence_penalty"] = presence_penalty
|
|
236
|
+
if frequency_penalty is not None:
|
|
237
|
+
body["frequency_penalty"] = frequency_penalty
|
|
238
|
+
if repeat_penalty is not None:
|
|
239
|
+
body["repeat_penalty"] = repeat_penalty
|
|
240
|
+
if logit_bias is not None:
|
|
241
|
+
body["logit_bias"] = logit_bias
|
|
242
|
+
if seed is not None:
|
|
243
|
+
body["seed"] = seed
|
|
244
|
+
if response_format is not None:
|
|
245
|
+
body["response_format"] = response_format
|
|
246
|
+
if tools is not None:
|
|
247
|
+
body["tools"] = tools
|
|
248
|
+
if tool_choice is not None:
|
|
249
|
+
body["tool_choice"] = tool_choice
|
|
250
|
+
if ttl is not None:
|
|
251
|
+
body["ttl"] = ttl
|
|
252
|
+
|
|
253
|
+
if stream:
|
|
254
|
+
body["stream"] = True
|
|
255
|
+
return self._stream_sse("/v1/chat/completions", body)
|
|
256
|
+
else:
|
|
257
|
+
return (
|
|
258
|
+
self._client.post("/v1/chat/completions", json=body).raise_for_status().json()
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def completions(
|
|
262
|
+
self,
|
|
263
|
+
model: str,
|
|
264
|
+
prompt: str | list,
|
|
265
|
+
*,
|
|
266
|
+
max_tokens: int | None = None,
|
|
267
|
+
temperature: float | None = None,
|
|
268
|
+
top_p: float | None = None,
|
|
269
|
+
top_k: int | None = None,
|
|
270
|
+
stop: str | list[str] | None = None,
|
|
271
|
+
frequency_penalty: float | None = None,
|
|
272
|
+
presence_penalty: float | None = None,
|
|
273
|
+
stream: bool = False,
|
|
274
|
+
seed: int | None = None,
|
|
275
|
+
ttl: int | None = None,
|
|
276
|
+
) -> dict | Generator[dict, None, None]:
|
|
277
|
+
"""POST /v1/completions — legacy text completions.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
model: Model identifier.
|
|
281
|
+
prompt: Input text or list of texts.
|
|
282
|
+
max_tokens: Maximum tokens to generate.
|
|
283
|
+
temperature: Sampling temperature (0–2).
|
|
284
|
+
top_p: Nucleus sampling (0–1).
|
|
285
|
+
top_k: Top-k sampling limit.
|
|
286
|
+
stop: Stop sequence(s).
|
|
287
|
+
frequency_penalty: Frequency penalty (-2 to 2).
|
|
288
|
+
presence_penalty: Presence penalty (-2 to 2).
|
|
289
|
+
stream: If True, return a generator of SSE event dicts.
|
|
290
|
+
seed: Random seed.
|
|
291
|
+
ttl: Model idle time-to-live in seconds.
|
|
292
|
+
"""
|
|
293
|
+
body: dict[str, Any] = {"model": model, "prompt": prompt}
|
|
294
|
+
if max_tokens is not None:
|
|
295
|
+
body["max_tokens"] = max_tokens
|
|
296
|
+
if temperature is not None:
|
|
297
|
+
body["temperature"] = temperature
|
|
298
|
+
if top_p is not None:
|
|
299
|
+
body["top_p"] = top_p
|
|
300
|
+
if top_k is not None:
|
|
301
|
+
body["top_k"] = top_k
|
|
302
|
+
if stop is not None:
|
|
303
|
+
body["stop"] = stop
|
|
304
|
+
if frequency_penalty is not None:
|
|
305
|
+
body["frequency_penalty"] = frequency_penalty
|
|
306
|
+
if presence_penalty is not None:
|
|
307
|
+
body["presence_penalty"] = presence_penalty
|
|
308
|
+
if seed is not None:
|
|
309
|
+
body["seed"] = seed
|
|
310
|
+
if ttl is not None:
|
|
311
|
+
body["ttl"] = ttl
|
|
312
|
+
|
|
313
|
+
if stream:
|
|
314
|
+
body["stream"] = True
|
|
315
|
+
return self._stream_sse("/v1/completions", body)
|
|
316
|
+
else:
|
|
317
|
+
return self._client.post("/v1/completions", json=body).raise_for_status().json()
|
|
318
|
+
|
|
319
|
+
def embeddings(
|
|
320
|
+
self,
|
|
321
|
+
model: str,
|
|
322
|
+
input: str | list[str],
|
|
323
|
+
*,
|
|
324
|
+
encoding_format: str | None = None,
|
|
325
|
+
dimensions: int | None = None,
|
|
326
|
+
) -> dict:
|
|
327
|
+
"""POST /v1/embeddings — generate text embeddings.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
model: Model identifier.
|
|
331
|
+
input: Text or list of texts to embed.
|
|
332
|
+
encoding_format: Output format — "float" or "base64".
|
|
333
|
+
dimensions: Target embedding dimensionality.
|
|
334
|
+
"""
|
|
335
|
+
body: dict[str, Any] = {"model": model, "input": input}
|
|
336
|
+
if encoding_format is not None:
|
|
337
|
+
body["encoding_format"] = encoding_format
|
|
338
|
+
if dimensions is not None:
|
|
339
|
+
body["dimensions"] = dimensions
|
|
340
|
+
return self._client.post("/v1/embeddings", json=body).raise_for_status().json()
|
|
341
|
+
|
|
342
|
+
def responses(
|
|
343
|
+
self,
|
|
344
|
+
model: str,
|
|
345
|
+
messages: list[dict],
|
|
346
|
+
**kwargs: Any,
|
|
347
|
+
) -> dict:
|
|
348
|
+
"""POST /v1/responses — stateful chat responses (OpenAI-compatible).
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
model: Model identifier.
|
|
352
|
+
messages: List of message dicts with ``role`` and ``content``.
|
|
353
|
+
**kwargs: Additional parameters forwarded to the endpoint.
|
|
354
|
+
"""
|
|
355
|
+
body: dict[str, Any] = {"model": model, "messages": messages, **kwargs}
|
|
356
|
+
return self._client.post("/v1/responses", json=body).raise_for_status().json()
|
|
357
|
+
|
|
358
|
+
# -------------------------------------------------------------------------
|
|
359
|
+
# Anthropic-compatible endpoint (/v1/messages)
|
|
360
|
+
# -------------------------------------------------------------------------
|
|
361
|
+
|
|
362
|
+
def messages(
|
|
363
|
+
self,
|
|
364
|
+
model: str,
|
|
365
|
+
messages: list[dict],
|
|
366
|
+
max_tokens: int,
|
|
367
|
+
*,
|
|
368
|
+
system: str | None = None,
|
|
369
|
+
temperature: float | None = None,
|
|
370
|
+
top_p: float | None = None,
|
|
371
|
+
top_k: int | None = None,
|
|
372
|
+
) -> dict:
|
|
373
|
+
"""POST /v1/messages — Anthropic-compatible messages API.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
model: Model identifier.
|
|
377
|
+
messages: List of message dicts with ``role`` and ``content``.
|
|
378
|
+
max_tokens: Maximum tokens to generate (required by the API).
|
|
379
|
+
system: System prompt.
|
|
380
|
+
temperature: Sampling temperature.
|
|
381
|
+
top_p: Nucleus sampling (0–1).
|
|
382
|
+
top_k: Top-k sampling limit.
|
|
383
|
+
"""
|
|
384
|
+
body: dict[str, Any] = {
|
|
385
|
+
"model": model,
|
|
386
|
+
"messages": messages,
|
|
387
|
+
"max_tokens": max_tokens,
|
|
388
|
+
}
|
|
389
|
+
if system is not None:
|
|
390
|
+
body["system"] = system
|
|
391
|
+
if temperature is not None:
|
|
392
|
+
body["temperature"] = temperature
|
|
393
|
+
if top_p is not None:
|
|
394
|
+
body["top_p"] = top_p
|
|
395
|
+
if top_k is not None:
|
|
396
|
+
body["top_k"] = top_k
|
|
397
|
+
return self._client.post("/v1/messages", json=body).raise_for_status().json()
|
|
398
|
+
|
|
399
|
+
# -------------------------------------------------------------------------
|
|
400
|
+
# Internal helpers
|
|
401
|
+
# -------------------------------------------------------------------------
|
|
402
|
+
|
|
403
|
+
def _stream_sse(self, path: str, body: dict) -> Generator[dict, None, None]:
|
|
404
|
+
"""POST to *path* and yield parsed Server-Sent Event data dicts."""
|
|
405
|
+
import json
|
|
406
|
+
|
|
407
|
+
with self._client.stream("POST", path, json=body) as response:
|
|
408
|
+
response.raise_for_status()
|
|
409
|
+
for line in response.iter_lines():
|
|
410
|
+
if line.startswith("data: "):
|
|
411
|
+
payload = line[len("data: "):]
|
|
412
|
+
if payload == "[DONE]":
|
|
413
|
+
break
|
|
414
|
+
yield json.loads(payload)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
httpx>=0.28.1
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
gseai
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "gseai"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Python client for the GSE AI LMStudio server"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.14"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"httpx>=0.28.1",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[dependency-groups]
|
|
12
|
+
dev = [
|
|
13
|
+
"sphinx>=9.1.0",
|
|
14
|
+
"sphinx-autodoc-typehints>=3.10.0",
|
|
15
|
+
"sphinx-rtd-theme>=3.1.0",
|
|
16
|
+
]
|
gseai-0.1.0/setup.cfg
ADDED