gseai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gseai-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.4
2
+ Name: gseai
3
+ Version: 0.1.0
4
+ Summary: Python client for the GSE AI LMStudio server
5
+ Requires-Python: >=3.14
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: httpx>=0.28.1
gseai-0.1.0/README.md ADDED
File without changes
@@ -0,0 +1,3 @@
1
+ from .server import GSEAIServer
2
+
3
+ __all__ = ["GSEAIServer"]
@@ -0,0 +1,414 @@
1
+ """Client for the GSE AI LMStudio server's REST API v1."""
2
+
3
+ from collections.abc import Generator, Iterator
4
+ from typing import Any
5
+
6
+ import httpx
7
+
8
+
9
+ class GSEAIServer:
10
+ """Client for the GSE AI LMStudio server.
11
+
12
+ Args:
13
+ api_token: Bearer token for authentication.
14
+ host: Hostname of the server.
15
+ port: Port the server listens on.
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ api_token: str,
21
+ host: str = "gseai.gse.buffalo.edu",
22
+ port: int = 11434,
23
+ ) -> None:
24
+ self.base_url = f"http://{host}:{port}"
25
+ self._client = httpx.Client(
26
+ base_url=self.base_url,
27
+ headers={"Authorization": f"Bearer {api_token}"},
28
+ )
29
+
30
+ def close(self) -> None:
31
+ self._client.close()
32
+
33
+ def __enter__(self):
34
+ return self
35
+
36
+ def __exit__(self, *args: Any) -> None:
37
+ self.close()
38
+
39
+ # -------------------------------------------------------------------------
40
+ # Native REST API v1 (/api/v1/)
41
+ # -------------------------------------------------------------------------
42
+
43
+ def list_models(self) -> dict:
44
+ """GET /api/v1/models — list available models."""
45
+ return self._client.get("/api/v1/models").raise_for_status().json()
46
+
47
+ def load_model(self, model: str, ttl: int | None = None) -> dict:
48
+ """POST /api/v1/models/load — load a model into memory.
49
+
50
+ Args:
51
+ model: Model identifier.
52
+ ttl: Idle time-to-live in seconds before the model is evicted.
53
+ """
54
+ body: dict[str, Any] = {"model": model}
55
+ if ttl is not None:
56
+ body["ttl"] = ttl
57
+ return self._client.post("/api/v1/models/load", json=body).raise_for_status().json()
58
+
59
+ def unload_model(self, model: str) -> dict:
60
+ """POST /api/v1/models/unload — unload a model from memory.
61
+
62
+ Args:
63
+ model: Model identifier.
64
+ """
65
+ return (
66
+ self._client.post("/api/v1/models/unload", json={"model": model})
67
+ .raise_for_status()
68
+ .json()
69
+ )
70
+
71
+ def download_model(self, model: str) -> dict:
72
+ """POST /api/v1/models/download — start downloading a model.
73
+
74
+ Args:
75
+ model: Model identifier to download.
76
+
77
+ Returns:
78
+ Dict containing a ``job_id`` for tracking download progress.
79
+ """
80
+ return (
81
+ self._client.post("/api/v1/models/download", json={"model": model})
82
+ .raise_for_status()
83
+ .json()
84
+ )
85
+
86
+ def get_download_status(self, job_id: str) -> dict:
87
+ """GET /api/v1/models/download/status/{job_id} — check download progress.
88
+
89
+ Args:
90
+ job_id: Job ID returned by :meth:`download_model`.
91
+ """
92
+ return (
93
+ self._client.get(f"/api/v1/models/download/status/{job_id}")
94
+ .raise_for_status()
95
+ .json()
96
+ )
97
+
98
+ def chat(
99
+ self,
100
+ model: str,
101
+ input: str | list[dict],
102
+ *,
103
+ system_prompt: str | None = None,
104
+ temperature: float | None = None,
105
+ top_p: float | None = None,
106
+ top_k: int | None = None,
107
+ min_p: float | None = None,
108
+ repeat_penalty: float | None = None,
109
+ max_output_tokens: int | None = None,
110
+ reasoning: str | None = None,
111
+ context_length: int | None = None,
112
+ store: bool = True,
113
+ previous_response_id: str | None = None,
114
+ ttl: int | None = None,
115
+ integrations: list[dict] | None = None,
116
+ stream: bool = False,
117
+ ) -> dict | Generator[dict, None, None]:
118
+ """POST /api/v1/chat — generate a chat response.
119
+
120
+ Args:
121
+ model: Model identifier.
122
+ input: A string prompt or a list of message dicts.
123
+ system_prompt: Optional system message.
124
+ temperature: Sampling temperature (0–2).
125
+ top_p: Nucleus sampling probability (0–1).
126
+ top_k: Top-k sampling limit.
127
+ min_p: Minimum probability threshold (0–1).
128
+ repeat_penalty: Repetition penalty.
129
+ max_output_tokens: Maximum number of tokens to generate.
130
+ reasoning: Reasoning effort — "off", "low", "medium", "high", or "on".
131
+ context_length: Context window size override.
132
+ store: Whether to persist the conversation server-side (default True).
133
+ previous_response_id: ID of a prior response to continue.
134
+ ttl: Model idle time-to-live in seconds.
135
+ integrations: MCP servers or plugin configs.
136
+ stream: If True, return a generator of Server-Sent Event dicts.
137
+
138
+ Returns:
139
+ Response dict, or a generator of SSE event dicts when ``stream=True``.
140
+ """
141
+ body: dict[str, Any] = {"model": model, "input": input, "store": store}
142
+ if system_prompt is not None:
143
+ body["system_prompt"] = system_prompt
144
+ if temperature is not None:
145
+ body["temperature"] = temperature
146
+ if top_p is not None:
147
+ body["top_p"] = top_p
148
+ if top_k is not None:
149
+ body["top_k"] = top_k
150
+ if min_p is not None:
151
+ body["min_p"] = min_p
152
+ if repeat_penalty is not None:
153
+ body["repeat_penalty"] = repeat_penalty
154
+ if max_output_tokens is not None:
155
+ body["max_output_tokens"] = max_output_tokens
156
+ if reasoning is not None:
157
+ body["reasoning"] = reasoning
158
+ if context_length is not None:
159
+ body["context_length"] = context_length
160
+ if previous_response_id is not None:
161
+ body["previous_response_id"] = previous_response_id
162
+ if ttl is not None:
163
+ body["ttl"] = ttl
164
+ if integrations is not None:
165
+ body["integrations"] = integrations
166
+
167
+ if stream:
168
+ body["stream"] = True
169
+ return self._stream_sse("/api/v1/chat", body)
170
+ else:
171
+ return self._client.post("/api/v1/chat", json=body).raise_for_status().json()
172
+
173
+ # -------------------------------------------------------------------------
174
+ # OpenAI-compatible endpoints (/v1/)
175
+ # -------------------------------------------------------------------------
176
+
177
+ def list_models_openai(self) -> dict:
178
+ """GET /v1/models — list models (OpenAI-compatible format)."""
179
+ return self._client.get("/v1/models").raise_for_status().json()
180
+
181
+ def chat_completions(
182
+ self,
183
+ model: str,
184
+ messages: list[dict],
185
+ *,
186
+ temperature: float | None = None,
187
+ max_tokens: int | None = None,
188
+ stream: bool = False,
189
+ top_p: float | None = None,
190
+ top_k: int | None = None,
191
+ stop: str | list[str] | None = None,
192
+ presence_penalty: float | None = None,
193
+ frequency_penalty: float | None = None,
194
+ repeat_penalty: float | None = None,
195
+ logit_bias: dict | None = None,
196
+ seed: int | None = None,
197
+ response_format: dict | None = None,
198
+ tools: list[dict] | None = None,
199
+ tool_choice: str | None = None,
200
+ ttl: int | None = None,
201
+ ) -> dict | Generator[dict, None, None]:
202
+ """POST /v1/chat/completions — OpenAI-compatible chat completions.
203
+
204
+ Args:
205
+ model: Model identifier.
206
+ messages: List of message dicts with ``role`` and ``content``.
207
+ temperature: Sampling temperature (0–2).
208
+ max_tokens: Maximum tokens to generate.
209
+ stream: If True, return a generator of SSE event dicts.
210
+ top_p: Nucleus sampling (0–1).
211
+ top_k: Top-k sampling limit.
212
+ stop: Stop sequence(s).
213
+ presence_penalty: Presence penalty (-2 to 2).
214
+ frequency_penalty: Frequency penalty (-2 to 2).
215
+ repeat_penalty: Repetition penalty.
216
+ logit_bias: Token probability bias adjustments.
217
+ seed: Random seed for reproducibility.
218
+ response_format: JSON schema for structured output.
219
+ tools: Function definitions for tool/function calling.
220
+ tool_choice: Tool selection mode — "auto", "none", or "required".
221
+ ttl: Model idle time-to-live in seconds.
222
+ """
223
+ body: dict[str, Any] = {"model": model, "messages": messages}
224
+ if temperature is not None:
225
+ body["temperature"] = temperature
226
+ if max_tokens is not None:
227
+ body["max_tokens"] = max_tokens
228
+ if top_p is not None:
229
+ body["top_p"] = top_p
230
+ if top_k is not None:
231
+ body["top_k"] = top_k
232
+ if stop is not None:
233
+ body["stop"] = stop
234
+ if presence_penalty is not None:
235
+ body["presence_penalty"] = presence_penalty
236
+ if frequency_penalty is not None:
237
+ body["frequency_penalty"] = frequency_penalty
238
+ if repeat_penalty is not None:
239
+ body["repeat_penalty"] = repeat_penalty
240
+ if logit_bias is not None:
241
+ body["logit_bias"] = logit_bias
242
+ if seed is not None:
243
+ body["seed"] = seed
244
+ if response_format is not None:
245
+ body["response_format"] = response_format
246
+ if tools is not None:
247
+ body["tools"] = tools
248
+ if tool_choice is not None:
249
+ body["tool_choice"] = tool_choice
250
+ if ttl is not None:
251
+ body["ttl"] = ttl
252
+
253
+ if stream:
254
+ body["stream"] = True
255
+ return self._stream_sse("/v1/chat/completions", body)
256
+ else:
257
+ return (
258
+ self._client.post("/v1/chat/completions", json=body).raise_for_status().json()
259
+ )
260
+
261
+ def completions(
262
+ self,
263
+ model: str,
264
+ prompt: str | list,
265
+ *,
266
+ max_tokens: int | None = None,
267
+ temperature: float | None = None,
268
+ top_p: float | None = None,
269
+ top_k: int | None = None,
270
+ stop: str | list[str] | None = None,
271
+ frequency_penalty: float | None = None,
272
+ presence_penalty: float | None = None,
273
+ stream: bool = False,
274
+ seed: int | None = None,
275
+ ttl: int | None = None,
276
+ ) -> dict | Generator[dict, None, None]:
277
+ """POST /v1/completions — legacy text completions.
278
+
279
+ Args:
280
+ model: Model identifier.
281
+ prompt: Input text or list of texts.
282
+ max_tokens: Maximum tokens to generate.
283
+ temperature: Sampling temperature (0–2).
284
+ top_p: Nucleus sampling (0–1).
285
+ top_k: Top-k sampling limit.
286
+ stop: Stop sequence(s).
287
+ frequency_penalty: Frequency penalty (-2 to 2).
288
+ presence_penalty: Presence penalty (-2 to 2).
289
+ stream: If True, return a generator of SSE event dicts.
290
+ seed: Random seed.
291
+ ttl: Model idle time-to-live in seconds.
292
+ """
293
+ body: dict[str, Any] = {"model": model, "prompt": prompt}
294
+ if max_tokens is not None:
295
+ body["max_tokens"] = max_tokens
296
+ if temperature is not None:
297
+ body["temperature"] = temperature
298
+ if top_p is not None:
299
+ body["top_p"] = top_p
300
+ if top_k is not None:
301
+ body["top_k"] = top_k
302
+ if stop is not None:
303
+ body["stop"] = stop
304
+ if frequency_penalty is not None:
305
+ body["frequency_penalty"] = frequency_penalty
306
+ if presence_penalty is not None:
307
+ body["presence_penalty"] = presence_penalty
308
+ if seed is not None:
309
+ body["seed"] = seed
310
+ if ttl is not None:
311
+ body["ttl"] = ttl
312
+
313
+ if stream:
314
+ body["stream"] = True
315
+ return self._stream_sse("/v1/completions", body)
316
+ else:
317
+ return self._client.post("/v1/completions", json=body).raise_for_status().json()
318
+
319
+ def embeddings(
320
+ self,
321
+ model: str,
322
+ input: str | list[str],
323
+ *,
324
+ encoding_format: str | None = None,
325
+ dimensions: int | None = None,
326
+ ) -> dict:
327
+ """POST /v1/embeddings — generate text embeddings.
328
+
329
+ Args:
330
+ model: Model identifier.
331
+ input: Text or list of texts to embed.
332
+ encoding_format: Output format — "float" or "base64".
333
+ dimensions: Target embedding dimensionality.
334
+ """
335
+ body: dict[str, Any] = {"model": model, "input": input}
336
+ if encoding_format is not None:
337
+ body["encoding_format"] = encoding_format
338
+ if dimensions is not None:
339
+ body["dimensions"] = dimensions
340
+ return self._client.post("/v1/embeddings", json=body).raise_for_status().json()
341
+
342
+ def responses(
343
+ self,
344
+ model: str,
345
+ messages: list[dict],
346
+ **kwargs: Any,
347
+ ) -> dict:
348
+ """POST /v1/responses — stateful chat responses (OpenAI-compatible).
349
+
350
+ Args:
351
+ model: Model identifier.
352
+ messages: List of message dicts with ``role`` and ``content``.
353
+ **kwargs: Additional parameters forwarded to the endpoint.
354
+ """
355
+ body: dict[str, Any] = {"model": model, "messages": messages, **kwargs}
356
+ return self._client.post("/v1/responses", json=body).raise_for_status().json()
357
+
358
+ # -------------------------------------------------------------------------
359
+ # Anthropic-compatible endpoint (/v1/messages)
360
+ # -------------------------------------------------------------------------
361
+
362
+ def messages(
363
+ self,
364
+ model: str,
365
+ messages: list[dict],
366
+ max_tokens: int,
367
+ *,
368
+ system: str | None = None,
369
+ temperature: float | None = None,
370
+ top_p: float | None = None,
371
+ top_k: int | None = None,
372
+ ) -> dict:
373
+ """POST /v1/messages — Anthropic-compatible messages API.
374
+
375
+ Args:
376
+ model: Model identifier.
377
+ messages: List of message dicts with ``role`` and ``content``.
378
+ max_tokens: Maximum tokens to generate (required by the API).
379
+ system: System prompt.
380
+ temperature: Sampling temperature.
381
+ top_p: Nucleus sampling (0–1).
382
+ top_k: Top-k sampling limit.
383
+ """
384
+ body: dict[str, Any] = {
385
+ "model": model,
386
+ "messages": messages,
387
+ "max_tokens": max_tokens,
388
+ }
389
+ if system is not None:
390
+ body["system"] = system
391
+ if temperature is not None:
392
+ body["temperature"] = temperature
393
+ if top_p is not None:
394
+ body["top_p"] = top_p
395
+ if top_k is not None:
396
+ body["top_k"] = top_k
397
+ return self._client.post("/v1/messages", json=body).raise_for_status().json()
398
+
399
+ # -------------------------------------------------------------------------
400
+ # Internal helpers
401
+ # -------------------------------------------------------------------------
402
+
403
+ def _stream_sse(self, path: str, body: dict) -> Generator[dict, None, None]:
404
+ """POST to *path* and yield parsed Server-Sent Event data dicts."""
405
+ import json
406
+
407
+ with self._client.stream("POST", path, json=body) as response:
408
+ response.raise_for_status()
409
+ for line in response.iter_lines():
410
+ if line.startswith("data: "):
411
+ payload = line[len("data: "):]
412
+ if payload == "[DONE]":
413
+ break
414
+ yield json.loads(payload)
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.4
2
+ Name: gseai
3
+ Version: 0.1.0
4
+ Summary: Python client for the GSE AI LMStudio server
5
+ Requires-Python: >=3.14
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: httpx>=0.28.1
@@ -0,0 +1,9 @@
1
+ README.md
2
+ pyproject.toml
3
+ gseai/__init__.py
4
+ gseai/server.py
5
+ gseai.egg-info/PKG-INFO
6
+ gseai.egg-info/SOURCES.txt
7
+ gseai.egg-info/dependency_links.txt
8
+ gseai.egg-info/requires.txt
9
+ gseai.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ httpx>=0.28.1
@@ -0,0 +1 @@
1
+ gseai
@@ -0,0 +1,16 @@
1
+ [project]
2
+ name = "gseai"
3
+ version = "0.1.0"
4
+ description = "Python client for the GSE AI LMStudio server"
5
+ readme = "README.md"
6
+ requires-python = ">=3.14"
7
+ dependencies = [
8
+ "httpx>=0.28.1",
9
+ ]
10
+
11
+ [dependency-groups]
12
+ dev = [
13
+ "sphinx>=9.1.0",
14
+ "sphinx-autodoc-typehints>=3.10.0",
15
+ "sphinx-rtd-theme>=3.1.0",
16
+ ]
gseai-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+