lmstd 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.4
2
+ Name: lmstd
3
+ Version: 0.1.0
4
+ Summary: LM Studio v1 REST API Client Library
5
+ Author: LM Studio User
6
+ License: MIT
7
+ Requires-Python: >=3.7
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: requests
11
+ Dynamic: license-file
12
+
13
+ # LMStd
14
+
15
+ LM Studio v1 REST API Client Library
16
+
17
+ This single-file library provides a clean, fully documented Python interface
18
+ to interact with an LM Studio local server based on the v1 REST API endpoints.
19
+
20
+ ## Installation
21
+
22
+ You can install this package via pip once published or from the source code:
23
+
24
+ ```bash
25
+ pip install lmstd
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ```python
31
+ import os
32
+ from lmstd import LMStd
33
+
34
+ # Initialize the client
35
+ client = LMStd(
36
+ base_url="http://localhost:1234",
37
+ api_token=os.environ.get("LMSTD_APIKEY")
38
+ )
39
+
40
+ # List available models
41
+ models = client.list_models()
42
+ print(models)
43
+ ```
44
+
45
+ ## Features
46
+
47
+ - **Stateful chats**: Fully utilize the stateful `/api/v1/chat` endpoint.
48
+ - **Model Context Protocol (MCP)**: Use integrations and MCP tools directly.
49
+ - **Advanced Model Management**: Load, unload, and download models programmatically.
50
+ - **Streaming Support**: Easy SSE-based chat streaming support.
51
+
52
+ ## License
53
+
54
+ MIT License
@@ -0,0 +1,6 @@
1
+ lmstd.py,sha256=Ouf69Brshw2UbdITAU1Y10eTpnTeCYI2QOjfuglcNAQ,14755
2
+ lmstd-0.1.0.dist-info/licenses/LICENSE,sha256=Ct45NP0cQPcrziLZ_ssab49Jtmx3jJd68BgxfKkIW-s,1090
3
+ lmstd-0.1.0.dist-info/METADATA,sha256=Au965gFgn7dx4z2JdqwUdUhoWAUaehsu4h8B-t4rfD8,1241
4
+ lmstd-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
5
+ lmstd-0.1.0.dist-info/top_level.txt,sha256=iWRpN_xU3aEt7BtqtMEJlyHh3Pq13KQ4OrE6PPlIsXo,6
6
+ lmstd-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 LM Studio User
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ lmstd
lmstd.py ADDED
@@ -0,0 +1,316 @@
1
+ """
2
+ LM Studio v1 REST API Client Library
3
+
4
+ This single-file library provides a clean, fully documented Python interface
5
+ to interact with an LM Studio local server based on the v1 REST API endpoints.
6
+
7
+ Features supported natively via the v1 API:
8
+ - Stateful chats
9
+ - Model Context Protocol (MCP) integrations via API
10
+ - Authentication configuration with API tokens
11
+ - Advanced model lifecycle management (download, load, unload)
12
+
13
+ Dependencies:
14
+ requests
15
+ """
16
+
17
+ import json
18
+ import os
19
+ from typing import Any, Dict, Iterator, List, Optional, Union
20
+
21
+
22
+ class LMStdError(Exception):
23
+ """Exception raised for errors returned by the LM Studio API."""
24
+ def __init__(self, status_code: int, response_text: str):
25
+ self.status_code = status_code
26
+ self.response_text = response_text
27
+ super().__init__(f"API Error {status_code}: {response_text}")
28
+
29
+
30
+ class LMStd:
31
+ """
32
+ A client library for interacting with LM Studio's native v1 REST API.
33
+ """
34
+
35
+ def __init__(self, base_url: str = "http://localhost:1234", api_token: Optional[str] = None):
36
+ """
37
+ Initializes the LM Studio API Client.
38
+
39
+ Args:
40
+ base_url (str): The base URL where your LM Studio local server is running.
41
+ By default, the server is available at http://localhost:1234.
42
+ api_token (str, optional): The LM_API_TOKEN authorization bearer token if required.
43
+ Passed as an Authorization header[cite: 57, 58].
44
+ """
45
+ import requests
46
+ self.base_url = base_url.rstrip('/')
47
+ self.session = requests.Session()
48
+
49
+ self.session.headers.update({
50
+ "Content-Type": "application/json"
51
+ })
52
+ if api_token:
53
+ self.session.headers.update({
54
+ "Authorization": f"Bearer {api_token}"
55
+ })
56
+
57
+ def _request(self, method: str, endpoint: str, json_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
58
+ """Internal helper to process HTTP requests cleanly."""
59
+ url = f"{self.base_url}{endpoint}"
60
+ try:
61
+ response = self.session.request(method=method, url=url, json=json_data)
62
+ if response.status_code not in (200, 201):
63
+ raise LMStdError(response.status_code, response.text)
64
+ return response.json()
65
+ except Exception as e:
66
+ if isinstance(e, LMStdError):
67
+ raise e
68
+ raise RuntimeError(f"Failed to connect or process request to {url}: {e}")
69
+
70
+ def chat(
71
+ self,
72
+ model: Optional[str] = None,
73
+ input_data: Optional[Union[str, List[Dict[str, Any]]]] = None,
74
+ system_prompt: Optional[str] = None,
75
+ integrations: Optional[List[Union[str, Dict[str, Any]]]] = None,
76
+ headers: Optional[Dict[str, str]] = None,
77
+ temperature: Optional[float] = None,
78
+ top_p: Optional[float] = None,
79
+ top_k: Optional[int] = None,
80
+ min_p: Optional[float] = None,
81
+ repeat_penalty: Optional[float] = None,
82
+ max_output_tokens: Optional[int] = None,
83
+ reasoning: Optional[str] = None,
84
+ context_length: Optional[int] = None,
85
+ store: bool = True,
86
+ previous_response_id: Optional[str] = None
87
+ ) -> Dict[str, Any]:
88
+ """
89
+ POST /api/v1/chat
90
+ Send a message to a model and receive a full response.
91
+ The /api/v1/chat endpoint is stateful by default, storing and managing context automatically.
92
+
93
+ Args:
94
+ model (str): Unique identifier for the model to use.
95
+ input_data (str or list): Text message string or an array of input items (messages/images).
96
+ Images can be passed using 'type': 'image' and 'data_url'[cite: 642, 656, 660].
97
+ system_prompt (str, optional): System message that sets model behavior or instructions.
98
+ integrations (list, optional): List of integrations (plugins, ephemeral MCP servers) to enable for this request.
99
+ headers (dict, optional): Custom HTTP headers to send with requests to the server.
100
+ temperature (float, optional): Randomness in token selection (0 is deterministic, [0,1]).
101
+ top_p (float, optional): Minimum cumulative probability for the possible next tokens [0,1].
102
+ top_k (int, optional): Limits next token selection to top-k most probable tokens.
103
+ min_p (float, optional): Minimum base probability for a token to be selected for output [0,1].
104
+ repeat_penalty (float, optional): Penalty for repeating token sequences. 1 is no penalty.
105
+ max_output_tokens (int, optional): Maximum number of tokens to generate.
106
+ reasoning (str, optional): Reasoning setting ('off', 'low', 'medium', 'high', 'on').
107
+ context_length (int, optional): Number of tokens to consider as context. Higher values recommended for MCP usage.
108
+ store (bool, optional): Whether to store the chat. If set to true, response will return a 'response_id' field.
109
+ previous_response_id (str, optional): Identifier of existing response to append to. Must start with "resp_".
110
+
111
+ Returns:
112
+ Dict[str, Any]: Response fields containing 'model_instance_id', an 'output' array (messages, tool_calls, reasoning),
113
+ 'stats' (token usage/metrics), and an optional 'response_id'[cite: 753, 756, 804, 837].
114
+ """
115
+ model = model or os.environ.get("LMSTD_MODEL")
116
+ if not model:
117
+ raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
118
+ if input_data is None:
119
+ raise ValueError("input_data must be provided.")
120
+
121
+ payload = {
122
+ "model": model,
123
+ "input": input_data,
124
+ "stream": False,
125
+ "store": store
126
+ }
127
+
128
+ if system_prompt is not None: payload["system_prompt"] = system_prompt
129
+ if integrations is not None: payload["integrations"] = integrations
130
+ if headers is not None: payload["headers"] = headers
131
+ if temperature is not None: payload["temperature"] = temperature
132
+ if top_p is not None: payload["top_p"] = top_p
133
+ if top_k is not None: payload["top_k"] = top_k
134
+ if min_p is not None: payload["min_p"] = min_p
135
+ if repeat_penalty is not None: payload["repeat_penalty"] = repeat_penalty
136
+ if max_output_tokens is not None: payload["max_output_tokens"] = max_output_tokens
137
+ if reasoning is not None: payload["reasoning"] = reasoning
138
+ if context_length is not None: payload["context_length"] = context_length
139
+ if previous_response_id is not None: payload["previous_response_id"] = previous_response_id
140
+
141
+ return self._request("POST", "/api/v1/chat", json_data=payload)
142
+
143
+ def chat_stream(
144
+ self,
145
+ model: Optional[str] = None,
146
+ input_data: Optional[Union[str, List[Dict[str, Any]]]] = None,
147
+ **kwargs
148
+ ) -> Iterator[Dict[str, Any]]:
149
+ """
150
+ POST /api/v1/chat (Streaming)
151
+ Send a message to a model with `stream` set to true. The response is sent as a stream of events using Server-Sent Events (SSE).
152
+
153
+ Args:
154
+ model (str): Unique identifier for the model to use.
155
+ input_data (str or list): Text message string or an array of input items.
156
+ **kwargs: Additional parameters matching the `chat` function (e.g., system_prompt, integrations, store, temperature, etc.).
157
+
158
+ Yields:
159
+ Dict[str, Any]: Parsed JSON objects corresponding to streaming events. Events arrive in order and include:
160
+ 'chat.start', 'model_load.*', 'prompt_processing.*', 'reasoning.*', 'tool_call.*', 'message.*',
161
+ 'error', and finally 'chat.end'[cite: 211, 216, 217, 220, 227, 238].
162
+ """
163
+ model = model or os.environ.get("LMSTD_MODEL")
164
+ if not model:
165
+ raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
166
+ if input_data is None:
167
+ raise ValueError("input_data must be provided.")
168
+
169
+ url = f"{self.base_url}/api/v1/chat"
170
+ payload = {
171
+ "model": model,
172
+ "input": input_data,
173
+ "stream": True,
174
+ "store": kwargs.get("store", True)
175
+ }
176
+
177
+ for key in ["system_prompt", "integrations", "headers", "temperature", "top_p", "top_k",
178
+ "min_p", "repeat_penalty", "max_output_tokens", "reasoning",
179
+ "context_length", "previous_response_id"]:
180
+ if key in kwargs and kwargs[key] is not None:
181
+ payload[key] = kwargs[key]
182
+
183
+ try:
184
+ response = self.session.post(url, json=payload, stream=True)
185
+ if response.status_code not in (200, 201):
186
+ raise LMStdError(response.status_code, response.text)
187
+
188
+ for line in response.iter_lines():
189
+ if line:
190
+ decoded_line = line.decode('utf-8')
191
+ if decoded_line.startswith('data: '):
192
+ data_str = decoded_line[6:].strip()
193
+ if data_str:
194
+ yield json.loads(data_str)
195
+
196
+ except Exception as e:
197
+ if isinstance(e, LMStdError):
198
+ raise e
199
+ raise RuntimeError(f"Failed to connect or stream request to {url}: {e}")
200
+
201
+ def list_models(self) -> Dict[str, Any]:
202
+ """
203
+ GET /api/v1/models
204
+ Get a list of available models on your system, including both LLMs and embedding models.
205
+
206
+ Returns:
207
+ Dict[str, Any]: JSON object containing a list of available models, their configs (context_length,
208
+ architecture, format), and currently loaded instances[cite: 1102, 1118, 1130, 1134].
209
+ """
210
+ return self._request("GET", "/api/v1/models")
211
+
212
+ def load_model(
213
+ self,
214
+ model: Optional[str] = None,
215
+ context_length: Optional[int] = None,
216
+ eval_batch_size: Optional[int] = None,
217
+ flash_attention: Optional[bool] = None,
218
+ num_experts: Optional[int] = None,
219
+ offload_kv_cache_to_gpu: Optional[bool] = None,
220
+ echo_load_config: bool = False
221
+ ) -> Dict[str, Any]:
222
+ """
223
+ POST /api/v1/models/load
224
+ Load an LLM or Embedding model into memory with custom configuration for inference.
225
+
226
+ Args:
227
+ model (str): Unique identifier for the model to load.
228
+ context_length (int, optional): Maximum number of tokens that the model will consider.
229
+ eval_batch_size (int, optional): Number of input tokens to process together in a single batch during evaluation.
230
+ flash_attention (bool, optional): Whether to optimize attention computation. Can decrease memory usage and improve speed.
231
+ num_experts (int, optional): Number of experts to use during inference for MoE (Mixture of Experts) models.
232
+ offload_kv_cache_to_gpu (bool, optional): Whether KV cache is offloaded to GPU memory.
233
+ echo_load_config (bool, optional): If true, echoes the final load configuration in the response.
234
+
235
+ Returns:
236
+ Dict[str, Any]: Response featuring 'type', 'instance_id', 'load_time_seconds', 'status', and optionally 'load_config'[cite: 1251, 1261].
237
+ """
238
+ model = model or os.environ.get("LMSTD_MODEL")
239
+ if not model:
240
+ raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
241
+
242
+ payload = {
243
+ "model": model,
244
+ "echo_load_config": echo_load_config
245
+ }
246
+ if context_length is not None: payload["context_length"] = context_length
247
+ if eval_batch_size is not None: payload["eval_batch_size"] = eval_batch_size
248
+ if flash_attention is not None: payload["flash_attention"] = flash_attention
249
+ if num_experts is not None: payload["num_experts"] = num_experts
250
+ if offload_kv_cache_to_gpu is not None: payload["offload_kv_cache_to_gpu"] = offload_kv_cache_to_gpu
251
+
252
+ return self._request("POST", "/api/v1/models/load", json_data=payload)
253
+
254
+ def unload_model(self, instance_id: str) -> Dict[str, Any]:
255
+ """
256
+ POST /api/v1/models/unload
257
+ Unload a loaded model from memory.
258
+
259
+ Args:
260
+ instance_id (str): Unique identifier of the model instance to unload.
261
+
262
+ Returns:
263
+ Dict[str, Any]: Confirmation of the unloaded model 'instance_id'.
264
+ """
265
+ payload = {"instance_id": instance_id}
266
+ return self._request("POST", "/api/v1/models/unload", json_data=payload)
267
+
268
+ def download_model(self, model: Optional[str] = None, quantization: Optional[str] = None) -> Dict[str, Any]:
269
+ """
270
+ POST /api/v1/models/download
271
+ Download LLMs and embedding models.
272
+
273
+ Args:
274
+ model (str): The model to download. Accepts model catalog identifiers and exact Hugging Face links.
275
+ quantization (str, optional): Quantization level of the model to download (e.g. 'Q4_K_M'). Only supported for Hugging Face links.
276
+
277
+ Returns:
278
+ Dict[str, Any]: Returns a download job status object (e.g., 'job_id', 'status', 'total_size_bytes', 'started_at')[cite: 1321, 1333, 1335].
279
+ """
280
+ model = model or os.environ.get("LMSTD_MODEL")
281
+ if not model:
282
+ raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
283
+
284
+ payload = {"model": model}
285
+ if quantization is not None:
286
+ payload["quantization"] = quantization
287
+ return self._request("POST", "/api/v1/models/download", json_data=payload)
288
+
289
+ def get_download_status(self, job_id: str) -> Dict[str, Any]:
290
+ """
291
+ GET /api/v1/models/download/status/:job_id
292
+ Get the status of model downloads.
293
+
294
+ Args:
295
+ job_id (str): The unique identifier of the download job.
296
+
297
+ Returns:
298
+ Dict[str, Any]: Download job status object including 'status', 'bytes_per_second', 'total_size_bytes',
299
+ 'downloaded_bytes', 'estimated_completion', etc[cite: 1391, 1394, 1396, 1400, 1402].
300
+ """
301
+ return self._request("GET", f"/api/v1/models/download/status/{job_id}")
302
+
303
+
304
+ # --- Basic Usage Verification Example ---
305
+ if __name__ == "__main__":
306
+ client = LMStd(api_token=os.environ.get("LMSTD_APIKEY"))
307
+
308
+ print("1. Listing system models...")
309
+ try:
310
+ models = client.list_models()
311
+ print(json.dumps(models, indent=2))
312
+
313
+ except Exception as error:
314
+ print(f"Server communication failed: {error}")
315
+
316
+ input()