lmstd 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2026 LM Studio User
3
+ Copyright (c) 2026 EMuVi (emuvi@outlook.com.br)
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lmstd
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: LM Studio v1 REST API Client Library
5
5
  Author: LM Studio User
6
- License: MIT
6
+ License-Expression: MIT
7
7
  Requires-Python: >=3.7
8
8
  Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
@@ -51,4 +51,6 @@ print(models)
51
51
 
52
52
  ## License
53
53
 
54
- MIT License
54
+ Copyright (c) 2026 EMuVi (emuvi@outlook.com.br)
55
+
56
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -39,4 +39,6 @@ print(models)
39
39
 
40
40
  ## License
41
41
 
42
- MIT License
42
+ Copyright (c) 2026 EMuVi (emuvi@outlook.com.br)
43
+
44
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lmstd
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: LM Studio v1 REST API Client Library
5
5
  Author: LM Studio User
6
- License: MIT
6
+ License-Expression: MIT
7
7
  Requires-Python: >=3.7
8
8
  Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
@@ -51,4 +51,6 @@ print(models)
51
51
 
52
52
  ## License
53
53
 
54
- MIT License
54
+ Copyright (c) 2026 EMuVi (emuvi@outlook.com.br)
55
+
56
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
lmstd-0.2.0/lmstd.py ADDED
@@ -0,0 +1,612 @@
1
+ # Copyright (c) 2026 EMuVi (emuvi@outlook.com.br)
2
+ # Licensed under the MIT License.
3
+
4
+ """
5
+ LM Studio v1 REST API Client Library
6
+
7
+ This single-file library provides a clean, fully documented Python interface
8
+ to interact with an LM Studio local server based on the v1 REST API endpoints.
9
+
10
+ Features supported natively via the v1 API:
11
+ - Stateful chats
12
+ - Model Context Protocol (MCP) integrations via API
13
+ - Authentication configuration with API tokens
14
+ - Advanced model lifecycle management (download, load, unload)
15
+
16
+ Dependencies:
17
+ requests
18
+ """
19
+
20
+ import json
21
+ import os
22
+ from typing import Any, Dict, Iterator, List, Optional, Union, Literal, TypedDict
23
+
24
+ class TextInput(TypedDict):
25
+ type: Literal["message"]
26
+ content: str
27
+
28
+ class ImageInput(TypedDict):
29
+ type: Literal["image"]
30
+ data_url: str
31
+
32
+ InputItem = Union[TextInput, ImageInput]
33
+
34
+ class PluginIntegrationBase(TypedDict):
35
+ type: Literal["plugin"]
36
+ id: str
37
+
38
+ class PluginIntegration(PluginIntegrationBase, total=False):
39
+ allowed_tools: List[str]
40
+
41
+ class EphemeralMCPIntegrationBase(TypedDict):
42
+ type: Literal["ephemeral_mcp"]
43
+ server_label: str
44
+ server_url: str
45
+
46
+ class EphemeralMCPIntegration(EphemeralMCPIntegrationBase, total=False):
47
+ allowed_tools: List[str]
48
+
49
+ Integration = Union[str, PluginIntegration, EphemeralMCPIntegration]
50
+
51
+ class ProviderInfoPlugin(TypedDict):
52
+ type: Literal["plugin"]
53
+ plugin_id: str
54
+
55
+ class ProviderInfoEphemeralMCP(TypedDict):
56
+ type: Literal["ephemeral_mcp"]
57
+ server_label: str
58
+
59
+ ProviderInfo = Union[ProviderInfoPlugin, ProviderInfoEphemeralMCP]
60
+
61
+ class MessageOutput(TypedDict):
62
+ type: Literal["message"]
63
+ content: str
64
+
65
+ class ToolCallOutput(TypedDict, total=False):
66
+ type: Literal["tool_call"]
67
+ tool: str
68
+ arguments: Dict[str, Any]
69
+ output: str
70
+ provider_info: ProviderInfo
71
+
72
+ class ReasoningOutput(TypedDict):
73
+ type: Literal["reasoning"]
74
+ content: str
75
+
76
+ class InvalidToolCallMetadata(TypedDict, total=False):
77
+ type: Literal["invalid_name", "invalid_arguments"]
78
+ tool_name: str
79
+ arguments: Dict[str, Any]
80
+ provider_info: ProviderInfo
81
+
82
+ class InvalidToolCallOutput(TypedDict, total=False):
83
+ type: Literal["invalid_tool_call"]
84
+ reason: str
85
+ metadata: InvalidToolCallMetadata
86
+
87
+ OutputItem = Union[MessageOutput, ToolCallOutput, ReasoningOutput, InvalidToolCallOutput]
88
+
89
+ class ChatStats(TypedDict, total=False):
90
+ input_tokens: int
91
+ total_output_tokens: int
92
+ reasoning_output_tokens: int
93
+ tokens_per_second: float
94
+ time_to_first_token_seconds: float
95
+ model_load_time_seconds: float
96
+
97
+ class ChatResponse(TypedDict, total=False):
98
+ model_instance_id: str
99
+ output: List[OutputItem]
100
+ stats: ChatStats
101
+ response_id: str
102
+
103
+ class ChatStartEvent(TypedDict):
104
+ type: Literal["chat.start"]
105
+ model_instance_id: str
106
+
107
+ class ModelLoadStartEvent(TypedDict):
108
+ type: Literal["model_load.start"]
109
+ model_instance_id: str
110
+
111
+ class ModelLoadProgressEvent(TypedDict):
112
+ type: Literal["model_load.progress"]
113
+ model_instance_id: str
114
+ progress: float
115
+
116
+ class ModelLoadEndEvent(TypedDict):
117
+ type: Literal["model_load.end"]
118
+ model_instance_id: str
119
+ load_time_seconds: float
120
+
121
+ class PromptProcessingStartEvent(TypedDict):
122
+ type: Literal["prompt_processing.start"]
123
+
124
+ class PromptProcessingProgressEvent(TypedDict):
125
+ type: Literal["prompt_processing.progress"]
126
+ progress: float
127
+
128
+ class PromptProcessingEndEvent(TypedDict):
129
+ type: Literal["prompt_processing.end"]
130
+
131
+ class ReasoningStartEvent(TypedDict):
132
+ type: Literal["reasoning.start"]
133
+
134
+ class ReasoningDeltaEvent(TypedDict):
135
+ type: Literal["reasoning.delta"]
136
+ content: str
137
+
138
+ class ReasoningEndEvent(TypedDict):
139
+ type: Literal["reasoning.end"]
140
+
141
+ class ToolCallStartEvent(TypedDict):
142
+ type: Literal["tool_call.start"]
143
+ tool: str
144
+ provider_info: ProviderInfo
145
+
146
+ class ToolCallArgumentsEvent(TypedDict):
147
+ type: Literal["tool_call.arguments"]
148
+ tool: str
149
+ arguments: Dict[str, Any]
150
+ provider_info: ProviderInfo
151
+
152
+ class ToolCallSuccessEvent(TypedDict):
153
+ type: Literal["tool_call.success"]
154
+ tool: str
155
+ arguments: Dict[str, Any]
156
+ output: str
157
+ provider_info: ProviderInfo
158
+
159
+ class ToolCallFailureEvent(TypedDict, total=False):
160
+ type: Literal["tool_call.failure"]
161
+ reason: str
162
+ metadata: InvalidToolCallMetadata
163
+
164
+ class MessageStartEvent(TypedDict):
165
+ type: Literal["message.start"]
166
+
167
+ class MessageDeltaEvent(TypedDict):
168
+ type: Literal["message.delta"]
169
+ content: str
170
+
171
+ class MessageEndEvent(TypedDict):
172
+ type: Literal["message.end"]
173
+
174
+ class ErrorInfo(TypedDict, total=False):
175
+ type: Literal["invalid_request", "unknown", "mcp_connection_error", "plugin_connection_error", "not_implemented", "model_not_found", "job_not_found", "internal_error"]
176
+ message: str
177
+ code: str
178
+ param: str
179
+
180
+ class ErrorEvent(TypedDict):
181
+ type: Literal["error"]
182
+ error: ErrorInfo
183
+
184
+ class ChatEndEvent(TypedDict):
185
+ type: Literal["chat.end"]
186
+ result: ChatResponse
187
+
188
+ ChatStreamEvent = Union[
189
+ ChatStartEvent,
190
+ ModelLoadStartEvent,
191
+ ModelLoadProgressEvent,
192
+ ModelLoadEndEvent,
193
+ PromptProcessingStartEvent,
194
+ PromptProcessingProgressEvent,
195
+ PromptProcessingEndEvent,
196
+ ReasoningStartEvent,
197
+ ReasoningDeltaEvent,
198
+ ReasoningEndEvent,
199
+ ToolCallStartEvent,
200
+ ToolCallArgumentsEvent,
201
+ ToolCallSuccessEvent,
202
+ ToolCallFailureEvent,
203
+ MessageStartEvent,
204
+ MessageDeltaEvent,
205
+ MessageEndEvent,
206
+ ErrorEvent,
207
+ ChatEndEvent
208
+ ]
209
+
210
+ class ModelQuantization(TypedDict, total=False):
211
+ name: Optional[str]
212
+ bits_per_weight: Optional[int]
213
+
214
+ class ModelConfig(TypedDict, total=False):
215
+ context_length: int
216
+ eval_batch_size: int
217
+ parallel: int
218
+ flash_attention: bool
219
+ num_experts: int
220
+ offload_kv_cache_to_gpu: bool
221
+
222
+ class LoadedInstance(TypedDict):
223
+ id: str
224
+ config: ModelConfig
225
+
226
+ class ModelCapabilities(TypedDict, total=False):
227
+ vision: bool
228
+ trained_for_tool_use: bool
229
+
230
+ class ModelReasoning(TypedDict):
231
+ allowed_options: List[Literal["off", "on", "low", "medium", "high"]]
232
+ default: Literal["off", "on", "low", "medium", "high"]
233
+
234
+ class ModelInfo(TypedDict, total=False):
235
+ type: Literal["llm", "embedding"]
236
+ publisher: str
237
+ key: str
238
+ display_name: str
239
+ architecture: Optional[str]
240
+ quantization: Optional[ModelQuantization]
241
+ size_bytes: int
242
+ params_string: Optional[str]
243
+ loaded_instances: List[LoadedInstance]
244
+ max_context_length: int
245
+ format: Optional[Literal["gguf", "mlx"]]
246
+ capabilities: ModelCapabilities
247
+ reasoning: ModelReasoning
248
+ description: Optional[str]
249
+ variants: List[str]
250
+ selected_variant: str
251
+
252
+ class ListModelsResponse(TypedDict):
253
+ models: List[ModelInfo]
254
+
255
+ class LoadConfig(TypedDict, total=False):
256
+ context_length: int
257
+ eval_batch_size: int
258
+ flash_attention: bool
259
+ num_experts: int
260
+ offload_kv_cache_to_gpu: bool
261
+
262
+ class LoadModelResponse(TypedDict, total=False):
263
+ type: Literal["llm", "embedding"]
264
+ instance_id: str
265
+ load_time_seconds: float
266
+ status: Literal["loaded"]
267
+ load_config: LoadConfig
268
+
269
+ class UnloadModelResponse(TypedDict):
270
+ instance_id: str
271
+
272
+ class DownloadStatusResponse(TypedDict, total=False):
273
+ job_id: str
274
+ status: Literal["downloading", "paused", "completed", "failed", "already_downloaded"]
275
+ bytes_per_second: int
276
+ estimated_completion: str
277
+ completed_at: str
278
+ total_size_bytes: int
279
+ downloaded_bytes: int
280
+ started_at: str
281
+
282
+
283
+
284
+ class LMStdError(Exception):
285
+ """Exception raised for errors returned by the LM Studio API."""
286
+ def __init__(self, status_code: int, response_text: str):
287
+ self.status_code = status_code
288
+ self.response_text = response_text
289
+ super().__init__(f"API Error {status_code}: {response_text}")
290
+
291
+
292
+ class LMStd:
293
+ """
294
+ A client library for interacting with LM Studio's native v1 REST API.
295
+ """
296
+
297
+ def __init__(self, base_url: str = "http://localhost:1234", api_token: Optional[str] = None):
298
+ """
299
+ Initializes the LM Studio API Client.
300
+
301
+ Args:
302
+ base_url (str): The base URL where your LM Studio local server is running.
303
+ By default, the server is available at http://localhost:1234.
304
+ api_token (str, optional): The LM_API_TOKEN authorization bearer token if required.
305
+ Passed as an Authorization header[cite: 57, 58].
306
+ """
307
+ import requests
308
+ self.base_url = base_url.rstrip('/')
309
+ self.session = requests.Session()
310
+
311
+ self.session.headers.update({
312
+ "Content-Type": "application/json"
313
+ })
314
+ if api_token:
315
+ self.session.headers.update({
316
+ "Authorization": f"Bearer {api_token}"
317
+ })
318
+
319
+ def _request(self, method: str, endpoint: str, json_data: Optional[Dict[str, Any]] = None) -> Any:
320
+ """Internal helper to process HTTP requests cleanly."""
321
+ url = f"{self.base_url}{endpoint}"
322
+ try:
323
+ response = self.session.request(method=method, url=url, json=json_data)
324
+ if response.status_code not in (200, 201):
325
+ raise LMStdError(response.status_code, response.text)
326
+ return response.json()
327
+ except Exception as e:
328
+ if isinstance(e, LMStdError):
329
+ raise e
330
+ raise RuntimeError(f"Failed to connect or process request to {url}: {e}")
331
+
332
+ def chat(
333
+ self,
334
+ model: Optional[str] = None,
335
+ input_data: Optional[Union[str, List[InputItem]]] = None,
336
+ system_prompt: Optional[str] = None,
337
+ integrations: Optional[List[Integration]] = None,
338
+ headers: Optional[Dict[str, str]] = None,
339
+ temperature: Optional[float] = None,
340
+ top_p: Optional[float] = None,
341
+ top_k: Optional[int] = None,
342
+ min_p: Optional[float] = None,
343
+ repeat_penalty: Optional[float] = None,
344
+ max_output_tokens: Optional[int] = None,
345
+ reasoning: Optional[Literal["off", "low", "medium", "high", "on"]] = None,
346
+ context_length: Optional[int] = None,
347
+ store: Optional[bool] = True,
348
+ previous_response_id: Optional[str] = None
349
+ ) -> ChatResponse:
350
+ """
351
+ POST /api/v1/chat
352
+ Send a message to a model and receive a full response.
353
+ The /api/v1/chat endpoint is stateful by default, storing and managing context automatically.
354
+
355
+ Args:
356
+ model (str): Unique identifier for the model to use.
357
+ input_data (str or list): Text message string or an array of input items (messages/images).
358
+ Images can be passed using 'type': 'image' and 'data_url'[cite: 642, 656, 660].
359
+ system_prompt (str, optional): System message that sets model behavior or instructions.
360
+ integrations (list, optional): List of integrations (plugins, ephemeral MCP servers) to enable for this request.
361
+ headers (dict, optional): Custom HTTP headers to send with requests to the server.
362
+ temperature (float, optional): Randomness in token selection (0 is deterministic, [0,1]).
363
+ top_p (float, optional): Minimum cumulative probability for the possible next tokens [0,1].
364
+ top_k (int, optional): Limits next token selection to top-k most probable tokens.
365
+ min_p (float, optional): Minimum base probability for a token to be selected for output [0,1].
366
+ repeat_penalty (float, optional): Penalty for repeating token sequences. 1 is no penalty.
367
+ max_output_tokens (int, optional): Maximum number of tokens to generate.
368
+ reasoning (str, optional): Reasoning setting ('off', 'low', 'medium', 'high', 'on').
369
+ context_length (int, optional): Number of tokens to consider as context. Higher values recommended for MCP usage.
370
+ store (bool, optional): Whether to store the chat. If set to true, response will return a 'response_id' field.
371
+ previous_response_id (str, optional): Identifier of existing response to append to. Must start with "resp_".
372
+
373
+ Returns:
374
+ ChatResponse: Response fields containing 'model_instance_id', an 'output' array (messages, tool_calls, reasoning),
375
+ 'stats' (token usage/metrics), and an optional 'response_id'[cite: 753, 756, 804, 837].
376
+ """
377
+ model = model or os.environ.get("LMSTD_MODEL")
378
+ if not model:
379
+ raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
380
+ if input_data is None:
381
+ raise ValueError("input_data must be provided.")
382
+
383
+ payload = {
384
+ "model": model,
385
+ "input": input_data,
386
+ "stream": False,
387
+ "store": store
388
+ }
389
+
390
+ if system_prompt is not None: payload["system_prompt"] = system_prompt
391
+ if integrations is not None: payload["integrations"] = integrations
392
+ if headers is not None: payload["headers"] = headers
393
+ if temperature is not None: payload["temperature"] = temperature
394
+ if top_p is not None: payload["top_p"] = top_p
395
+ if top_k is not None: payload["top_k"] = top_k
396
+ if min_p is not None: payload["min_p"] = min_p
397
+ if repeat_penalty is not None: payload["repeat_penalty"] = repeat_penalty
398
+ if max_output_tokens is not None: payload["max_output_tokens"] = max_output_tokens
399
+ if reasoning is not None: payload["reasoning"] = reasoning
400
+ if context_length is not None: payload["context_length"] = context_length
401
+ if previous_response_id is not None: payload["previous_response_id"] = previous_response_id
402
+
403
+ return self._request("POST", "/api/v1/chat", json_data=payload)
404
+
405
+ def chat_stream(
406
+ self,
407
+ model: Optional[str] = None,
408
+ input_data: Optional[Union[str, List[InputItem]]] = None,
409
+ system_prompt: Optional[str] = None,
410
+ integrations: Optional[List[Integration]] = None,
411
+ headers: Optional[Dict[str, str]] = None,
412
+ temperature: Optional[float] = None,
413
+ top_p: Optional[float] = None,
414
+ top_k: Optional[int] = None,
415
+ min_p: Optional[float] = None,
416
+ repeat_penalty: Optional[float] = None,
417
+ max_output_tokens: Optional[int] = None,
418
+ reasoning: Optional[Literal["off", "low", "medium", "high", "on"]] = None,
419
+ context_length: Optional[int] = None,
420
+ store: Optional[bool] = True,
421
+ previous_response_id: Optional[str] = None
422
+ ) -> Iterator[ChatStreamEvent]:
423
+ """
424
+ POST /api/v1/chat (Streaming)
425
+ Send a message to a model with `stream` set to true. The response is sent as a stream of events using Server-Sent Events (SSE).
426
+
427
+ Args:
428
+ model (str, optional): Unique identifier for the model to use. Can be an LLM or embedding model.
429
+ input_data (str | List[InputItem], optional): Message to send to the model. Text message string or an array of InputItem objects.
430
+ system_prompt (str, optional): System message that sets model behavior or instructions.
431
+ integrations (List[Integration], optional): List of integrations (plugins, ephemeral MCP servers, etc...) to enable for this request.
432
+ headers (dict, optional): Custom HTTP headers to send with requests to the server.
433
+ temperature (float, optional): Randomness in token selection. 0 is deterministic, higher values increase creativity [0,1].
434
+ top_p (float, optional): Minimum cumulative probability for the possible next tokens [0,1].
435
+ top_k (int, optional): Limits next token selection to top-k most probable tokens.
436
+ min_p (float, optional): Minimum base probability for a token to be selected for output [0,1].
437
+ repeat_penalty (float, optional): Penalty for repeating token sequences. 1 is no penalty, higher values discourage repetition.
438
+ max_output_tokens (int, optional): Maximum number of tokens to generate.
439
+ reasoning (Literal["off", "low", "medium", "high", "on"], optional): Reasoning setting. Will error if the model being used does not support the reasoning setting using. Defaults to the automatically chosen setting for the model.
440
+ context_length (int, optional): Number of tokens to consider as context. Higher values recommended for MCP usage.
441
+ store (bool, optional): Whether to store the chat. If set, response will return a 'response_id' field. Default true.
442
+ previous_response_id (str, optional): Identifier of existing response to append to. Must start with "resp_".
443
+
444
+ Yields:
445
+ ChatStreamEvent: Parsed JSON objects corresponding to streaming events. Events arrive in order and may include multiple deltas.
446
+ Events: 'chat.start', 'model_load.start', 'model_load.progress', 'model_load.end',
447
+ 'prompt_processing.start', 'prompt_processing.progress', 'prompt_processing.end',
448
+ 'reasoning.start', 'reasoning.delta', 'reasoning.end',
449
+ 'tool_call.start', 'tool_call.arguments', 'tool_call.success', 'tool_call.failure',
450
+ 'message.start', 'message.delta', 'message.end', 'error', 'chat.end'.
451
+ """
452
+ model = model or os.environ.get("LMSTD_MODEL")
453
+ if not model:
454
+ raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
455
+ if input_data is None:
456
+ raise ValueError("input_data must be provided.")
457
+
458
+ url = f"{self.base_url}/api/v1/chat"
459
+ payload = {
460
+ "model": model,
461
+ "input": input_data,
462
+ "stream": True,
463
+ "store": store
464
+ }
465
+
466
+ if system_prompt is not None: payload["system_prompt"] = system_prompt
467
+ if integrations is not None: payload["integrations"] = integrations
468
+ if headers is not None: payload["headers"] = headers
469
+ if temperature is not None: payload["temperature"] = temperature
470
+ if top_p is not None: payload["top_p"] = top_p
471
+ if top_k is not None: payload["top_k"] = top_k
472
+ if min_p is not None: payload["min_p"] = min_p
473
+ if repeat_penalty is not None: payload["repeat_penalty"] = repeat_penalty
474
+ if max_output_tokens is not None: payload["max_output_tokens"] = max_output_tokens
475
+ if reasoning is not None: payload["reasoning"] = reasoning
476
+ if context_length is not None: payload["context_length"] = context_length
477
+ if previous_response_id is not None: payload["previous_response_id"] = previous_response_id
478
+
479
+ try:
480
+ response = self.session.post(url, json=payload, stream=True)
481
+ if response.status_code not in (200, 201):
482
+ raise LMStdError(response.status_code, response.text)
483
+
484
+ for line in response.iter_lines():
485
+ if line:
486
+ decoded_line = line.decode('utf-8')
487
+ if decoded_line.startswith('data: '):
488
+ data_str = decoded_line[6:].strip()
489
+ if data_str:
490
+ yield json.loads(data_str)
491
+
492
+ except Exception as e:
493
+ if isinstance(e, LMStdError):
494
+ raise e
495
+ raise RuntimeError(f"Failed to connect or stream request to {url}: {e}")
496
+
497
+ def list_models(self) -> ListModelsResponse:
498
+ """
499
+ GET /api/v1/models
500
+ Get a list of available models on your system, including both LLMs and embedding models.
501
+
502
+ Returns:
503
+ ListModelsResponse: JSON object containing a list of available models, their configs (context_length,
504
+ architecture, format), and currently loaded instances[cite: 1102, 1118, 1130, 1134].
505
+ """
506
+ return self._request("GET", "/api/v1/models")
507
+
508
+ def load_model(
509
+ self,
510
+ model: Optional[str] = None,
511
+ context_length: Optional[int] = None,
512
+ eval_batch_size: Optional[int] = None,
513
+ flash_attention: Optional[bool] = None,
514
+ num_experts: Optional[int] = None,
515
+ offload_kv_cache_to_gpu: Optional[bool] = None,
516
+ echo_load_config: Optional[bool] = False
517
+ ) -> LoadModelResponse:
518
+ """
519
+ POST /api/v1/models/load
520
+ Load an LLM or Embedding model into memory with custom configuration for inference.
521
+
522
+ Args:
523
+ model (str): Unique identifier for the model to load.
524
+ context_length (int, optional): Maximum number of tokens that the model will consider.
525
+ eval_batch_size (int, optional): Number of input tokens to process together in a single batch during evaluation.
526
+ flash_attention (bool, optional): Whether to optimize attention computation. Can decrease memory usage and improve speed.
527
+ num_experts (int, optional): Number of experts to use during inference for MoE (Mixture of Experts) models.
528
+ offload_kv_cache_to_gpu (bool, optional): Whether KV cache is offloaded to GPU memory.
529
+ echo_load_config (bool, optional): If true, echoes the final load configuration in the response.
530
+
531
+ Returns:
532
+ LoadModelResponse: Response featuring 'type', 'instance_id', 'load_time_seconds', 'status', and optionally 'load_config'[cite: 1251, 1261].
533
+ """
534
+ model = model or os.environ.get("LMSTD_MODEL")
535
+ if not model:
536
+ raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
537
+
538
+ payload = {
539
+ "model": model,
540
+ "echo_load_config": echo_load_config
541
+ }
542
+ if context_length is not None: payload["context_length"] = context_length
543
+ if eval_batch_size is not None: payload["eval_batch_size"] = eval_batch_size
544
+ if flash_attention is not None: payload["flash_attention"] = flash_attention
545
+ if num_experts is not None: payload["num_experts"] = num_experts
546
+ if offload_kv_cache_to_gpu is not None: payload["offload_kv_cache_to_gpu"] = offload_kv_cache_to_gpu
547
+
548
+ return self._request("POST", "/api/v1/models/load", json_data=payload)
549
+
550
+ def unload_model(self, instance_id: str) -> UnloadModelResponse:
551
+ """
552
+ POST /api/v1/models/unload
553
+ Unload a loaded model from memory.
554
+
555
+ Args:
556
+ instance_id (str): Unique identifier of the model instance to unload.
557
+
558
+ Returns:
559
+ UnloadModelResponse: Response containing the 'instance_id' of the unloaded model instance.
560
+ """
561
+ payload = {"instance_id": instance_id}
562
+ return self._request("POST", "/api/v1/models/unload", json_data=payload)
563
+
564
+ def download_model(self, model: Optional[str] = None, quantization: Optional[str] = None) -> DownloadStatusResponse:
565
+ """
566
+ POST /api/v1/models/download
567
+ Download LLMs and embedding models.
568
+
569
+ Args:
570
+ model (str, optional): The model to download. Accepts model catalog identifiers (e.g., openai/gpt-oss-20b) and exact Hugging Face links (e.g., https://huggingface.co/lmstudio-community/gpt-oss-20b-GGUF).
571
+ quantization (str, optional): Quantization level of the model to download (e.g., Q4_K_M). Only supported for Hugging Face links.
572
+
573
+ Returns:
574
+ DownloadStatusResponse: Returns a download job status object. The response varies based on the download status ('downloading', 'paused', 'completed', 'failed', 'already_downloaded').
575
+ """
576
+ model = model or os.environ.get("LMSTD_MODEL")
577
+ if not model:
578
+ raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
579
+
580
+ payload = {"model": model}
581
+ if quantization is not None:
582
+ payload["quantization"] = quantization
583
+ return self._request("POST", "/api/v1/models/download", json_data=payload)
584
+
585
+ def get_download_status(self, job_id: str) -> DownloadStatusResponse:
586
+ """
587
+ GET /api/v1/models/download/status/:job_id
588
+ Get the status of model downloads.
589
+
590
+ Args:
591
+ job_id (str): The unique identifier of the download job.
592
+
593
+ Returns:
594
+ DownloadStatusResponse: Download job status object including 'status', 'bytes_per_second', 'total_size_bytes',
595
+ 'downloaded_bytes', 'estimated_completion', etc[cite: 1391, 1394, 1396, 1400, 1402].
596
+ """
597
+ return self._request("GET", f"/api/v1/models/download/status/{job_id}")
598
+
599
+
600
+ # --- Basic Usage Verification Example ---
601
+ if __name__ == "__main__":
602
+ client = LMStd(api_token=os.environ.get("LMSTD_APIKEY"))
603
+
604
+ print("1. Listing system models...")
605
+ try:
606
+ models = client.list_models()
607
+ print(json.dumps(models, indent=2))
608
+
609
+ except Exception as error:
610
+ print(f"Server communication failed: {error}")
611
+
612
+ input()
@@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "lmstd"
7
- version = "0.1.0"
7
+ version = "0.2.0"
8
8
  description = "LM Studio v1 REST API Client Library"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.7"
11
- license = {text = "MIT"}
11
+ license = "MIT"
12
12
  authors = [
13
13
  { name = "LM Studio User" }
14
14
  ]
lmstd-0.1.0/lmstd.py DELETED
@@ -1,316 +0,0 @@
1
- """
2
- LM Studio v1 REST API Client Library
3
-
4
- This single-file library provides a clean, fully documented Python interface
5
- to interact with an LM Studio local server based on the v1 REST API endpoints.
6
-
7
- Features supported natively via the v1 API:
8
- - Stateful chats
9
- - Model Context Protocol (MCP) integrations via API
10
- - Authentication configuration with API tokens
11
- - Advanced model lifecycle management (download, load, unload)
12
-
13
- Dependencies:
14
- requests
15
- """
16
-
17
- import json
18
- import os
19
- from typing import Any, Dict, Iterator, List, Optional, Union
20
-
21
-
22
- class LMStdError(Exception):
23
- """Exception raised for errors returned by the LM Studio API."""
24
- def __init__(self, status_code: int, response_text: str):
25
- self.status_code = status_code
26
- self.response_text = response_text
27
- super().__init__(f"API Error {status_code}: {response_text}")
28
-
29
-
30
- class LMStd:
31
- """
32
- A client library for interacting with LM Studio's native v1 REST API.
33
- """
34
-
35
- def __init__(self, base_url: str = "http://localhost:1234", api_token: Optional[str] = None):
36
- """
37
- Initializes the LM Studio API Client.
38
-
39
- Args:
40
- base_url (str): The base URL where your LM Studio local server is running.
41
- By default, the server is available at http://localhost:1234.
42
- api_token (str, optional): The LM_API_TOKEN authorization bearer token if required.
43
- Passed as an Authorization header[cite: 57, 58].
44
- """
45
- import requests
46
- self.base_url = base_url.rstrip('/')
47
- self.session = requests.Session()
48
-
49
- self.session.headers.update({
50
- "Content-Type": "application/json"
51
- })
52
- if api_token:
53
- self.session.headers.update({
54
- "Authorization": f"Bearer {api_token}"
55
- })
56
-
57
- def _request(self, method: str, endpoint: str, json_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
58
- """Internal helper to process HTTP requests cleanly."""
59
- url = f"{self.base_url}{endpoint}"
60
- try:
61
- response = self.session.request(method=method, url=url, json=json_data)
62
- if response.status_code not in (200, 201):
63
- raise LMStdError(response.status_code, response.text)
64
- return response.json()
65
- except Exception as e:
66
- if isinstance(e, LMStdError):
67
- raise e
68
- raise RuntimeError(f"Failed to connect or process request to {url}: {e}")
69
-
70
- def chat(
71
- self,
72
- model: Optional[str] = None,
73
- input_data: Optional[Union[str, List[Dict[str, Any]]]] = None,
74
- system_prompt: Optional[str] = None,
75
- integrations: Optional[List[Union[str, Dict[str, Any]]]] = None,
76
- headers: Optional[Dict[str, str]] = None,
77
- temperature: Optional[float] = None,
78
- top_p: Optional[float] = None,
79
- top_k: Optional[int] = None,
80
- min_p: Optional[float] = None,
81
- repeat_penalty: Optional[float] = None,
82
- max_output_tokens: Optional[int] = None,
83
- reasoning: Optional[str] = None,
84
- context_length: Optional[int] = None,
85
- store: bool = True,
86
- previous_response_id: Optional[str] = None
87
- ) -> Dict[str, Any]:
88
- """
89
- POST /api/v1/chat
90
- Send a message to a model and receive a full response.
91
- The /api/v1/chat endpoint is stateful by default, storing and managing context automatically.
92
-
93
- Args:
94
- model (str): Unique identifier for the model to use.
95
- input_data (str or list): Text message string or an array of input items (messages/images).
96
- Images can be passed using 'type': 'image' and 'data_url'[cite: 642, 656, 660].
97
- system_prompt (str, optional): System message that sets model behavior or instructions.
98
- integrations (list, optional): List of integrations (plugins, ephemeral MCP servers) to enable for this request.
99
- headers (dict, optional): Custom HTTP headers to send with requests to the server.
100
- temperature (float, optional): Randomness in token selection (0 is deterministic, [0,1]).
101
- top_p (float, optional): Minimum cumulative probability for the possible next tokens [0,1].
102
- top_k (int, optional): Limits next token selection to top-k most probable tokens.
103
- min_p (float, optional): Minimum base probability for a token to be selected for output [0,1].
104
- repeat_penalty (float, optional): Penalty for repeating token sequences. 1 is no penalty.
105
- max_output_tokens (int, optional): Maximum number of tokens to generate.
106
- reasoning (str, optional): Reasoning setting ('off', 'low', 'medium', 'high', 'on').
107
- context_length (int, optional): Number of tokens to consider as context. Higher values recommended for MCP usage.
108
- store (bool, optional): Whether to store the chat. If set to true, response will return a 'response_id' field.
109
- previous_response_id (str, optional): Identifier of existing response to append to. Must start with "resp_".
110
-
111
- Returns:
112
- Dict[str, Any]: Response fields containing 'model_instance_id', an 'output' array (messages, tool_calls, reasoning),
113
- 'stats' (token usage/metrics), and an optional 'response_id'[cite: 753, 756, 804, 837].
114
- """
115
- model = model or os.environ.get("LMSTD_MODEL")
116
- if not model:
117
- raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
118
- if input_data is None:
119
- raise ValueError("input_data must be provided.")
120
-
121
- payload = {
122
- "model": model,
123
- "input": input_data,
124
- "stream": False,
125
- "store": store
126
- }
127
-
128
- if system_prompt is not None: payload["system_prompt"] = system_prompt
129
- if integrations is not None: payload["integrations"] = integrations
130
- if headers is not None: payload["headers"] = headers
131
- if temperature is not None: payload["temperature"] = temperature
132
- if top_p is not None: payload["top_p"] = top_p
133
- if top_k is not None: payload["top_k"] = top_k
134
- if min_p is not None: payload["min_p"] = min_p
135
- if repeat_penalty is not None: payload["repeat_penalty"] = repeat_penalty
136
- if max_output_tokens is not None: payload["max_output_tokens"] = max_output_tokens
137
- if reasoning is not None: payload["reasoning"] = reasoning
138
- if context_length is not None: payload["context_length"] = context_length
139
- if previous_response_id is not None: payload["previous_response_id"] = previous_response_id
140
-
141
- return self._request("POST", "/api/v1/chat", json_data=payload)
142
-
143
- def chat_stream(
144
- self,
145
- model: Optional[str] = None,
146
- input_data: Optional[Union[str, List[Dict[str, Any]]]] = None,
147
- **kwargs
148
- ) -> Iterator[Dict[str, Any]]:
149
- """
150
- POST /api/v1/chat (Streaming)
151
- Send a message to a model with `stream` set to true. The response is sent as a stream of events using Server-Sent Events (SSE).
152
-
153
- Args:
154
- model (str): Unique identifier for the model to use.
155
- input_data (str or list): Text message string or an array of input items.
156
- **kwargs: Additional parameters matching the `chat` function (e.g., system_prompt, integrations, store, temperature, etc.).
157
-
158
- Yields:
159
- Dict[str, Any]: Parsed JSON objects corresponding to streaming events. Events arrive in order and include:
160
- 'chat.start', 'model_load.*', 'prompt_processing.*', 'reasoning.*', 'tool_call.*', 'message.*',
161
- 'error', and finally 'chat.end'[cite: 211, 216, 217, 220, 227, 238].
162
- """
163
- model = model or os.environ.get("LMSTD_MODEL")
164
- if not model:
165
- raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
166
- if input_data is None:
167
- raise ValueError("input_data must be provided.")
168
-
169
- url = f"{self.base_url}/api/v1/chat"
170
- payload = {
171
- "model": model,
172
- "input": input_data,
173
- "stream": True,
174
- "store": kwargs.get("store", True)
175
- }
176
-
177
- for key in ["system_prompt", "integrations", "headers", "temperature", "top_p", "top_k",
178
- "min_p", "repeat_penalty", "max_output_tokens", "reasoning",
179
- "context_length", "previous_response_id"]:
180
- if key in kwargs and kwargs[key] is not None:
181
- payload[key] = kwargs[key]
182
-
183
- try:
184
- response = self.session.post(url, json=payload, stream=True)
185
- if response.status_code not in (200, 201):
186
- raise LMStdError(response.status_code, response.text)
187
-
188
- for line in response.iter_lines():
189
- if line:
190
- decoded_line = line.decode('utf-8')
191
- if decoded_line.startswith('data: '):
192
- data_str = decoded_line[6:].strip()
193
- if data_str:
194
- yield json.loads(data_str)
195
-
196
- except Exception as e:
197
- if isinstance(e, LMStdError):
198
- raise e
199
- raise RuntimeError(f"Failed to connect or stream request to {url}: {e}")
200
-
201
- def list_models(self) -> Dict[str, Any]:
202
- """
203
- GET /api/v1/models
204
- Get a list of available models on your system, including both LLMs and embedding models.
205
-
206
- Returns:
207
- Dict[str, Any]: JSON object containing a list of available models, their configs (context_length,
208
- architecture, format), and currently loaded instances[cite: 1102, 1118, 1130, 1134].
209
- """
210
- return self._request("GET", "/api/v1/models")
211
-
212
- def load_model(
213
- self,
214
- model: Optional[str] = None,
215
- context_length: Optional[int] = None,
216
- eval_batch_size: Optional[int] = None,
217
- flash_attention: Optional[bool] = None,
218
- num_experts: Optional[int] = None,
219
- offload_kv_cache_to_gpu: Optional[bool] = None,
220
- echo_load_config: bool = False
221
- ) -> Dict[str, Any]:
222
- """
223
- POST /api/v1/models/load
224
- Load an LLM or Embedding model into memory with custom configuration for inference.
225
-
226
- Args:
227
- model (str): Unique identifier for the model to load.
228
- context_length (int, optional): Maximum number of tokens that the model will consider.
229
- eval_batch_size (int, optional): Number of input tokens to process together in a single batch during evaluation.
230
- flash_attention (bool, optional): Whether to optimize attention computation. Can decrease memory usage and improve speed.
231
- num_experts (int, optional): Number of experts to use during inference for MoE (Mixture of Experts) models.
232
- offload_kv_cache_to_gpu (bool, optional): Whether KV cache is offloaded to GPU memory.
233
- echo_load_config (bool, optional): If true, echoes the final load configuration in the response.
234
-
235
- Returns:
236
- Dict[str, Any]: Response featuring 'type', 'instance_id', 'load_time_seconds', 'status', and optionally 'load_config'[cite: 1251, 1261].
237
- """
238
- model = model or os.environ.get("LMSTD_MODEL")
239
- if not model:
240
- raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
241
-
242
- payload = {
243
- "model": model,
244
- "echo_load_config": echo_load_config
245
- }
246
- if context_length is not None: payload["context_length"] = context_length
247
- if eval_batch_size is not None: payload["eval_batch_size"] = eval_batch_size
248
- if flash_attention is not None: payload["flash_attention"] = flash_attention
249
- if num_experts is not None: payload["num_experts"] = num_experts
250
- if offload_kv_cache_to_gpu is not None: payload["offload_kv_cache_to_gpu"] = offload_kv_cache_to_gpu
251
-
252
- return self._request("POST", "/api/v1/models/load", json_data=payload)
253
-
254
- def unload_model(self, instance_id: str) -> Dict[str, Any]:
255
- """
256
- POST /api/v1/models/unload
257
- Unload a loaded model from memory.
258
-
259
- Args:
260
- instance_id (str): Unique identifier of the model instance to unload.
261
-
262
- Returns:
263
- Dict[str, Any]: Confirmation of the unloaded model 'instance_id'.
264
- """
265
- payload = {"instance_id": instance_id}
266
- return self._request("POST", "/api/v1/models/unload", json_data=payload)
267
-
268
- def download_model(self, model: Optional[str] = None, quantization: Optional[str] = None) -> Dict[str, Any]:
269
- """
270
- POST /api/v1/models/download
271
- Download LLMs and embedding models.
272
-
273
- Args:
274
- model (str): The model to download. Accepts model catalog identifiers and exact Hugging Face links.
275
- quantization (str, optional): Quantization level of the model to download (e.g. 'Q4_K_M'). Only supported for Hugging Face links.
276
-
277
- Returns:
278
- Dict[str, Any]: Returns a download job status object (e.g., 'job_id', 'status', 'total_size_bytes', 'started_at')[cite: 1321, 1333, 1335].
279
- """
280
- model = model or os.environ.get("LMSTD_MODEL")
281
- if not model:
282
- raise ValueError("Model must be provided or set via the LMSTD_MODEL environment variable.")
283
-
284
- payload = {"model": model}
285
- if quantization is not None:
286
- payload["quantization"] = quantization
287
- return self._request("POST", "/api/v1/models/download", json_data=payload)
288
-
289
- def get_download_status(self, job_id: str) -> Dict[str, Any]:
290
- """
291
- GET /api/v1/models/download/status/:job_id
292
- Get the status of model downloads.
293
-
294
- Args:
295
- job_id (str): The unique identifier of the download job.
296
-
297
- Returns:
298
- Dict[str, Any]: Download job status object including 'status', 'bytes_per_second', 'total_size_bytes',
299
- 'downloaded_bytes', 'estimated_completion', etc[cite: 1391, 1394, 1396, 1400, 1402].
300
- """
301
- return self._request("GET", f"/api/v1/models/download/status/{job_id}")
302
-
303
-
304
- # --- Basic Usage Verification Example ---
305
- if __name__ == "__main__":
306
- client = LMStd(api_token=os.environ.get("LMSTD_APIKEY"))
307
-
308
- print("1. Listing system models...")
309
- try:
310
- models = client.list_models()
311
- print(json.dumps(models, indent=2))
312
-
313
- except Exception as error:
314
- print(f"Server communication failed: {error}")
315
-
316
- input()
File without changes
File without changes
File without changes