llm-sdk-py 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_sdk_py-0.1.0/LICENSE +21 -0
- llm_sdk_py-0.1.0/PKG-INFO +218 -0
- llm_sdk_py-0.1.0/README.md +197 -0
- llm_sdk_py-0.1.0/llm_sdk.py +1866 -0
- llm_sdk_py-0.1.0/llm_sdk_py.egg-info/PKG-INFO +218 -0
- llm_sdk_py-0.1.0/llm_sdk_py.egg-info/SOURCES.txt +10 -0
- llm_sdk_py-0.1.0/llm_sdk_py.egg-info/dependency_links.txt +1 -0
- llm_sdk_py-0.1.0/llm_sdk_py.egg-info/requires.txt +4 -0
- llm_sdk_py-0.1.0/llm_sdk_py.egg-info/top_level.txt +1 -0
- llm_sdk_py-0.1.0/pyproject.toml +34 -0
- llm_sdk_py-0.1.0/setup.cfg +4 -0
- llm_sdk_py-0.1.0/tests/test_llm_sdk.py +192 -0
llm_sdk_py-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Florian GΓ€rtig
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llm-sdk-py
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Small, clean Python SDK for OpenAI-compatible LLM APIs.
|
|
5
|
+
Author: Florian Gaertig
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: openai>=1.0.0
|
|
18
|
+
Provides-Extra: pillow
|
|
19
|
+
Requires-Dist: pillow>=9.0.0; extra == "pillow"
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# π¦ llm-sdk
|
|
23
|
+
|
|
24
|
+
Small Python SDK for OpenAI-compatible LLM APIs.
|
|
25
|
+
|
|
26
|
+
One file, clean API, boring on purpose. Use it with local servers, OpenAI-style endpoints, structured output, tool calls, vision inputs, and reasoning streams.
|
|
27
|
+
|
|
28
|
+
## β¨ Features
|
|
29
|
+
|
|
30
|
+
- Sync and async clients
|
|
31
|
+
- Streaming and non-streaming responses
|
|
32
|
+
- OpenAI Chat Completions support
|
|
33
|
+
- Optional OpenAI Responses API mode
|
|
34
|
+
- Structured output from JSON schema or typed Python classes
|
|
35
|
+
- Tool schema generation from Python callables
|
|
36
|
+
- Vision input normalization from URL, path, base64, or PIL image
|
|
37
|
+
- Thinking/reasoning token parsing
|
|
38
|
+
- Lightweight verbose stats for streams
|
|
39
|
+
|
|
40
|
+
## π Get Started
|
|
41
|
+
|
|
42
|
+
Install the only required dependency:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install openai
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Optional, only for PIL image inputs:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install pillow
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Drop `llm_sdk.py` into your project or import it from this repo:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from llm_sdk import LLM
|
|
58
|
+
|
|
59
|
+
llm = LLM(
|
|
60
|
+
model="qwen3.6-27b",
|
|
61
|
+
base_url="http://localhost:1234",
|
|
62
|
+
api_key="lm-studio",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
response = llm.response(input="Write a tiny haiku about fast code.")
|
|
66
|
+
|
|
67
|
+
print(response["answer"])
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
By default, `base_url="http://localhost:1234/v1"` and `api_key="lm-studio"`, so local LM Studio-style servers work with very little setup.
|
|
71
|
+
|
|
72
|
+
All inference methods accept either `input="..."` for the common single-user-message case or a Chat Completions-style message list:
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
response = llm.response(messages=[
|
|
76
|
+
{"role": "system", "content": "Be concise."},
|
|
77
|
+
{"role": "user", "content": "Write a tiny haiku about fast code."},
|
|
78
|
+
])
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## π‘ Streaming
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
for event in llm.stream_response(input="Explain adapters in one paragraph."):
|
|
85
|
+
if event["type"] == "answer":
|
|
86
|
+
print(event["content"], end="", flush=True)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Events are small dictionaries:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
{"type": "answer", "content": "..."}
|
|
93
|
+
{"type": "reasoning", "content": "..."}
|
|
94
|
+
{"type": "tool_call", "content": {"id": "...", "name": "...", "arguments": {...}}}
|
|
95
|
+
{"type": "verbose", "content": {"tokens": 42, "tokens_per_second": 91.3, "latency": 0.2, "prompt_tokens": 10, "completion_tokens": 32, "total_tokens": 42}}
|
|
96
|
+
{"type": "final", "content": {"answer": "..."}}
|
|
97
|
+
{"type": "done"}
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Use `final=True` if you also want a final aggregated response event.
|
|
101
|
+
|
|
102
|
+
## β±οΈ Async
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
import asyncio
|
|
106
|
+
from llm_sdk import LLM
|
|
107
|
+
|
|
108
|
+
async def main():
|
|
109
|
+
async with LLM(model="gpt-5.5", api_key="sk-...", base_url="https://api.openai.com/v1", use_responses_api=True) as llm:
|
|
110
|
+
response = await llm.async_response(input="Give me a crisp project name.")
|
|
111
|
+
print(response["answer"])
|
|
112
|
+
|
|
113
|
+
asyncio.run(main())
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## π Structured Output
|
|
117
|
+
|
|
118
|
+
Pass a JSON schema or a typed class. Classes are converted into OpenAI-compatible JSON schema.
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
class Verdict:
|
|
122
|
+
sentiment: str
|
|
123
|
+
score: float
|
|
124
|
+
tags: list[str]
|
|
125
|
+
|
|
126
|
+
result = llm.response(
|
|
127
|
+
input="Review: fast, small, surprisingly nice.",
|
|
128
|
+
output_format=Verdict,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
print(result["answer"])
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## π οΈ Tools
|
|
135
|
+
|
|
136
|
+
Pass Python callables or already-built OpenAI tool definitions. The SDK exposes tool definitions and returns streamed/final tool calls.
|
|
137
|
+
|
|
138
|
+
It does not execute tools for you. You stay in control.
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
def search_docs(query: str, limit: int = 5) -> str:
|
|
142
|
+
"""Search internal docs."""
|
|
143
|
+
return "..."
|
|
144
|
+
|
|
145
|
+
response = llm.response(
|
|
146
|
+
input="Find the auth setup notes.",
|
|
147
|
+
tools=[search_docs],
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
print(response.get("tool_calls", []))
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## ποΈ Vision
|
|
154
|
+
|
|
155
|
+
Image content can be a URL, a local path, base64, or a PIL image.
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
response = llm.response([
|
|
159
|
+
{
|
|
160
|
+
"role": "user",
|
|
161
|
+
"content": [
|
|
162
|
+
{"type": "text", "text": "What is in this image?"},
|
|
163
|
+
{"type": "image_path", "image_path": "photo.png"},
|
|
164
|
+
],
|
|
165
|
+
}
|
|
166
|
+
])
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Supported image forms include:
|
|
170
|
+
|
|
171
|
+
- `{"type": "image_url", "image_url": "https://..."}`
|
|
172
|
+
- `{"type": "image_path", "image_path": "local-file.png"}`
|
|
173
|
+
- `{"type": "image_base64", "image_base64": "..."}`
|
|
174
|
+
- `{"type": "image_pil", "image_pil": image}`
|
|
175
|
+
|
|
176
|
+
## π Responses API
|
|
177
|
+
|
|
178
|
+
Use `use_responses_api=True` for endpoints that prefer OpenAI's Responses API shape.
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
llm = LLM(
|
|
182
|
+
model="gpt-5.5",
|
|
183
|
+
api_key="sk-...",
|
|
184
|
+
base_url="https://api.openai.com/v1",
|
|
185
|
+
use_responses_api=True,
|
|
186
|
+
)
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## π§ Reasoning Effort
|
|
190
|
+
|
|
191
|
+
Use `reasoning_effort="high"` to set models's reasoning effort.
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
response = llm.response(
|
|
195
|
+
input="...",
|
|
196
|
+
reasoning_effort="high"
|
|
197
|
+
)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## βοΈ API
|
|
201
|
+
|
|
202
|
+
- `response(...)` and `stream_response(...)`
|
|
203
|
+
- `async_response(...)` and `async_stream_response(...)`
|
|
204
|
+
- `input="..."` or `messages=[...]` for all inference methods
|
|
205
|
+
- `list_models(fallback=[...])`
|
|
206
|
+
- `reasoning_effort="low|medium|high"` where supported
|
|
207
|
+
- `hide_thinking=False` to stream/return reasoning content
|
|
208
|
+
- `CustomThinkingToken(...)` for custom `<think>`-style parsing
|
|
209
|
+
- `verbose=True` for token-ish stream stats
|
|
210
|
+
- `with LLM(...) as llm:` / `async with LLM(...) as llm:` for cleanup
|
|
211
|
+
|
|
212
|
+
## π‘ Why
|
|
213
|
+
|
|
214
|
+
Most LLM wrappers either become frameworks or stay too close to raw HTTP. This sits in the middle: enough structure to be pleasant, little enough surface area to understand in one sitting.
|
|
215
|
+
|
|
216
|
+
## π License
|
|
217
|
+
|
|
218
|
+
MIT
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# π¦ llm-sdk
|
|
2
|
+
|
|
3
|
+
Small Python SDK for OpenAI-compatible LLM APIs.
|
|
4
|
+
|
|
5
|
+
One file, clean API, boring on purpose. Use it with local servers, OpenAI-style endpoints, structured output, tool calls, vision inputs, and reasoning streams.
|
|
6
|
+
|
|
7
|
+
## β¨ Features
|
|
8
|
+
|
|
9
|
+
- Sync and async clients
|
|
10
|
+
- Streaming and non-streaming responses
|
|
11
|
+
- OpenAI Chat Completions support
|
|
12
|
+
- Optional OpenAI Responses API mode
|
|
13
|
+
- Structured output from JSON schema or typed Python classes
|
|
14
|
+
- Tool schema generation from Python callables
|
|
15
|
+
- Vision input normalization from URL, path, base64, or PIL image
|
|
16
|
+
- Thinking/reasoning token parsing
|
|
17
|
+
- Lightweight verbose stats for streams
|
|
18
|
+
|
|
19
|
+
## π Get Started
|
|
20
|
+
|
|
21
|
+
Install the only required dependency:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install openai
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Optional, only for PIL image inputs:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install pillow
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Drop `llm_sdk.py` into your project or import it from this repo:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from llm_sdk import LLM
|
|
37
|
+
|
|
38
|
+
llm = LLM(
|
|
39
|
+
model="qwen3.6-27b",
|
|
40
|
+
base_url="http://localhost:1234",
|
|
41
|
+
api_key="lm-studio",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
response = llm.response(input="Write a tiny haiku about fast code.")
|
|
45
|
+
|
|
46
|
+
print(response["answer"])
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
By default, `base_url="http://localhost:1234/v1"` and `api_key="lm-studio"`, so local LM Studio-style servers work with very little setup.
|
|
50
|
+
|
|
51
|
+
All inference methods accept either `input="..."` for the common single-user-message case or a Chat Completions-style message list:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
response = llm.response(messages=[
|
|
55
|
+
{"role": "system", "content": "Be concise."},
|
|
56
|
+
{"role": "user", "content": "Write a tiny haiku about fast code."},
|
|
57
|
+
])
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## π‘ Streaming
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
for event in llm.stream_response(input="Explain adapters in one paragraph."):
|
|
64
|
+
if event["type"] == "answer":
|
|
65
|
+
print(event["content"], end="", flush=True)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Events are small dictionaries:
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
{"type": "answer", "content": "..."}
|
|
72
|
+
{"type": "reasoning", "content": "..."}
|
|
73
|
+
{"type": "tool_call", "content": {"id": "...", "name": "...", "arguments": {...}}}
|
|
74
|
+
{"type": "verbose", "content": {"tokens": 42, "tokens_per_second": 91.3, "latency": 0.2, "prompt_tokens": 10, "completion_tokens": 32, "total_tokens": 42}}
|
|
75
|
+
{"type": "final", "content": {"answer": "..."}}
|
|
76
|
+
{"type": "done"}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Use `final=True` if you also want a final aggregated response event.
|
|
80
|
+
|
|
81
|
+
## β±οΈ Async
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
import asyncio
|
|
85
|
+
from llm_sdk import LLM
|
|
86
|
+
|
|
87
|
+
async def main():
|
|
88
|
+
async with LLM(model="gpt-5.5", api_key="sk-...", base_url="https://api.openai.com/v1", use_responses_api=True) as llm:
|
|
89
|
+
response = await llm.async_response(input="Give me a crisp project name.")
|
|
90
|
+
print(response["answer"])
|
|
91
|
+
|
|
92
|
+
asyncio.run(main())
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## π Structured Output
|
|
96
|
+
|
|
97
|
+
Pass a JSON schema or a typed class. Classes are converted into OpenAI-compatible JSON schema.
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
class Verdict:
|
|
101
|
+
sentiment: str
|
|
102
|
+
score: float
|
|
103
|
+
tags: list[str]
|
|
104
|
+
|
|
105
|
+
result = llm.response(
|
|
106
|
+
input="Review: fast, small, surprisingly nice.",
|
|
107
|
+
output_format=Verdict,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
print(result["answer"])
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## π οΈ Tools
|
|
114
|
+
|
|
115
|
+
Pass Python callables or already-built OpenAI tool definitions. The SDK exposes tool definitions and returns streamed/final tool calls.
|
|
116
|
+
|
|
117
|
+
It does not execute tools for you. You stay in control.
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
def search_docs(query: str, limit: int = 5) -> str:
|
|
121
|
+
"""Search internal docs."""
|
|
122
|
+
return "..."
|
|
123
|
+
|
|
124
|
+
response = llm.response(
|
|
125
|
+
input="Find the auth setup notes.",
|
|
126
|
+
tools=[search_docs],
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
print(response.get("tool_calls", []))
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## ποΈ Vision
|
|
133
|
+
|
|
134
|
+
Image content can be a URL, a local path, base64, or a PIL image.
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
response = llm.response([
|
|
138
|
+
{
|
|
139
|
+
"role": "user",
|
|
140
|
+
"content": [
|
|
141
|
+
{"type": "text", "text": "What is in this image?"},
|
|
142
|
+
{"type": "image_path", "image_path": "photo.png"},
|
|
143
|
+
],
|
|
144
|
+
}
|
|
145
|
+
])
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Supported image forms include:
|
|
149
|
+
|
|
150
|
+
- `{"type": "image_url", "image_url": "https://..."}`
|
|
151
|
+
- `{"type": "image_path", "image_path": "local-file.png"}`
|
|
152
|
+
- `{"type": "image_base64", "image_base64": "..."}`
|
|
153
|
+
- `{"type": "image_pil", "image_pil": image}`
|
|
154
|
+
|
|
155
|
+
## π Responses API
|
|
156
|
+
|
|
157
|
+
Use `use_responses_api=True` for endpoints that prefer OpenAI's Responses API shape.
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
llm = LLM(
|
|
161
|
+
model="gpt-5.5",
|
|
162
|
+
api_key="sk-...",
|
|
163
|
+
base_url="https://api.openai.com/v1",
|
|
164
|
+
use_responses_api=True,
|
|
165
|
+
)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## π§ Reasoning Effort
|
|
169
|
+
|
|
170
|
+
Use `reasoning_effort="high"` to set models's reasoning effort.
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
response = llm.response(
|
|
174
|
+
input="...",
|
|
175
|
+
reasoning_effort="high"
|
|
176
|
+
)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## βοΈ API
|
|
180
|
+
|
|
181
|
+
- `response(...)` and `stream_response(...)`
|
|
182
|
+
- `async_response(...)` and `async_stream_response(...)`
|
|
183
|
+
- `input="..."` or `messages=[...]` for all inference methods
|
|
184
|
+
- `list_models(fallback=[...])`
|
|
185
|
+
- `reasoning_effort="low|medium|high"` where supported
|
|
186
|
+
- `hide_thinking=False` to stream/return reasoning content
|
|
187
|
+
- `CustomThinkingToken(...)` for custom `<think>`-style parsing
|
|
188
|
+
- `verbose=True` for token-ish stream stats
|
|
189
|
+
- `with LLM(...) as llm:` / `async with LLM(...) as llm:` for cleanup
|
|
190
|
+
|
|
191
|
+
## π‘ Why
|
|
192
|
+
|
|
193
|
+
Most LLM wrappers either become frameworks or stay too close to raw HTTP. This sits in the middle: enough structure to be pleasant, little enough surface area to understand in one sitting.
|
|
194
|
+
|
|
195
|
+
## π License
|
|
196
|
+
|
|
197
|
+
MIT
|