lemonade-python-sdk 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lemonade_python_sdk-1.0.1/LICENSE +21 -0
- lemonade_python_sdk-1.0.1/PKG-INFO +180 -0
- lemonade_python_sdk-1.0.1/README.md +139 -0
- lemonade_python_sdk-1.0.1/lemonade_integration/__init__.py +6 -0
- lemonade_python_sdk-1.0.1/lemonade_integration/client.py +177 -0
- lemonade_python_sdk-1.0.1/lemonade_integration/model_discovery.py +131 -0
- lemonade_python_sdk-1.0.1/lemonade_integration/port_scanner.py +114 -0
- lemonade_python_sdk-1.0.1/lemonade_integration/request_builder.py +137 -0
- lemonade_python_sdk-1.0.1/lemonade_integration/utils.py +118 -0
- lemonade_python_sdk-1.0.1/lemonade_python_sdk.egg-info/PKG-INFO +180 -0
- lemonade_python_sdk-1.0.1/lemonade_python_sdk.egg-info/SOURCES.txt +14 -0
- lemonade_python_sdk-1.0.1/lemonade_python_sdk.egg-info/dependency_links.txt +1 -0
- lemonade_python_sdk-1.0.1/lemonade_python_sdk.egg-info/requires.txt +6 -0
- lemonade_python_sdk-1.0.1/lemonade_python_sdk.egg-info/top_level.txt +1 -0
- lemonade_python_sdk-1.0.1/setup.cfg +4 -0
- lemonade_python_sdk-1.0.1/setup.py +51 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Your Name or Organization
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lemonade-python-sdk
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: A clean interface for interacting with the Lemonade LLM server
|
|
5
|
+
Home-page: https://github.com/Tetramatrix/lemonade-python-sdk
|
|
6
|
+
Author: Your Name
|
|
7
|
+
Author-email: your.email@example.com
|
|
8
|
+
Project-URL: Bug Reports, https://github.com/Tetramatrix/lemonade-python-sdk/issues
|
|
9
|
+
Project-URL: Source, https://github.com/Tetramatrix/lemonade-python-sdk
|
|
10
|
+
Keywords: llm,ai,lemonade,sdk,api
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Requires-Python: >=3.8
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: requests>=2.25.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
27
|
+
Requires-Dist: flake8; extra == "dev"
|
|
28
|
+
Dynamic: author
|
|
29
|
+
Dynamic: author-email
|
|
30
|
+
Dynamic: classifier
|
|
31
|
+
Dynamic: description
|
|
32
|
+
Dynamic: description-content-type
|
|
33
|
+
Dynamic: home-page
|
|
34
|
+
Dynamic: keywords
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
Dynamic: project-url
|
|
37
|
+
Dynamic: provides-extra
|
|
38
|
+
Dynamic: requires-dist
|
|
39
|
+
Dynamic: requires-python
|
|
40
|
+
Dynamic: summary
|
|
41
|
+
|
|
42
|
+
# 🍋 Lemonade Python SDK
|
|
43
|
+
|
|
44
|
+
[](https://opensource.org/licenses/MIT)
|
|
45
|
+
[](https://www.python.org/downloads/)
|
|
46
|
+
|
|
47
|
+
A robust, production-grade Python wrapper for the **Lemonade C++ Backend**.
|
|
48
|
+
|
|
49
|
+
This SDK provides a clean, pythonic interface for interacting with local LLMs running on Lemonade. It was built to power **Sorana** (a visual workspace for AI), extracting the core integration logic into a standalone, open-source library for the developer community.
|
|
50
|
+
|
|
51
|
+
## 🚀 Key Features
|
|
52
|
+
|
|
53
|
+
* **Auto-Discovery:** Automatically scans multiple ports and hosts to find active Lemonade instances.
|
|
54
|
+
* **Low-Overhead Architecture:** Designed as a thin, efficient wrapper to leverage Lemonade's C++ performance with minimal Python latency.
|
|
55
|
+
* **Health Checks & Recovery:** Built-in utilities to verify server status and handle connection drops.
|
|
56
|
+
* **Type-Safe Client:** Full Python type hinting for better developer experience (IDE autocompletion).
|
|
57
|
+
* **Model Management:** Simple API to load, unload, and list models dynamically.
|
|
58
|
+
* **Embeddings API:** Generate text embeddings for semantic search, RAG, and clustering (FLM & llamacpp backends).
|
|
59
|
+
|
|
60
|
+
## 📦 Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install .
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Alternatively, you can install it directly from GitHub:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install git+[https://github.com/Tetramatrix/lemonade-python-sdk.git](https://github.com/Tetramatrix/lemonade-python-sdk.git)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## ⚡ Quick Start
|
|
73
|
+
|
|
74
|
+
### 1. Connecting to Lemonade
|
|
75
|
+
|
|
76
|
+
The SDK automatically handles port discovery, so you don't need to hardcode localhost:8000.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from lemonade_integration.client import LemonadeClient
|
|
80
|
+
from lemonade_integration.port_scanner import find_available_lemonade_port
|
|
81
|
+
|
|
82
|
+
# Auto-discover running instance
|
|
83
|
+
port = find_available_lemonade_port()
|
|
84
|
+
if port:
|
|
85
|
+
client = LemonadeClient(base_url=f"http://localhost:{port}")
|
|
86
|
+
if client.health_check():
|
|
87
|
+
print(f"Connected to Lemonade on port {port}")
|
|
88
|
+
else:
|
|
89
|
+
print("No Lemonade instance found.")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 2. Chat Completion
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
response = client.chat_completion(
|
|
96
|
+
model="Llama-3-8B-Instruct",
|
|
97
|
+
messages=[
|
|
98
|
+
{"role": "system", "content": "You are a helpful coding assistant."},
|
|
99
|
+
{"role": "user", "content": "Write a Hello World in C++"}
|
|
100
|
+
],
|
|
101
|
+
temperature=0.7
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
print(response['choices'][0]['message']['content'])
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### 3. Model Management
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
# List all available models
|
|
111
|
+
models = client.list_models()
|
|
112
|
+
for m in models:
|
|
113
|
+
print(f"Found model: {m['id']}")
|
|
114
|
+
|
|
115
|
+
# Load a specific model into memory
|
|
116
|
+
client.load_model("Mistral-7B-v0.1")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### 4. Embeddings (NEW)
|
|
120
|
+
|
|
121
|
+
Generate text embeddings for semantic search, RAG pipelines, and clustering.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
# List available embedding models (filtered by 'embeddings' label)
|
|
125
|
+
embedding_models = client.list_embedding_models()
|
|
126
|
+
for model in embedding_models:
|
|
127
|
+
print(f"Embedding model: {model['id']}")
|
|
128
|
+
|
|
129
|
+
# Generate embeddings for single text
|
|
130
|
+
response = client.embeddings(
|
|
131
|
+
input="Hello, world!",
|
|
132
|
+
model="nomic-embed-text-v1-GGUF"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
embedding_vector = response["data"][0]["embedding"]
|
|
136
|
+
print(f"Vector length: {len(embedding_vector)}")
|
|
137
|
+
|
|
138
|
+
# Generate embeddings for multiple texts
|
|
139
|
+
texts = ["Text 1", "Text 2", "Text 3"]
|
|
140
|
+
response = client.embeddings(
|
|
141
|
+
input=texts,
|
|
142
|
+
model="nomic-embed-text-v1-GGUF"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
for item in response["data"]:
|
|
146
|
+
print(f"Text {item['index']}: {len(item['embedding'])} dimensions")
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Supported Backends:**
|
|
150
|
+
- ✅ **FLM (FastFlowLM)** - NPU-accelerated on Windows
|
|
151
|
+
- ✅ **llamacpp** (.GGUF models) - CPU/GPU
|
|
152
|
+
- ❌ ONNX/OGA - Not supported
|
|
153
|
+
|
|
154
|
+
### 🖼️ Production Showcase: [Sorana](https://tetramatrix.github.io/Sorana/)
|
|
155
|
+
This SDK was extracted from the core engine of [Sorana](https://tetramatrix.github.io/Sorana/), a professional visual workspace for AI. It demonstrates the SDK's capability to handle complex, real-world requirements on AMD Ryzen AI hardware:
|
|
156
|
+
|
|
157
|
+
* **Low Latency:** Powers sub-second response times for multi-model chat interfaces.
|
|
158
|
+
* **Dynamic Workflows:** Manages the loading and unloading of 20+ different LLMs based on user activity to optimize local NPU/GPU memory.
|
|
159
|
+
* **Zero-Config UX:** Uses the built-in port scanner to automatically connect the Sorana frontend to the Lemonade backend without user intervention.
|
|
160
|
+
|
|
161
|
+
## 🛠️ Project Structure
|
|
162
|
+
|
|
163
|
+
* **client.py:** Main entry point for API interactions (chat, embeddings, model management).
|
|
164
|
+
* **port_scanner.py:** Utilities for detecting Lemonade instances across ports (8000-9000).
|
|
165
|
+
* **model_discovery.py:** Logic for fetching and parsing model metadata.
|
|
166
|
+
* **request_builder.py:** Helper functions to construct compliant payloads (chat, embeddings).
|
|
167
|
+
* **utils.py:** Additional utility functions.
|
|
168
|
+
|
|
169
|
+
## 📚 Documentation
|
|
170
|
+
|
|
171
|
+
* **[Embeddings API](docs/embeddings_api.md)** - Complete guide for using embeddings
|
|
172
|
+
* [Lemonade Server Docs](https://lemonade-server.ai/docs/server/server_spec/) - Official Lemonade documentation
|
|
173
|
+
|
|
174
|
+
## 🤝 Contributing
|
|
175
|
+
|
|
176
|
+
Contributions are welcome! This project is intended to help the AMD Ryzen AI and Lemonade community build downstream applications faster.
|
|
177
|
+
|
|
178
|
+
## 📄 License
|
|
179
|
+
|
|
180
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# 🍋 Lemonade Python SDK
|
|
2
|
+
|
|
3
|
+
[](https://opensource.org/licenses/MIT)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
|
|
6
|
+
A robust, production-grade Python wrapper for the **Lemonade C++ Backend**.
|
|
7
|
+
|
|
8
|
+
This SDK provides a clean, pythonic interface for interacting with local LLMs running on Lemonade. It was built to power **Sorana** (a visual workspace for AI), extracting the core integration logic into a standalone, open-source library for the developer community.
|
|
9
|
+
|
|
10
|
+
## 🚀 Key Features
|
|
11
|
+
|
|
12
|
+
* **Auto-Discovery:** Automatically scans multiple ports and hosts to find active Lemonade instances.
|
|
13
|
+
* **Low-Overhead Architecture:** Designed as a thin, efficient wrapper to leverage Lemonade's C++ performance with minimal Python latency.
|
|
14
|
+
* **Health Checks & Recovery:** Built-in utilities to verify server status and handle connection drops.
|
|
15
|
+
* **Type-Safe Client:** Full Python type hinting for better developer experience (IDE autocompletion).
|
|
16
|
+
* **Model Management:** Simple API to load, unload, and list models dynamically.
|
|
17
|
+
* **Embeddings API:** Generate text embeddings for semantic search, RAG, and clustering (FLM & llamacpp backends).
|
|
18
|
+
|
|
19
|
+
## 📦 Installation
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install .
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Alternatively, you can install it directly from GitHub:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install git+[https://github.com/Tetramatrix/lemonade-python-sdk.git](https://github.com/Tetramatrix/lemonade-python-sdk.git)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## ⚡ Quick Start
|
|
32
|
+
|
|
33
|
+
### 1. Connecting to Lemonade
|
|
34
|
+
|
|
35
|
+
The SDK automatically handles port discovery, so you don't need to hardcode localhost:8000.
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from lemonade_integration.client import LemonadeClient
|
|
39
|
+
from lemonade_integration.port_scanner import find_available_lemonade_port
|
|
40
|
+
|
|
41
|
+
# Auto-discover running instance
|
|
42
|
+
port = find_available_lemonade_port()
|
|
43
|
+
if port:
|
|
44
|
+
client = LemonadeClient(base_url=f"http://localhost:{port}")
|
|
45
|
+
if client.health_check():
|
|
46
|
+
print(f"Connected to Lemonade on port {port}")
|
|
47
|
+
else:
|
|
48
|
+
print("No Lemonade instance found.")
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### 2. Chat Completion
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
response = client.chat_completion(
|
|
55
|
+
model="Llama-3-8B-Instruct",
|
|
56
|
+
messages=[
|
|
57
|
+
{"role": "system", "content": "You are a helpful coding assistant."},
|
|
58
|
+
{"role": "user", "content": "Write a Hello World in C++"}
|
|
59
|
+
],
|
|
60
|
+
temperature=0.7
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
print(response['choices'][0]['message']['content'])
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### 3. Model Management
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
# List all available models
|
|
70
|
+
models = client.list_models()
|
|
71
|
+
for m in models:
|
|
72
|
+
print(f"Found model: {m['id']}")
|
|
73
|
+
|
|
74
|
+
# Load a specific model into memory
|
|
75
|
+
client.load_model("Mistral-7B-v0.1")
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 4. Embeddings (NEW)
|
|
79
|
+
|
|
80
|
+
Generate text embeddings for semantic search, RAG pipelines, and clustering.
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
# List available embedding models (filtered by 'embeddings' label)
|
|
84
|
+
embedding_models = client.list_embedding_models()
|
|
85
|
+
for model in embedding_models:
|
|
86
|
+
print(f"Embedding model: {model['id']}")
|
|
87
|
+
|
|
88
|
+
# Generate embeddings for single text
|
|
89
|
+
response = client.embeddings(
|
|
90
|
+
input="Hello, world!",
|
|
91
|
+
model="nomic-embed-text-v1-GGUF"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
embedding_vector = response["data"][0]["embedding"]
|
|
95
|
+
print(f"Vector length: {len(embedding_vector)}")
|
|
96
|
+
|
|
97
|
+
# Generate embeddings for multiple texts
|
|
98
|
+
texts = ["Text 1", "Text 2", "Text 3"]
|
|
99
|
+
response = client.embeddings(
|
|
100
|
+
input=texts,
|
|
101
|
+
model="nomic-embed-text-v1-GGUF"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
for item in response["data"]:
|
|
105
|
+
print(f"Text {item['index']}: {len(item['embedding'])} dimensions")
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**Supported Backends:**
|
|
109
|
+
- ✅ **FLM (FastFlowLM)** - NPU-accelerated on Windows
|
|
110
|
+
- ✅ **llamacpp** (.GGUF models) - CPU/GPU
|
|
111
|
+
- ❌ ONNX/OGA - Not supported
|
|
112
|
+
|
|
113
|
+
### 🖼️ Production Showcase: [Sorana](https://tetramatrix.github.io/Sorana/)
|
|
114
|
+
This SDK was extracted from the core engine of [Sorana](https://tetramatrix.github.io/Sorana/), a professional visual workspace for AI. It demonstrates the SDK's capability to handle complex, real-world requirements on AMD Ryzen AI hardware:
|
|
115
|
+
|
|
116
|
+
* **Low Latency:** Powers sub-second response times for multi-model chat interfaces.
|
|
117
|
+
* **Dynamic Workflows:** Manages the loading and unloading of 20+ different LLMs based on user activity to optimize local NPU/GPU memory.
|
|
118
|
+
* **Zero-Config UX:** Uses the built-in port scanner to automatically connect the Sorana frontend to the Lemonade backend without user intervention.
|
|
119
|
+
|
|
120
|
+
## 🛠️ Project Structure
|
|
121
|
+
|
|
122
|
+
* **client.py:** Main entry point for API interactions (chat, embeddings, model management).
|
|
123
|
+
* **port_scanner.py:** Utilities for detecting Lemonade instances across ports (8000-9000).
|
|
124
|
+
* **model_discovery.py:** Logic for fetching and parsing model metadata.
|
|
125
|
+
* **request_builder.py:** Helper functions to construct compliant payloads (chat, embeddings).
|
|
126
|
+
* **utils.py:** Additional utility functions.
|
|
127
|
+
|
|
128
|
+
## 📚 Documentation
|
|
129
|
+
|
|
130
|
+
* **[Embeddings API](docs/embeddings_api.md)** - Complete guide for using embeddings
|
|
131
|
+
* [Lemonade Server Docs](https://lemonade-server.ai/docs/server/server_spec/) - Official Lemonade documentation
|
|
132
|
+
|
|
133
|
+
## 🤝 Contributing
|
|
134
|
+
|
|
135
|
+
Contributions are welcome! This project is intended to help the AMD Ryzen AI and Lemonade community build downstream applications faster.
|
|
136
|
+
|
|
137
|
+
## 📄 License
|
|
138
|
+
|
|
139
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from .client import LemonadeClient
|
|
2
|
+
from .model_discovery import discover_lemonade_models
|
|
3
|
+
from .port_scanner import find_available_lemonade_port
|
|
4
|
+
from .request_builder import build_embedding_payload
|
|
5
|
+
|
|
6
|
+
__all__ = ['LemonadeClient', 'discover_lemonade_models', 'find_available_lemonade_port', 'build_embedding_payload']
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LemonadeClient - Main class for interacting with the Lemonade server
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
import json
|
|
7
|
+
from typing import Dict, List, Optional, Any
|
|
8
|
+
from .request_builder import build_chat_completion_payload, send_request, build_embedding_payload
|
|
9
|
+
from .model_discovery import get_active_model
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LemonadeClient:
|
|
13
|
+
"""
|
|
14
|
+
A class for interacting with the Lemonade LLM Server.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, base_url: str = "http://localhost:8000"):
|
|
18
|
+
"""
|
|
19
|
+
Initializes the Lemonade Client with the base URL.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
base_url (str): The base URL of the Lemonade server (default: http://localhost:8000)
|
|
23
|
+
"""
|
|
24
|
+
self.base_url = base_url.rstrip('/')
|
|
25
|
+
self.session = requests.Session()
|
|
26
|
+
|
|
27
|
+
def list_models(self) -> List[Dict[str, Any]]:
|
|
28
|
+
"""
|
|
29
|
+
Retrieves the list of available models from the Lemonade server.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
List[Dict[str, Any]]: List of available models
|
|
33
|
+
"""
|
|
34
|
+
url = f"{self.base_url}/api/v1/models"
|
|
35
|
+
try:
|
|
36
|
+
response = self.session.get(url, timeout=10)
|
|
37
|
+
response.raise_for_status()
|
|
38
|
+
data = response.json()
|
|
39
|
+
|
|
40
|
+
# Lemonade returns models under 'data' key
|
|
41
|
+
models = data.get('data', [])
|
|
42
|
+
return models
|
|
43
|
+
except requests.exceptions.RequestException as e:
|
|
44
|
+
print(f"Error retrieving models: {e}")
|
|
45
|
+
return []
|
|
46
|
+
except json.JSONDecodeError as e:
|
|
47
|
+
print(f"Error parsing response: {e}")
|
|
48
|
+
return []
|
|
49
|
+
|
|
50
|
+
def chat_completion(self, model: str, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
|
|
51
|
+
"""
|
|
52
|
+
Sends a chat completion request to the Lemonade server.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
model (str): The name of the model to use
|
|
56
|
+
messages (List[Dict[str, str]]): The messages for the conversation
|
|
57
|
+
**kwargs: Additional parameters for the request
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Dict[str, Any]: The response from the server
|
|
61
|
+
"""
|
|
62
|
+
url = f"{self.base_url}/api/v1/chat/completions"
|
|
63
|
+
|
|
64
|
+
payload = build_chat_completion_payload(model, messages, **kwargs)
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
response = send_request(url, payload, session=self.session)
|
|
68
|
+
return response
|
|
69
|
+
except Exception as e:
|
|
70
|
+
print(f"Error in chat completion request: {e}")
|
|
71
|
+
return {"error": str(e)}
|
|
72
|
+
|
|
73
|
+
def health_check(self) -> bool:
|
|
74
|
+
"""
|
|
75
|
+
Checks if the Lemonade server is running and reachable.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
bool: True if the server is reachable, otherwise False
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
models = self.list_models()
|
|
82
|
+
return len(models) >= 0 # If we don't get an error, the server is reachable
|
|
83
|
+
except:
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
def get_current_model(self) -> Optional[str]:
|
|
87
|
+
"""
|
|
88
|
+
Retrieves the currently active model from the Lemonade server.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Optional[str]: The name of the current model or None
|
|
92
|
+
"""
|
|
93
|
+
return get_active_model(self.base_url)
|
|
94
|
+
|
|
95
|
+
def load_model(self, model_name: str, **kwargs) -> Dict[str, Any]:
|
|
96
|
+
"""
|
|
97
|
+
Loads a specific model on the Lemonade server.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
model_name (str): The name of the model to load
|
|
101
|
+
**kwargs: Additional parameters for loading the model
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Dict[str, Any]: Response from the server
|
|
105
|
+
"""
|
|
106
|
+
url = f"{self.base_url}/api/v1/load_model"
|
|
107
|
+
|
|
108
|
+
payload = {
|
|
109
|
+
"model": model_name,
|
|
110
|
+
**kwargs
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
response = send_request(url, payload, session=self.session)
|
|
115
|
+
return response
|
|
116
|
+
except Exception as e:
|
|
117
|
+
print(f"Error loading model: {e}")
|
|
118
|
+
return {"error": str(e)}
|
|
119
|
+
|
|
120
|
+
def unload_model(self) -> Dict[str, Any]:
|
|
121
|
+
"""
|
|
122
|
+
Unloads the current model from the Lemonade server.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Dict[str, Any]: Response from the server
|
|
126
|
+
"""
|
|
127
|
+
url = f"{self.base_url}/api/v1/unload_model"
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
response = send_request(url, {}, session=self.session)
|
|
131
|
+
return response
|
|
132
|
+
except Exception as e:
|
|
133
|
+
print(f"Error unloading model: {e}")
|
|
134
|
+
return {"error": str(e)}
|
|
135
|
+
|
|
136
|
+
def embeddings(self, input: str, model: str, **kwargs) -> Dict[str, Any]:
|
|
137
|
+
"""
|
|
138
|
+
Sends an embedding request to the Lemonade server.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
input (str): The text or list of texts to embed
|
|
142
|
+
model (str): The name of the embedding model to use (e.g., "nomic-embed-text-v1-GGUF")
|
|
143
|
+
**kwargs: Additional parameters for the request
|
|
144
|
+
- encoding_format (str): "float" (default) or "base64"
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Dict[str, Any]: The response containing the embedding vectors
|
|
148
|
+
"""
|
|
149
|
+
url = f"{self.base_url}/api/v1/embeddings"
|
|
150
|
+
|
|
151
|
+
payload = build_embedding_payload(input, model, **kwargs)
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
response = send_request(url, payload, session=self.session)
|
|
155
|
+
return response
|
|
156
|
+
except Exception as e:
|
|
157
|
+
print(f"Error in embedding request: {e}")
|
|
158
|
+
return {"error": str(e)}
|
|
159
|
+
|
|
160
|
+
def list_embedding_models(self) -> List[Dict[str, Any]]:
|
|
161
|
+
"""
|
|
162
|
+
Retrieves only the embedding models from the Lemonade server.
|
|
163
|
+
Filters models by the 'embeddings' label.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
List[Dict[str, Any]]: List of available embedding models
|
|
167
|
+
"""
|
|
168
|
+
all_models = self.list_models()
|
|
169
|
+
|
|
170
|
+
# Filter models that have the 'embeddings' label
|
|
171
|
+
embedding_models = []
|
|
172
|
+
for model in all_models:
|
|
173
|
+
labels = model.get("labels", [])
|
|
174
|
+
if labels and "embeddings" in labels:
|
|
175
|
+
embedding_models.append(model)
|
|
176
|
+
|
|
177
|
+
return embedding_models
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for discovering and managing Lemonade models
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
import json
|
|
7
|
+
from typing import List, Dict, Any, Optional
|
|
8
|
+
from .port_scanner import find_available_lemonade_port
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def discover_lemonade_models(base_url: str = "http://localhost:8000") -> List[Dict[str, Any]]:
|
|
12
|
+
"""
|
|
13
|
+
Scans for available models on the Lemonade server.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
base_url (str): The base URL of the Lemonade server
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
List[Dict[str, Any]]: List of found models with their properties
|
|
20
|
+
"""
|
|
21
|
+
# Try to find the correct port if the default port is not available
|
|
22
|
+
if "localhost" in base_url or "127.0.0.1" in base_url:
|
|
23
|
+
# Extract port from URL
|
|
24
|
+
import re
|
|
25
|
+
port_match = re.search(r':(\d+)', base_url)
|
|
26
|
+
if port_match:
|
|
27
|
+
current_port = int(port_match.group(1))
|
|
28
|
+
available_port = find_available_lemonade_port(ports=[current_port])
|
|
29
|
+
if available_port and available_port != current_port:
|
|
30
|
+
# Replace the port in the URL
|
|
31
|
+
base_url = base_url.replace(f':{current_port}', f':{available_port}')
|
|
32
|
+
|
|
33
|
+
url = f"{base_url}/api/v1/models"
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
response = requests.get(url, timeout=10)
|
|
37
|
+
response.raise_for_status()
|
|
38
|
+
data = response.json()
|
|
39
|
+
|
|
40
|
+
# Lemonade returns models under 'data' key
|
|
41
|
+
models = data.get('data', [])
|
|
42
|
+
|
|
43
|
+
# Format the models to match the expected structure
|
|
44
|
+
formatted_models = []
|
|
45
|
+
for model in models:
|
|
46
|
+
formatted_model = {
|
|
47
|
+
'id': model.get('id', model.get('name', 'unknown')),
|
|
48
|
+
'name': model.get('name', model.get('id', 'unknown')),
|
|
49
|
+
'object': model.get('object', 'model'),
|
|
50
|
+
'created': model.get('created', 0),
|
|
51
|
+
'owned_by': model.get('owned_by', 'unknown'),
|
|
52
|
+
'source': 'external',
|
|
53
|
+
'provider': 'Lemonade',
|
|
54
|
+
'status': 'Available',
|
|
55
|
+
'size_gb': 0, # Lemonade doesn't provide size info in the API
|
|
56
|
+
'local_path': f"lemonade://{model.get('name', model.get('id', 'unknown'))}",
|
|
57
|
+
'backend': 'lemonade'
|
|
58
|
+
}
|
|
59
|
+
formatted_models.append(formatted_model)
|
|
60
|
+
|
|
61
|
+
return formatted_models
|
|
62
|
+
except requests.exceptions.RequestException as e:
|
|
63
|
+
print(f"Error retrieving models from {base_url}: {e}")
|
|
64
|
+
return []
|
|
65
|
+
except json.JSONDecodeError as e:
|
|
66
|
+
print(f"Error parsing response from {base_url}: {e}")
|
|
67
|
+
return []
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_active_model(base_url: str = "http://localhost:8000") -> Optional[str]:
|
|
71
|
+
"""
|
|
72
|
+
Retrieves the currently active model from the Lemonade server.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
base_url (str): The base URL of the Lemonade server
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Optional[str]: The name of the active model or None
|
|
79
|
+
"""
|
|
80
|
+
# Lemonade has no direct endpoint for the active model,
|
|
81
|
+
# so we try various known endpoints
|
|
82
|
+
endpoints_to_try = [
|
|
83
|
+
f"{base_url}/api/v1/current_model",
|
|
84
|
+
f"{base_url}/api/v1/model",
|
|
85
|
+
f"{base_url}/api/v1/status"
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
for endpoint in endpoints_to_try:
|
|
89
|
+
try:
|
|
90
|
+
response = requests.get(endpoint, timeout=5)
|
|
91
|
+
if response.status_code == 200:
|
|
92
|
+
data = response.json()
|
|
93
|
+
|
|
94
|
+
# Try various possible field names for the active model
|
|
95
|
+
for field in ['model', 'current_model', 'active_model', 'name']:
|
|
96
|
+
if field in data:
|
|
97
|
+
return data[field]
|
|
98
|
+
|
|
99
|
+
# If the result is directly a string
|
|
100
|
+
if isinstance(data, str):
|
|
101
|
+
return data
|
|
102
|
+
|
|
103
|
+
except (requests.exceptions.RequestException, json.JSONDecodeError):
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
# As a fallback, try to get the first available model
|
|
107
|
+
available_models = discover_lemonade_models(base_url)
|
|
108
|
+
if available_models:
|
|
109
|
+
return available_models[0]['name']
|
|
110
|
+
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def verify_model_availability(model_name: str, base_url: str = "http://localhost:8000") -> bool:
|
|
115
|
+
"""
|
|
116
|
+
Checks if a specific model is available on the Lemonade server.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
model_name (str): The name of the model to check
|
|
120
|
+
base_url (str): The base URL of the Lemonade server
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
bool: True if the model is available, otherwise False
|
|
124
|
+
"""
|
|
125
|
+
available_models = discover_lemonade_models(base_url)
|
|
126
|
+
|
|
127
|
+
for model in available_models:
|
|
128
|
+
if model['name'] == model_name or model['id'] == model_name:
|
|
129
|
+
return True
|
|
130
|
+
|
|
131
|
+
return False
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for scanning available Lemonade servers
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import socket
|
|
6
|
+
import requests
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def is_port_open(host: str, port: int, timeout: float = 0.5) -> bool:
|
|
11
|
+
"""
|
|
12
|
+
Checks if a TCP port is reachable on the host.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
host (str): The host on which to check the port
|
|
16
|
+
port (int): The port to check
|
|
17
|
+
timeout (float): Timeout for the connection (default: 0.5 seconds)
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
bool: True if the port is reachable, otherwise False
|
|
21
|
+
"""
|
|
22
|
+
try:
|
|
23
|
+
with socket.create_connection((host, port), timeout=timeout):
|
|
24
|
+
return True
|
|
25
|
+
except Exception:
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def verify_lemonade_server(port: int, host: str = "127.0.0.1") -> bool:
|
|
30
|
+
"""
|
|
31
|
+
Checks if a Lemonade server is running on the specified port.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
port (int): The port to check
|
|
35
|
+
host (str): The host (default: 127.0.0.1)
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
bool: True if a Lemonade server is running on the port, otherwise False
|
|
39
|
+
"""
|
|
40
|
+
url = f"http://{host}:{port}/api/v1/models"
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
response = requests.get(url, timeout=2)
|
|
44
|
+
# A Lemonade server should respond to this endpoint with a list of models
|
|
45
|
+
if response.status_code == 200:
|
|
46
|
+
try:
|
|
47
|
+
data = response.json()
|
|
48
|
+
# Check if the response has the expected format
|
|
49
|
+
if "data" in data or isinstance(data, list):
|
|
50
|
+
return True
|
|
51
|
+
except ValueError:
|
|
52
|
+
# If the response is not valid JSON, it's probably not a Lemonade server
|
|
53
|
+
pass
|
|
54
|
+
except requests.RequestException:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def find_available_lemonade_port(
|
|
61
|
+
host: str = "127.0.0.1",
|
|
62
|
+
ports: List[int] = None
|
|
63
|
+
) -> Optional[int]:
|
|
64
|
+
"""
|
|
65
|
+
Finds the first available port on which a Lemonade server is running.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
host (str): The host to search on (default: 127.0.0.1)
|
|
69
|
+
ports (List[int]): List of ports to check (default: [8000, 8020, 8040, 8060, 8080, 9000])
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Optional[int]: The first found port with a Lemonade server, or None
|
|
73
|
+
"""
|
|
74
|
+
if ports is None:
|
|
75
|
+
# Standard Lemonade ports
|
|
76
|
+
ports = [8000, 8020, 8040, 8060, 8080, 9000]
|
|
77
|
+
|
|
78
|
+
for port in ports:
|
|
79
|
+
if is_port_open(host, port):
|
|
80
|
+
if verify_lemonade_server(port, host):
|
|
81
|
+
return port
|
|
82
|
+
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def scan_multiple_hosts_for_lemonade(
|
|
87
|
+
hosts: List[str] = None,
|
|
88
|
+
ports: List[int] = None
|
|
89
|
+
) -> List[tuple]:
|
|
90
|
+
"""
|
|
91
|
+
Scans multiple hosts and ports for available Lemonade servers.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
hosts (List[str]): List of hosts to scan
|
|
95
|
+
ports (List[int]): List of ports to scan
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
List[tuple]: List of (host, port) tuples where Lemonade servers were found
|
|
99
|
+
"""
|
|
100
|
+
if hosts is None:
|
|
101
|
+
hosts = ["127.0.0.1", "localhost"]
|
|
102
|
+
|
|
103
|
+
if ports is None:
|
|
104
|
+
ports = [8000, 8020, 8040, 8060, 8080, 9000]
|
|
105
|
+
|
|
106
|
+
available_servers = []
|
|
107
|
+
|
|
108
|
+
for host in hosts:
|
|
109
|
+
for port in ports:
|
|
110
|
+
if is_port_open(host, port):
|
|
111
|
+
if verify_lemonade_server(port, host):
|
|
112
|
+
available_servers.append((host, port))
|
|
113
|
+
|
|
114
|
+
return available_servers
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for building API requests to the Lemonade server
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
import json
|
|
7
|
+
from typing import Dict, Any, List, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_chat_completion_payload(model: str, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
|
|
11
|
+
"""
|
|
12
|
+
Creates the payload for chat completion requests.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
model (str): The name of the model to use
|
|
16
|
+
messages (List[Dict[str, str]]): The messages for the conversation
|
|
17
|
+
**kwargs: Additional parameters for the request
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Dict[str, Any]: The finished payload for the request
|
|
21
|
+
"""
|
|
22
|
+
payload = {
|
|
23
|
+
"model": model,
|
|
24
|
+
"messages": messages,
|
|
25
|
+
"stream": kwargs.get("stream", False),
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# Add optional parameters if they exist
|
|
29
|
+
optional_params = [
|
|
30
|
+
"temperature", "top_p", "top_k", "max_tokens", "stop",
|
|
31
|
+
"presence_penalty", "frequency_penalty", "repetition_penalty"
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
for param in optional_params:
|
|
35
|
+
if param in kwargs and kwargs[param] is not None:
|
|
36
|
+
payload[param] = kwargs[param]
|
|
37
|
+
|
|
38
|
+
# Handle special parameters for Lemonade
|
|
39
|
+
if "options" in kwargs:
|
|
40
|
+
payload["options"] = kwargs["options"]
|
|
41
|
+
|
|
42
|
+
return payload
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def build_model_load_payload(model_name: str, **kwargs) -> Dict[str, Any]:
|
|
46
|
+
"""
|
|
47
|
+
Creates the payload for loading a model.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
model_name (str): The name of the model to load
|
|
51
|
+
**kwargs: Additional parameters for loading the model
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Dict[str, Any]: The finished payload for loading the model
|
|
55
|
+
"""
|
|
56
|
+
payload = {
|
|
57
|
+
"model": model_name
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# Add optional parameters
|
|
61
|
+
for key, value in kwargs.items():
|
|
62
|
+
payload[key] = value
|
|
63
|
+
|
|
64
|
+
return payload
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def send_request(url: str, payload: Dict[str, Any], headers: Optional[Dict[str, str]] = None, session: Optional[requests.Session] = None, method: str = "POST") -> Dict[str, Any]:
|
|
68
|
+
"""
|
|
69
|
+
Sends a request to the Lemonade server.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
url (str): The target URL for the request
|
|
73
|
+
payload (Dict[str, Any]): The payload to send
|
|
74
|
+
headers (Optional[Dict[str, str]]): Optional headers for the request
|
|
75
|
+
session (Optional[requests.Session]): Optional session for the request
|
|
76
|
+
method (str): HTTP method to use ("POST" or "GET")
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Dict[str, Any]: The response from the server
|
|
80
|
+
"""
|
|
81
|
+
if headers is None:
|
|
82
|
+
headers = {
|
|
83
|
+
"Content-Type": "application/json"
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# Use either the passed session or create a temporary one
|
|
87
|
+
req_session = session or requests.Session()
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
if method.upper() == "POST":
|
|
91
|
+
response = req_session.post(url, json=payload, headers=headers, timeout=30)
|
|
92
|
+
elif method.upper() == "GET":
|
|
93
|
+
response = req_session.get(url, headers=headers, timeout=30, params=payload if payload else None)
|
|
94
|
+
else:
|
|
95
|
+
return {"error": f"Unsupported HTTP method: {method}"}
|
|
96
|
+
|
|
97
|
+
response.raise_for_status()
|
|
98
|
+
return response.json()
|
|
99
|
+
except requests.exceptions.HTTPError as e:
|
|
100
|
+
print(f"HTTP error in request to {url}: {e}")
|
|
101
|
+
if response is not None:
|
|
102
|
+
print(f"Response body: {response.text}")
|
|
103
|
+
return {"error": f"HTTP Error: {e}"}
|
|
104
|
+
except requests.exceptions.RequestException as e:
|
|
105
|
+
print(f"Error in request to {url}: {e}")
|
|
106
|
+
return {"error": f"Request Error: {e}"}
|
|
107
|
+
except json.JSONDecodeError as e:
|
|
108
|
+
print(f"Error parsing response from {url}: {e}")
|
|
109
|
+
return {"error": f"JSON Decode Error: {e}"}
|
|
110
|
+
finally:
|
|
111
|
+
# If no session was passed, close the temporary one here
|
|
112
|
+
if session is None:
|
|
113
|
+
req_session.close()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def build_embedding_payload(input_text: str, model: str, **kwargs) -> Dict[str, Any]:
|
|
117
|
+
"""
|
|
118
|
+
Creates the payload for embedding requests.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
input_text (str): The text for the embedding
|
|
122
|
+
model (str): The name of the model to use
|
|
123
|
+
**kwargs: Additional parameters for the request
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Dict[str, Any]: The finished payload for the embedding request
|
|
127
|
+
"""
|
|
128
|
+
payload = {
|
|
129
|
+
"input": input_text,
|
|
130
|
+
"model": model
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Add optional parameters
|
|
134
|
+
for key, value in kwargs.items():
|
|
135
|
+
payload[key] = value
|
|
136
|
+
|
|
137
|
+
return payload
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Helper functions for Lemonade integration
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import List, Dict, Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def format_messages_for_lemonade(messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
|
|
10
|
+
"""
|
|
11
|
+
Formats messages in Lemonade-compatible format.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
messages (List[Dict[str, str]]): The messages to format
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
List[Dict[str, str]]: The formatted messages
|
|
18
|
+
"""
|
|
19
|
+
# Lemonade expects messages in OpenAI-like format
|
|
20
|
+
# Ensure each message has a 'role' and 'content' field
|
|
21
|
+
formatted_messages = []
|
|
22
|
+
for msg in messages:
|
|
23
|
+
formatted_msg = {
|
|
24
|
+
"role": msg.get("role", "user"),
|
|
25
|
+
"content": msg.get("content", "")
|
|
26
|
+
}
|
|
27
|
+
formatted_messages.append(formatted_msg)
|
|
28
|
+
|
|
29
|
+
return formatted_messages
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def extract_model_info_from_response(response: Dict[str, Any]) -> Dict[str, Any]:
|
|
33
|
+
"""
|
|
34
|
+
Extracts relevant model information from the response.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
response (Dict[str, Any]): The response from the server
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Dict[str, Any]: Extracted model information
|
|
41
|
+
"""
|
|
42
|
+
model_info = {}
|
|
43
|
+
|
|
44
|
+
# Try to extract various possible fields
|
|
45
|
+
if "model" in response:
|
|
46
|
+
model_info["model"] = response["model"]
|
|
47
|
+
|
|
48
|
+
if "usage" in response:
|
|
49
|
+
model_info["usage"] = response["usage"]
|
|
50
|
+
|
|
51
|
+
if "choices" in response:
|
|
52
|
+
model_info["choices"] = response["choices"]
|
|
53
|
+
|
|
54
|
+
if "created" in response:
|
|
55
|
+
model_info["created"] = response["created"]
|
|
56
|
+
|
|
57
|
+
# For model list responses
|
|
58
|
+
if "data" in response:
|
|
59
|
+
model_info["data"] = response["data"]
|
|
60
|
+
|
|
61
|
+
return model_info
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def validate_lemonade_response(response: Dict[str, Any]) -> bool:
|
|
65
|
+
"""
|
|
66
|
+
Validates whether the response from the Lemonade server has the expected format.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
response (Dict[str, Any]): The response from the server
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
bool: True if the response is valid, otherwise False
|
|
73
|
+
"""
|
|
74
|
+
# Check for basic fields that a valid response should have
|
|
75
|
+
if not isinstance(response, dict):
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
# For chat completion responses, there should be choices
|
|
79
|
+
if "choices" in response:
|
|
80
|
+
if not isinstance(response["choices"], list) or len(response["choices"]) == 0:
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
# For model list responses, there should be data
|
|
84
|
+
if "data" in response:
|
|
85
|
+
if not isinstance(response["data"], list):
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
return True
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def sanitize_model_name(model_name: str) -> str:
|
|
92
|
+
"""
|
|
93
|
+
Cleans the model name of invalid characters.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
model_name (str): The model name to clean
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
str: The cleaned model name
|
|
100
|
+
"""
|
|
101
|
+
# Remove or replace invalid characters
|
|
102
|
+
sanitized = model_name.strip()
|
|
103
|
+
# Replace spaces with underscores if necessary
|
|
104
|
+
sanitized = sanitized.replace(" ", "_")
|
|
105
|
+
return sanitized
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def format_error_message(error: Exception) -> str:
|
|
109
|
+
"""
|
|
110
|
+
Formats an error message for output.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
error (Exception): The exception that occurred
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
str: Formatted error message
|
|
117
|
+
"""
|
|
118
|
+
return f"Error: {type(error).__name__} - {str(error)}"
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lemonade-python-sdk
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: A clean interface for interacting with the Lemonade LLM server
|
|
5
|
+
Home-page: https://github.com/Tetramatrix/lemonade-python-sdk
|
|
6
|
+
Author: Your Name
|
|
7
|
+
Author-email: your.email@example.com
|
|
8
|
+
Project-URL: Bug Reports, https://github.com/Tetramatrix/lemonade-python-sdk/issues
|
|
9
|
+
Project-URL: Source, https://github.com/Tetramatrix/lemonade-python-sdk
|
|
10
|
+
Keywords: llm,ai,lemonade,sdk,api
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Requires-Python: >=3.8
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: requests>=2.25.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
27
|
+
Requires-Dist: flake8; extra == "dev"
|
|
28
|
+
Dynamic: author
|
|
29
|
+
Dynamic: author-email
|
|
30
|
+
Dynamic: classifier
|
|
31
|
+
Dynamic: description
|
|
32
|
+
Dynamic: description-content-type
|
|
33
|
+
Dynamic: home-page
|
|
34
|
+
Dynamic: keywords
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
Dynamic: project-url
|
|
37
|
+
Dynamic: provides-extra
|
|
38
|
+
Dynamic: requires-dist
|
|
39
|
+
Dynamic: requires-python
|
|
40
|
+
Dynamic: summary
|
|
41
|
+
|
|
42
|
+
# 🍋 Lemonade Python SDK
|
|
43
|
+
|
|
44
|
+
[](https://opensource.org/licenses/MIT)
|
|
45
|
+
[](https://www.python.org/downloads/)
|
|
46
|
+
|
|
47
|
+
A robust, production-grade Python wrapper for the **Lemonade C++ Backend**.
|
|
48
|
+
|
|
49
|
+
This SDK provides a clean, pythonic interface for interacting with local LLMs running on Lemonade. It was built to power **Sorana** (a visual workspace for AI), extracting the core integration logic into a standalone, open-source library for the developer community.
|
|
50
|
+
|
|
51
|
+
## 🚀 Key Features
|
|
52
|
+
|
|
53
|
+
* **Auto-Discovery:** Automatically scans multiple ports and hosts to find active Lemonade instances.
|
|
54
|
+
* **Low-Overhead Architecture:** Designed as a thin, efficient wrapper to leverage Lemonade's C++ performance with minimal Python latency.
|
|
55
|
+
* **Health Checks & Recovery:** Built-in utilities to verify server status and handle connection drops.
|
|
56
|
+
* **Type-Safe Client:** Full Python type hinting for better developer experience (IDE autocompletion).
|
|
57
|
+
* **Model Management:** Simple API to load, unload, and list models dynamically.
|
|
58
|
+
* **Embeddings API:** Generate text embeddings for semantic search, RAG, and clustering (FLM & llamacpp backends).
|
|
59
|
+
|
|
60
|
+
## 📦 Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install .
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Alternatively, you can install it directly from GitHub:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install git+[https://github.com/Tetramatrix/lemonade-python-sdk.git](https://github.com/Tetramatrix/lemonade-python-sdk.git)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## ⚡ Quick Start
|
|
73
|
+
|
|
74
|
+
### 1. Connecting to Lemonade
|
|
75
|
+
|
|
76
|
+
The SDK automatically handles port discovery, so you don't need to hardcode localhost:8000.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from lemonade_integration.client import LemonadeClient
|
|
80
|
+
from lemonade_integration.port_scanner import find_available_lemonade_port
|
|
81
|
+
|
|
82
|
+
# Auto-discover running instance
|
|
83
|
+
port = find_available_lemonade_port()
|
|
84
|
+
if port:
|
|
85
|
+
client = LemonadeClient(base_url=f"http://localhost:{port}")
|
|
86
|
+
if client.health_check():
|
|
87
|
+
print(f"Connected to Lemonade on port {port}")
|
|
88
|
+
else:
|
|
89
|
+
print("No Lemonade instance found.")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 2. Chat Completion
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
response = client.chat_completion(
|
|
96
|
+
model="Llama-3-8B-Instruct",
|
|
97
|
+
messages=[
|
|
98
|
+
{"role": "system", "content": "You are a helpful coding assistant."},
|
|
99
|
+
{"role": "user", "content": "Write a Hello World in C++"}
|
|
100
|
+
],
|
|
101
|
+
temperature=0.7
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
print(response['choices'][0]['message']['content'])
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### 3. Model Management
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
# List all available models
|
|
111
|
+
models = client.list_models()
|
|
112
|
+
for m in models:
|
|
113
|
+
print(f"Found model: {m['id']}")
|
|
114
|
+
|
|
115
|
+
# Load a specific model into memory
|
|
116
|
+
client.load_model("Mistral-7B-v0.1")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### 4. Embeddings (NEW)
|
|
120
|
+
|
|
121
|
+
Generate text embeddings for semantic search, RAG pipelines, and clustering.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
# List available embedding models (filtered by 'embeddings' label)
|
|
125
|
+
embedding_models = client.list_embedding_models()
|
|
126
|
+
for model in embedding_models:
|
|
127
|
+
print(f"Embedding model: {model['id']}")
|
|
128
|
+
|
|
129
|
+
# Generate embeddings for single text
|
|
130
|
+
response = client.embeddings(
|
|
131
|
+
input="Hello, world!",
|
|
132
|
+
model="nomic-embed-text-v1-GGUF"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
embedding_vector = response["data"][0]["embedding"]
|
|
136
|
+
print(f"Vector length: {len(embedding_vector)}")
|
|
137
|
+
|
|
138
|
+
# Generate embeddings for multiple texts
|
|
139
|
+
texts = ["Text 1", "Text 2", "Text 3"]
|
|
140
|
+
response = client.embeddings(
|
|
141
|
+
input=texts,
|
|
142
|
+
model="nomic-embed-text-v1-GGUF"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
for item in response["data"]:
|
|
146
|
+
print(f"Text {item['index']}: {len(item['embedding'])} dimensions")
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Supported Backends:**
|
|
150
|
+
- ✅ **FLM (FastFlowLM)** - NPU-accelerated on Windows
|
|
151
|
+
- ✅ **llamacpp** (.GGUF models) - CPU/GPU
|
|
152
|
+
- ❌ ONNX/OGA - Not supported
|
|
153
|
+
|
|
154
|
+
### 🖼️ Production Showcase: [Sorana](https://tetramatrix.github.io/Sorana/)
|
|
155
|
+
This SDK was extracted from the core engine of [Sorana](https://tetramatrix.github.io/Sorana/), a professional visual workspace for AI. It demonstrates the SDK's capability to handle complex, real-world requirements on AMD Ryzen AI hardware:
|
|
156
|
+
|
|
157
|
+
* **Low Latency:** Powers sub-second response times for multi-model chat interfaces.
|
|
158
|
+
* **Dynamic Workflows:** Manages the loading and unloading of 20+ different LLMs based on user activity to optimize local NPU/GPU memory.
|
|
159
|
+
* **Zero-Config UX:** Uses the built-in port scanner to automatically connect the Sorana frontend to the Lemonade backend without user intervention.
|
|
160
|
+
|
|
161
|
+
## 🛠️ Project Structure
|
|
162
|
+
|
|
163
|
+
* **client.py:** Main entry point for API interactions (chat, embeddings, model management).
|
|
164
|
+
* **port_scanner.py:** Utilities for detecting Lemonade instances across ports (8000-9000).
|
|
165
|
+
* **model_discovery.py:** Logic for fetching and parsing model metadata.
|
|
166
|
+
* **request_builder.py:** Helper functions to construct compliant payloads (chat, embeddings).
|
|
167
|
+
* **utils.py:** Additional utility functions.
|
|
168
|
+
|
|
169
|
+
## 📚 Documentation
|
|
170
|
+
|
|
171
|
+
* **[Embeddings API](docs/embeddings_api.md)** - Complete guide for using embeddings
|
|
172
|
+
* [Lemonade Server Docs](https://lemonade-server.ai/docs/server/server_spec/) - Official Lemonade documentation
|
|
173
|
+
|
|
174
|
+
## 🤝 Contributing
|
|
175
|
+
|
|
176
|
+
Contributions are welcome! This project is intended to help the AMD Ryzen AI and Lemonade community build downstream applications faster.
|
|
177
|
+
|
|
178
|
+
## 📄 License
|
|
179
|
+
|
|
180
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
setup.py
|
|
4
|
+
lemonade_integration/__init__.py
|
|
5
|
+
lemonade_integration/client.py
|
|
6
|
+
lemonade_integration/model_discovery.py
|
|
7
|
+
lemonade_integration/port_scanner.py
|
|
8
|
+
lemonade_integration/request_builder.py
|
|
9
|
+
lemonade_integration/utils.py
|
|
10
|
+
lemonade_python_sdk.egg-info/PKG-INFO
|
|
11
|
+
lemonade_python_sdk.egg-info/SOURCES.txt
|
|
12
|
+
lemonade_python_sdk.egg-info/dependency_links.txt
|
|
13
|
+
lemonade_python_sdk.egg-info/requires.txt
|
|
14
|
+
lemonade_python_sdk.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
lemonade_integration
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Setup file for the Lemonade integration module
|
|
3
|
+
This file allows installation as a standalone package
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from setuptools import setup, find_packages
|
|
7
|
+
|
|
8
|
+
with open("README.md", "r", encoding="utf-8") as fh:
|
|
9
|
+
long_description = fh.read()
|
|
10
|
+
|
|
11
|
+
with open("LICENSE", "r", encoding="utf-8") as fh:
|
|
12
|
+
license_content = fh.read()
|
|
13
|
+
|
|
14
|
+
setup(
|
|
15
|
+
name="lemonade-python-sdk",
|
|
16
|
+
version="1.0.1",
|
|
17
|
+
author="Your Name",
|
|
18
|
+
author_email="your.email@example.com",
|
|
19
|
+
description="A clean interface for interacting with the Lemonade LLM server",
|
|
20
|
+
long_description=long_description,
|
|
21
|
+
long_description_content_type="text/markdown",
|
|
22
|
+
url="https://github.com/Tetramatrix/lemonade-python-sdk",
|
|
23
|
+
packages=find_packages(),
|
|
24
|
+
classifiers=[
|
|
25
|
+
"Development Status :: 4 - Beta",
|
|
26
|
+
"Intended Audience :: Developers",
|
|
27
|
+
"License :: OSI Approved :: MIT License",
|
|
28
|
+
"Operating System :: OS Independent",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Programming Language :: Python :: 3.8",
|
|
31
|
+
"Programming Language :: Python :: 3.9",
|
|
32
|
+
"Programming Language :: Python :: 3.10",
|
|
33
|
+
"Programming Language :: Python :: 3.11",
|
|
34
|
+
],
|
|
35
|
+
python_requires=">=3.8",
|
|
36
|
+
install_requires=[
|
|
37
|
+
"requests>=2.25.0",
|
|
38
|
+
],
|
|
39
|
+
extras_require={
|
|
40
|
+
"dev": [
|
|
41
|
+
"pytest>=6.0",
|
|
42
|
+
"pytest-cov",
|
|
43
|
+
"flake8",
|
|
44
|
+
],
|
|
45
|
+
},
|
|
46
|
+
keywords="llm, ai, lemonade, sdk, api",
|
|
47
|
+
project_urls={
|
|
48
|
+
"Bug Reports": "https://github.com/Tetramatrix/lemonade-python-sdk/issues",
|
|
49
|
+
"Source": "https://github.com/Tetramatrix/lemonade-python-sdk",
|
|
50
|
+
},
|
|
51
|
+
)
|