lemonade-python-sdk 1.0.4__tar.gz → 1.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/PKG-INFO +9 -6
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/README.md +8 -5
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_python_sdk.egg-info/PKG-INFO +9 -6
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/setup.py +1 -1
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/LICENSE +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_python_sdk.egg-info/SOURCES.txt +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_python_sdk.egg-info/dependency_links.txt +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_python_sdk.egg-info/requires.txt +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_python_sdk.egg-info/top_level.txt +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_sdk/__init__.py +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_sdk/audio_stream.py +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_sdk/client.py +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_sdk/model_discovery.py +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_sdk/port_scanner.py +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_sdk/request_builder.py +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_sdk/utils.py +0 -0
- {lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-python-sdk
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: A clean interface for interacting with the Lemonade LLM server
|
|
5
5
|
Home-page: https://github.com/Tetramatrix/lemonade-python-sdk
|
|
6
6
|
Author: Tetramatrix
|
|
@@ -52,8 +52,7 @@ This SDK provides a clean, pythonic interface for interacting with local LLMs ru
|
|
|
52
52
|
|
|
53
53
|
* **Auto-Discovery:** Automatically scans multiple ports and hosts to find active Lemonade instances.
|
|
54
54
|
* **Low-Overhead Architecture:** Designed as a thin, efficient wrapper to leverage Lemonade's C++ performance with minimal Python latency.
|
|
55
|
-
* **Health Checks & Stats:** Lightweight `/api/v1/health` endpoint
|
|
56
|
-
* **Server Statistics:** Retrieve token usage, requests served, and performance metrics via `get_stats()`.
|
|
55
|
+
* **Health Checks & Server Stats:** Lightweight `/api/v1/health` endpoint plus `get_stats()` for token usage, requests served, and performance metrics.
|
|
57
56
|
* **Type-Safe Client:** Full Python type hinting for better developer experience (IDE autocompletion).
|
|
58
57
|
* **Model Management:** Simple API to load, unload, and list models dynamically.
|
|
59
58
|
* **Embeddings API:** Generate text embeddings for semantic search, RAG, and clustering (FLM & llamacpp backends).
|
|
@@ -100,14 +99,18 @@ else:
|
|
|
100
99
|
if client.health_check():
|
|
101
100
|
print("Lemonade is running!")
|
|
102
101
|
|
|
103
|
-
# Get server statistics
|
|
102
|
+
# Get server statistics (performance metrics from last request)
|
|
104
103
|
stats = client.get_stats()
|
|
105
104
|
if stats:
|
|
106
|
-
print(f"
|
|
105
|
+
print(f"Time to first token: {stats.get('time_to_first_token', 0):.2f}s")
|
|
107
106
|
print(f"Tokens/sec: {stats.get('tokens_per_second', 0):.1f}")
|
|
108
|
-
print(f"
|
|
107
|
+
print(f"Input tokens: {stats.get('input_tokens', 0)}")
|
|
108
|
+
print(f"Output tokens: {stats.get('output_tokens', 0)}")
|
|
109
|
+
print(f"Prompt tokens: {stats.get('prompt_tokens', 0)}")
|
|
109
110
|
```
|
|
110
111
|
|
|
112
|
+
**Available stats fields:** `time_to_first_token`, `tokens_per_second`, `input_tokens`, `output_tokens`, `decode_token_times`, `prompt_tokens`.
|
|
113
|
+
|
|
111
114
|
### 2. Chat Completion
|
|
112
115
|
|
|
113
116
|
```python
|
|
@@ -11,8 +11,7 @@ This SDK provides a clean, pythonic interface for interacting with local LLMs ru
|
|
|
11
11
|
|
|
12
12
|
* **Auto-Discovery:** Automatically scans multiple ports and hosts to find active Lemonade instances.
|
|
13
13
|
* **Low-Overhead Architecture:** Designed as a thin, efficient wrapper to leverage Lemonade's C++ performance with minimal Python latency.
|
|
14
|
-
* **Health Checks & Stats:** Lightweight `/api/v1/health` endpoint
|
|
15
|
-
* **Server Statistics:** Retrieve token usage, requests served, and performance metrics via `get_stats()`.
|
|
14
|
+
* **Health Checks & Server Stats:** Lightweight `/api/v1/health` endpoint plus `get_stats()` for token usage, requests served, and performance metrics.
|
|
16
15
|
* **Type-Safe Client:** Full Python type hinting for better developer experience (IDE autocompletion).
|
|
17
16
|
* **Model Management:** Simple API to load, unload, and list models dynamically.
|
|
18
17
|
* **Embeddings API:** Generate text embeddings for semantic search, RAG, and clustering (FLM & llamacpp backends).
|
|
@@ -59,14 +58,18 @@ else:
|
|
|
59
58
|
if client.health_check():
|
|
60
59
|
print("Lemonade is running!")
|
|
61
60
|
|
|
62
|
-
# Get server statistics
|
|
61
|
+
# Get server statistics (performance metrics from last request)
|
|
63
62
|
stats = client.get_stats()
|
|
64
63
|
if stats:
|
|
65
|
-
print(f"
|
|
64
|
+
print(f"Time to first token: {stats.get('time_to_first_token', 0):.2f}s")
|
|
66
65
|
print(f"Tokens/sec: {stats.get('tokens_per_second', 0):.1f}")
|
|
67
|
-
print(f"
|
|
66
|
+
print(f"Input tokens: {stats.get('input_tokens', 0)}")
|
|
67
|
+
print(f"Output tokens: {stats.get('output_tokens', 0)}")
|
|
68
|
+
print(f"Prompt tokens: {stats.get('prompt_tokens', 0)}")
|
|
68
69
|
```
|
|
69
70
|
|
|
71
|
+
**Available stats fields:** `time_to_first_token`, `tokens_per_second`, `input_tokens`, `output_tokens`, `decode_token_times`, `prompt_tokens`.
|
|
72
|
+
|
|
70
73
|
### 2. Chat Completion
|
|
71
74
|
|
|
72
75
|
```python
|
{lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_python_sdk.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-python-sdk
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: A clean interface for interacting with the Lemonade LLM server
|
|
5
5
|
Home-page: https://github.com/Tetramatrix/lemonade-python-sdk
|
|
6
6
|
Author: Tetramatrix
|
|
@@ -52,8 +52,7 @@ This SDK provides a clean, pythonic interface for interacting with local LLMs ru
|
|
|
52
52
|
|
|
53
53
|
* **Auto-Discovery:** Automatically scans multiple ports and hosts to find active Lemonade instances.
|
|
54
54
|
* **Low-Overhead Architecture:** Designed as a thin, efficient wrapper to leverage Lemonade's C++ performance with minimal Python latency.
|
|
55
|
-
* **Health Checks & Stats:** Lightweight `/api/v1/health` endpoint
|
|
56
|
-
* **Server Statistics:** Retrieve token usage, requests served, and performance metrics via `get_stats()`.
|
|
55
|
+
* **Health Checks & Server Stats:** Lightweight `/api/v1/health` endpoint plus `get_stats()` for token usage, requests served, and performance metrics.
|
|
57
56
|
* **Type-Safe Client:** Full Python type hinting for better developer experience (IDE autocompletion).
|
|
58
57
|
* **Model Management:** Simple API to load, unload, and list models dynamically.
|
|
59
58
|
* **Embeddings API:** Generate text embeddings for semantic search, RAG, and clustering (FLM & llamacpp backends).
|
|
@@ -100,14 +99,18 @@ else:
|
|
|
100
99
|
if client.health_check():
|
|
101
100
|
print("Lemonade is running!")
|
|
102
101
|
|
|
103
|
-
# Get server statistics
|
|
102
|
+
# Get server statistics (performance metrics from last request)
|
|
104
103
|
stats = client.get_stats()
|
|
105
104
|
if stats:
|
|
106
|
-
print(f"
|
|
105
|
+
print(f"Time to first token: {stats.get('time_to_first_token', 0):.2f}s")
|
|
107
106
|
print(f"Tokens/sec: {stats.get('tokens_per_second', 0):.1f}")
|
|
108
|
-
print(f"
|
|
107
|
+
print(f"Input tokens: {stats.get('input_tokens', 0)}")
|
|
108
|
+
print(f"Output tokens: {stats.get('output_tokens', 0)}")
|
|
109
|
+
print(f"Prompt tokens: {stats.get('prompt_tokens', 0)}")
|
|
109
110
|
```
|
|
110
111
|
|
|
112
|
+
**Available stats fields:** `time_to_first_token`, `tokens_per_second`, `input_tokens`, `output_tokens`, `decode_token_times`, `prompt_tokens`.
|
|
113
|
+
|
|
111
114
|
### 2. Chat Completion
|
|
112
115
|
|
|
113
116
|
```python
|
|
@@ -13,7 +13,7 @@ with open("LICENSE", "r", encoding="utf-8") as fh:
|
|
|
13
13
|
|
|
14
14
|
setup(
|
|
15
15
|
name="lemonade-python-sdk",
|
|
16
|
-
version="1.0.
|
|
16
|
+
version="1.0.5",
|
|
17
17
|
author="Tetramatrix",
|
|
18
18
|
author_email="contact@tetramatrix.dev",
|
|
19
19
|
description="A clean interface for interacting with the Lemonade LLM server",
|
|
File without changes
|
{lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_python_sdk.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_python_sdk.egg-info/requires.txt
RENAMED
|
File without changes
|
{lemonade_python_sdk-1.0.4 → lemonade_python_sdk-1.0.5}/lemonade_python_sdk.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|