pendra 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pendra-0.1.0/PKG-INFO +144 -0
- pendra-0.1.0/README.md +117 -0
- pendra-0.1.0/pendra/__init__.py +22 -0
- pendra-0.1.0/pendra/_client.py +110 -0
- pendra-0.1.0/pendra/_exceptions.py +32 -0
- pendra-0.1.0/pendra/_streaming.py +38 -0
- pendra-0.1.0/pendra/resources/__init__.py +4 -0
- pendra-0.1.0/pendra/resources/async_chat.py +87 -0
- pendra-0.1.0/pendra/resources/async_models.py +21 -0
- pendra-0.1.0/pendra/resources/chat/__init__.py +3 -0
- pendra-0.1.0/pendra/resources/chat/chat.py +6 -0
- pendra-0.1.0/pendra/resources/chat/completions.py +136 -0
- pendra-0.1.0/pendra/resources/models.py +37 -0
- pendra-0.1.0/pendra/types/__init__.py +19 -0
- pendra-0.1.0/pendra/types/chat.py +110 -0
- pendra-0.1.0/pendra.egg-info/PKG-INFO +144 -0
- pendra-0.1.0/pendra.egg-info/SOURCES.txt +29 -0
- pendra-0.1.0/pendra.egg-info/dependency_links.txt +1 -0
- pendra-0.1.0/pendra.egg-info/requires.txt +6 -0
- pendra-0.1.0/pendra.egg-info/top_level.txt +1 -0
- pendra-0.1.0/pyproject.toml +37 -0
- pendra-0.1.0/setup.cfg +4 -0
- pendra-0.1.0/setup.py +4 -0
- pendra-0.1.0/tests/test_async_chat_completions.py +59 -0
- pendra-0.1.0/tests/test_async_models.py +30 -0
- pendra-0.1.0/tests/test_chat_completions.py +80 -0
- pendra-0.1.0/tests/test_client.py +55 -0
- pendra-0.1.0/tests/test_exceptions.py +38 -0
- pendra-0.1.0/tests/test_models.py +36 -0
- pendra-0.1.0/tests/test_streaming.py +97 -0
- pendra-0.1.0/tests/test_types.py +81 -0
pendra-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pendra
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK for Pendra — UK-based, privacy-first LLM inference
|
|
5
|
+
Author-email: Pendra <hello@pendra.ai>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://pendra.ai
|
|
8
|
+
Project-URL: Documentation, https://docs.pendra.ai
|
|
9
|
+
Project-URL: Repository, https://github.com/Pendra-Cloud/pendra
|
|
10
|
+
Keywords: llm,ai,inference,uk,openai-compatible
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: httpx>=0.27
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
25
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
|
|
26
|
+
Requires-Dist: respx>=0.22; extra == "dev"
|
|
27
|
+
|
|
28
|
+
# pendra-python
|
|
29
|
+
|
|
30
|
+
Official Python SDK for [Pendra](https://pendra.ai) — UK-based, privacy-first LLM inference.
|
|
31
|
+
|
|
32
|
+
Your data is processed in the UK, never stored, never shared with US cloud providers.
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install pendra
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Quick Start
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
import pendra
|
|
44
|
+
|
|
45
|
+
client = pendra.Pendra(
|
|
46
|
+
api_key="pdr_sk_...", # or set PENDRA_API_KEY env var
|
|
47
|
+
base_url="http://localhost", # your self-hosted instance
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
response = client.chat.completions.create(
|
|
51
|
+
model="llama3.2",
|
|
52
|
+
messages=[
|
|
53
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
54
|
+
{"role": "user", "content": "What is the capital of the UK?"},
|
|
55
|
+
],
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
print(response.choices[0].message.content)
|
|
59
|
+
# → London is the capital of the United Kingdom.
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Streaming
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
with client.chat.completions.create(
|
|
66
|
+
model="llama3.2",
|
|
67
|
+
messages=[{"role": "user", "content": "Write me a short poem about London."}],
|
|
68
|
+
stream=True,
|
|
69
|
+
) as stream:
|
|
70
|
+
for chunk in stream:
|
|
71
|
+
print(chunk.choices[0].delta.content or "", end="", flush=True)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Async
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import asyncio
|
|
78
|
+
import pendra
|
|
79
|
+
|
|
80
|
+
async def main():
|
|
81
|
+
async with pendra.AsyncPendra(api_key="pdr_sk_...") as client:
|
|
82
|
+
# Non-streaming
|
|
83
|
+
response = await client.chat.completions.create(
|
|
84
|
+
model="llama3.2",
|
|
85
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
86
|
+
)
|
|
87
|
+
print(response.choices[0].message.content)
|
|
88
|
+
|
|
89
|
+
# Streaming
|
|
90
|
+
stream = await client.chat.completions.create(
|
|
91
|
+
model="llama3.2",
|
|
92
|
+
messages=[{"role": "user", "content": "Count to 5"}],
|
|
93
|
+
stream=True,
|
|
94
|
+
)
|
|
95
|
+
async for chunk in stream:
|
|
96
|
+
print(chunk.choices[0].delta.content or "", end="", flush=True)
|
|
97
|
+
|
|
98
|
+
asyncio.run(main())
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## List Models
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
models = client.models.list()
|
|
105
|
+
for model in models:
|
|
106
|
+
print(model.id)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Environment Variables
|
|
110
|
+
|
|
111
|
+
| Variable | Description |
|
|
112
|
+
|----------|-------------|
|
|
113
|
+
| `PENDRA_API_KEY` | Your Pendra API key (`pdr_sk_...`) |
|
|
114
|
+
|
|
115
|
+
## OpenAI Compatibility
|
|
116
|
+
|
|
117
|
+
The Pendra SDK is fully compatible with the OpenAI Python SDK interface. To migrate:
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
# Before
|
|
121
|
+
from openai import OpenAI
|
|
122
|
+
client = OpenAI(api_key="sk-...")
|
|
123
|
+
|
|
124
|
+
# After
|
|
125
|
+
from pendra import Pendra
|
|
126
|
+
client = Pendra(api_key="pdr_sk_...", base_url="https://your-pendra-instance.com")
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
The `client.chat.completions.create()` interface is identical.
|
|
130
|
+
|
|
131
|
+
## Self-Hosting
|
|
132
|
+
|
|
133
|
+
Pendra is fully open source. Run your own instance:
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
git clone https://github.com/pendra-ai/pendra
|
|
137
|
+
cd pendra
|
|
138
|
+
cp .env.example .env
|
|
139
|
+
docker compose up -d
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Licence
|
|
143
|
+
|
|
144
|
+
MIT
|
pendra-0.1.0/README.md
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# pendra-python
|
|
2
|
+
|
|
3
|
+
Official Python SDK for [Pendra](https://pendra.ai) — UK-based, privacy-first LLM inference.
|
|
4
|
+
|
|
5
|
+
Your data is processed in the UK, never stored, never shared with US cloud providers.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install pendra
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import pendra
|
|
17
|
+
|
|
18
|
+
client = pendra.Pendra(
|
|
19
|
+
api_key="pdr_sk_...", # or set PENDRA_API_KEY env var
|
|
20
|
+
base_url="http://localhost", # your self-hosted instance
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
response = client.chat.completions.create(
|
|
24
|
+
model="llama3.2",
|
|
25
|
+
messages=[
|
|
26
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
27
|
+
{"role": "user", "content": "What is the capital of the UK?"},
|
|
28
|
+
],
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
print(response.choices[0].message.content)
|
|
32
|
+
# → London is the capital of the United Kingdom.
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Streaming
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
with client.chat.completions.create(
|
|
39
|
+
model="llama3.2",
|
|
40
|
+
messages=[{"role": "user", "content": "Write me a short poem about London."}],
|
|
41
|
+
stream=True,
|
|
42
|
+
) as stream:
|
|
43
|
+
for chunk in stream:
|
|
44
|
+
print(chunk.choices[0].delta.content or "", end="", flush=True)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Async
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
import asyncio
|
|
51
|
+
import pendra
|
|
52
|
+
|
|
53
|
+
async def main():
|
|
54
|
+
async with pendra.AsyncPendra(api_key="pdr_sk_...") as client:
|
|
55
|
+
# Non-streaming
|
|
56
|
+
response = await client.chat.completions.create(
|
|
57
|
+
model="llama3.2",
|
|
58
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
59
|
+
)
|
|
60
|
+
print(response.choices[0].message.content)
|
|
61
|
+
|
|
62
|
+
# Streaming
|
|
63
|
+
stream = await client.chat.completions.create(
|
|
64
|
+
model="llama3.2",
|
|
65
|
+
messages=[{"role": "user", "content": "Count to 5"}],
|
|
66
|
+
stream=True,
|
|
67
|
+
)
|
|
68
|
+
async for chunk in stream:
|
|
69
|
+
print(chunk.choices[0].delta.content or "", end="", flush=True)
|
|
70
|
+
|
|
71
|
+
asyncio.run(main())
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## List Models
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
models = client.models.list()
|
|
78
|
+
for model in models:
|
|
79
|
+
print(model.id)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Environment Variables
|
|
83
|
+
|
|
84
|
+
| Variable | Description |
|
|
85
|
+
|----------|-------------|
|
|
86
|
+
| `PENDRA_API_KEY` | Your Pendra API key (`pdr_sk_...`) |
|
|
87
|
+
|
|
88
|
+
## OpenAI Compatibility
|
|
89
|
+
|
|
90
|
+
The Pendra SDK is fully compatible with the OpenAI Python SDK interface. To migrate:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
# Before
|
|
94
|
+
from openai import OpenAI
|
|
95
|
+
client = OpenAI(api_key="sk-...")
|
|
96
|
+
|
|
97
|
+
# After
|
|
98
|
+
from pendra import Pendra
|
|
99
|
+
client = Pendra(api_key="pdr_sk_...", base_url="https://your-pendra-instance.com")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
The `client.chat.completions.create()` interface is identical.
|
|
103
|
+
|
|
104
|
+
## Self-Hosting
|
|
105
|
+
|
|
106
|
+
Pendra is fully open source. Run your own instance:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
git clone https://github.com/pendra-ai/pendra
|
|
110
|
+
cd pendra
|
|
111
|
+
cp .env.example .env
|
|
112
|
+
docker compose up -d
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Licence
|
|
116
|
+
|
|
117
|
+
MIT
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Pendra Python SDK — UK-based LLM inference, OpenAI-compatible."""
|
|
2
|
+
|
|
3
|
+
from ._client import AsyncPendra, Pendra
|
|
4
|
+
from ._exceptions import (
|
|
5
|
+
APIConnectionError,
|
|
6
|
+
APIError,
|
|
7
|
+
APIStatusError,
|
|
8
|
+
AuthenticationError,
|
|
9
|
+
RateLimitError,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__version__ = "0.1.0"
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"Pendra",
|
|
16
|
+
"AsyncPendra",
|
|
17
|
+
"APIError",
|
|
18
|
+
"APIStatusError",
|
|
19
|
+
"APIConnectionError",
|
|
20
|
+
"AuthenticationError",
|
|
21
|
+
"RateLimitError",
|
|
22
|
+
]
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from .resources.chat import Chat
|
|
8
|
+
from .resources.models import Models
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Pendra:
|
|
12
|
+
"""
|
|
13
|
+
Synchronous Pendra client.
|
|
14
|
+
|
|
15
|
+
Usage::
|
|
16
|
+
|
|
17
|
+
import pendra
|
|
18
|
+
|
|
19
|
+
client = pendra.Pendra(
|
|
20
|
+
api_key="pdr_sk_...",
|
|
21
|
+
base_url="http://localhost", # or your Pendra instance
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
response = client.chat.completions.create(
|
|
25
|
+
model="llama3.2",
|
|
26
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
27
|
+
)
|
|
28
|
+
print(response.choices[0].message.content)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
api_key: str | None = None,
|
|
34
|
+
base_url: str = "https://api.pendra.ai",
|
|
35
|
+
timeout: float = 120.0,
|
|
36
|
+
):
|
|
37
|
+
self.api_key = api_key or os.environ.get("PENDRA_API_KEY")
|
|
38
|
+
if not self.api_key:
|
|
39
|
+
raise ValueError(
|
|
40
|
+
"No API key provided. Pass api_key= or set the "
|
|
41
|
+
"PENDRA_API_KEY environment variable."
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
self.base_url = base_url.rstrip("/")
|
|
45
|
+
self._http = httpx.Client(timeout=timeout)
|
|
46
|
+
|
|
47
|
+
self.chat = Chat(self)
|
|
48
|
+
self.models = Models(self)
|
|
49
|
+
|
|
50
|
+
def __enter__(self):
|
|
51
|
+
return self
|
|
52
|
+
|
|
53
|
+
def __exit__(self, *_):
|
|
54
|
+
self._http.close()
|
|
55
|
+
|
|
56
|
+
def close(self):
|
|
57
|
+
self._http.close()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class AsyncPendra:
|
|
61
|
+
"""
|
|
62
|
+
Async Pendra client for use with asyncio.
|
|
63
|
+
|
|
64
|
+
Usage::
|
|
65
|
+
|
|
66
|
+
import asyncio
|
|
67
|
+
import pendra
|
|
68
|
+
|
|
69
|
+
async def main():
|
|
70
|
+
async with pendra.AsyncPendra(api_key="pdr_sk_...") as client:
|
|
71
|
+
response = await client.chat.completions.create(
|
|
72
|
+
model="llama3.2",
|
|
73
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
74
|
+
)
|
|
75
|
+
print(response.choices[0].message.content)
|
|
76
|
+
|
|
77
|
+
asyncio.run(main())
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
api_key: str | None = None,
|
|
83
|
+
base_url: str = "https://api.pendra.ai",
|
|
84
|
+
timeout: float = 120.0,
|
|
85
|
+
):
|
|
86
|
+
self.api_key = api_key or os.environ.get("PENDRA_API_KEY")
|
|
87
|
+
if not self.api_key:
|
|
88
|
+
raise ValueError(
|
|
89
|
+
"No API key provided. Pass api_key= or set the "
|
|
90
|
+
"PENDRA_API_KEY environment variable."
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
self.base_url = base_url.rstrip("/")
|
|
94
|
+
self._http = httpx.AsyncClient(timeout=timeout)
|
|
95
|
+
|
|
96
|
+
# Async resources share the sync interface via separate implementation
|
|
97
|
+
from .resources.async_chat import AsyncChat
|
|
98
|
+
from .resources.async_models import AsyncModels
|
|
99
|
+
|
|
100
|
+
self.chat = AsyncChat(self)
|
|
101
|
+
self.models = AsyncModels(self)
|
|
102
|
+
|
|
103
|
+
async def __aenter__(self):
|
|
104
|
+
return self
|
|
105
|
+
|
|
106
|
+
async def __aexit__(self, *_):
|
|
107
|
+
await self._http.aclose()
|
|
108
|
+
|
|
109
|
+
async def close(self):
|
|
110
|
+
await self._http.aclose()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
class APIError(Exception):
|
|
2
|
+
"""Base exception for all Pendra API errors."""
|
|
3
|
+
|
|
4
|
+
def __init__(self, message: str, status_code: int | None = None):
|
|
5
|
+
super().__init__(message)
|
|
6
|
+
self.message = message
|
|
7
|
+
self.status_code = status_code
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class APIConnectionError(APIError):
|
|
11
|
+
"""Raised when the API cannot be reached."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, message: str = "Failed to connect to the Pendra API"):
|
|
14
|
+
super().__init__(message)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class APIStatusError(APIError):
|
|
18
|
+
"""Raised when the API returns a non-2xx response."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AuthenticationError(APIStatusError):
|
|
22
|
+
"""Raised on 401 Unauthorized."""
|
|
23
|
+
|
|
24
|
+
def __init__(self):
|
|
25
|
+
super().__init__("Invalid API key — check your PENDRA_API_KEY", status_code=401)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RateLimitError(APIStatusError):
|
|
29
|
+
"""Raised on 429 Too Many Requests."""
|
|
30
|
+
|
|
31
|
+
def __init__(self):
|
|
32
|
+
super().__init__("Rate limit exceeded", status_code=429)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from .types.chat import ChatCompletionChunk
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Stream:
|
|
12
|
+
"""Iterates over SSE chunks from a streaming chat completion."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, ctx_manager, response: "httpx.Response"):
|
|
15
|
+
self._ctx = ctx_manager # keeps the httpx stream context alive
|
|
16
|
+
self._response = response
|
|
17
|
+
self._iter = response.iter_lines()
|
|
18
|
+
|
|
19
|
+
def __iter__(self) -> Iterator[ChatCompletionChunk]:
|
|
20
|
+
for line in self._iter:
|
|
21
|
+
line = line.strip()
|
|
22
|
+
if not line or not line.startswith("data: "):
|
|
23
|
+
continue
|
|
24
|
+
data = line[6:]
|
|
25
|
+
if data == "[DONE]":
|
|
26
|
+
break
|
|
27
|
+
try:
|
|
28
|
+
chunk = ChatCompletionChunk.from_dict(json.loads(data))
|
|
29
|
+
yield chunk
|
|
30
|
+
except (json.JSONDecodeError, KeyError):
|
|
31
|
+
continue
|
|
32
|
+
|
|
33
|
+
def __enter__(self):
|
|
34
|
+
return self
|
|
35
|
+
|
|
36
|
+
def __exit__(self, *args):
|
|
37
|
+
self._response.close()
|
|
38
|
+
self._ctx.__exit__(*args)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import AsyncIterator
|
|
5
|
+
from typing import TYPE_CHECKING, Literal, Optional, Union, overload
|
|
6
|
+
|
|
7
|
+
from .._exceptions import APIConnectionError, APIStatusError, AuthenticationError, RateLimitError
|
|
8
|
+
from .._streaming import Stream
|
|
9
|
+
from ..types.chat import ChatCompletion, ChatCompletionChunk
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from .._client import AsyncPendra
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AsyncCompletions:
|
|
16
|
+
def __init__(self, client: "AsyncPendra"):
|
|
17
|
+
self._client = client
|
|
18
|
+
|
|
19
|
+
async def create(
|
|
20
|
+
self,
|
|
21
|
+
*,
|
|
22
|
+
model: str,
|
|
23
|
+
messages: list[dict],
|
|
24
|
+
stream: bool = False,
|
|
25
|
+
temperature: Optional[float] = None,
|
|
26
|
+
max_tokens: Optional[int] = None,
|
|
27
|
+
top_p: Optional[float] = None,
|
|
28
|
+
stop: Optional[Union[str, list[str]]] = None,
|
|
29
|
+
**kwargs,
|
|
30
|
+
) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
|
|
31
|
+
payload: dict = {"model": model, "messages": messages, "stream": stream}
|
|
32
|
+
if temperature is not None:
|
|
33
|
+
payload["temperature"] = temperature
|
|
34
|
+
if max_tokens is not None:
|
|
35
|
+
payload["max_tokens"] = max_tokens
|
|
36
|
+
if top_p is not None:
|
|
37
|
+
payload["top_p"] = top_p
|
|
38
|
+
if stop is not None:
|
|
39
|
+
payload["stop"] = stop
|
|
40
|
+
payload.update(kwargs)
|
|
41
|
+
|
|
42
|
+
url = f"{self._client.base_url}/api/v1/chat/completions"
|
|
43
|
+
headers = {
|
|
44
|
+
"Authorization": f"Bearer {self._client.api_key}",
|
|
45
|
+
"Content-Type": "application/json",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if stream:
|
|
49
|
+
return self._stream(url, payload, headers)
|
|
50
|
+
else:
|
|
51
|
+
resp = await self._client._http.post(url, json=payload, headers=headers)
|
|
52
|
+
self._raise_for_status(resp)
|
|
53
|
+
return ChatCompletion.from_dict(resp.json())
|
|
54
|
+
|
|
55
|
+
async def _stream(
|
|
56
|
+
self, url: str, payload: dict, headers: dict
|
|
57
|
+
) -> AsyncIterator[ChatCompletionChunk]:
|
|
58
|
+
async with self._client._http.stream("POST", url, json=payload, headers=headers) as resp:
|
|
59
|
+
self._raise_for_status(resp)
|
|
60
|
+
async for line in resp.aiter_lines():
|
|
61
|
+
line = line.strip()
|
|
62
|
+
if not line or not line.startswith("data: "):
|
|
63
|
+
continue
|
|
64
|
+
data = line[6:]
|
|
65
|
+
if data == "[DONE]":
|
|
66
|
+
break
|
|
67
|
+
try:
|
|
68
|
+
yield ChatCompletionChunk.from_dict(json.loads(data))
|
|
69
|
+
except (json.JSONDecodeError, KeyError):
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
def _raise_for_status(self, response) -> None:
|
|
73
|
+
if response.status_code == 401:
|
|
74
|
+
raise AuthenticationError()
|
|
75
|
+
if response.status_code == 429:
|
|
76
|
+
raise RateLimitError()
|
|
77
|
+
if response.status_code >= 400:
|
|
78
|
+
try:
|
|
79
|
+
detail = response.json().get("detail", response.text)
|
|
80
|
+
except Exception:
|
|
81
|
+
detail = response.text
|
|
82
|
+
raise APIStatusError(detail, status_code=response.status_code)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class AsyncChat:
|
|
86
|
+
def __init__(self, client: "AsyncPendra"):
|
|
87
|
+
self.completions = AsyncCompletions(client)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from .models import Model
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from .._client import AsyncPendra
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AsyncModels:
|
|
12
|
+
def __init__(self, client: "AsyncPendra"):
|
|
13
|
+
self._client = client
|
|
14
|
+
|
|
15
|
+
async def list(self) -> list[Model]:
|
|
16
|
+
"""List all available models on this Pendra instance."""
|
|
17
|
+
url = f"{self._client.base_url}/api/v1/models"
|
|
18
|
+
headers = {"Authorization": f"Bearer {self._client.api_key}"}
|
|
19
|
+
resp = await self._client._http.get(url, headers=headers)
|
|
20
|
+
resp.raise_for_status()
|
|
21
|
+
return [Model.from_dict(m) for m in resp.json().get("data", [])]
|