mantis-gw 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mantis_gw-1.0.0/PKG-INFO +152 -0
- mantis_gw-1.0.0/README.md +142 -0
- mantis_gw-1.0.0/pyproject.toml +31 -0
- mantis_gw-1.0.0/src/mantis_gw/__init__.py +5 -0
- mantis_gw-1.0.0/src/mantis_gw/gateway.py +41 -0
mantis_gw-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: mantis-gw
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Mantis SDK
|
|
5
|
+
Author: Mantis Team
|
|
6
|
+
Author-email: Mantis Team <rizwansyed876@gmail.com>
|
|
7
|
+
Requires-Dist: httpx
|
|
8
|
+
Requires-Python: >=3.13
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
|
|
11
|
+
# Mantis SDK
|
|
12
|
+
|
|
13
|
+
Python SDK for sending chat completion requests to the Mantis `llm-gateway`
|
|
14
|
+
`/v1/chat/completions` endpoint.
|
|
15
|
+
|
|
16
|
+
## Requirements
|
|
17
|
+
|
|
18
|
+
- Python 3.13+
|
|
19
|
+
- A running `llm-gateway` service
|
|
20
|
+
- A gateway API token
|
|
21
|
+
|
|
22
|
+
## Install
|
|
23
|
+
|
|
24
|
+
From this repo:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
uv sync
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Non-streaming
|
|
31
|
+
|
|
32
|
+
`url` should be the gateway base URL and should not include `/v1/chat/completions`.
|
|
33
|
+
The SDK appends that endpoint path automatically.
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import asyncio
|
|
37
|
+
|
|
38
|
+
from mantis import gateway
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
async def main() -> None:
|
|
42
|
+
client = gateway.Gateway(
|
|
43
|
+
url="https://gateway.example.com",
|
|
44
|
+
token="gw_token-id_token-secret",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
response = await client.send(
|
|
48
|
+
{
|
|
49
|
+
"messages": [
|
|
50
|
+
{"role": "user", "content": "Write a short project summary."},
|
|
51
|
+
],
|
|
52
|
+
"stream": False,
|
|
53
|
+
"temperature": 0.5,
|
|
54
|
+
"max_tokens": 256,
|
|
55
|
+
"system": "Answer clearly and concisely.",
|
|
56
|
+
},
|
|
57
|
+
metadata={"task-type": "summarization"},
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
print(response)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
asyncio.run(main())
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Streaming
|
|
67
|
+
|
|
68
|
+
When `stream` is `True`, `send()` returns an async iterator of text chunks.
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import asyncio
|
|
72
|
+
|
|
73
|
+
from mantis import gateway
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
async def main() -> None:
|
|
77
|
+
client = gateway.Gateway(
|
|
78
|
+
url="https://gateway.example.com",
|
|
79
|
+
token="gw_token-id_token-secret"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
chunks = await client.send(
|
|
83
|
+
{
|
|
84
|
+
"messages": [
|
|
85
|
+
{"role": "user", "content": "Write a short project summary."},
|
|
86
|
+
],
|
|
87
|
+
"stream": True,
|
|
88
|
+
"temperature": 0.5,
|
|
89
|
+
"max_tokens": 256,
|
|
90
|
+
"system": "Answer clearly and concisely.",
|
|
91
|
+
},
|
|
92
|
+
metadata={"task-type": "summarization"},
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
async for chunk in chunks:
|
|
96
|
+
print(chunk, end="")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
asyncio.run(main())
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Request Payload
|
|
103
|
+
|
|
104
|
+
The SDK sends the request payload to `llm-gateway` as-is. Gateway-side validation
|
|
105
|
+
still applies to the endpoint:
|
|
106
|
+
|
|
107
|
+
- `messages` is required and must contain at least one message.
|
|
108
|
+
- Each message must only contain `role` and `content`.
|
|
109
|
+
- `role` must be `"user"` or `"assistant"`.
|
|
110
|
+
- `content` must be a non-empty string after trimming whitespace.
|
|
111
|
+
- Extra top-level request fields are rejected.
|
|
112
|
+
- `stream` defaults to `False`.
|
|
113
|
+
- `temperature` can be omitted, `None`, or a number from `0.0` to `2.0`.
|
|
114
|
+
- `max_tokens` can be omitted, `None`, or an integer greater than `0`.
|
|
115
|
+
- `system` can be omitted, `None`, or a non-empty string after trimming whitespace.
|
|
116
|
+
|
|
117
|
+
## Metadata
|
|
118
|
+
|
|
119
|
+
Pass routing metadata with the `metadata` keyword argument. Keys and values must be strings.
|
|
120
|
+
The SDK sends it as the gateway's `metadata` HTTP header.
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
await client.send(
|
|
124
|
+
{
|
|
125
|
+
"messages": [{"role": "user", "content": "Generate a Python function."}],
|
|
126
|
+
"stream": False,
|
|
127
|
+
},
|
|
128
|
+
metadata={"task-type": "code_generation"},
|
|
129
|
+
)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Errors
|
|
133
|
+
|
|
134
|
+
Gateway responses with 4xx or 5xx status codes raise httpx.HTTPStatusError.
|
|
135
|
+
|
|
136
|
+
## Integration Tests
|
|
137
|
+
|
|
138
|
+
The integration tests read gateway credentials from a local `.env` file in the
|
|
139
|
+
repo root:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
MANTIS_GATEWAY_URL=https://gateway.example.com
|
|
143
|
+
MANTIS_GATEWAY_TOKEN=gw_token-id_token-secret
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Run them with:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
uv run pytest
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
The `.env` file is ignored by Git so real credentials stay local.
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# Mantis SDK
|
|
2
|
+
|
|
3
|
+
Python SDK for sending chat completion requests to the Mantis `llm-gateway`
|
|
4
|
+
`/v1/chat/completions` endpoint.
|
|
5
|
+
|
|
6
|
+
## Requirements
|
|
7
|
+
|
|
8
|
+
- Python 3.13+
|
|
9
|
+
- A running `llm-gateway` service
|
|
10
|
+
- A gateway API token
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
From this repo:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
uv sync
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Non-streaming
|
|
21
|
+
|
|
22
|
+
`url` should be the gateway base URL and should not include `/v1/chat/completions`.
|
|
23
|
+
The SDK appends that endpoint path automatically.
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
import asyncio
|
|
27
|
+
|
|
28
|
+
from mantis import gateway
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def main() -> None:
|
|
32
|
+
client = gateway.Gateway(
|
|
33
|
+
url="https://gateway.example.com",
|
|
34
|
+
token="gw_token-id_token-secret",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
response = await client.send(
|
|
38
|
+
{
|
|
39
|
+
"messages": [
|
|
40
|
+
{"role": "user", "content": "Write a short project summary."},
|
|
41
|
+
],
|
|
42
|
+
"stream": False,
|
|
43
|
+
"temperature": 0.5,
|
|
44
|
+
"max_tokens": 256,
|
|
45
|
+
"system": "Answer clearly and concisely.",
|
|
46
|
+
},
|
|
47
|
+
metadata={"task-type": "summarization"},
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
print(response)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
asyncio.run(main())
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Streaming
|
|
57
|
+
|
|
58
|
+
When `stream` is `True`, `send()` returns an async iterator of text chunks.
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import asyncio
|
|
62
|
+
|
|
63
|
+
from mantis import gateway
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def main() -> None:
|
|
67
|
+
client = gateway.Gateway(
|
|
68
|
+
url="https://gateway.example.com",
|
|
69
|
+
token="gw_token-id_token-secret"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
chunks = await client.send(
|
|
73
|
+
{
|
|
74
|
+
"messages": [
|
|
75
|
+
{"role": "user", "content": "Write a short project summary."},
|
|
76
|
+
],
|
|
77
|
+
"stream": True,
|
|
78
|
+
"temperature": 0.5,
|
|
79
|
+
"max_tokens": 256,
|
|
80
|
+
"system": "Answer clearly and concisely.",
|
|
81
|
+
},
|
|
82
|
+
metadata={"task-type": "summarization"},
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
async for chunk in chunks:
|
|
86
|
+
print(chunk, end="")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
asyncio.run(main())
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Request Payload
|
|
93
|
+
|
|
94
|
+
The SDK sends the request payload to `llm-gateway` as-is. Gateway-side validation
|
|
95
|
+
still applies to the endpoint:
|
|
96
|
+
|
|
97
|
+
- `messages` is required and must contain at least one message.
|
|
98
|
+
- Each message must only contain `role` and `content`.
|
|
99
|
+
- `role` must be `"user"` or `"assistant"`.
|
|
100
|
+
- `content` must be a non-empty string after trimming whitespace.
|
|
101
|
+
- Extra top-level request fields are rejected.
|
|
102
|
+
- `stream` defaults to `False`.
|
|
103
|
+
- `temperature` can be omitted, `None`, or a number from `0.0` to `2.0`.
|
|
104
|
+
- `max_tokens` can be omitted, `None`, or an integer greater than `0`.
|
|
105
|
+
- `system` can be omitted, `None`, or a non-empty string after trimming whitespace.
|
|
106
|
+
|
|
107
|
+
## Metadata
|
|
108
|
+
|
|
109
|
+
Pass routing metadata with the `metadata` keyword argument. Keys and values must be strings.
|
|
110
|
+
The SDK sends it as the gateway's `metadata` HTTP header.
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
await client.send(
|
|
114
|
+
{
|
|
115
|
+
"messages": [{"role": "user", "content": "Generate a Python function."}],
|
|
116
|
+
"stream": False,
|
|
117
|
+
},
|
|
118
|
+
metadata={"task-type": "code_generation"},
|
|
119
|
+
)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Errors
|
|
123
|
+
|
|
124
|
+
Gateway responses with 4xx or 5xx status codes raise httpx.HTTPStatusError.
|
|
125
|
+
|
|
126
|
+
## Integration Tests
|
|
127
|
+
|
|
128
|
+
The integration tests read gateway credentials from a local `.env` file in the
|
|
129
|
+
repo root:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
MANTIS_GATEWAY_URL=https://gateway.example.com
|
|
133
|
+
MANTIS_GATEWAY_TOKEN=gw_token-id_token-secret
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Run them with:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
uv run pytest
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
The `.env` file is ignored by Git so real credentials stay local.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mantis-gw"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "Mantis SDK"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Mantis Team", email = "rizwansyed876@gmail.com" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.13"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"httpx",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[dependency-groups]
|
|
15
|
+
dev = [
|
|
16
|
+
"pre-commit",
|
|
17
|
+
"pytest",
|
|
18
|
+
"python-dotenv",
|
|
19
|
+
"ruff",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[build-system]
|
|
23
|
+
requires = ["uv_build>=0.10.9,<0.11.0"]
|
|
24
|
+
build-backend = "uv_build"
|
|
25
|
+
|
|
26
|
+
[tool.ruff]
|
|
27
|
+
line-length = 100
|
|
28
|
+
target-version = "py313"
|
|
29
|
+
|
|
30
|
+
[tool.ruff.lint]
|
|
31
|
+
select = ["E", "F", "I"]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from collections.abc import AsyncIterator
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
_CHAT_COMPLETIONS_PATH = "/v1/chat/completions"
|
|
8
|
+
|
|
9
|
+
class Gateway:
|
|
10
|
+
def __init__(self, *, url, token):
|
|
11
|
+
self.url = url.rstrip("/")
|
|
12
|
+
self.token = token
|
|
13
|
+
|
|
14
|
+
async def send(self, request, *, metadata=None) -> AsyncIterator[str] | dict[str, Any]:
|
|
15
|
+
headers = _build_headers(self.token, metadata)
|
|
16
|
+
endpoint = f"{self.url}{_CHAT_COMPLETIONS_PATH}"
|
|
17
|
+
|
|
18
|
+
if request.get("stream") is True:
|
|
19
|
+
return _stream(endpoint, headers, request)
|
|
20
|
+
|
|
21
|
+
async with httpx.AsyncClient() as client:
|
|
22
|
+
response = await client.post(endpoint, headers=headers, json=request)
|
|
23
|
+
response.raise_for_status()
|
|
24
|
+
return response.json()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def _stream(endpoint, headers, request, ) -> AsyncIterator[str]:
|
|
28
|
+
async with httpx.AsyncClient() as client:
|
|
29
|
+
async with client.stream("POST", endpoint, headers=headers, json=request) as response:
|
|
30
|
+
response.raise_for_status()
|
|
31
|
+
async for chunk in response.aiter_text():
|
|
32
|
+
if chunk:
|
|
33
|
+
yield chunk
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _build_headers(token, metadata):
|
|
37
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
38
|
+
if metadata is not None:
|
|
39
|
+
headers["metadata"] = json.dumps(metadata)
|
|
40
|
+
return headers
|
|
41
|
+
|