modelgov 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modelgov/__init__.py +82 -0
- modelgov/client.py +564 -0
- modelgov/errors.py +96 -0
- modelgov/py.typed +0 -0
- modelgov/types.py +236 -0
- modelgov-1.0.0.dist-info/METADATA +272 -0
- modelgov-1.0.0.dist-info/RECORD +8 -0
- modelgov-1.0.0.dist-info/WHEEL +4 -0
modelgov/__init__.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Modelgov Python SDK.
|
|
2
|
+
|
|
3
|
+
A typed, idiomatic Python client for the Modelgov AI policy gateway. Mirrors
|
|
4
|
+
the TypeScript SDK's surface (``@modelgov/sdk``).
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
>>> from modelgov import ModelgovClient
|
|
8
|
+
>>> client = ModelgovClient(base_url="http://localhost:3000", api_key="sk-...")
|
|
9
|
+
>>> res = client.chat(
|
|
10
|
+
... user_id="user_123",
|
|
11
|
+
... user_type="logged_in",
|
|
12
|
+
... feature="support_chat",
|
|
13
|
+
... model_class="cheap",
|
|
14
|
+
... messages=[{"role": "user", "content": "Help me reset my password"}],
|
|
15
|
+
... )
|
|
16
|
+
>>> print(res["message"]["content"])
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from .client import ModelgovClient
|
|
20
|
+
from .errors import ModelgovError, PolicyBlockedError, SafetyBlockedError
|
|
21
|
+
from .types import (
|
|
22
|
+
BudgetRemaining,
|
|
23
|
+
ChatMessage,
|
|
24
|
+
ChatResponse,
|
|
25
|
+
ChatResult,
|
|
26
|
+
ContentPart,
|
|
27
|
+
Cost,
|
|
28
|
+
EmbeddingsResponse,
|
|
29
|
+
EmbeddingsResult,
|
|
30
|
+
EmbeddingsUsage,
|
|
31
|
+
ExplainBudget,
|
|
32
|
+
ExplainBudgetUsed,
|
|
33
|
+
ExplainCost,
|
|
34
|
+
ExplainRequested,
|
|
35
|
+
ExplainResolved,
|
|
36
|
+
ExplainResponse,
|
|
37
|
+
ExplainResult,
|
|
38
|
+
ExplainSafety,
|
|
39
|
+
ImagePart,
|
|
40
|
+
ImageUrl,
|
|
41
|
+
ResponseMessage,
|
|
42
|
+
Safety,
|
|
43
|
+
TextPart,
|
|
44
|
+
Usage,
|
|
45
|
+
UsageResponse,
|
|
46
|
+
UsageResult,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
__version__ = "1.0.0"
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"ModelgovClient",
|
|
53
|
+
"ModelgovError",
|
|
54
|
+
"PolicyBlockedError",
|
|
55
|
+
"SafetyBlockedError",
|
|
56
|
+
"ChatMessage",
|
|
57
|
+
"ChatResponse",
|
|
58
|
+
"ChatResult",
|
|
59
|
+
"TextPart",
|
|
60
|
+
"ImageUrl",
|
|
61
|
+
"ImagePart",
|
|
62
|
+
"ContentPart",
|
|
63
|
+
"EmbeddingsResponse",
|
|
64
|
+
"EmbeddingsResult",
|
|
65
|
+
"EmbeddingsUsage",
|
|
66
|
+
"Usage",
|
|
67
|
+
"Cost",
|
|
68
|
+
"BudgetRemaining",
|
|
69
|
+
"Safety",
|
|
70
|
+
"ResponseMessage",
|
|
71
|
+
"ExplainRequested",
|
|
72
|
+
"ExplainResolved",
|
|
73
|
+
"ExplainSafety",
|
|
74
|
+
"ExplainCost",
|
|
75
|
+
"ExplainBudgetUsed",
|
|
76
|
+
"ExplainBudget",
|
|
77
|
+
"ExplainResponse",
|
|
78
|
+
"ExplainResult",
|
|
79
|
+
"UsageResponse",
|
|
80
|
+
"UsageResult",
|
|
81
|
+
"__version__",
|
|
82
|
+
]
|
modelgov/client.py
ADDED
|
@@ -0,0 +1,564 @@
|
|
|
1
|
+
"""Synchronous Modelgov API client.
|
|
2
|
+
|
|
3
|
+
A thin, typed HTTP client over the Modelgov REST API. Policy enforcement is
|
|
4
|
+
always server-side; this client just shapes requests and maps errors. It
|
|
5
|
+
mirrors the TypeScript SDK (``packages/sdk-typescript``) in API surface and
|
|
6
|
+
ergonomics, adapted to idiomatic Python:
|
|
7
|
+
|
|
8
|
+
* keyword-only, snake_case call signatures (``user_id`` -> JSON ``userId``);
|
|
9
|
+
* structured exceptions (:class:`~modelgov.errors.ModelgovError` and
|
|
10
|
+
subclasses) carrying the API error envelope;
|
|
11
|
+
* a streaming generator (:meth:`ModelgovClient.chat_stream`) over SSE.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
from types import TracebackType
|
|
18
|
+
from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Type, Union
|
|
19
|
+
|
|
20
|
+
import httpx
|
|
21
|
+
|
|
22
|
+
from .errors import ModelgovError, PolicyBlockedError, SafetyBlockedError
|
|
23
|
+
from .types import (
|
|
24
|
+
ChatMessage,
|
|
25
|
+
ChatResult,
|
|
26
|
+
EmbeddingsResult,
|
|
27
|
+
ExplainResult,
|
|
28
|
+
UsageResult,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = ["ModelgovClient"]
|
|
32
|
+
|
|
33
|
+
DEFAULT_TIMEOUT = 30.0
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ModelgovClient:
|
|
37
|
+
"""Synchronous client for the Modelgov API.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
base_url: Base URL of the Modelgov API (e.g. ``http://localhost:3000``).
|
|
41
|
+
A trailing slash is stripped.
|
|
42
|
+
api_key: Sent as ``Authorization: Bearer <api_key>`` when provided.
|
|
43
|
+
timeout: Request timeout in seconds (or any value ``httpx`` accepts).
|
|
44
|
+
Defaults to 30s.
|
|
45
|
+
http_client: Optional pre-built ``httpx.Client`` for custom transports
|
|
46
|
+
or test injection. When provided, ``timeout`` is ignored and the
|
|
47
|
+
caller owns the client's lifecycle.
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
>>> client = ModelgovClient(base_url="http://localhost:3000", api_key="sk-...")
|
|
51
|
+
>>> res = client.chat(
|
|
52
|
+
... user_id="user_123",
|
|
53
|
+
... user_type="logged_in",
|
|
54
|
+
... feature="support_chat",
|
|
55
|
+
... messages=[{"role": "user", "content": "Hello"}],
|
|
56
|
+
... )
|
|
57
|
+
>>> res["message"]["content"]
|
|
58
|
+
|
|
59
|
+
The client is a context manager; use ``with ModelgovClient(...) as c:`` to
|
|
60
|
+
close the underlying connection pool automatically.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
base_url: str,
|
|
66
|
+
api_key: Optional[str] = None,
|
|
67
|
+
*,
|
|
68
|
+
timeout: Union[float, httpx.Timeout, None] = DEFAULT_TIMEOUT,
|
|
69
|
+
http_client: Optional[httpx.Client] = None,
|
|
70
|
+
) -> None:
|
|
71
|
+
self.base_url = base_url.rstrip("/")
|
|
72
|
+
self.api_key = api_key
|
|
73
|
+
self._owns_client = http_client is None
|
|
74
|
+
self._client = http_client or httpx.Client(timeout=timeout)
|
|
75
|
+
|
|
76
|
+
# -- lifecycle ----------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
def close(self) -> None:
|
|
79
|
+
"""Close the underlying HTTP client (only if this instance owns it)."""
|
|
80
|
+
if self._owns_client:
|
|
81
|
+
self._client.close()
|
|
82
|
+
|
|
83
|
+
def __enter__(self) -> "ModelgovClient":
|
|
84
|
+
return self
|
|
85
|
+
|
|
86
|
+
def __exit__(
|
|
87
|
+
self,
|
|
88
|
+
exc_type: Optional[Type[BaseException]],
|
|
89
|
+
exc: Optional[BaseException],
|
|
90
|
+
tb: Optional[TracebackType],
|
|
91
|
+
) -> None:
|
|
92
|
+
self.close()
|
|
93
|
+
|
|
94
|
+
# -- headers ------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
def _headers(self, extra: Optional[Mapping[str, str]] = None) -> Dict[str, str]:
|
|
97
|
+
headers: Dict[str, str] = {"content-type": "application/json"}
|
|
98
|
+
if self.api_key:
|
|
99
|
+
headers["authorization"] = f"Bearer {self.api_key}"
|
|
100
|
+
if extra:
|
|
101
|
+
headers.update({k: v for k, v in extra.items() if v is not None})
|
|
102
|
+
return headers
|
|
103
|
+
|
|
104
|
+
# -- public API ---------------------------------------------------------
|
|
105
|
+
|
|
106
|
+
def chat(
|
|
107
|
+
self,
|
|
108
|
+
*,
|
|
109
|
+
user_id: str,
|
|
110
|
+
user_type: str,
|
|
111
|
+
feature: str,
|
|
112
|
+
messages: Sequence[ChatMessage],
|
|
113
|
+
context: Optional[Sequence[str]] = None,
|
|
114
|
+
model_class: Optional[str] = None,
|
|
115
|
+
requested_model_class: Optional[str] = None,
|
|
116
|
+
input_tokens_estimate: Optional[int] = None,
|
|
117
|
+
temperature: Optional[float] = None,
|
|
118
|
+
project_id: Optional[str] = None,
|
|
119
|
+
environment: Optional[str] = None,
|
|
120
|
+
idempotency_key: Optional[str] = None,
|
|
121
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
122
|
+
) -> ChatResult:
|
|
123
|
+
"""Run a guarded chat completion (``POST /v1/chat``).
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
user_id: Your end-user id. Required.
|
|
127
|
+
user_type: Must match a user type in ``modelgov.yaml``. Required.
|
|
128
|
+
feature: Registered feature name. Required.
|
|
129
|
+
messages: List of ``{"role", "content"}`` messages. ``content`` is a
|
|
130
|
+
string, or a list of content parts (``{"type": "text", ...}`` /
|
|
131
|
+
``{"type": "image_url", "image_url": {"url": ...}}``) for vision.
|
|
132
|
+
context: Retrieved passages for a grounded feature (safety
|
|
133
|
+
``grounding: strict``). The gateway answers ONLY from these,
|
|
134
|
+
forces verbatim citations, and verifies them; unverifiable
|
|
135
|
+
answers become a safe refusal.
|
|
136
|
+
model_class: Requested model class (e.g. ``"cheap"``). Maps to the
|
|
137
|
+
API's ``modelClass`` field.
|
|
138
|
+
requested_model_class: Alias for ``model_class``; if both are given,
|
|
139
|
+
``model_class`` wins. Provided for parity with callers that use
|
|
140
|
+
the more explicit name.
|
|
141
|
+
input_tokens_estimate: Optional pre-estimate for budget checks.
|
|
142
|
+
temperature: Sampling temperature (0-2).
|
|
143
|
+
project_id: Optional project scope.
|
|
144
|
+
environment: Optional environment tag.
|
|
145
|
+
idempotency_key: Sent as the ``Idempotency-Key`` header. Retrying
|
|
146
|
+
with the same key + body replays the first result instead of
|
|
147
|
+
re-charging budget or re-calling the model.
|
|
148
|
+
metadata: Arbitrary key/value data stored on the audit log
|
|
149
|
+
(max 32 keys). Does not affect policy.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
The decoded :class:`~modelgov.types.ChatResponse` body.
|
|
153
|
+
|
|
154
|
+
Raises:
|
|
155
|
+
SafetyBlockedError: 403 ``safety_blocked``.
|
|
156
|
+
PolicyBlockedError: 403 ``policy_blocked`` / ``budget_exceeded``.
|
|
157
|
+
ModelgovError: any other non-2xx response.
|
|
158
|
+
"""
|
|
159
|
+
body = self._chat_body(
|
|
160
|
+
user_id=user_id,
|
|
161
|
+
user_type=user_type,
|
|
162
|
+
feature=feature,
|
|
163
|
+
messages=messages,
|
|
164
|
+
context=context,
|
|
165
|
+
model_class=model_class,
|
|
166
|
+
requested_model_class=requested_model_class,
|
|
167
|
+
input_tokens_estimate=input_tokens_estimate,
|
|
168
|
+
temperature=temperature,
|
|
169
|
+
project_id=project_id,
|
|
170
|
+
environment=environment,
|
|
171
|
+
metadata=metadata,
|
|
172
|
+
)
|
|
173
|
+
extra = {"idempotency-key": idempotency_key} if idempotency_key else None
|
|
174
|
+
response = self._client.post(
|
|
175
|
+
f"{self.base_url}/v1/chat",
|
|
176
|
+
headers=self._headers(extra),
|
|
177
|
+
json=body,
|
|
178
|
+
)
|
|
179
|
+
return self._handle_json(response) # type: ignore[return-value]
|
|
180
|
+
|
|
181
|
+
def chat_stream(
|
|
182
|
+
self,
|
|
183
|
+
*,
|
|
184
|
+
user_id: str,
|
|
185
|
+
user_type: str,
|
|
186
|
+
feature: str,
|
|
187
|
+
messages: Sequence[ChatMessage],
|
|
188
|
+
context: Optional[Sequence[str]] = None,
|
|
189
|
+
model_class: Optional[str] = None,
|
|
190
|
+
requested_model_class: Optional[str] = None,
|
|
191
|
+
input_tokens_estimate: Optional[int] = None,
|
|
192
|
+
temperature: Optional[float] = None,
|
|
193
|
+
project_id: Optional[str] = None,
|
|
194
|
+
environment: Optional[str] = None,
|
|
195
|
+
idempotency_key: Optional[str] = None,
|
|
196
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
197
|
+
) -> Iterator[str]:
|
|
198
|
+
"""Stream a guarded chat completion as incremental text chunks.
|
|
199
|
+
|
|
200
|
+
Sends the same body as :meth:`chat` plus ``"stream": true`` and yields
|
|
201
|
+
text deltas as they arrive. The generator holds the HTTP connection
|
|
202
|
+
open until it is fully consumed (or closed); use it in a ``for`` loop
|
|
203
|
+
or wrap it in ``contextlib.closing`` if you may abandon it early.
|
|
204
|
+
|
|
205
|
+
SSE framing assumption:
|
|
206
|
+
The server responds with ``Content-Type: text/event-stream`` and
|
|
207
|
+
OpenAI-style Server-Sent Events — one event per ``data:`` line,
|
|
208
|
+
terminated by a literal ``data: [DONE]`` sentinel. Each non-sentinel
|
|
209
|
+
``data:`` payload is JSON. This client extracts the incremental
|
|
210
|
+
text from, in order of preference:
|
|
211
|
+
|
|
212
|
+
* ``chunk["choices"][0]["delta"]["content"]`` (OpenAI chat delta),
|
|
213
|
+
* ``chunk["delta"]`` or ``chunk["content"]`` or ``chunk["text"]``
|
|
214
|
+
(simpler shapes the Modelgov API may emit).
|
|
215
|
+
|
|
216
|
+
If a ``data:`` payload is not valid JSON it is yielded verbatim as a
|
|
217
|
+
text chunk (tolerant of a plain-text delta stream). Empty deltas are
|
|
218
|
+
skipped. This mirrors how the TypeScript SDK's streaming is expected
|
|
219
|
+
to parse SSE; adjust here if the server's framing diverges.
|
|
220
|
+
|
|
221
|
+
Policy/safety blocks that happen *before* streaming starts are returned
|
|
222
|
+
as a normal non-2xx JSON response and raised as the usual typed errors.
|
|
223
|
+
|
|
224
|
+
Yields:
|
|
225
|
+
``str`` chunks of assistant text, in order.
|
|
226
|
+
|
|
227
|
+
Raises:
|
|
228
|
+
SafetyBlockedError / PolicyBlockedError / ModelgovError: on a non-2xx
|
|
229
|
+
response received before the stream body begins.
|
|
230
|
+
"""
|
|
231
|
+
body = self._chat_body(
|
|
232
|
+
user_id=user_id,
|
|
233
|
+
user_type=user_type,
|
|
234
|
+
feature=feature,
|
|
235
|
+
messages=messages,
|
|
236
|
+
context=context,
|
|
237
|
+
model_class=model_class,
|
|
238
|
+
requested_model_class=requested_model_class,
|
|
239
|
+
input_tokens_estimate=input_tokens_estimate,
|
|
240
|
+
temperature=temperature,
|
|
241
|
+
project_id=project_id,
|
|
242
|
+
environment=environment,
|
|
243
|
+
metadata=metadata,
|
|
244
|
+
)
|
|
245
|
+
body["stream"] = True
|
|
246
|
+
|
|
247
|
+
extra: Dict[str, str] = {"accept": "text/event-stream"}
|
|
248
|
+
if idempotency_key:
|
|
249
|
+
extra["idempotency-key"] = idempotency_key
|
|
250
|
+
|
|
251
|
+
with self._client.stream(
|
|
252
|
+
"POST",
|
|
253
|
+
f"{self.base_url}/v1/chat",
|
|
254
|
+
headers=self._headers(extra),
|
|
255
|
+
json=body,
|
|
256
|
+
) as response:
|
|
257
|
+
if response.status_code < 200 or response.status_code >= 300:
|
|
258
|
+
# Materialize the error body, then map to a typed exception.
|
|
259
|
+
response.read()
|
|
260
|
+
self._raise_for_status(response)
|
|
261
|
+
|
|
262
|
+
for line in response.iter_lines():
|
|
263
|
+
chunk = _parse_sse_line(line)
|
|
264
|
+
if chunk is _DONE:
|
|
265
|
+
break
|
|
266
|
+
if chunk:
|
|
267
|
+
yield chunk
|
|
268
|
+
|
|
269
|
+
def explain(
|
|
270
|
+
self,
|
|
271
|
+
*,
|
|
272
|
+
user_id: str,
|
|
273
|
+
user_type: str,
|
|
274
|
+
feature: str,
|
|
275
|
+
model_class: Optional[str] = None,
|
|
276
|
+
requested_model_class: Optional[str] = None,
|
|
277
|
+
input_tokens_estimate: Optional[int] = None,
|
|
278
|
+
project_id: Optional[str] = None,
|
|
279
|
+
environment: Optional[str] = None,
|
|
280
|
+
) -> ExplainResult:
|
|
281
|
+
"""Dry-run policy evaluation (``POST /v1/explain``).
|
|
282
|
+
|
|
283
|
+
Returns the decision, resolved model, safety plan, and a live budget
|
|
284
|
+
snapshot *without* calling the model or reserving budget. Same identity
|
|
285
|
+
fields as :meth:`chat`, but no ``messages``.
|
|
286
|
+
"""
|
|
287
|
+
body: Dict[str, Any] = {
|
|
288
|
+
"userId": user_id,
|
|
289
|
+
"userType": user_type,
|
|
290
|
+
"feature": feature,
|
|
291
|
+
}
|
|
292
|
+
resolved_model_class = model_class or requested_model_class
|
|
293
|
+
if resolved_model_class is not None:
|
|
294
|
+
body["modelClass"] = resolved_model_class
|
|
295
|
+
if input_tokens_estimate is not None:
|
|
296
|
+
body["inputTokensEstimate"] = input_tokens_estimate
|
|
297
|
+
if project_id is not None:
|
|
298
|
+
body["projectId"] = project_id
|
|
299
|
+
if environment is not None:
|
|
300
|
+
body["environment"] = environment
|
|
301
|
+
|
|
302
|
+
response = self._client.post(
|
|
303
|
+
f"{self.base_url}/v1/explain",
|
|
304
|
+
headers=self._headers(),
|
|
305
|
+
json=body,
|
|
306
|
+
)
|
|
307
|
+
return self._handle_json(response) # type: ignore[return-value]
|
|
308
|
+
|
|
309
|
+
def embed(
|
|
310
|
+
self,
|
|
311
|
+
*,
|
|
312
|
+
user_id: str,
|
|
313
|
+
user_type: str,
|
|
314
|
+
feature: str,
|
|
315
|
+
input: Union[str, Sequence[str]],
|
|
316
|
+
model_class: Optional[str] = None,
|
|
317
|
+
requested_model_class: Optional[str] = None,
|
|
318
|
+
input_tokens_estimate: Optional[int] = None,
|
|
319
|
+
project_id: Optional[str] = None,
|
|
320
|
+
environment: Optional[str] = None,
|
|
321
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
322
|
+
) -> EmbeddingsResult:
|
|
323
|
+
"""Embed one or more texts through the gateway (``POST /v1/embeddings``).
|
|
324
|
+
|
|
325
|
+
Policy-checked (``feature`` + ``user_type``), budget-reserved, and
|
|
326
|
+
audited exactly like :meth:`chat`, and raises the same typed errors on a
|
|
327
|
+
``403`` policy/budget block.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
input: A single text, or a batch of texts to embed. A list is sent
|
|
331
|
+
as a JSON array (one vector is returned per input, in order).
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
The decoded :class:`~modelgov.types.EmbeddingsResponse` body, whose
|
|
335
|
+
``embeddings`` is one vector per input in request order.
|
|
336
|
+
"""
|
|
337
|
+
body: Dict[str, Any] = {
|
|
338
|
+
"userId": user_id,
|
|
339
|
+
"userType": user_type,
|
|
340
|
+
"feature": feature,
|
|
341
|
+
"input": input if isinstance(input, str) else list(input),
|
|
342
|
+
}
|
|
343
|
+
resolved_model_class = model_class or requested_model_class
|
|
344
|
+
if resolved_model_class is not None:
|
|
345
|
+
body["modelClass"] = resolved_model_class
|
|
346
|
+
if input_tokens_estimate is not None:
|
|
347
|
+
body["inputTokensEstimate"] = input_tokens_estimate
|
|
348
|
+
if project_id is not None:
|
|
349
|
+
body["projectId"] = project_id
|
|
350
|
+
if environment is not None:
|
|
351
|
+
body["environment"] = environment
|
|
352
|
+
if metadata is not None:
|
|
353
|
+
body["metadata"] = dict(metadata)
|
|
354
|
+
|
|
355
|
+
response = self._client.post(
|
|
356
|
+
f"{self.base_url}/v1/embeddings",
|
|
357
|
+
headers=self._headers(),
|
|
358
|
+
json=body,
|
|
359
|
+
)
|
|
360
|
+
return self._handle_json(response) # type: ignore[return-value]
|
|
361
|
+
|
|
362
|
+
def get_usage(
|
|
363
|
+
self,
|
|
364
|
+
*,
|
|
365
|
+
user_id: Optional[str] = None,
|
|
366
|
+
feature: Optional[str] = None,
|
|
367
|
+
project_id: Optional[str] = None,
|
|
368
|
+
) -> UsageResult:
|
|
369
|
+
"""Fetch budget counters and recent stats (``GET /v1/usage``).
|
|
370
|
+
|
|
371
|
+
Requires an API key with the ``usage:read`` permission.
|
|
372
|
+
"""
|
|
373
|
+
params: Dict[str, str] = {}
|
|
374
|
+
if user_id is not None:
|
|
375
|
+
params["userId"] = user_id
|
|
376
|
+
if feature is not None:
|
|
377
|
+
params["feature"] = feature
|
|
378
|
+
if project_id is not None:
|
|
379
|
+
params["projectId"] = project_id
|
|
380
|
+
|
|
381
|
+
response = self._client.get(
|
|
382
|
+
f"{self.base_url}/v1/usage",
|
|
383
|
+
headers=self._headers(),
|
|
384
|
+
params=params,
|
|
385
|
+
)
|
|
386
|
+
return self._handle_json(response) # type: ignore[return-value]
|
|
387
|
+
|
|
388
|
+
def get_usage_summary(
|
|
389
|
+
self,
|
|
390
|
+
*,
|
|
391
|
+
feature: Optional[str] = None,
|
|
392
|
+
user_type: Optional[str] = None,
|
|
393
|
+
since: Optional[str] = None,
|
|
394
|
+
project_id: Optional[str] = None,
|
|
395
|
+
) -> UsageResult:
|
|
396
|
+
"""Fetch aggregated cost/request summary (``GET /v1/usage/summary``).
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
since: ``"24h"``, ``"7d"``, or an ISO-8601 timestamp (default
|
|
400
|
+
``"24h"`` server-side).
|
|
401
|
+
"""
|
|
402
|
+
params: Dict[str, str] = {}
|
|
403
|
+
if feature is not None:
|
|
404
|
+
params["feature"] = feature
|
|
405
|
+
if user_type is not None:
|
|
406
|
+
params["userType"] = user_type
|
|
407
|
+
if since is not None:
|
|
408
|
+
params["since"] = since
|
|
409
|
+
if project_id is not None:
|
|
410
|
+
params["projectId"] = project_id
|
|
411
|
+
|
|
412
|
+
response = self._client.get(
|
|
413
|
+
f"{self.base_url}/v1/usage/summary",
|
|
414
|
+
headers=self._headers(),
|
|
415
|
+
params=params,
|
|
416
|
+
)
|
|
417
|
+
return self._handle_json(response) # type: ignore[return-value]
|
|
418
|
+
|
|
419
|
+
# -- internals ----------------------------------------------------------
|
|
420
|
+
|
|
421
|
+
@staticmethod
|
|
422
|
+
def _chat_body(
|
|
423
|
+
*,
|
|
424
|
+
user_id: str,
|
|
425
|
+
user_type: str,
|
|
426
|
+
feature: str,
|
|
427
|
+
messages: Sequence[ChatMessage],
|
|
428
|
+
context: Optional[Sequence[str]],
|
|
429
|
+
model_class: Optional[str],
|
|
430
|
+
requested_model_class: Optional[str],
|
|
431
|
+
input_tokens_estimate: Optional[int],
|
|
432
|
+
temperature: Optional[float],
|
|
433
|
+
project_id: Optional[str],
|
|
434
|
+
environment: Optional[str],
|
|
435
|
+
metadata: Optional[Mapping[str, Any]],
|
|
436
|
+
) -> Dict[str, Any]:
|
|
437
|
+
"""Build the camelCase JSON body the API expects, omitting None fields."""
|
|
438
|
+
body: Dict[str, Any] = {
|
|
439
|
+
"userId": user_id,
|
|
440
|
+
"userType": user_type,
|
|
441
|
+
"feature": feature,
|
|
442
|
+
"messages": [dict(m) for m in messages],
|
|
443
|
+
}
|
|
444
|
+
if context is not None:
|
|
445
|
+
body["context"] = list(context)
|
|
446
|
+
resolved_model_class = model_class or requested_model_class
|
|
447
|
+
if resolved_model_class is not None:
|
|
448
|
+
body["modelClass"] = resolved_model_class
|
|
449
|
+
if input_tokens_estimate is not None:
|
|
450
|
+
body["inputTokensEstimate"] = input_tokens_estimate
|
|
451
|
+
if temperature is not None:
|
|
452
|
+
body["temperature"] = temperature
|
|
453
|
+
if project_id is not None:
|
|
454
|
+
body["projectId"] = project_id
|
|
455
|
+
if environment is not None:
|
|
456
|
+
body["environment"] = environment
|
|
457
|
+
if metadata is not None:
|
|
458
|
+
body["metadata"] = dict(metadata)
|
|
459
|
+
return body
|
|
460
|
+
|
|
461
|
+
def _handle_json(self, response: httpx.Response) -> Any:
|
|
462
|
+
"""Return the parsed JSON body, or raise a typed error on non-2xx."""
|
|
463
|
+
if response.status_code < 200 or response.status_code >= 300:
|
|
464
|
+
self._raise_for_status(response)
|
|
465
|
+
try:
|
|
466
|
+
return response.json()
|
|
467
|
+
except (json.JSONDecodeError, ValueError):
|
|
468
|
+
return {}
|
|
469
|
+
|
|
470
|
+
@staticmethod
|
|
471
|
+
def _raise_for_status(response: httpx.Response) -> None:
|
|
472
|
+
"""Map a non-2xx response to the appropriate ModelgovError subclass."""
|
|
473
|
+
try:
|
|
474
|
+
body = response.json()
|
|
475
|
+
except (json.JSONDecodeError, ValueError):
|
|
476
|
+
body = {}
|
|
477
|
+
|
|
478
|
+
code = _error_code(body)
|
|
479
|
+
if code == "safety_blocked":
|
|
480
|
+
raise SafetyBlockedError(response.status_code, code, body)
|
|
481
|
+
if code in ("policy_blocked", "budget_exceeded"):
|
|
482
|
+
raise PolicyBlockedError(response.status_code, code, body)
|
|
483
|
+
raise ModelgovError(response.status_code, code, body)
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _error_code(body: Any) -> str:
|
|
487
|
+
"""Extract ``error.code`` from the envelope, tolerating loose shapes."""
|
|
488
|
+
if isinstance(body, dict):
|
|
489
|
+
error = body.get("error")
|
|
490
|
+
if isinstance(error, str):
|
|
491
|
+
return error
|
|
492
|
+
if isinstance(error, dict):
|
|
493
|
+
code = error.get("code")
|
|
494
|
+
if isinstance(code, str):
|
|
495
|
+
return code
|
|
496
|
+
return "error"
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
# Sentinel returned by _parse_sse_line for the terminal `data: [DONE]` event.
|
|
500
|
+
_DONE = object()
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def _parse_sse_line(line: str) -> Any:
|
|
504
|
+
"""Parse one SSE line into a text chunk, ``""``, or the ``_DONE`` sentinel.
|
|
505
|
+
|
|
506
|
+
Returns ``_DONE`` for the ``[DONE]`` sentinel, ``""`` for lines with no
|
|
507
|
+
text delta (comments, blank lines, non-``data:`` fields, empty deltas), and
|
|
508
|
+
the extracted text chunk otherwise. See :meth:`ModelgovClient.chat_stream`
|
|
509
|
+
for the framing assumptions.
|
|
510
|
+
"""
|
|
511
|
+
if not line:
|
|
512
|
+
return ""
|
|
513
|
+
stripped = line.strip()
|
|
514
|
+
if not stripped or stripped.startswith(":"):
|
|
515
|
+
# Blank line (event separator) or SSE comment.
|
|
516
|
+
return ""
|
|
517
|
+
if not stripped.startswith("data:"):
|
|
518
|
+
# Ignore other SSE fields (event:, id:, retry:).
|
|
519
|
+
return ""
|
|
520
|
+
|
|
521
|
+
data = stripped[len("data:"):].strip()
|
|
522
|
+
if data == "[DONE]":
|
|
523
|
+
return _DONE
|
|
524
|
+
if not data:
|
|
525
|
+
return ""
|
|
526
|
+
|
|
527
|
+
try:
|
|
528
|
+
payload = json.loads(data)
|
|
529
|
+
except (json.JSONDecodeError, ValueError):
|
|
530
|
+
# Tolerate a plain-text delta stream.
|
|
531
|
+
return data
|
|
532
|
+
|
|
533
|
+
return _extract_delta(payload)
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def _extract_delta(payload: Any) -> str:
|
|
537
|
+
"""Pull the incremental text out of a decoded SSE JSON payload."""
|
|
538
|
+
if not isinstance(payload, dict):
|
|
539
|
+
return ""
|
|
540
|
+
|
|
541
|
+
# OpenAI-style: choices[0].delta.content
|
|
542
|
+
choices = payload.get("choices")
|
|
543
|
+
if isinstance(choices, list) and choices:
|
|
544
|
+
first = choices[0]
|
|
545
|
+
if isinstance(first, dict):
|
|
546
|
+
delta = first.get("delta")
|
|
547
|
+
if isinstance(delta, dict):
|
|
548
|
+
content = delta.get("content")
|
|
549
|
+
if isinstance(content, str):
|
|
550
|
+
return content
|
|
551
|
+
# Non-streaming-style fallback within a choice.
|
|
552
|
+
message = first.get("message")
|
|
553
|
+
if isinstance(message, dict) and isinstance(message.get("content"), str):
|
|
554
|
+
return message["content"]
|
|
555
|
+
if isinstance(first.get("text"), str):
|
|
556
|
+
return first["text"]
|
|
557
|
+
|
|
558
|
+
# Simpler shapes the Modelgov API may emit.
|
|
559
|
+
for key in ("delta", "content", "text"):
|
|
560
|
+
value = payload.get(key)
|
|
561
|
+
if isinstance(value, str):
|
|
562
|
+
return value
|
|
563
|
+
|
|
564
|
+
return ""
|
modelgov/errors.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Typed exceptions raised by :class:`modelgov.client.ModelgovClient`.
|
|
2
|
+
|
|
3
|
+
Mirrors the TypeScript SDK's error hierarchy (``ModelgovError`` /
|
|
4
|
+
``PolicyBlockedError`` / ``SafetyBlockedError``) while surfacing the API's
|
|
5
|
+
structured error envelope (``code``, ``message``, ``details``, ``requestId``,
|
|
6
|
+
and block metadata from ``details``) as first-class attributes.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelgovError(Exception):
|
|
15
|
+
"""Base error carrying the HTTP status and the API's structured error body.
|
|
16
|
+
|
|
17
|
+
The Modelgov error envelope looks like::
|
|
18
|
+
|
|
19
|
+
{
|
|
20
|
+
"error": {
|
|
21
|
+
"code": "policy_blocked",
|
|
22
|
+
"message": "...",
|
|
23
|
+
"details": {
|
|
24
|
+
"auditRequestId": "req_42" # audit-log row (block/safety only)
|
|
25
|
+
},
|
|
26
|
+
"requestId": "550e8400-..." # HTTP trace id (UUID)
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
status: HTTP status code (0 if the request never got a response).
|
|
32
|
+
code: The stable ``error.code`` string (e.g. ``"policy_blocked"``).
|
|
33
|
+
message: Human-readable ``error.message``.
|
|
34
|
+
details: The ``error.details`` object, if present.
|
|
35
|
+
request_id: ``error.requestId`` — the HTTP trace id (UUID).
|
|
36
|
+
audit_request_id: ``error.details.auditRequestId`` — the ``req_<n>``
|
|
37
|
+
audit id, present on policy/safety/budget blocks. Use with
|
|
38
|
+
``modelgov requests show``.
|
|
39
|
+
reason_code: ``error.details.reasonCode`` — stable machine-readable
|
|
40
|
+
block reason (e.g. ``"daily_budget_exceeded"``), when present.
|
|
41
|
+
budget_remaining: ``error.details.budgetRemaining`` — remaining budget
|
|
42
|
+
headroom at decision time, when the API reports it.
|
|
43
|
+
body: The full parsed response body.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
status: int,
|
|
49
|
+
code: str,
|
|
50
|
+
body: Any = None,
|
|
51
|
+
*,
|
|
52
|
+
message: Optional[str] = None,
|
|
53
|
+
) -> None:
|
|
54
|
+
self.status = status
|
|
55
|
+
self.code = code
|
|
56
|
+
self.body = body
|
|
57
|
+
|
|
58
|
+
error = body.get("error") if isinstance(body, dict) else None
|
|
59
|
+
error_obj: Dict[str, Any] = error if isinstance(error, dict) else {}
|
|
60
|
+
|
|
61
|
+
self.message: str = message or error_obj.get("message") or code
|
|
62
|
+
self.details: Optional[Dict[str, Any]] = (
|
|
63
|
+
error_obj.get("details") if isinstance(error_obj.get("details"), dict) else None
|
|
64
|
+
)
|
|
65
|
+
self.request_id: Optional[str] = error_obj.get("requestId")
|
|
66
|
+
details = self.details if isinstance(self.details, dict) else {}
|
|
67
|
+
self.audit_request_id: Optional[str] = (
|
|
68
|
+
details.get("auditRequestId")
|
|
69
|
+
if isinstance(details.get("auditRequestId"), str)
|
|
70
|
+
else error_obj.get("auditRequestId")
|
|
71
|
+
)
|
|
72
|
+
# Block metadata surfaced from `error.details` as first-class attributes,
|
|
73
|
+
# mirroring the TypeScript SDK's `reasonCode` / `budgetRemaining`.
|
|
74
|
+
reason_code = details.get("reasonCode")
|
|
75
|
+
self.reason_code: Optional[str] = reason_code if isinstance(reason_code, str) else None
|
|
76
|
+
budget_remaining = details.get("budgetRemaining")
|
|
77
|
+
self.budget_remaining: Optional[Dict[str, Any]] = (
|
|
78
|
+
budget_remaining if isinstance(budget_remaining, dict) else None
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
super().__init__(f"modelgov request failed ({status}): {code} - {self.message}")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class PolicyBlockedError(ModelgovError):
|
|
85
|
+
"""Raised on 403 ``policy_blocked`` or ``budget_exceeded``.
|
|
86
|
+
|
|
87
|
+
Inspect :attr:`~ModelgovError.body` / :attr:`~ModelgovError.details` for the
|
|
88
|
+
block reason, and :attr:`~ModelgovError.audit_request_id` for the audit id.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class SafetyBlockedError(ModelgovError):
|
|
93
|
+
"""Raised on 403 ``safety_blocked`` (PII or prompt injection)."""
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
__all__ = ["ModelgovError", "PolicyBlockedError", "SafetyBlockedError"]
|
modelgov/py.typed
ADDED
|
File without changes
|
modelgov/types.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""Typed request/response models for the Modelgov API.
|
|
2
|
+
|
|
3
|
+
These mirror ``packages/sdk-typescript/src/types.ts`` and the shapes in
|
|
4
|
+
``packages/api/openapi.json``. Response models are :class:`typing.TypedDict`
|
|
5
|
+
so a decoded JSON body *is* the typed object with no conversion step — the
|
|
6
|
+
API already returns camelCase keys and the SDK returns them unchanged.
|
|
7
|
+
|
|
8
|
+
``ChatResult`` / ``ExplainResult`` / ``UsageResult`` are exported aliases used
|
|
9
|
+
by the client's return-type annotations.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import sys
|
|
15
|
+
from typing import Any, Dict, List, Optional, Union
|
|
16
|
+
|
|
17
|
+
if sys.version_info >= (3, 11):
|
|
18
|
+
from typing import NotRequired, TypedDict
|
|
19
|
+
else: # pragma: no cover - exercised on 3.9/3.10 runtimes
|
|
20
|
+
from typing_extensions import NotRequired, TypedDict # type: ignore
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# --- Chat -------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TextPart(TypedDict):
|
|
27
|
+
"""A text segment of a multimodal message (``type`` is ``"text"``)."""
|
|
28
|
+
|
|
29
|
+
type: str
|
|
30
|
+
text: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ImageUrl(TypedDict):
|
|
34
|
+
url: str
|
|
35
|
+
detail: NotRequired[str] # "low" | "high" | "auto"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ImagePart(TypedDict):
|
|
39
|
+
"""An image segment of a multimodal message (``type`` is ``"image_url"``).
|
|
40
|
+
|
|
41
|
+
``image_url.url`` is an http(s) URL or a ``data:`` URI (base64) — e.g. a
|
|
42
|
+
page scan for OCR. Passed through to a vision model; the gateway still
|
|
43
|
+
governs budget, audit, and text-part safety.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
type: str
|
|
47
|
+
image_url: ImageUrl
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
ContentPart = Union[TextPart, ImagePart]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ChatMessage(TypedDict):
|
|
54
|
+
"""A single chat message. ``role`` is one of system/user/assistant/tool.
|
|
55
|
+
|
|
56
|
+
``content`` is a plain string, or a list of OpenAI-style content parts
|
|
57
|
+
(text + images) for vision / multimodal features.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
role: str
|
|
61
|
+
content: Union[str, List[ContentPart]]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Usage(TypedDict):
|
|
65
|
+
inputTokens: Optional[int]
|
|
66
|
+
outputTokens: Optional[int]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class Cost(TypedDict):
|
|
70
|
+
estimatedUsd: float
|
|
71
|
+
actualUsd: float
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class BudgetRemaining(TypedDict):
|
|
75
|
+
userDailyUsd: float
|
|
76
|
+
# null when no cap is configured (monthly_usd: 0).
|
|
77
|
+
featureMonthlyUsd: Optional[float]
|
|
78
|
+
globalMonthlyUsd: Optional[float]
|
|
79
|
+
# Token headroom; present when a token cap is configured, null otherwise.
|
|
80
|
+
userDailyTokens: NotRequired[Optional[int]]
|
|
81
|
+
featureMonthlyTokens: NotRequired[Optional[int]]
|
|
82
|
+
globalMonthlyTokens: NotRequired[Optional[int]]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class Safety(TypedDict):
|
|
86
|
+
piiMasked: bool
|
|
87
|
+
injectionBlocked: bool
|
|
88
|
+
# Present only for grounded features: whether the answer's citations were
|
|
89
|
+
# verified against the provided context.
|
|
90
|
+
grounded: NotRequired[bool]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ResponseMessage(TypedDict):
|
|
94
|
+
role: str
|
|
95
|
+
content: str
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class ChatResponse(TypedDict):
|
|
99
|
+
"""``200`` body of ``POST /v1/chat``."""
|
|
100
|
+
|
|
101
|
+
message: ResponseMessage
|
|
102
|
+
model: str
|
|
103
|
+
# Provider of the model that ran, e.g. "openai", "openrouter", "ollama".
|
|
104
|
+
provider: str
|
|
105
|
+
decision: str # "allow" | "degrade" | "fallback"
|
|
106
|
+
reason: NotRequired[str]
|
|
107
|
+
usage: Usage
|
|
108
|
+
cost: Cost
|
|
109
|
+
# null under hierarchical budgets (the node tree is the authority).
|
|
110
|
+
budgetRemaining: Optional[BudgetRemaining]
|
|
111
|
+
safety: Safety
|
|
112
|
+
requestId: str # audit id ("req_<n>")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# --- Explain ----------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class ExplainRequested(TypedDict):
|
|
119
|
+
userId: str
|
|
120
|
+
userType: str
|
|
121
|
+
feature: str
|
|
122
|
+
modelClass: str
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class ExplainResolved(TypedDict):
|
|
126
|
+
modelClass: str
|
|
127
|
+
model: str
|
|
128
|
+
provider: str
|
|
129
|
+
fallbackModel: NotRequired[str]
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class ExplainSafety(TypedDict):
|
|
133
|
+
preset: str
|
|
134
|
+
pii: str
|
|
135
|
+
promptInjection: str
|
|
136
|
+
maxOutputTokens: int
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class ExplainCost(TypedDict):
|
|
140
|
+
estimatedUsd: float
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class ExplainBudgetUsed(TypedDict):
|
|
144
|
+
userDailyUsd: float
|
|
145
|
+
userDailyRequests: int
|
|
146
|
+
featureMonthlyUsd: float
|
|
147
|
+
globalMonthlyUsd: float
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class ExplainBudget(TypedDict):
|
|
151
|
+
remaining: BudgetRemaining
|
|
152
|
+
used: ExplainBudgetUsed
|
|
153
|
+
permittedModels: List[str]
|
|
154
|
+
dailyRequestLimit: int
|
|
155
|
+
dailyRequestsRemaining: int
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class ExplainResponse(TypedDict):
|
|
159
|
+
"""``200`` body of ``POST /v1/explain``."""
|
|
160
|
+
|
|
161
|
+
decision: str # "allow" | "block" | "degrade" | "fallback"
|
|
162
|
+
reason: NotRequired[str]
|
|
163
|
+
reasonCode: NotRequired[str]
|
|
164
|
+
requested: ExplainRequested
|
|
165
|
+
resolved: ExplainResolved
|
|
166
|
+
safety: ExplainSafety
|
|
167
|
+
cost: ExplainCost
|
|
168
|
+
budget: ExplainBudget
|
|
169
|
+
wouldCallModel: bool
|
|
170
|
+
summary: str
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# --- Embeddings -------------------------------------------------------------
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class EmbeddingsUsage(TypedDict):
|
|
177
|
+
inputTokens: Optional[int]
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class EmbeddingsResponse(TypedDict):
|
|
181
|
+
"""``200`` body of ``POST /v1/embeddings``."""
|
|
182
|
+
|
|
183
|
+
embeddings: List[List[float]] # one vector per input, in request order
|
|
184
|
+
model: str
|
|
185
|
+
provider: str
|
|
186
|
+
decision: str # "allow" | "degrade" | "fallback"
|
|
187
|
+
reason: NotRequired[str]
|
|
188
|
+
usage: EmbeddingsUsage
|
|
189
|
+
cost: Cost
|
|
190
|
+
# null under hierarchical budgets (the node tree is the authority).
|
|
191
|
+
budgetRemaining: Optional[BudgetRemaining]
|
|
192
|
+
requestId: str
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# --- Usage ------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
# The /v1/usage and /v1/usage/summary bodies are operator-facing and not fully
|
|
198
|
+
# fixed in the OpenAPI spec, so they are typed as a loose mapping.
|
|
199
|
+
UsageResponse = Dict[str, Any]
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# --- Public aliases (match the naming used in the task/client signatures) ---
|
|
203
|
+
|
|
204
|
+
ChatResult = ChatResponse
|
|
205
|
+
ExplainResult = ExplainResponse
|
|
206
|
+
EmbeddingsResult = EmbeddingsResponse
|
|
207
|
+
UsageResult = UsageResponse
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
__all__ = [
|
|
211
|
+
"ChatMessage",
|
|
212
|
+
"TextPart",
|
|
213
|
+
"ImageUrl",
|
|
214
|
+
"ImagePart",
|
|
215
|
+
"ContentPart",
|
|
216
|
+
"Usage",
|
|
217
|
+
"Cost",
|
|
218
|
+
"BudgetRemaining",
|
|
219
|
+
"Safety",
|
|
220
|
+
"ResponseMessage",
|
|
221
|
+
"ChatResponse",
|
|
222
|
+
"ChatResult",
|
|
223
|
+
"EmbeddingsUsage",
|
|
224
|
+
"EmbeddingsResponse",
|
|
225
|
+
"EmbeddingsResult",
|
|
226
|
+
"ExplainRequested",
|
|
227
|
+
"ExplainResolved",
|
|
228
|
+
"ExplainSafety",
|
|
229
|
+
"ExplainCost",
|
|
230
|
+
"ExplainBudgetUsed",
|
|
231
|
+
"ExplainBudget",
|
|
232
|
+
"ExplainResponse",
|
|
233
|
+
"ExplainResult",
|
|
234
|
+
"UsageResponse",
|
|
235
|
+
"UsageResult",
|
|
236
|
+
]
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: modelgov
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Modelgov API client. `feature` and `userType` are mandatory on every request.
|
|
5
|
+
Project-URL: Homepage, https://github.com/mml555/modelgov
|
|
6
|
+
Author: Modelgov
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: budget,gateway,llm,modelgov,policy,safety
|
|
9
|
+
Requires-Python: >=3.9
|
|
10
|
+
Requires-Dist: httpx>=0.27
|
|
11
|
+
Requires-Dist: typing-extensions>=4.0; python_version < '3.11'
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
14
|
+
Requires-Dist: respx; extra == 'dev'
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# Modelgov Python SDK
|
|
18
|
+
|
|
19
|
+
Package: `modelgov` (module `modelgov`). The Python counterpart to
|
|
20
|
+
[`@modelgov/sdk`](../sdk-typescript).
|
|
21
|
+
|
|
22
|
+
The SDK is a **thin HTTP client** to the Modelgov API. Policy enforcement is
|
|
23
|
+
always server-side. Every request declares a **user**, **user type**, and
|
|
24
|
+
**feature**; policy is checked **before** the model call.
|
|
25
|
+
|
|
26
|
+
## Install
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install modelgov
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
> Note: `modelgov` is not yet published to PyPI. Until then, install from
|
|
33
|
+
> source with the editable install below (see also [self-host.md](../../docs/self-host.md)).
|
|
34
|
+
|
|
35
|
+
From the monorepo (editable, with test deps):
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install -e "packages/sdk-python[dev]"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Requires Python >= 3.9. Depends on [`httpx`](https://www.python-httpx.org/).
|
|
42
|
+
|
|
43
|
+
## Create a client
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
import os
|
|
47
|
+
from modelgov import ModelgovClient
|
|
48
|
+
|
|
49
|
+
ai = ModelgovClient(
|
|
50
|
+
base_url=os.environ.get("MODELGOV_URL", "http://localhost:3000"),
|
|
51
|
+
api_key=os.environ["MODELGOV_API_KEY"],
|
|
52
|
+
)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
`ModelgovClient` is a context manager and closes its connection pool on exit:
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
with ModelgovClient(base_url=..., api_key=...) as ai:
|
|
59
|
+
...
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Chat
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
res = ai.chat(
|
|
66
|
+
user_id="user_123", # your end-user id
|
|
67
|
+
user_type="logged_in", # must match modelgov.yaml budgets
|
|
68
|
+
feature="support_chat", # required — registered feature
|
|
69
|
+
model_class="cheap",
|
|
70
|
+
messages=[{"role": "user", "content": "Help me reset my password"}],
|
|
71
|
+
# optional:
|
|
72
|
+
# input_tokens_estimate=120,
|
|
73
|
+
# temperature=0.7,
|
|
74
|
+
# project_id="checkout",
|
|
75
|
+
# environment="production",
|
|
76
|
+
# metadata={"trace_id": "abc"},
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
print(res["message"]["content"])
|
|
80
|
+
print(res["model"], res["decision"], res["requestId"])
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Snake_case keyword args are converted to the camelCase JSON the API expects
|
|
84
|
+
(`user_id` → `userId`, `model_class` → `modelClass`, etc.). `None`-valued
|
|
85
|
+
optional args are omitted from the request body.
|
|
86
|
+
|
|
87
|
+
### Response
|
|
88
|
+
|
|
89
|
+
`chat()` returns a `ChatResponse` (a `TypedDict`), so it is a plain `dict` with
|
|
90
|
+
typed keys:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
{
|
|
94
|
+
"message": {"role": "assistant", "content": "..."},
|
|
95
|
+
"model": "openai/gpt-4o-mini",
|
|
96
|
+
"decision": "allow", # "allow" | "degrade" | "fallback"
|
|
97
|
+
"usage": {"inputTokens": 12, "outputTokens": 8},
|
|
98
|
+
"cost": {"estimatedUsd": 0.0001, "actualUsd": 0.00008},
|
|
99
|
+
"budgetRemaining": {"userDailyUsd": 0.24, "featureMonthlyUsd": None, "globalMonthlyUsd": 499.5},
|
|
100
|
+
"safety": {"piiMasked": False, "injectionBlocked": False},
|
|
101
|
+
"requestId": "req_42", # audit id — log with your domain ids
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Vision (multimodal)
|
|
106
|
+
|
|
107
|
+
Pass content parts instead of a string to send images to a vision model. The
|
|
108
|
+
gateway governs budget/audit and still runs safety on the text parts:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
res = ai.chat(
|
|
112
|
+
user_id="user_123",
|
|
113
|
+
user_type="logged_in",
|
|
114
|
+
feature="document_extraction",
|
|
115
|
+
messages=[{
|
|
116
|
+
"role": "user",
|
|
117
|
+
"content": [
|
|
118
|
+
{"type": "text", "text": "Extract the total from this receipt."},
|
|
119
|
+
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
|
|
120
|
+
],
|
|
121
|
+
}],
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Grounding
|
|
126
|
+
|
|
127
|
+
For a feature with safety `grounding: strict`, pass retrieved passages as
|
|
128
|
+
`context`. The gateway answers only from them, forces verbatim citations, and
|
|
129
|
+
verifies them — unverifiable answers become a safe refusal, and
|
|
130
|
+
`res["safety"]["grounded"]` reports whether the citations checked out:
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
res = ai.chat(
|
|
134
|
+
user_id="user_123",
|
|
135
|
+
user_type="logged_in",
|
|
136
|
+
feature="grounded_support",
|
|
137
|
+
messages=[{"role": "user", "content": "How long do refunds take?"}],
|
|
138
|
+
context=["Refunds are issued within 5 business days of approval."],
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Streaming
|
|
143
|
+
|
|
144
|
+
`chat_stream()` yields incremental text chunks over Server-Sent Events. It
|
|
145
|
+
sends `"stream": true` and iterates `data:` lines until the `[DONE]` sentinel.
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
for chunk in ai.chat_stream(
|
|
149
|
+
user_id="user_123",
|
|
150
|
+
user_type="logged_in",
|
|
151
|
+
feature="support_chat",
|
|
152
|
+
messages=[{"role": "user", "content": "Write a haiku about budgets"}],
|
|
153
|
+
):
|
|
154
|
+
print(chunk, end="", flush=True)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**SSE framing assumption:** OpenAI-style events — one JSON payload per `data:`
|
|
158
|
+
line, terminated by `data: [DONE]`. Text is read from
|
|
159
|
+
`choices[0].delta.content` (or a simpler `delta` / `content` / `text` field).
|
|
160
|
+
Non-JSON `data:` payloads are yielded verbatim. See the `chat_stream` docstring
|
|
161
|
+
if the server's framing differs.
|
|
162
|
+
|
|
163
|
+
The generator holds the connection open until fully consumed. Policy/safety
|
|
164
|
+
blocks that occur before the stream begins raise the usual typed errors.
|
|
165
|
+
|
|
166
|
+
## Embeddings
|
|
167
|
+
|
|
168
|
+
`embed()` runs governed embeddings (`POST /v1/embeddings`) — policy-checked,
|
|
169
|
+
budget-reserved, and audited like `chat()`. Pass one string or a batch:
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
res = ai.embed(
|
|
173
|
+
user_id="user_123",
|
|
174
|
+
user_type="logged_in",
|
|
175
|
+
feature="rag_ingest",
|
|
176
|
+
input=["first passage", "second passage"], # or a single string
|
|
177
|
+
)
|
|
178
|
+
vectors = res["embeddings"] # one vector per input, in request order
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Idempotency
|
|
182
|
+
|
|
183
|
+
Pass a stable key to retry safely without double-charging budget or re-calling
|
|
184
|
+
the model:
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
ai.chat(
|
|
188
|
+
user_id="user_123",
|
|
189
|
+
user_type="logged_in",
|
|
190
|
+
feature="support_chat",
|
|
191
|
+
messages=[{"role": "user", "content": "..."}],
|
|
192
|
+
idempotency_key=f"chat-{user_id}-{session_id}",
|
|
193
|
+
)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
The API returns `x-idempotent-replay: true` on cache hits; a same-key request
|
|
197
|
+
with a different body returns `422 idempotency_key_reuse`.
|
|
198
|
+
|
|
199
|
+
## Explain (dry run)
|
|
200
|
+
|
|
201
|
+
Evaluate policy without calling the model or reserving budget:
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
plan = ai.explain(
|
|
205
|
+
user_id="user_123",
|
|
206
|
+
user_type="logged_in",
|
|
207
|
+
feature="support_chat",
|
|
208
|
+
model_class="premium",
|
|
209
|
+
)
|
|
210
|
+
print(plan["decision"], plan["summary"])
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## Usage
|
|
214
|
+
|
|
215
|
+
Requires an API key with `usage:read`.
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
usage = ai.get_usage(user_id="user_123")
|
|
219
|
+
summary = ai.get_usage_summary(feature="support_chat", since="7d")
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Errors
|
|
223
|
+
|
|
224
|
+
| Class | When |
|
|
225
|
+
| --- | --- |
|
|
226
|
+
| `PolicyBlockedError` | 403 `policy_blocked` or `budget_exceeded` |
|
|
227
|
+
| `SafetyBlockedError` | 403 `safety_blocked` (PII or prompt injection) |
|
|
228
|
+
| `ModelgovError` | Other 4xx / 5xx |
|
|
229
|
+
|
|
230
|
+
`PolicyBlockedError` and `SafetyBlockedError` subclass `ModelgovError`. Each
|
|
231
|
+
error carries the API's structured envelope:
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
from modelgov import ModelgovError, PolicyBlockedError, SafetyBlockedError
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
ai.chat(
|
|
238
|
+
user_id="user_123",
|
|
239
|
+
user_type="logged_in",
|
|
240
|
+
feature="support_chat",
|
|
241
|
+
messages=[{"role": "user", "content": "..."}],
|
|
242
|
+
)
|
|
243
|
+
except PolicyBlockedError as err:
|
|
244
|
+
print(err.status) # 403
|
|
245
|
+
print(err.code) # "policy_blocked" | "budget_exceeded"
|
|
246
|
+
print(err.message) # human-readable
|
|
247
|
+
print(err.details) # error.details object
|
|
248
|
+
print(err.audit_request_id) # "req_<n>" — modelgov requests show
|
|
249
|
+
print(err.request_id) # HTTP trace id (UUID)
|
|
250
|
+
print(err.body) # full parsed envelope
|
|
251
|
+
except ModelgovError as err:
|
|
252
|
+
...
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## Integration pattern
|
|
256
|
+
|
|
257
|
+
```text
|
|
258
|
+
1. Authenticate user (your app)
|
|
259
|
+
2. Authorize product action (your app)
|
|
260
|
+
3. ai.chat(user_id=..., user_type=..., feature=..., messages=...)
|
|
261
|
+
4. Return res["message"]["content"] to the user
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
Never call Modelgov before your app has decided the user may use this feature.
|
|
265
|
+
|
|
266
|
+
## Development
|
|
267
|
+
|
|
268
|
+
```bash
|
|
269
|
+
pip install -e "packages/sdk-python[dev]"
|
|
270
|
+
cd packages/sdk-python
|
|
271
|
+
pytest
|
|
272
|
+
```
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
modelgov/__init__.py,sha256=TG3dZTOrt7LyUpvmT0GC-p8QGL9sDqgCcqpqA3PgNew,1824
|
|
2
|
+
modelgov/client.py,sha256=-GLUV2aZW9iJIpbHnMDqU97Pn2gc1kB3J1UmA9bj8KU,21036
|
|
3
|
+
modelgov/errors.py,sha256=GkfgnYbcCbD9Yw5zQrmpmlIGLKYYiimO_3hSePng-P8,3827
|
|
4
|
+
modelgov/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
modelgov/types.py,sha256=O2LPIj4yTjOvIYJZ5GEcUOBJgNgtMbScE9MQlqGLSr8,5994
|
|
6
|
+
modelgov-1.0.0.dist-info/METADATA,sha256=fD6eei3pu4SQmF9STK2LkpXMRZiFd1WKMfYuVtQLhm0,7824
|
|
7
|
+
modelgov-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
8
|
+
modelgov-1.0.0.dist-info/RECORD,,
|