modelgov 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modelgov/__init__.py ADDED
@@ -0,0 +1,82 @@
1
+ """Modelgov Python SDK.
2
+
3
+ A typed, idiomatic Python client for the Modelgov AI policy gateway. Mirrors
4
+ the TypeScript SDK's surface (``@modelgov/sdk``).
5
+
6
+ Example:
7
+ >>> from modelgov import ModelgovClient
8
+ >>> client = ModelgovClient(base_url="http://localhost:3000", api_key="sk-...")
9
+ >>> res = client.chat(
10
+ ... user_id="user_123",
11
+ ... user_type="logged_in",
12
+ ... feature="support_chat",
13
+ ... model_class="cheap",
14
+ ... messages=[{"role": "user", "content": "Help me reset my password"}],
15
+ ... )
16
+ >>> print(res["message"]["content"])
17
+ """
18
+
19
+ from .client import ModelgovClient
20
+ from .errors import ModelgovError, PolicyBlockedError, SafetyBlockedError
21
+ from .types import (
22
+ BudgetRemaining,
23
+ ChatMessage,
24
+ ChatResponse,
25
+ ChatResult,
26
+ ContentPart,
27
+ Cost,
28
+ EmbeddingsResponse,
29
+ EmbeddingsResult,
30
+ EmbeddingsUsage,
31
+ ExplainBudget,
32
+ ExplainBudgetUsed,
33
+ ExplainCost,
34
+ ExplainRequested,
35
+ ExplainResolved,
36
+ ExplainResponse,
37
+ ExplainResult,
38
+ ExplainSafety,
39
+ ImagePart,
40
+ ImageUrl,
41
+ ResponseMessage,
42
+ Safety,
43
+ TextPart,
44
+ Usage,
45
+ UsageResponse,
46
+ UsageResult,
47
+ )
48
+
49
+ __version__ = "1.0.0"
50
+
51
+ __all__ = [
52
+ "ModelgovClient",
53
+ "ModelgovError",
54
+ "PolicyBlockedError",
55
+ "SafetyBlockedError",
56
+ "ChatMessage",
57
+ "ChatResponse",
58
+ "ChatResult",
59
+ "TextPart",
60
+ "ImageUrl",
61
+ "ImagePart",
62
+ "ContentPart",
63
+ "EmbeddingsResponse",
64
+ "EmbeddingsResult",
65
+ "EmbeddingsUsage",
66
+ "Usage",
67
+ "Cost",
68
+ "BudgetRemaining",
69
+ "Safety",
70
+ "ResponseMessage",
71
+ "ExplainRequested",
72
+ "ExplainResolved",
73
+ "ExplainSafety",
74
+ "ExplainCost",
75
+ "ExplainBudgetUsed",
76
+ "ExplainBudget",
77
+ "ExplainResponse",
78
+ "ExplainResult",
79
+ "UsageResponse",
80
+ "UsageResult",
81
+ "__version__",
82
+ ]
modelgov/client.py ADDED
@@ -0,0 +1,564 @@
1
+ """Synchronous Modelgov API client.
2
+
3
+ A thin, typed HTTP client over the Modelgov REST API. Policy enforcement is
4
+ always server-side; this client just shapes requests and maps errors. It
5
+ mirrors the TypeScript SDK (``packages/sdk-typescript``) in API surface and
6
+ ergonomics, adapted to idiomatic Python:
7
+
8
+ * keyword-only, snake_case call signatures (``user_id`` -> JSON ``userId``);
9
+ * structured exceptions (:class:`~modelgov.errors.ModelgovError` and
10
+ subclasses) carrying the API error envelope;
11
+ * a streaming generator (:meth:`ModelgovClient.chat_stream`) over SSE.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ from types import TracebackType
18
+ from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Type, Union
19
+
20
+ import httpx
21
+
22
+ from .errors import ModelgovError, PolicyBlockedError, SafetyBlockedError
23
+ from .types import (
24
+ ChatMessage,
25
+ ChatResult,
26
+ EmbeddingsResult,
27
+ ExplainResult,
28
+ UsageResult,
29
+ )
30
+
31
+ __all__ = ["ModelgovClient"]
32
+
33
+ DEFAULT_TIMEOUT = 30.0
34
+
35
+
36
+ class ModelgovClient:
37
+ """Synchronous client for the Modelgov API.
38
+
39
+ Args:
40
+ base_url: Base URL of the Modelgov API (e.g. ``http://localhost:3000``).
41
+ A trailing slash is stripped.
42
+ api_key: Sent as ``Authorization: Bearer <api_key>`` when provided.
43
+ timeout: Request timeout in seconds (or any value ``httpx`` accepts).
44
+ Defaults to 30s.
45
+ http_client: Optional pre-built ``httpx.Client`` for custom transports
46
+ or test injection. When provided, ``timeout`` is ignored and the
47
+ caller owns the client's lifecycle.
48
+
49
+ Example:
50
+ >>> client = ModelgovClient(base_url="http://localhost:3000", api_key="sk-...")
51
+ >>> res = client.chat(
52
+ ... user_id="user_123",
53
+ ... user_type="logged_in",
54
+ ... feature="support_chat",
55
+ ... messages=[{"role": "user", "content": "Hello"}],
56
+ ... )
57
+ >>> res["message"]["content"]
58
+
59
+ The client is a context manager; use ``with ModelgovClient(...) as c:`` to
60
+ close the underlying connection pool automatically.
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ base_url: str,
66
+ api_key: Optional[str] = None,
67
+ *,
68
+ timeout: Union[float, httpx.Timeout, None] = DEFAULT_TIMEOUT,
69
+ http_client: Optional[httpx.Client] = None,
70
+ ) -> None:
71
+ self.base_url = base_url.rstrip("/")
72
+ self.api_key = api_key
73
+ self._owns_client = http_client is None
74
+ self._client = http_client or httpx.Client(timeout=timeout)
75
+
76
+ # -- lifecycle ----------------------------------------------------------
77
+
78
+ def close(self) -> None:
79
+ """Close the underlying HTTP client (only if this instance owns it)."""
80
+ if self._owns_client:
81
+ self._client.close()
82
+
83
+ def __enter__(self) -> "ModelgovClient":
84
+ return self
85
+
86
+ def __exit__(
87
+ self,
88
+ exc_type: Optional[Type[BaseException]],
89
+ exc: Optional[BaseException],
90
+ tb: Optional[TracebackType],
91
+ ) -> None:
92
+ self.close()
93
+
94
+ # -- headers ------------------------------------------------------------
95
+
96
+ def _headers(self, extra: Optional[Mapping[str, str]] = None) -> Dict[str, str]:
97
+ headers: Dict[str, str] = {"content-type": "application/json"}
98
+ if self.api_key:
99
+ headers["authorization"] = f"Bearer {self.api_key}"
100
+ if extra:
101
+ headers.update({k: v for k, v in extra.items() if v is not None})
102
+ return headers
103
+
104
+ # -- public API ---------------------------------------------------------
105
+
106
+ def chat(
107
+ self,
108
+ *,
109
+ user_id: str,
110
+ user_type: str,
111
+ feature: str,
112
+ messages: Sequence[ChatMessage],
113
+ context: Optional[Sequence[str]] = None,
114
+ model_class: Optional[str] = None,
115
+ requested_model_class: Optional[str] = None,
116
+ input_tokens_estimate: Optional[int] = None,
117
+ temperature: Optional[float] = None,
118
+ project_id: Optional[str] = None,
119
+ environment: Optional[str] = None,
120
+ idempotency_key: Optional[str] = None,
121
+ metadata: Optional[Mapping[str, Any]] = None,
122
+ ) -> ChatResult:
123
+ """Run a guarded chat completion (``POST /v1/chat``).
124
+
125
+ Args:
126
+ user_id: Your end-user id. Required.
127
+ user_type: Must match a user type in ``modelgov.yaml``. Required.
128
+ feature: Registered feature name. Required.
129
+ messages: List of ``{"role", "content"}`` messages. ``content`` is a
130
+ string, or a list of content parts (``{"type": "text", ...}`` /
131
+ ``{"type": "image_url", "image_url": {"url": ...}}``) for vision.
132
+ context: Retrieved passages for a grounded feature (safety
133
+ ``grounding: strict``). The gateway answers ONLY from these,
134
+ forces verbatim citations, and verifies them; unverifiable
135
+ answers become a safe refusal.
136
+ model_class: Requested model class (e.g. ``"cheap"``). Maps to the
137
+ API's ``modelClass`` field.
138
+ requested_model_class: Alias for ``model_class``; if both are given,
139
+ ``model_class`` wins. Provided for parity with callers that use
140
+ the more explicit name.
141
+ input_tokens_estimate: Optional pre-estimate for budget checks.
142
+ temperature: Sampling temperature (0-2).
143
+ project_id: Optional project scope.
144
+ environment: Optional environment tag.
145
+ idempotency_key: Sent as the ``Idempotency-Key`` header. Retrying
146
+ with the same key + body replays the first result instead of
147
+ re-charging budget or re-calling the model.
148
+ metadata: Arbitrary key/value data stored on the audit log
149
+ (max 32 keys). Does not affect policy.
150
+
151
+ Returns:
152
+ The decoded :class:`~modelgov.types.ChatResponse` body.
153
+
154
+ Raises:
155
+ SafetyBlockedError: 403 ``safety_blocked``.
156
+ PolicyBlockedError: 403 ``policy_blocked`` / ``budget_exceeded``.
157
+ ModelgovError: any other non-2xx response.
158
+ """
159
+ body = self._chat_body(
160
+ user_id=user_id,
161
+ user_type=user_type,
162
+ feature=feature,
163
+ messages=messages,
164
+ context=context,
165
+ model_class=model_class,
166
+ requested_model_class=requested_model_class,
167
+ input_tokens_estimate=input_tokens_estimate,
168
+ temperature=temperature,
169
+ project_id=project_id,
170
+ environment=environment,
171
+ metadata=metadata,
172
+ )
173
+ extra = {"idempotency-key": idempotency_key} if idempotency_key else None
174
+ response = self._client.post(
175
+ f"{self.base_url}/v1/chat",
176
+ headers=self._headers(extra),
177
+ json=body,
178
+ )
179
+ return self._handle_json(response) # type: ignore[return-value]
180
+
181
+ def chat_stream(
182
+ self,
183
+ *,
184
+ user_id: str,
185
+ user_type: str,
186
+ feature: str,
187
+ messages: Sequence[ChatMessage],
188
+ context: Optional[Sequence[str]] = None,
189
+ model_class: Optional[str] = None,
190
+ requested_model_class: Optional[str] = None,
191
+ input_tokens_estimate: Optional[int] = None,
192
+ temperature: Optional[float] = None,
193
+ project_id: Optional[str] = None,
194
+ environment: Optional[str] = None,
195
+ idempotency_key: Optional[str] = None,
196
+ metadata: Optional[Mapping[str, Any]] = None,
197
+ ) -> Iterator[str]:
198
+ """Stream a guarded chat completion as incremental text chunks.
199
+
200
+ Sends the same body as :meth:`chat` plus ``"stream": true`` and yields
201
+ text deltas as they arrive. The generator holds the HTTP connection
202
+ open until it is fully consumed (or closed); use it in a ``for`` loop
203
+ or wrap it in ``contextlib.closing`` if you may abandon it early.
204
+
205
+ SSE framing assumption:
206
+ The server responds with ``Content-Type: text/event-stream`` and
207
+ OpenAI-style Server-Sent Events — one event per ``data:`` line,
208
+ terminated by a literal ``data: [DONE]`` sentinel. Each non-sentinel
209
+ ``data:`` payload is JSON. This client extracts the incremental
210
+ text from, in order of preference:
211
+
212
+ * ``chunk["choices"][0]["delta"]["content"]`` (OpenAI chat delta),
213
+ * ``chunk["delta"]`` or ``chunk["content"]`` or ``chunk["text"]``
214
+ (simpler shapes the Modelgov API may emit).
215
+
216
+ If a ``data:`` payload is not valid JSON it is yielded verbatim as a
217
+ text chunk (tolerant of a plain-text delta stream). Empty deltas are
218
+ skipped. This mirrors how the TypeScript SDK's streaming is expected
219
+ to parse SSE; adjust here if the server's framing diverges.
220
+
221
+ Policy/safety blocks that happen *before* streaming starts are returned
222
+ as a normal non-2xx JSON response and raised as the usual typed errors.
223
+
224
+ Yields:
225
+ ``str`` chunks of assistant text, in order.
226
+
227
+ Raises:
228
+ SafetyBlockedError / PolicyBlockedError / ModelgovError: on a non-2xx
229
+ response received before the stream body begins.
230
+ """
231
+ body = self._chat_body(
232
+ user_id=user_id,
233
+ user_type=user_type,
234
+ feature=feature,
235
+ messages=messages,
236
+ context=context,
237
+ model_class=model_class,
238
+ requested_model_class=requested_model_class,
239
+ input_tokens_estimate=input_tokens_estimate,
240
+ temperature=temperature,
241
+ project_id=project_id,
242
+ environment=environment,
243
+ metadata=metadata,
244
+ )
245
+ body["stream"] = True
246
+
247
+ extra: Dict[str, str] = {"accept": "text/event-stream"}
248
+ if idempotency_key:
249
+ extra["idempotency-key"] = idempotency_key
250
+
251
+ with self._client.stream(
252
+ "POST",
253
+ f"{self.base_url}/v1/chat",
254
+ headers=self._headers(extra),
255
+ json=body,
256
+ ) as response:
257
+ if response.status_code < 200 or response.status_code >= 300:
258
+ # Materialize the error body, then map to a typed exception.
259
+ response.read()
260
+ self._raise_for_status(response)
261
+
262
+ for line in response.iter_lines():
263
+ chunk = _parse_sse_line(line)
264
+ if chunk is _DONE:
265
+ break
266
+ if chunk:
267
+ yield chunk
268
+
269
+ def explain(
270
+ self,
271
+ *,
272
+ user_id: str,
273
+ user_type: str,
274
+ feature: str,
275
+ model_class: Optional[str] = None,
276
+ requested_model_class: Optional[str] = None,
277
+ input_tokens_estimate: Optional[int] = None,
278
+ project_id: Optional[str] = None,
279
+ environment: Optional[str] = None,
280
+ ) -> ExplainResult:
281
+ """Dry-run policy evaluation (``POST /v1/explain``).
282
+
283
+ Returns the decision, resolved model, safety plan, and a live budget
284
+ snapshot *without* calling the model or reserving budget. Same identity
285
+ fields as :meth:`chat`, but no ``messages``.
286
+ """
287
+ body: Dict[str, Any] = {
288
+ "userId": user_id,
289
+ "userType": user_type,
290
+ "feature": feature,
291
+ }
292
+ resolved_model_class = model_class or requested_model_class
293
+ if resolved_model_class is not None:
294
+ body["modelClass"] = resolved_model_class
295
+ if input_tokens_estimate is not None:
296
+ body["inputTokensEstimate"] = input_tokens_estimate
297
+ if project_id is not None:
298
+ body["projectId"] = project_id
299
+ if environment is not None:
300
+ body["environment"] = environment
301
+
302
+ response = self._client.post(
303
+ f"{self.base_url}/v1/explain",
304
+ headers=self._headers(),
305
+ json=body,
306
+ )
307
+ return self._handle_json(response) # type: ignore[return-value]
308
+
309
+ def embed(
310
+ self,
311
+ *,
312
+ user_id: str,
313
+ user_type: str,
314
+ feature: str,
315
+ input: Union[str, Sequence[str]],
316
+ model_class: Optional[str] = None,
317
+ requested_model_class: Optional[str] = None,
318
+ input_tokens_estimate: Optional[int] = None,
319
+ project_id: Optional[str] = None,
320
+ environment: Optional[str] = None,
321
+ metadata: Optional[Mapping[str, Any]] = None,
322
+ ) -> EmbeddingsResult:
323
+ """Embed one or more texts through the gateway (``POST /v1/embeddings``).
324
+
325
+ Policy-checked (``feature`` + ``user_type``), budget-reserved, and
326
+ audited exactly like :meth:`chat`, and raises the same typed errors on a
327
+ ``403`` policy/budget block.
328
+
329
+ Args:
330
+ input: A single text, or a batch of texts to embed. A list is sent
331
+ as a JSON array (one vector is returned per input, in order).
332
+
333
+ Returns:
334
+ The decoded :class:`~modelgov.types.EmbeddingsResponse` body, whose
335
+ ``embeddings`` is one vector per input in request order.
336
+ """
337
+ body: Dict[str, Any] = {
338
+ "userId": user_id,
339
+ "userType": user_type,
340
+ "feature": feature,
341
+ "input": input if isinstance(input, str) else list(input),
342
+ }
343
+ resolved_model_class = model_class or requested_model_class
344
+ if resolved_model_class is not None:
345
+ body["modelClass"] = resolved_model_class
346
+ if input_tokens_estimate is not None:
347
+ body["inputTokensEstimate"] = input_tokens_estimate
348
+ if project_id is not None:
349
+ body["projectId"] = project_id
350
+ if environment is not None:
351
+ body["environment"] = environment
352
+ if metadata is not None:
353
+ body["metadata"] = dict(metadata)
354
+
355
+ response = self._client.post(
356
+ f"{self.base_url}/v1/embeddings",
357
+ headers=self._headers(),
358
+ json=body,
359
+ )
360
+ return self._handle_json(response) # type: ignore[return-value]
361
+
362
+ def get_usage(
363
+ self,
364
+ *,
365
+ user_id: Optional[str] = None,
366
+ feature: Optional[str] = None,
367
+ project_id: Optional[str] = None,
368
+ ) -> UsageResult:
369
+ """Fetch budget counters and recent stats (``GET /v1/usage``).
370
+
371
+ Requires an API key with the ``usage:read`` permission.
372
+ """
373
+ params: Dict[str, str] = {}
374
+ if user_id is not None:
375
+ params["userId"] = user_id
376
+ if feature is not None:
377
+ params["feature"] = feature
378
+ if project_id is not None:
379
+ params["projectId"] = project_id
380
+
381
+ response = self._client.get(
382
+ f"{self.base_url}/v1/usage",
383
+ headers=self._headers(),
384
+ params=params,
385
+ )
386
+ return self._handle_json(response) # type: ignore[return-value]
387
+
388
+ def get_usage_summary(
389
+ self,
390
+ *,
391
+ feature: Optional[str] = None,
392
+ user_type: Optional[str] = None,
393
+ since: Optional[str] = None,
394
+ project_id: Optional[str] = None,
395
+ ) -> UsageResult:
396
+ """Fetch aggregated cost/request summary (``GET /v1/usage/summary``).
397
+
398
+ Args:
399
+ since: ``"24h"``, ``"7d"``, or an ISO-8601 timestamp (default
400
+ ``"24h"`` server-side).
401
+ """
402
+ params: Dict[str, str] = {}
403
+ if feature is not None:
404
+ params["feature"] = feature
405
+ if user_type is not None:
406
+ params["userType"] = user_type
407
+ if since is not None:
408
+ params["since"] = since
409
+ if project_id is not None:
410
+ params["projectId"] = project_id
411
+
412
+ response = self._client.get(
413
+ f"{self.base_url}/v1/usage/summary",
414
+ headers=self._headers(),
415
+ params=params,
416
+ )
417
+ return self._handle_json(response) # type: ignore[return-value]
418
+
419
+ # -- internals ----------------------------------------------------------
420
+
421
+ @staticmethod
422
+ def _chat_body(
423
+ *,
424
+ user_id: str,
425
+ user_type: str,
426
+ feature: str,
427
+ messages: Sequence[ChatMessage],
428
+ context: Optional[Sequence[str]],
429
+ model_class: Optional[str],
430
+ requested_model_class: Optional[str],
431
+ input_tokens_estimate: Optional[int],
432
+ temperature: Optional[float],
433
+ project_id: Optional[str],
434
+ environment: Optional[str],
435
+ metadata: Optional[Mapping[str, Any]],
436
+ ) -> Dict[str, Any]:
437
+ """Build the camelCase JSON body the API expects, omitting None fields."""
438
+ body: Dict[str, Any] = {
439
+ "userId": user_id,
440
+ "userType": user_type,
441
+ "feature": feature,
442
+ "messages": [dict(m) for m in messages],
443
+ }
444
+ if context is not None:
445
+ body["context"] = list(context)
446
+ resolved_model_class = model_class or requested_model_class
447
+ if resolved_model_class is not None:
448
+ body["modelClass"] = resolved_model_class
449
+ if input_tokens_estimate is not None:
450
+ body["inputTokensEstimate"] = input_tokens_estimate
451
+ if temperature is not None:
452
+ body["temperature"] = temperature
453
+ if project_id is not None:
454
+ body["projectId"] = project_id
455
+ if environment is not None:
456
+ body["environment"] = environment
457
+ if metadata is not None:
458
+ body["metadata"] = dict(metadata)
459
+ return body
460
+
461
+ def _handle_json(self, response: httpx.Response) -> Any:
462
+ """Return the parsed JSON body, or raise a typed error on non-2xx."""
463
+ if response.status_code < 200 or response.status_code >= 300:
464
+ self._raise_for_status(response)
465
+ try:
466
+ return response.json()
467
+ except (json.JSONDecodeError, ValueError):
468
+ return {}
469
+
470
+ @staticmethod
471
+ def _raise_for_status(response: httpx.Response) -> None:
472
+ """Map a non-2xx response to the appropriate ModelgovError subclass."""
473
+ try:
474
+ body = response.json()
475
+ except (json.JSONDecodeError, ValueError):
476
+ body = {}
477
+
478
+ code = _error_code(body)
479
+ if code == "safety_blocked":
480
+ raise SafetyBlockedError(response.status_code, code, body)
481
+ if code in ("policy_blocked", "budget_exceeded"):
482
+ raise PolicyBlockedError(response.status_code, code, body)
483
+ raise ModelgovError(response.status_code, code, body)
484
+
485
+
486
+ def _error_code(body: Any) -> str:
487
+ """Extract ``error.code`` from the envelope, tolerating loose shapes."""
488
+ if isinstance(body, dict):
489
+ error = body.get("error")
490
+ if isinstance(error, str):
491
+ return error
492
+ if isinstance(error, dict):
493
+ code = error.get("code")
494
+ if isinstance(code, str):
495
+ return code
496
+ return "error"
497
+
498
+
499
+ # Sentinel returned by _parse_sse_line for the terminal `data: [DONE]` event.
500
+ _DONE = object()
501
+
502
+
503
+ def _parse_sse_line(line: str) -> Any:
504
+ """Parse one SSE line into a text chunk, ``""``, or the ``_DONE`` sentinel.
505
+
506
+ Returns ``_DONE`` for the ``[DONE]`` sentinel, ``""`` for lines with no
507
+ text delta (comments, blank lines, non-``data:`` fields, empty deltas), and
508
+ the extracted text chunk otherwise. See :meth:`ModelgovClient.chat_stream`
509
+ for the framing assumptions.
510
+ """
511
+ if not line:
512
+ return ""
513
+ stripped = line.strip()
514
+ if not stripped or stripped.startswith(":"):
515
+ # Blank line (event separator) or SSE comment.
516
+ return ""
517
+ if not stripped.startswith("data:"):
518
+ # Ignore other SSE fields (event:, id:, retry:).
519
+ return ""
520
+
521
+ data = stripped[len("data:"):].strip()
522
+ if data == "[DONE]":
523
+ return _DONE
524
+ if not data:
525
+ return ""
526
+
527
+ try:
528
+ payload = json.loads(data)
529
+ except (json.JSONDecodeError, ValueError):
530
+ # Tolerate a plain-text delta stream.
531
+ return data
532
+
533
+ return _extract_delta(payload)
534
+
535
+
536
+ def _extract_delta(payload: Any) -> str:
537
+ """Pull the incremental text out of a decoded SSE JSON payload."""
538
+ if not isinstance(payload, dict):
539
+ return ""
540
+
541
+ # OpenAI-style: choices[0].delta.content
542
+ choices = payload.get("choices")
543
+ if isinstance(choices, list) and choices:
544
+ first = choices[0]
545
+ if isinstance(first, dict):
546
+ delta = first.get("delta")
547
+ if isinstance(delta, dict):
548
+ content = delta.get("content")
549
+ if isinstance(content, str):
550
+ return content
551
+ # Non-streaming-style fallback within a choice.
552
+ message = first.get("message")
553
+ if isinstance(message, dict) and isinstance(message.get("content"), str):
554
+ return message["content"]
555
+ if isinstance(first.get("text"), str):
556
+ return first["text"]
557
+
558
+ # Simpler shapes the Modelgov API may emit.
559
+ for key in ("delta", "content", "text"):
560
+ value = payload.get(key)
561
+ if isinstance(value, str):
562
+ return value
563
+
564
+ return ""
modelgov/errors.py ADDED
@@ -0,0 +1,96 @@
1
+ """Typed exceptions raised by :class:`modelgov.client.ModelgovClient`.
2
+
3
+ Mirrors the TypeScript SDK's error hierarchy (``ModelgovError`` /
4
+ ``PolicyBlockedError`` / ``SafetyBlockedError``) while surfacing the API's
5
+ structured error envelope (``code``, ``message``, ``details``, ``requestId``,
6
+ and block metadata from ``details``) as first-class attributes.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, Dict, Optional
12
+
13
+
14
+ class ModelgovError(Exception):
15
+ """Base error carrying the HTTP status and the API's structured error body.
16
+
17
+ The Modelgov error envelope looks like::
18
+
19
+ {
20
+ "error": {
21
+ "code": "policy_blocked",
22
+ "message": "...",
23
+ "details": {
24
+ "auditRequestId": "req_42" # audit-log row (block/safety only)
25
+ },
26
+ "requestId": "550e8400-..." # HTTP trace id (UUID)
27
+ }
28
+ }
29
+
30
+ Attributes:
31
+ status: HTTP status code (0 if the request never got a response).
32
+ code: The stable ``error.code`` string (e.g. ``"policy_blocked"``).
33
+ message: Human-readable ``error.message``.
34
+ details: The ``error.details`` object, if present.
35
+ request_id: ``error.requestId`` — the HTTP trace id (UUID).
36
+ audit_request_id: ``error.details.auditRequestId`` — the ``req_<n>``
37
+ audit id, present on policy/safety/budget blocks. Use with
38
+ ``modelgov requests show``.
39
+ reason_code: ``error.details.reasonCode`` — stable machine-readable
40
+ block reason (e.g. ``"daily_budget_exceeded"``), when present.
41
+ budget_remaining: ``error.details.budgetRemaining`` — remaining budget
42
+ headroom at decision time, when the API reports it.
43
+ body: The full parsed response body.
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ status: int,
49
+ code: str,
50
+ body: Any = None,
51
+ *,
52
+ message: Optional[str] = None,
53
+ ) -> None:
54
+ self.status = status
55
+ self.code = code
56
+ self.body = body
57
+
58
+ error = body.get("error") if isinstance(body, dict) else None
59
+ error_obj: Dict[str, Any] = error if isinstance(error, dict) else {}
60
+
61
+ self.message: str = message or error_obj.get("message") or code
62
+ self.details: Optional[Dict[str, Any]] = (
63
+ error_obj.get("details") if isinstance(error_obj.get("details"), dict) else None
64
+ )
65
+ self.request_id: Optional[str] = error_obj.get("requestId")
66
+ details = self.details if isinstance(self.details, dict) else {}
67
+ self.audit_request_id: Optional[str] = (
68
+ details.get("auditRequestId")
69
+ if isinstance(details.get("auditRequestId"), str)
70
+ else error_obj.get("auditRequestId")
71
+ )
72
+ # Block metadata surfaced from `error.details` as first-class attributes,
73
+ # mirroring the TypeScript SDK's `reasonCode` / `budgetRemaining`.
74
+ reason_code = details.get("reasonCode")
75
+ self.reason_code: Optional[str] = reason_code if isinstance(reason_code, str) else None
76
+ budget_remaining = details.get("budgetRemaining")
77
+ self.budget_remaining: Optional[Dict[str, Any]] = (
78
+ budget_remaining if isinstance(budget_remaining, dict) else None
79
+ )
80
+
81
+ super().__init__(f"modelgov request failed ({status}): {code} - {self.message}")
82
+
83
+
84
+ class PolicyBlockedError(ModelgovError):
85
+ """Raised on 403 ``policy_blocked`` or ``budget_exceeded``.
86
+
87
+ Inspect :attr:`~ModelgovError.body` / :attr:`~ModelgovError.details` for the
88
+ block reason, and :attr:`~ModelgovError.audit_request_id` for the audit id.
89
+ """
90
+
91
+
92
+ class SafetyBlockedError(ModelgovError):
93
+ """Raised on 403 ``safety_blocked`` (PII or prompt injection)."""
94
+
95
+
96
+ __all__ = ["ModelgovError", "PolicyBlockedError", "SafetyBlockedError"]
modelgov/py.typed ADDED
File without changes
modelgov/types.py ADDED
@@ -0,0 +1,236 @@
1
+ """Typed request/response models for the Modelgov API.
2
+
3
+ These mirror ``packages/sdk-typescript/src/types.ts`` and the shapes in
4
+ ``packages/api/openapi.json``. Response models are :class:`typing.TypedDict`
5
+ so a decoded JSON body *is* the typed object with no conversion step — the
6
+ API already returns camelCase keys and the SDK returns them unchanged.
7
+
8
+ ``ChatResult`` / ``ExplainResult`` / ``UsageResult`` are exported aliases used
9
+ by the client's return-type annotations.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import sys
15
+ from typing import Any, Dict, List, Optional, Union
16
+
17
+ if sys.version_info >= (3, 11):
18
+ from typing import NotRequired, TypedDict
19
+ else: # pragma: no cover - exercised on 3.9/3.10 runtimes
20
+ from typing_extensions import NotRequired, TypedDict # type: ignore
21
+
22
+
23
+ # --- Chat -------------------------------------------------------------------
24
+
25
+
26
+ class TextPart(TypedDict):
27
+ """A text segment of a multimodal message (``type`` is ``"text"``)."""
28
+
29
+ type: str
30
+ text: str
31
+
32
+
33
+ class ImageUrl(TypedDict):
34
+ url: str
35
+ detail: NotRequired[str] # "low" | "high" | "auto"
36
+
37
+
38
+ class ImagePart(TypedDict):
39
+ """An image segment of a multimodal message (``type`` is ``"image_url"``).
40
+
41
+ ``image_url.url`` is an http(s) URL or a ``data:`` URI (base64) — e.g. a
42
+ page scan for OCR. Passed through to a vision model; the gateway still
43
+ governs budget, audit, and text-part safety.
44
+ """
45
+
46
+ type: str
47
+ image_url: ImageUrl
48
+
49
+
50
+ ContentPart = Union[TextPart, ImagePart]
51
+
52
+
53
+ class ChatMessage(TypedDict):
54
+ """A single chat message. ``role`` is one of system/user/assistant/tool.
55
+
56
+ ``content`` is a plain string, or a list of OpenAI-style content parts
57
+ (text + images) for vision / multimodal features.
58
+ """
59
+
60
+ role: str
61
+ content: Union[str, List[ContentPart]]
62
+
63
+
64
+ class Usage(TypedDict):
65
+ inputTokens: Optional[int]
66
+ outputTokens: Optional[int]
67
+
68
+
69
+ class Cost(TypedDict):
70
+ estimatedUsd: float
71
+ actualUsd: float
72
+
73
+
74
+ class BudgetRemaining(TypedDict):
75
+ userDailyUsd: float
76
+ # null when no cap is configured (monthly_usd: 0).
77
+ featureMonthlyUsd: Optional[float]
78
+ globalMonthlyUsd: Optional[float]
79
+ # Token headroom; present when a token cap is configured, null otherwise.
80
+ userDailyTokens: NotRequired[Optional[int]]
81
+ featureMonthlyTokens: NotRequired[Optional[int]]
82
+ globalMonthlyTokens: NotRequired[Optional[int]]
83
+
84
+
85
+ class Safety(TypedDict):
86
+ piiMasked: bool
87
+ injectionBlocked: bool
88
+ # Present only for grounded features: whether the answer's citations were
89
+ # verified against the provided context.
90
+ grounded: NotRequired[bool]
91
+
92
+
93
+ class ResponseMessage(TypedDict):
94
+ role: str
95
+ content: str
96
+
97
+
98
+ class ChatResponse(TypedDict):
99
+ """``200`` body of ``POST /v1/chat``."""
100
+
101
+ message: ResponseMessage
102
+ model: str
103
+ # Provider of the model that ran, e.g. "openai", "openrouter", "ollama".
104
+ provider: str
105
+ decision: str # "allow" | "degrade" | "fallback"
106
+ reason: NotRequired[str]
107
+ usage: Usage
108
+ cost: Cost
109
+ # null under hierarchical budgets (the node tree is the authority).
110
+ budgetRemaining: Optional[BudgetRemaining]
111
+ safety: Safety
112
+ requestId: str # audit id ("req_<n>")
113
+
114
+
115
+ # --- Explain ----------------------------------------------------------------
116
+
117
+
118
+ class ExplainRequested(TypedDict):
119
+ userId: str
120
+ userType: str
121
+ feature: str
122
+ modelClass: str
123
+
124
+
125
+ class ExplainResolved(TypedDict):
126
+ modelClass: str
127
+ model: str
128
+ provider: str
129
+ fallbackModel: NotRequired[str]
130
+
131
+
132
+ class ExplainSafety(TypedDict):
133
+ preset: str
134
+ pii: str
135
+ promptInjection: str
136
+ maxOutputTokens: int
137
+
138
+
139
+ class ExplainCost(TypedDict):
140
+ estimatedUsd: float
141
+
142
+
143
+ class ExplainBudgetUsed(TypedDict):
144
+ userDailyUsd: float
145
+ userDailyRequests: int
146
+ featureMonthlyUsd: float
147
+ globalMonthlyUsd: float
148
+
149
+
150
+ class ExplainBudget(TypedDict):
151
+ remaining: BudgetRemaining
152
+ used: ExplainBudgetUsed
153
+ permittedModels: List[str]
154
+ dailyRequestLimit: int
155
+ dailyRequestsRemaining: int
156
+
157
+
158
+ class ExplainResponse(TypedDict):
159
+ """``200`` body of ``POST /v1/explain``."""
160
+
161
+ decision: str # "allow" | "block" | "degrade" | "fallback"
162
+ reason: NotRequired[str]
163
+ reasonCode: NotRequired[str]
164
+ requested: ExplainRequested
165
+ resolved: ExplainResolved
166
+ safety: ExplainSafety
167
+ cost: ExplainCost
168
+ budget: ExplainBudget
169
+ wouldCallModel: bool
170
+ summary: str
171
+
172
+
173
+ # --- Embeddings -------------------------------------------------------------
174
+
175
+
176
+ class EmbeddingsUsage(TypedDict):
177
+ inputTokens: Optional[int]
178
+
179
+
180
+ class EmbeddingsResponse(TypedDict):
181
+ """``200`` body of ``POST /v1/embeddings``."""
182
+
183
+ embeddings: List[List[float]] # one vector per input, in request order
184
+ model: str
185
+ provider: str
186
+ decision: str # "allow" | "degrade" | "fallback"
187
+ reason: NotRequired[str]
188
+ usage: EmbeddingsUsage
189
+ cost: Cost
190
+ # null under hierarchical budgets (the node tree is the authority).
191
+ budgetRemaining: Optional[BudgetRemaining]
192
+ requestId: str
193
+
194
+
195
+ # --- Usage ------------------------------------------------------------------
196
+
197
+ # The /v1/usage and /v1/usage/summary bodies are operator-facing and not fully
198
+ # fixed in the OpenAPI spec, so they are typed as a loose mapping.
199
+ UsageResponse = Dict[str, Any]
200
+
201
+
202
+ # --- Public aliases (match the naming used in the task/client signatures) ---
203
+
204
+ ChatResult = ChatResponse
205
+ ExplainResult = ExplainResponse
206
+ EmbeddingsResult = EmbeddingsResponse
207
+ UsageResult = UsageResponse
208
+
209
+
210
+ __all__ = [
211
+ "ChatMessage",
212
+ "TextPart",
213
+ "ImageUrl",
214
+ "ImagePart",
215
+ "ContentPart",
216
+ "Usage",
217
+ "Cost",
218
+ "BudgetRemaining",
219
+ "Safety",
220
+ "ResponseMessage",
221
+ "ChatResponse",
222
+ "ChatResult",
223
+ "EmbeddingsUsage",
224
+ "EmbeddingsResponse",
225
+ "EmbeddingsResult",
226
+ "ExplainRequested",
227
+ "ExplainResolved",
228
+ "ExplainSafety",
229
+ "ExplainCost",
230
+ "ExplainBudgetUsed",
231
+ "ExplainBudget",
232
+ "ExplainResponse",
233
+ "ExplainResult",
234
+ "UsageResponse",
235
+ "UsageResult",
236
+ ]
@@ -0,0 +1,272 @@
1
+ Metadata-Version: 2.4
2
+ Name: modelgov
3
+ Version: 1.0.0
4
+ Summary: Modelgov API client. `feature` and `userType` are mandatory on every request.
5
+ Project-URL: Homepage, https://github.com/mml555/modelgov
6
+ Author: Modelgov
7
+ License: MIT
8
+ Keywords: budget,gateway,llm,modelgov,policy,safety
9
+ Requires-Python: >=3.9
10
+ Requires-Dist: httpx>=0.27
11
+ Requires-Dist: typing-extensions>=4.0; python_version < '3.11'
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest; extra == 'dev'
14
+ Requires-Dist: respx; extra == 'dev'
15
+ Description-Content-Type: text/markdown
16
+
17
+ # Modelgov Python SDK
18
+
19
+ Package: `modelgov` (module `modelgov`). The Python counterpart to
20
+ [`@modelgov/sdk`](../sdk-typescript).
21
+
22
+ The SDK is a **thin HTTP client** to the Modelgov API. Policy enforcement is
23
+ always server-side. Every request declares a **user**, **user type**, and
24
+ **feature**; policy is checked **before** the model call.
25
+
26
+ ## Install
27
+
28
+ ```bash
29
+ pip install modelgov
30
+ ```
31
+
32
+ > Note: `modelgov` is not yet published to PyPI. Until then, install from
33
+ > source with the editable install below (see also [self-host.md](../../docs/self-host.md)).
34
+
35
+ From the monorepo (editable, with test deps):
36
+
37
+ ```bash
38
+ pip install -e "packages/sdk-python[dev]"
39
+ ```
40
+
41
+ Requires Python >= 3.9. Depends on [`httpx`](https://www.python-httpx.org/).
42
+
43
+ ## Create a client
44
+
45
+ ```python
46
+ import os
47
+ from modelgov import ModelgovClient
48
+
49
+ ai = ModelgovClient(
50
+ base_url=os.environ.get("MODELGOV_URL", "http://localhost:3000"),
51
+ api_key=os.environ["MODELGOV_API_KEY"],
52
+ )
53
+ ```
54
+
55
+ `ModelgovClient` is a context manager and closes its connection pool on exit:
56
+
57
+ ```python
58
+ with ModelgovClient(base_url=..., api_key=...) as ai:
59
+ ...
60
+ ```
61
+
62
+ ## Chat
63
+
64
+ ```python
65
+ res = ai.chat(
66
+ user_id="user_123", # your end-user id
67
+ user_type="logged_in", # must match modelgov.yaml budgets
68
+ feature="support_chat", # required — registered feature
69
+ model_class="cheap",
70
+ messages=[{"role": "user", "content": "Help me reset my password"}],
71
+ # optional:
72
+ # input_tokens_estimate=120,
73
+ # temperature=0.7,
74
+ # project_id="checkout",
75
+ # environment="production",
76
+ # metadata={"trace_id": "abc"},
77
+ )
78
+
79
+ print(res["message"]["content"])
80
+ print(res["model"], res["decision"], res["requestId"])
81
+ ```
82
+
83
+ Snake_case keyword args are converted to the camelCase JSON the API expects
84
+ (`user_id` → `userId`, `model_class` → `modelClass`, etc.). `None`-valued
85
+ optional args are omitted from the request body.
86
+
87
+ ### Response
88
+
89
+ `chat()` returns a `ChatResponse` (a `TypedDict`), so it is a plain `dict` with
90
+ typed keys:
91
+
92
+ ```python
93
+ {
94
+ "message": {"role": "assistant", "content": "..."},
95
+ "model": "openai/gpt-4o-mini",
96
+ "decision": "allow", # "allow" | "degrade" | "fallback"
97
+ "usage": {"inputTokens": 12, "outputTokens": 8},
98
+ "cost": {"estimatedUsd": 0.0001, "actualUsd": 0.00008},
99
+ "budgetRemaining": {"userDailyUsd": 0.24, "featureMonthlyUsd": None, "globalMonthlyUsd": 499.5},
100
+ "safety": {"piiMasked": False, "injectionBlocked": False},
101
+ "requestId": "req_42", # audit id — log with your domain ids
102
+ }
103
+ ```
104
+
105
+ ### Vision (multimodal)
106
+
107
+ Pass content parts instead of a string to send images to a vision model. The
108
+ gateway governs budget/audit and still runs safety on the text parts:
109
+
110
+ ```python
111
+ res = ai.chat(
112
+ user_id="user_123",
113
+ user_type="logged_in",
114
+ feature="document_extraction",
115
+ messages=[{
116
+ "role": "user",
117
+ "content": [
118
+ {"type": "text", "text": "Extract the total from this receipt."},
119
+ {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
120
+ ],
121
+ }],
122
+ )
123
+ ```
124
+
125
+ ### Grounding
126
+
127
+ For a feature with safety `grounding: strict`, pass retrieved passages as
128
+ `context`. The gateway answers only from them, forces verbatim citations, and
129
+ verifies them — unverifiable answers become a safe refusal, and
130
+ `res["safety"]["grounded"]` reports whether the citations checked out:
131
+
132
+ ```python
133
+ res = ai.chat(
134
+ user_id="user_123",
135
+ user_type="logged_in",
136
+ feature="grounded_support",
137
+ messages=[{"role": "user", "content": "How long do refunds take?"}],
138
+ context=["Refunds are issued within 5 business days of approval."],
139
+ )
140
+ ```
141
+
142
+ ## Streaming
143
+
144
+ `chat_stream()` yields incremental text chunks over Server-Sent Events. It
145
+ sends `"stream": true` and iterates `data:` lines until the `[DONE]` sentinel.
146
+
147
+ ```python
148
+ for chunk in ai.chat_stream(
149
+ user_id="user_123",
150
+ user_type="logged_in",
151
+ feature="support_chat",
152
+ messages=[{"role": "user", "content": "Write a haiku about budgets"}],
153
+ ):
154
+ print(chunk, end="", flush=True)
155
+ ```
156
+
157
+ **SSE framing assumption:** OpenAI-style events — one JSON payload per `data:`
158
+ line, terminated by `data: [DONE]`. Text is read from
159
+ `choices[0].delta.content` (or a simpler `delta` / `content` / `text` field).
160
+ Non-JSON `data:` payloads are yielded verbatim. See the `chat_stream` docstring
161
+ if the server's framing differs.
162
+
163
+ The generator holds the connection open until fully consumed. Policy/safety
164
+ blocks that occur before the stream begins raise the usual typed errors.
165
+
166
+ ## Embeddings
167
+
168
+ `embed()` runs governed embeddings (`POST /v1/embeddings`) — policy-checked,
169
+ budget-reserved, and audited like `chat()`. Pass one string or a batch:
170
+
171
+ ```python
172
+ res = ai.embed(
173
+ user_id="user_123",
174
+ user_type="logged_in",
175
+ feature="rag_ingest",
176
+ input=["first passage", "second passage"], # or a single string
177
+ )
178
+ vectors = res["embeddings"] # one vector per input, in request order
179
+ ```
180
+
181
+ ## Idempotency
182
+
183
+ Pass a stable key to retry safely without double-charging budget or re-calling
184
+ the model:
185
+
186
+ ```python
187
+ ai.chat(
188
+ user_id="user_123",
189
+ user_type="logged_in",
190
+ feature="support_chat",
191
+ messages=[{"role": "user", "content": "..."}],
192
+ idempotency_key=f"chat-{user_id}-{session_id}",
193
+ )
194
+ ```
195
+
196
+ The API returns `x-idempotent-replay: true` on cache hits; a same-key request
197
+ with a different body returns `422 idempotency_key_reuse`.
198
+
199
+ ## Explain (dry run)
200
+
201
+ Evaluate policy without calling the model or reserving budget:
202
+
203
+ ```python
204
+ plan = ai.explain(
205
+ user_id="user_123",
206
+ user_type="logged_in",
207
+ feature="support_chat",
208
+ model_class="premium",
209
+ )
210
+ print(plan["decision"], plan["summary"])
211
+ ```
212
+
213
+ ## Usage
214
+
215
+ Requires an API key with `usage:read`.
216
+
217
+ ```python
218
+ usage = ai.get_usage(user_id="user_123")
219
+ summary = ai.get_usage_summary(feature="support_chat", since="7d")
220
+ ```
221
+
222
+ ## Errors
223
+
224
+ | Class | When |
225
+ | --- | --- |
226
+ | `PolicyBlockedError` | 403 `policy_blocked` or `budget_exceeded` |
227
+ | `SafetyBlockedError` | 403 `safety_blocked` (PII or prompt injection) |
228
+ | `ModelgovError` | Other 4xx / 5xx |
229
+
230
+ `PolicyBlockedError` and `SafetyBlockedError` subclass `ModelgovError`. Each
231
+ error carries the API's structured envelope:
232
+
233
+ ```python
234
+ from modelgov import ModelgovError, PolicyBlockedError, SafetyBlockedError
235
+
236
+ try:
237
+ ai.chat(
238
+ user_id="user_123",
239
+ user_type="logged_in",
240
+ feature="support_chat",
241
+ messages=[{"role": "user", "content": "..."}],
242
+ )
243
+ except PolicyBlockedError as err:
244
+ print(err.status) # 403
245
+ print(err.code) # "policy_blocked" | "budget_exceeded"
246
+ print(err.message) # human-readable
247
+ print(err.details) # error.details object
248
+ print(err.audit_request_id) # "req_<n>" — modelgov requests show
249
+ print(err.request_id) # HTTP trace id (UUID)
250
+ print(err.body) # full parsed envelope
251
+ except ModelgovError as err:
252
+ ...
253
+ ```
254
+
255
+ ## Integration pattern
256
+
257
+ ```text
258
+ 1. Authenticate user (your app)
259
+ 2. Authorize product action (your app)
260
+ 3. ai.chat(user_id=..., user_type=..., feature=..., messages=...)
261
+ 4. Return res["message"]["content"] to the user
262
+ ```
263
+
264
+ Never call Modelgov before your app has decided the user may use this feature.
265
+
266
+ ## Development
267
+
268
+ ```bash
269
+ pip install -e "packages/sdk-python[dev]"
270
+ cd packages/sdk-python
271
+ pytest
272
+ ```
@@ -0,0 +1,8 @@
1
+ modelgov/__init__.py,sha256=TG3dZTOrt7LyUpvmT0GC-p8QGL9sDqgCcqpqA3PgNew,1824
2
+ modelgov/client.py,sha256=-GLUV2aZW9iJIpbHnMDqU97Pn2gc1kB3J1UmA9bj8KU,21036
3
+ modelgov/errors.py,sha256=GkfgnYbcCbD9Yw5zQrmpmlIGLKYYiimO_3hSePng-P8,3827
4
+ modelgov/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ modelgov/types.py,sha256=O2LPIj4yTjOvIYJZ5GEcUOBJgNgtMbScE9MQlqGLSr8,5994
6
+ modelgov-1.0.0.dist-info/METADATA,sha256=fD6eei3pu4SQmF9STK2LkpXMRZiFd1WKMfYuVtQLhm0,7824
7
+ modelgov-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
8
+ modelgov-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any