semql-auth 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
semql_auth/__init__.py ADDED
@@ -0,0 +1,35 @@
1
+ """Public surface of the semql-auth package.
2
+
3
+ Reference ``TokenVerifier`` / ``TokenMapper`` implementations for turning
4
+ a transport credential (bearer token, mTLS client cert) into a
5
+ :class:`semql.model.AuthContext` — the identity ``semql`` threads through
6
+ ``Catalog.compile(viewer=...)`` for ``required_roles`` visibility and
7
+ ``security_sql`` row scoping.
8
+
9
+ ``AuthContext`` itself lives in ``semql.model`` (the compiler depends on
10
+ it); this package is only the credential→identity adapters, which carry
11
+ optional third-party deps (PyJWT, httpx, cryptography) the pure core
12
+ shouldn't.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from semql_auth.auth import (
18
+ DictMapper,
19
+ HMACVerifier,
20
+ IntrospectMapper,
21
+ JWKSVerifier,
22
+ TokenMapper,
23
+ TokenVerifier,
24
+ X509Mapper,
25
+ )
26
+
27
+ __all__ = [
28
+ "DictMapper",
29
+ "HMACVerifier",
30
+ "IntrospectMapper",
31
+ "JWKSVerifier",
32
+ "TokenMapper",
33
+ "TokenVerifier",
34
+ "X509Mapper",
35
+ ]
semql_auth/auth.py ADDED
@@ -0,0 +1,734 @@
1
+ # mypy: disable-error-code=unused-ignore
2
+ # pyright: reportAttributeAccessIssue=false, reportUnknownMemberType=false, reportUnusedImport=false, reportUnknownVariableType=false, reportUnknownArgumentType=false
3
+ """``TokenVerifier`` / ``TokenMapper`` Protocols + reference implementations.
4
+
5
+ Two sibling integration points that turn an inbound auth identity
6
+ into a fully-populated :class:`~semql.model.AuthContext`:
7
+
8
+ - :class:`TokenVerifier` is the JWT-shaped path
9
+ (``verify(token: str) -> AuthContext``). Reference impls:
10
+ :class:`HMACVerifier` (HS256/384/512) and :class:`JWKSVerifier`
11
+ (RS256 / ES256 against a JWKS endpoint).
12
+ - :class:`TokenMapper` is the structured-identity path
13
+ (``verify(identity: object) -> AuthContext``) for stacks that
14
+ don't return a string token. Reference impls:
15
+ :class:`DictMapper` (always available, no extras — for tests and
16
+ pre-shaped introspect responses),
17
+ :class:`IntrospectMapper` (RFC 7662 OAuth 2.0 Token Introspection
18
+ — ``httpx`` required, ``semql[jwt]`` extras),
19
+ :class:`X509Mapper` (mTLS client cert subject / SAN mapping —
20
+ ``cryptography`` required, ``semql[mTLS]`` extras).
21
+
22
+ Callers wire either protocol into their request middleware;
23
+ downstream code (compile, prompt, MCP) just receives the
24
+ ``AuthContext`` and never sees the token.
25
+
26
+ Reference claim mapping (used by ``HMACVerifier``,
27
+ ``JWKSVerifier``, ``DictMapper``, ``IntrospectMapper``):
28
+
29
+ - ``sub`` (or ``username``, or ``client_id`` for client-credentials
30
+ grants) → ``AuthContext.viewer_id``
31
+ - ``roles`` claim (list[str]) or RFC 7662 ``scope`` (space-delimited)
32
+ → ``AuthContext.roles``
33
+ - everything else → ``AuthContext.attrs``, with the original JSON
34
+ type preserved (list, bool, int, str, dict).
35
+
36
+ Convention (documented, not enforced): namespace claim names
37
+ k8s-style (e.g. ``acme/allowed_regions``) to avoid collisions
38
+ with standard JWT claims like ``sub`` / ``iss`` / ``exp``.
39
+
40
+ The Protocols are the integration point: callers with their own
41
+ auth stack implement either one without depending on this module.
42
+
43
+ ``HMACVerifier`` and ``JWKSVerifier`` require ``PyJWT``;
44
+ ``JWKSVerifier`` additionally requires ``httpx``; ``X509Mapper``
45
+ requires ``cryptography``. Each is import-guarded with an
46
+ actionable message — install the matching extras group to enable it.
47
+ """
48
+
49
+ from __future__ import annotations
50
+
51
+ import json
52
+ from collections.abc import Mapping
53
+ from typing import Any, Protocol, runtime_checkable
54
+
55
+ from semql.errors import AuthError
56
+ from semql.model import AuthContext
57
+
58
+ # Reserved claims — never copied into ``attrs`` so a token can't
59
+ # shadow the structural fields of ``AuthContext``. Note: ``iss``,
60
+ # ``aud``, ``exp``, ``iat``, ``nbf``, ``jti`` are NOT reserved
61
+ # here — they're preserved in ``attrs`` so callers can reference
62
+ # them (e.g. introspect-endpoint ``exp`` for cache TTL). The
63
+ # JWT path's PyJWT-decoded payload and the introspect path's
64
+ # flat JSON response share the same claim-mapping contract.
65
+ _RESERVED_CLAIMS = frozenset(
66
+ {
67
+ "sub", # mapped to AuthContext.viewer_id
68
+ "roles", # mapped to AuthContext.roles
69
+ "metadata", # mapped to AuthContext.metadata
70
+ }
71
+ )
72
+
73
+
74
+ def _payload_to_auth_context(payload: dict[str, Any]) -> AuthContext:
75
+ """Map a verified JWT payload (or pre-shaped claims dict) to
76
+ an ``AuthContext``.
77
+
78
+ ``sub`` is required; missing ``sub`` raises ``AuthError``. Other
79
+ reserved claims (``exp`` / ``iat`` / etc.) are validated by
80
+ PyJWT before this is called for the JWT path; for the
81
+ non-JWT ``DictMapper`` path, those claims are treated as
82
+ regular attrs (preserved as-is).
83
+
84
+ The ``metadata`` claim is treated as the structural
85
+ ``AuthContext.metadata`` field rather than being merged into
86
+ ``attrs`` — legacy callers that already pass ``metadata``
87
+ keep their existing field name.
88
+ """
89
+ if "sub" not in payload or not payload["sub"]:
90
+ raise AuthError("Identity is missing required 'sub' claim.", reason="missing_sub")
91
+ viewer_id = str(payload["sub"])
92
+ roles_raw_obj: object = payload.get("roles", [])
93
+ if not isinstance(roles_raw_obj, list) or not all(
94
+ isinstance(r, str)
95
+ for r in roles_raw_obj # type: ignore[union-attr]
96
+ ):
97
+ raise AuthError(
98
+ "Identity 'roles' claim must be a list[str].",
99
+ reason="bad_roles_claim",
100
+ )
101
+ roles_list: list[str] = [r for r in roles_raw_obj] # type: ignore[union-attr]
102
+ metadata_obj: object = payload.get("metadata", {})
103
+ if not isinstance(metadata_obj, dict):
104
+ metadata_obj = {}
105
+ metadata_dict: dict[str, str] = {str(k): str(v) for k, v in metadata_obj.items()} # type: ignore[union-attr]
106
+ attrs: dict[str, Any] = {k: v for k, v in payload.items() if k not in _RESERVED_CLAIMS}
107
+ return AuthContext(
108
+ viewer_id=viewer_id,
109
+ roles=roles_list,
110
+ attrs=attrs,
111
+ metadata=metadata_dict,
112
+ )
113
+
114
+
115
+ @runtime_checkable
116
+ class TokenVerifier(Protocol):
117
+ """Decode a bearer token into an ``AuthContext``.
118
+
119
+ Implementations raise :class:`~semql.errors.AuthError` on
120
+ invalid, expired, or otherwise unverifiable tokens. The contract
121
+ is intentionally narrow: a token string in, an
122
+ ``AuthContext`` out (or an exception).
123
+ """
124
+
125
+ def verify(self, token: str) -> AuthContext: ...
126
+
127
+
128
+ @runtime_checkable
129
+ class TokenMapper(Protocol):
130
+ """Map a structured auth identity into an ``AuthContext``.
131
+
132
+ Sibling of :class:`TokenVerifier` for stacks that don't
133
+ return a string token. ``TokenMapper.verify`` takes whatever
134
+ the caller's auth stack natively provides — an OAuth
135
+ introspect response dict, an mTLS x509 cert, a SAML
136
+ assertion, an opaque session token — and produces the
137
+ canonical ``AuthContext``.
138
+
139
+ The Protocol is parameterised on ``object`` (any structured
140
+ identity). Concrete mappers narrow the parameter to their
141
+ own type via a runtime check. The contract is:
142
+
143
+ - Return an :class:`~semql.model.AuthContext` on a valid
144
+ identity.
145
+ - Raise :class:`~semql.errors.AuthError` on malformed /
146
+ unidentifiable input.
147
+ - Never return a "default" AuthContext — a missing
148
+ viewer id is an auth error, not a fallback.
149
+
150
+ Application code wires one of ``TokenVerifier`` /
151
+ ``TokenMapper`` into its request middleware; downstream
152
+ code (compile, prompt, MCP) just receives the
153
+ ``AuthContext`` and never sees the underlying identity.
154
+ """
155
+
156
+ def verify(self, identity: object) -> AuthContext: ...
157
+
158
+
159
+ class DictMapper:
160
+ """Map a ``Mapping[str, Any]``-shaped identity to an ``AuthContext``.
161
+
162
+ Always-available reference implementation of
163
+ :class:`TokenMapper`. No external dependencies — useful
164
+ for tests, for callers that pre-shape their auth response
165
+ (e.g. a middleware that already decoded the JWT and
166
+ passes the claims dict), and for any custom auth stack
167
+ that can return a ``Mapping``-compatible view of its
168
+ identity.
169
+
170
+ The claim mapping is the same as the JWT path:
171
+ ``sub`` → ``viewer_id``, ``roles`` (list[str]) →
172
+ ``roles``, ``metadata`` → ``metadata``, everything else
173
+ → ``attrs``.
174
+ """
175
+
176
+ def verify(self, identity: object) -> AuthContext:
177
+ if not isinstance(identity, Mapping):
178
+ raise AuthError(
179
+ "DictMapper.verify requires a Mapping-shaped identity "
180
+ f"(got {type(identity).__name__}).",
181
+ reason="bad_identity_type",
182
+ )
183
+ # ``Mapping`` doesn't enforce string keys; the contract
184
+ # is ``Mapping[str, Any]`` but the runtime check is
185
+ # cheap. We normalise to a dict for the existing
186
+ # ``_payload_to_auth_context`` helper.
187
+ items_any: Any = identity
188
+ payload: dict[str, Any] = {str(k): v for k, v in items_any.items()}
189
+ return _payload_to_auth_context(payload)
190
+
191
+
192
+ # ---------------------------------------------------------------------------
193
+ # HMACVerifier
194
+ # ---------------------------------------------------------------------------
195
+
196
+
197
+ class HMACVerifier:
198
+ """HMAC shared-secret JWT verification (HS256 / HS384 / HS512).
199
+
200
+ Reference implementation of :class:`TokenVerifier`. Suits
201
+ single-tenant deployments and any context where the platform
202
+ issues its own tokens. For multi-tenant with rotating keys,
203
+ use :class:`JWKSVerifier`.
204
+
205
+ Args:
206
+ secret: HMAC shared secret. Must be at least the algorithm's
207
+ required key length (32 bytes for HS256/HS384, 64 for HS512);
208
+ PyJWT raises on shorter secrets.
209
+ algorithm: One of ``"HS256"`` / ``"HS384"`` / ``"HS512"``.
210
+ Defaults to ``"HS256"``.
211
+ audience: Optional ``aud`` claim to enforce.
212
+ issuer: Optional ``iss`` claim to enforce.
213
+ """
214
+
215
+ def __init__(
216
+ self,
217
+ secret: bytes | str,
218
+ *,
219
+ algorithm: str = "HS256",
220
+ audience: str | None = None,
221
+ issuer: str | None = None,
222
+ ) -> None:
223
+ try:
224
+ import jwt # noqa: F401 — import-time guard
225
+ except ImportError as exc:
226
+ raise ImportError(
227
+ "HMACVerifier requires PyJWT. Install with `pip install semql[jwt]`."
228
+ ) from exc
229
+ if isinstance(secret, str):
230
+ secret = secret.encode("utf-8")
231
+ self._secret = secret
232
+ self._algorithm = algorithm
233
+ self._audience = audience
234
+ self._issuer = issuer
235
+
236
+ def verify(self, token: str) -> AuthContext:
237
+ import jwt
238
+
239
+ options: dict[str, Any] = {}
240
+ decode_kwargs: dict[str, Any] = {
241
+ "key": self._secret,
242
+ "algorithms": [self._algorithm],
243
+ "options": options,
244
+ }
245
+ if self._audience is not None:
246
+ decode_kwargs["audience"] = self._audience
247
+ if self._issuer is not None:
248
+ decode_kwargs["issuer"] = self._issuer
249
+ try:
250
+ payload = jwt.decode(token, **decode_kwargs)
251
+ except jwt.ExpiredSignatureError as exc:
252
+ raise AuthError("Token has expired.", reason="expired") from exc
253
+ except jwt.InvalidSignatureError as exc:
254
+ raise AuthError("Token signature is invalid.", reason="bad_signature") from exc
255
+ except jwt.InvalidAudienceError as exc:
256
+ raise AuthError("Token audience is invalid.", reason="bad_audience") from exc
257
+ except jwt.InvalidIssuerError as exc:
258
+ raise AuthError("Token issuer is invalid.", reason="bad_issuer") from exc
259
+ except jwt.DecodeError as exc:
260
+ raise AuthError("Token is malformed.", reason="malformed") from exc
261
+ except jwt.InvalidTokenError as exc:
262
+ raise AuthError(f"Token failed verification: {exc}", reason="invalid") from exc
263
+ return _payload_to_auth_context(payload)
264
+
265
+
266
+ # ---------------------------------------------------------------------------
267
+ # JWKSVerifier — RS256 / ES256 against a JWKS endpoint
268
+ # ---------------------------------------------------------------------------
269
+
270
+
271
+ class JWKSVerifier:
272
+ """RS256 / ES256 JWT verification against a JWKS endpoint.
273
+
274
+ Reference implementation of :class:`TokenVerifier` for the case
275
+ where an external identity provider signs tokens with an
276
+ asymmetric key. Fetches the JWKS document once and caches
277
+ keys; rotate keys by re-fetching (see ``ttl``).
278
+
279
+ Args:
280
+ jwks_url: URL of the JWKS endpoint (e.g. an OIDC provider's
281
+ ``/jwks.json`` route).
282
+ algorithms: Tuple of allowed algorithms. Defaults to
283
+ ``("RS256",)`` — extend cautiously, never accept ``none``.
284
+ audience: Optional ``aud`` claim to enforce.
285
+ issuer: Optional ``iss`` claim to enforce.
286
+ ttl: Seconds the cached JWKS lives before refetching. Default
287
+ 300 (5 minutes). Set ``0`` to disable caching.
288
+ """
289
+
290
+ def __init__(
291
+ self,
292
+ jwks_url: str,
293
+ *,
294
+ algorithms: tuple[str, ...] = ("RS256",),
295
+ audience: str | None = None,
296
+ issuer: str | None = None,
297
+ ttl: int = 300,
298
+ ) -> None:
299
+ try:
300
+ import httpx # noqa: F401
301
+ import jwt # noqa: F401
302
+ except ImportError as exc:
303
+ raise ImportError(
304
+ "JWKSVerifier requires PyJWT + httpx. Install with `pip install semql[jwt]`."
305
+ ) from exc
306
+ if "none" in (a.lower() for a in algorithms):
307
+ raise ValueError(
308
+ "JWKSVerifier must never accept the 'none' algorithm — "
309
+ "this is a security invariant, not a config knob."
310
+ )
311
+ self._jwks_url = jwks_url
312
+ self._algorithms = algorithms
313
+ self._audience = audience
314
+ self._issuer = issuer
315
+ self._ttl = ttl
316
+ self._cached_jwks: dict[str, Any] | None = None
317
+ self._cached_at: float = 0.0
318
+
319
+ def _fetch_jwks(self) -> dict[str, Any]:
320
+ import httpx
321
+
322
+ now = __import__("time").monotonic()
323
+ cached = self._cached_jwks
324
+ if cached is not None and self._ttl > 0 and (now - self._cached_at) < self._ttl:
325
+ return cached
326
+ response = httpx.get(self._jwks_url, timeout=10.0)
327
+ response.raise_for_status()
328
+ new_jwks: dict[str, Any] = response.json()
329
+ self._cached_jwks = new_jwks
330
+ self._cached_at = now
331
+ return new_jwks
332
+
333
+ def verify(self, token: str) -> AuthContext:
334
+ import jwt
335
+
336
+ try:
337
+ unverified_header = jwt.get_unverified_header(token)
338
+ except jwt.DecodeError as exc:
339
+ raise AuthError("Token header is malformed.", reason="malformed") from exc
340
+ kid = unverified_header.get("kid")
341
+ if not kid:
342
+ raise AuthError(
343
+ "Token header is missing 'kid' (key id).",
344
+ reason="missing_kid",
345
+ )
346
+ jwks = self._fetch_jwks()
347
+ key = next((k for k in jwks.get("keys", []) if k.get("kid") == kid), None)
348
+ if key is None:
349
+ # Cache miss — refetch once before giving up. Key rotations
350
+ # land between TTL windows; this handles the common case.
351
+ self._cached_jwks = None
352
+ jwks = self._fetch_jwks()
353
+ key = next((k for k in jwks.get("keys", []) if k.get("kid") == kid), None)
354
+ if key is None:
355
+ raise AuthError(
356
+ f"No JWKS key matches token kid={kid!r}.",
357
+ reason="unknown_kid",
358
+ )
359
+
360
+ # PyJWT accepts a JWK dict directly via from_jwk, but we have
361
+ # the key in raw form; serialise to a PEM-like public key.
362
+ public_key: Any = jwt.algorithms.RSAAlgorithm.from_jwk( # type: ignore[attr-defined]
363
+ json.dumps(key)
364
+ )
365
+
366
+ decode_kwargs: dict[str, Any] = {
367
+ "key": public_key,
368
+ "algorithms": list(self._algorithms),
369
+ }
370
+ if self._audience is not None:
371
+ decode_kwargs["audience"] = self._audience
372
+ if self._issuer is not None:
373
+ decode_kwargs["issuer"] = self._issuer
374
+ try:
375
+ payload = jwt.decode(token, **decode_kwargs)
376
+ except jwt.ExpiredSignatureError as exc:
377
+ raise AuthError("Token has expired.", reason="expired") from exc
378
+ except jwt.InvalidSignatureError as exc:
379
+ raise AuthError("Token signature is invalid.", reason="bad_signature") from exc
380
+ except jwt.InvalidAudienceError as exc:
381
+ raise AuthError("Token audience is invalid.", reason="bad_audience") from exc
382
+ except jwt.InvalidIssuerError as exc:
383
+ raise AuthError("Token issuer is invalid.", reason="bad_issuer") from exc
384
+ except jwt.DecodeError as exc:
385
+ raise AuthError("Token is malformed.", reason="malformed") from exc
386
+ except jwt.InvalidTokenError as exc:
387
+ raise AuthError(f"Token failed verification: {exc}", reason="invalid") from exc
388
+ return _payload_to_auth_context(payload)
389
+
390
+
391
+ # ---------------------------------------------------------------------------
392
+ # IntrospectMapper — RFC 7662 OAuth 2.0 Token Introspection
393
+ # ---------------------------------------------------------------------------
394
+
395
+
396
+ class _IntrospectHttpClient(Protocol):
397
+ """Structural protocol for the HTTP client used by
398
+ :class:`IntrospectMapper`. The real implementation is
399
+ ``httpx``; tests inject a fake with the same shape."""
400
+
401
+ def post(
402
+ self,
403
+ url: str,
404
+ *,
405
+ data: dict[str, str],
406
+ auth: tuple[str, str],
407
+ timeout: float,
408
+ ) -> Mapping[str, object]: ...
409
+
410
+
411
+ def _default_introspect_client() -> _IntrospectHttpClient:
412
+ """Lazy default — ``httpx`` is required for
413
+ :class:`IntrospectMapper` but not for any other part of
414
+ ``semql``. The import-time guard below gives a clean
415
+ error message at construction; this function exists so the
416
+ failure is at mapper-instantiation time, not at module
417
+ import.
418
+ """
419
+ try:
420
+ import httpx # type: ignore[import-not-found]
421
+ except ImportError as exc:
422
+ raise ImportError(
423
+ "IntrospectMapper requires httpx. Install with `pip install semql[jwt]`."
424
+ ) from exc
425
+
426
+ class _HttpxAdapter:
427
+ def post(
428
+ self,
429
+ url: str,
430
+ *,
431
+ data: dict[str, str],
432
+ auth: tuple[str, str],
433
+ timeout: float,
434
+ ) -> Mapping[str, object]:
435
+ return httpx.post(url, data=data, auth=auth, timeout=timeout).json() # type: ignore[no-any-return]
436
+
437
+ return _HttpxAdapter()
438
+
439
+
440
+ class IntrospectMapper:
441
+ """OAuth 2.0 Token Introspection (RFC 7662) → ``AuthContext``.
442
+
443
+ Reference implementation of :class:`TokenMapper` for the
444
+ case where the auth stack has an OAuth 2.0 introspection
445
+ endpoint. ``verify(access_token)`` POSTs the token to the
446
+ configured endpoint with HTTP Basic client credentials,
447
+ parses the JSON response, and maps it to an
448
+ ``AuthContext``.
449
+
450
+ Requires ``httpx`` — guarded at construction time with
451
+ an actionable ``ImportError`` (``pip install semql[jwt]``).
452
+
453
+ Args:
454
+ introspect_url: The introspection endpoint URL (e.g.
455
+ ``https://idp.example.com/oauth2/introspect``).
456
+ client_id: The OAuth client's id.
457
+ client_secret: The OAuth client's secret.
458
+ http_client: An optional HTTP client. Defaults to
459
+ ``httpx``. Tests inject a fake with the shape
460
+ ``post(url, *, data, auth, timeout) -> Any``.
461
+ timeout: HTTP request timeout in seconds. Default 5.0.
462
+ """
463
+
464
+ def __init__(
465
+ self,
466
+ introspect_url: str,
467
+ *,
468
+ client_id: str,
469
+ client_secret: str,
470
+ http_client: _IntrospectHttpClient | None = None,
471
+ timeout: float = 5.0,
472
+ ) -> None:
473
+ self._introspect_url = introspect_url
474
+ self._client_id = client_id
475
+ self._client_secret = client_secret
476
+ self._timeout = timeout
477
+ self._http_client: _IntrospectHttpClient = (
478
+ http_client if http_client is not None else _default_introspect_client()
479
+ )
480
+
481
+ def verify(self, identity: object) -> AuthContext:
482
+ # The IntrospectMapper only handles string access tokens.
483
+ if not isinstance(identity, str):
484
+ raise AuthError(
485
+ "IntrospectMapper.verify requires a string access token "
486
+ f"(got {type(identity).__name__}).",
487
+ reason="bad_identity_type",
488
+ )
489
+ try:
490
+ # The ``object`` annotation (not ``Mapping[str, object]``)
491
+ # is intentional: it keeps the runtime ``isinstance``
492
+ # check live rather than being narrowed away. Test
493
+ # fakes and custom http-client implementations can
494
+ # misbehave at runtime; the type-level ``Mapping``
495
+ # contract is the static guarantee, the
496
+ # ``isinstance`` is the dynamic one.
497
+ response: object = self._http_client.post( # type: ignore[assignment]
498
+ self._introspect_url,
499
+ data={"token": identity, "token_type_hint": "access_token"},
500
+ auth=(self._client_id, self._client_secret),
501
+ timeout=self._timeout,
502
+ )
503
+ except Exception as exc:
504
+ raise AuthError(
505
+ f"Introspection endpoint returned an error: {exc}",
506
+ reason="introspect_failed",
507
+ ) from exc
508
+ # The ``isinstance`` is intentional despite the static
509
+ # ``Mapping[str, object]`` annotation — test fakes and
510
+ # custom http-client implementations can misbehave at
511
+ # runtime. The type-level contract is the static
512
+ # guarantee; the ``isinstance`` is the dynamic one.
513
+ if not isinstance(response, Mapping): # pyright: ignore[reportUnnecessaryIsInstance]
514
+ raise AuthError(
515
+ "Introspection endpoint did not return a JSON object.",
516
+ reason="introspect_bad_response",
517
+ )
518
+ # RFC 7662 §2.2: ``active`` MUST be a boolean. The
519
+ # only true indicator of validity is ``active=true``.
520
+ if not response.get("active", False):
521
+ raise AuthError(
522
+ "Introspection reported token inactive (expired, revoked, or unknown).",
523
+ reason="inactive",
524
+ )
525
+ # Build a payload shaped like the JWT claims dict, then
526
+ # re-use ``_payload_to_auth_context`` so claim mapping
527
+ # is identical to the JWT path.
528
+ # ``sub`` is the canonical viewer id; fall back to
529
+ # ``username`` (older Auth0 / Keycloak) then to
530
+ # ``client_id`` (client-credentials grants) so the
531
+ # contract is "we always have a viewer id when the
532
+ # token is active".
533
+ if "sub" not in response or not response["sub"]:
534
+ if "username" in response and response["username"]:
535
+ response = dict(response)
536
+ response["sub"] = response["username"]
537
+ elif "client_id" in response and response["client_id"]:
538
+ # Response carries a ``client_id`` claim
539
+ # (the token was issued for that client —
540
+ # client-credentials grant or
541
+ # service-account delegation).
542
+ response = dict(response)
543
+ response["sub"] = response["client_id"]
544
+ # RFC 7662 §2.2: ``scope`` is OPTIONAL but when present
545
+ # it's a space-delimited string. Treat it as the
546
+ # canonical roles surface (overriding any ``roles``
547
+ # claim — OAuth scopes are the standard way to express
548
+ # authorisation grants).
549
+ scope_value = response.get("scope")
550
+ if isinstance(scope_value, str):
551
+ response = dict(response)
552
+ response["roles"] = scope_value.split()
553
+ return _payload_to_auth_context(dict(response))
554
+
555
+
556
+ # ---------------------------------------------------------------------------
557
+ # X509Mapper — mTLS client cert subject / SAN mapping
558
+ # ---------------------------------------------------------------------------
559
+
560
+
561
+ class _X509Cert(Protocol):
562
+ """Structural protocol for the client cert shape used by
563
+ :class:`X509Mapper`. Real ``cryptography.x509.Certificate``
564
+ objects expose these via attribute access; tests inject a
565
+ plain object with the right shape (the mapper doesn't
566
+ require ``cryptography`` for the test path)."""
567
+
568
+ @property
569
+ def subject_cn(self) -> str: ...
570
+
571
+ @property
572
+ def subject_ou(self) -> tuple[str, ...]: ...
573
+
574
+ @property
575
+ def subject_o(self) -> str | None: ...
576
+
577
+ @property
578
+ def subject_c(self) -> str | None: ...
579
+
580
+ @property
581
+ def sans(self) -> tuple[str, ...]: ...
582
+
583
+ @property
584
+ def fingerprint(self) -> str | None: ...
585
+
586
+
587
+ class X509Mapper:
588
+ """Map an mTLS client x509 cert to an ``AuthContext``.
589
+
590
+ Reference implementation of :class:`TokenMapper` for
591
+ mTLS deployments where the caller's middleware extracts
592
+ the client cert from the TLS handshake. ``verify(cert)``
593
+ reads the cert's subject Common Name (preferred) or its
594
+ Subject Alternative Names (URI > DNS > email) and maps it
595
+ to ``viewer_id``.
596
+
597
+ The mapper is structurally typed — the call site is
598
+ responsible for decoding the cert into a shape with the
599
+ right attributes (``subject_cn``, ``subject_ou``,
600
+ ``subject_o``, ``subject_c``, ``sans``, ``fingerprint``).
601
+ The ``cryptography`` adapter is shipped as a small
602
+ reference function in ``semql_auth.auth._cryptography_adapter``
603
+ (separate file, import-guarded) for the common case;
604
+ call sites with their own cert decoder don't need it.
605
+
606
+ Args:
607
+ ou_to_role: If ``True``, each ``OU`` (Organizational
608
+ Unit) in the cert subject becomes a role in the
609
+ resulting ``AuthContext``. Default ``False`` —
610
+ ``OU``s land in ``attrs`` instead. Opt in to the
611
+ structural mapping when your PKI uses ``OU`` as
612
+ the team / project grouping.
613
+ """
614
+
615
+ def __init__(self, *, ou_to_role: bool = False) -> None:
616
+ self._ou_to_role = ou_to_role
617
+
618
+ def verify(self, identity: object) -> AuthContext:
619
+ cert = self._coerce(identity)
620
+ viewer_id = self._viewer_id(cert)
621
+ if not viewer_id:
622
+ raise AuthError(
623
+ "X509 cert has no usable identity (empty subject CN and no "
624
+ "URI / DNS / email SAN). Cannot derive viewer_id.",
625
+ reason="no_identity",
626
+ )
627
+ attrs: dict[str, Any] = {}
628
+ if cert.subject_o is not None:
629
+ attrs["subject_o"] = cert.subject_o
630
+ if cert.subject_c is not None:
631
+ attrs["subject_c"] = cert.subject_c
632
+ if cert.subject_ou:
633
+ attrs["subject_ou"] = list(cert.subject_ou)
634
+ if cert.fingerprint is not None:
635
+ attrs["fingerprint"] = cert.fingerprint
636
+ if cert.sans:
637
+ attrs["sans"] = list(cert.sans)
638
+ roles: list[str] = list(cert.subject_ou) if self._ou_to_role and cert.subject_ou else []
639
+ return AuthContext(viewer_id=viewer_id, roles=roles, attrs=attrs)
640
+
641
+ @staticmethod
642
+ def _coerce(identity: object) -> _X509Cert:
643
+ # The mapper is structurally typed. The ``_X509Cert``
644
+ # Protocol declares the attribute shape; we just
645
+ # access them and let any missing attribute raise
646
+ # AttributeError. A clearer error helps callers fix
647
+ # the adapter.
648
+ try:
649
+ cn = getattr(identity, "subject_cn", "")
650
+ if not isinstance(cn, str):
651
+ cn = ""
652
+ return _CertAdapter( # type: ignore[return-value]
653
+ subject_cn=cn,
654
+ subject_ou=tuple(getattr(identity, "subject_ou", ()) or ()),
655
+ subject_o=getattr(identity, "subject_o", None),
656
+ subject_c=getattr(identity, "subject_c", None),
657
+ sans=tuple(getattr(identity, "sans", ()) or ()),
658
+ fingerprint=getattr(identity, "fingerprint", None),
659
+ )
660
+ except AttributeError as exc:
661
+ raise AuthError(
662
+ "X509Mapper.verify requires a cert-like object with "
663
+ "attributes subject_cn / subject_ou / subject_o / "
664
+ f"subject_c / sans / fingerprint. ({exc})",
665
+ reason="bad_cert_shape",
666
+ ) from exc
667
+
668
+ @staticmethod
669
+ def _viewer_id(cert: _X509Cert) -> str:
670
+ # CN first (RFC 6125 §6.4.4 — CN is the canonical
671
+ # mTLS identity). Then SANs in priority order:
672
+ # URI > DNS > email. The first hit wins.
673
+ if cert.subject_cn:
674
+ return cert.subject_cn
675
+ for san in cert.sans:
676
+ if san.startswith(("spiffe://", "https://", "urn:")):
677
+ return san
678
+ for san in cert.sans:
679
+ if "." in san and "@" not in san and ":" not in san:
680
+ # DNS-style: contains a dot, no @, no port.
681
+ return san
682
+ for san in cert.sans:
683
+ if "@" in san:
684
+ # Email-style: use the *full* address as the viewer
685
+ # id. The local part alone is not unique —
686
+ # alice@a.com and alice@b.com are different
687
+ # principals, and collapsing them to "alice" would
688
+ # cross-wire row-level security between tenants.
689
+ return san
690
+ return ""
691
+
692
+
693
+ class _CertAdapter:
694
+ """Wrap a duck-typed cert so the rest of the mapper can
695
+ use ``_X509Cert``-typed attribute access uniformly. Internal
696
+ helper — not exported."""
697
+
698
+ __slots__ = (
699
+ "subject_cn",
700
+ "subject_ou",
701
+ "subject_o",
702
+ "subject_c",
703
+ "sans",
704
+ "fingerprint",
705
+ )
706
+
707
+ def __init__(
708
+ self,
709
+ *,
710
+ subject_cn: str,
711
+ subject_ou: tuple[str, ...],
712
+ subject_o: str | None,
713
+ subject_c: str | None,
714
+ sans: tuple[str, ...],
715
+ fingerprint: str | None,
716
+ ) -> None:
717
+ self.subject_cn = subject_cn
718
+ self.subject_ou = subject_ou
719
+ self.subject_o = subject_o
720
+ self.subject_c = subject_c
721
+ self.sans = sans
722
+ self.fingerprint = fingerprint
723
+
724
+
725
+ __all__ = [
726
+ "DictMapper",
727
+ "HMACVerifier",
728
+ "IntrospectMapper",
729
+ "JWKSVerifier",
730
+ "TokenMapper",
731
+ "TokenVerifier",
732
+ "X509Mapper",
733
+ "_payload_to_auth_context",
734
+ ]
semql_auth/py.typed ADDED
File without changes
@@ -0,0 +1,83 @@
1
+ Metadata-Version: 2.4
2
+ Name: semql-auth
3
+ Version: 0.3.0
4
+ Summary: Credential→identity adapters for semql: bearer-token verifiers (HMAC, JWKS, introspection) and mappers (dict, mTLS x509) that produce a semql AuthContext.
5
+ Author: Nikhil Pallamreddy
6
+ Author-email: Nikhil Pallamreddy <nikhil.pallamreddy+git@gmail.com>
7
+ License-Expression: BSD-3-Clause
8
+ License-File: LICENSE
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Database
16
+ Classifier: Topic :: Security
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Classifier: Typing :: Typed
19
+ Requires-Dist: semql>=0.3.0,<0.4
20
+ Requires-Dist: httpx>=0.27 ; extra == 'introspect'
21
+ Requires-Dist: pyjwt[crypto]>=2.8 ; extra == 'jwks'
22
+ Requires-Dist: httpx>=0.27 ; extra == 'jwks'
23
+ Requires-Dist: cryptography>=42 ; extra == 'x509'
24
+ Requires-Python: >=3.12
25
+ Project-URL: Homepage, https://github.com/npalladium/semql
26
+ Project-URL: Repository, https://github.com/npalladium/semql
27
+ Project-URL: Issues, https://github.com/npalladium/semql/issues
28
+ Provides-Extra: introspect
29
+ Provides-Extra: jwks
30
+ Provides-Extra: x509
31
+ Description-Content-Type: text/markdown
32
+
33
+ # semql-auth
34
+
35
+ Credential→identity adapters for [semql](https://github.com/npalladium/semql).
36
+
37
+ `semql` threads an `AuthContext` (identity + roles) through
38
+ `Catalog.compile(viewer=...)` to enforce `required_roles` cube/field
39
+ visibility and `security_sql` row-level scoping. This package turns a
40
+ transport credential into that `AuthContext`:
41
+
42
+ - **`TokenVerifier`** — verify a bearer token and return its claims.
43
+ - `HMACVerifier` — symmetric HS256/384/512.
44
+ - `JWKSVerifier` — asymmetric RS/ES, fetching keys from a JWKS URL
45
+ (needs the `jwks` extra: `pip install semql-auth[jwks]`).
46
+ - **`TokenMapper`** — map a verified credential to an `AuthContext`.
47
+ - `DictMapper` — static, in-memory `token → AuthContext` table.
48
+ - `IntrospectMapper` — OAuth2 token introspection (`introspect` extra).
49
+ - `X509Mapper` — derive identity from an mTLS client cert subject / SAN
50
+ (the reference cryptography decoder needs the `x509` extra).
51
+
52
+ `AuthContext` itself lives in `semql.model` — the compiler depends on it,
53
+ so it stays in the pure core. This package holds only the adapters, which
54
+ carry optional third-party dependencies (PyJWT, httpx, cryptography) that
55
+ the core shouldn't.
56
+
57
+ ## Install
58
+
59
+ ```sh
60
+ pip install semql-auth
61
+ pip install semql-auth[jwks] # JWKS verifier (httpx)
62
+ pip install semql-auth[introspect] # OAuth2 introspection
63
+ pip install semql-auth[x509] # mTLS client cert decoder
64
+ ```
65
+
66
+ ## Quick start
67
+
68
+ ```python
69
+ from semql import Catalog
70
+ from semql_auth import HMACVerifier, DictMapper
71
+
72
+ verifier = HMACVerifier(secret="...")
73
+ mapper = DictMapper({"tok-abc": ...})
74
+ # In your transport: verify the token, map to AuthContext, then
75
+ # catalog.compile(query, viewer=auth_context)
76
+ ```
77
+
78
+ See [API reference](../../docs/api/semql_auth.md) for the full adapter
79
+ surface.
80
+
81
+ ## License
82
+
83
+ BSD-3-Clause.
@@ -0,0 +1,7 @@
1
+ semql_auth/__init__.py,sha256=jNobqTFJKOVZntKCX_burgQbwq6ZF82-kJQO1-srHEE,941
2
+ semql_auth/auth.py,sha256=tSYxleLG0tmQfJGZ6_mSF73a8p28jbU5DZ3JjeAn_r8,29242
3
+ semql_auth/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ semql_auth-0.3.0.dist-info/licenses/LICENSE,sha256=AdcAzanKVr3cVSrhBpG6gytjG0Ss1SBTQDAavLe0CRc,1505
5
+ semql_auth-0.3.0.dist-info/WHEEL,sha256=wXwAVsgVaOZ_pwDFqQm5Rd6PID-Fc74nkLc8X8gHiDo,81
6
+ semql_auth-0.3.0.dist-info/METADATA,sha256=dUcIw8voZcaKq4uGc16Ypo-A8Cq4mlWm6MMAU52PAvM,3192
7
+ semql_auth-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.11.19
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2026, Nikhil Pallamreddy
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.