leancontext 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ """Run LeanContext's reduction alongside Anthropic's native context editing.
2
+
3
+ LeanContext reduces tool outputs by content on the way in; Anthropic's context
4
+ editing clears old tool results by age as the window grows. They're complementary,
5
+ and this module turns both on for one client:
6
+
7
+ from leancontext.integrations.anthropic_native import wrap_anthropic_native
8
+ client = wrap_anthropic_native(anthropic.Anthropic(),
9
+ trigger_input_tokens=30000, keep_tool_uses=3)
10
+ # every messages.create now: (1) LeanContext-reduces tool_result blocks,
11
+ # (2) enables clear_tool_uses_20250919,
12
+ # (3) sends the context-management beta header.
13
+
14
+ Schema verified against platform.claude.com/docs (context-editing), 2026-06.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from collections.abc import Iterable
20
+ from typing import Any
21
+
22
+ from ._common import wrap_messages_create
23
+
24
+ #: Beta header required to enable context management on the Messages API.
25
+ BETA_HEADER = "context-management-2025-06-27"
26
+
27
+ #: Tool-result clearing strategy identifier (verbatim from the API).
28
+ CLEAR_TOOL_USES = "clear_tool_uses_20250919"
29
+
30
+
31
+ def context_management(
32
+ *,
33
+ trigger_input_tokens: int | None = None,
34
+ keep_tool_uses: int | None = None,
35
+ clear_at_least_input_tokens: int | None = None,
36
+ exclude_tools: Iterable[str] | None = None,
37
+ clear_tool_inputs: bool | None = None,
38
+ ) -> dict:
39
+ """Build the ``context_management`` request param for tool-result clearing.
40
+
41
+ Omitted fields fall back to the API defaults. With no args this returns the
42
+ minimal ``{"edits": [{"type": "clear_tool_uses_20250919"}]}``.
43
+ """
44
+ edit: dict[str, Any] = {"type": CLEAR_TOOL_USES}
45
+ if trigger_input_tokens is not None:
46
+ edit["trigger"] = {"type": "input_tokens", "value": int(trigger_input_tokens)}
47
+ if keep_tool_uses is not None:
48
+ edit["keep"] = {"type": "tool_uses", "value": int(keep_tool_uses)}
49
+ if clear_at_least_input_tokens is not None:
50
+ edit["clear_at_least"] = {"type": "input_tokens", "value": int(clear_at_least_input_tokens)}
51
+ if exclude_tools is not None:
52
+ edit["exclude_tools"] = list(exclude_tools)
53
+ if clear_tool_inputs is not None:
54
+ edit["clear_tool_inputs"] = bool(clear_tool_inputs)
55
+ return {"edits": [edit]}
56
+
57
+
58
+ def beta_headers(extra: dict | None = None) -> dict:
59
+ """Return headers enabling context management, merged with ``extra``."""
60
+ headers = dict(extra or {})
61
+ headers.setdefault("anthropic-beta", BETA_HEADER)
62
+ return headers
63
+
64
+
65
+ def wrap_anthropic_native(client: Any, *, reduce: bool = True, send_beta: bool = True, **cm) -> Any:
66
+ """Wrap an Anthropic client so messages.create composes reduction + native clearing.
67
+
68
+ ``cm`` kwargs are forwarded to :func:`context_management`. Fail-open.
69
+ """
70
+ cm_config = context_management(**cm)
71
+
72
+ def inject(kwargs: dict) -> None:
73
+ kwargs.setdefault("context_management", cm_config)
74
+ if send_beta:
75
+ kwargs["extra_headers"] = beta_headers(kwargs.get("extra_headers"))
76
+
77
+ try:
78
+ client.messages.create = wrap_messages_create(
79
+ client.messages.create, fmt="anthropic", opts={}, reduce=reduce, before=inject
80
+ )
81
+ except Exception:
82
+ pass # fail open
83
+ return client
@@ -0,0 +1,58 @@
1
+ """SDK client wrappers for OpenAI, Anthropic, and Gemini.
2
+
3
+ Wraps the provider's call (OpenAI ``chat.completions.create``, Anthropic
4
+ ``messages.create``, Gemini ``models.generate_content``) so tool outputs in the
5
+ outbound request are reduced before they're sent. Contract-preserving and
6
+ fail-open: anything unexpected leaves the original call untouched. Reductions are
7
+ deterministic, so the prompt-cache prefix stays stable.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Any
13
+
14
+ from ._common import wrap_messages_create
15
+
16
+
17
+ def wrap_openai(client: Any, **opts) -> Any:
18
+ """Reduce tool outputs on an OpenAI client's chat.completions.create."""
19
+ try:
20
+ comp = client.chat.completions
21
+ comp.create = wrap_messages_create(comp.create, fmt="openai", opts=opts)
22
+ except Exception:
23
+ pass # fail open
24
+ return client
25
+
26
+
27
+ def wrap_anthropic(client: Any, **opts) -> Any:
28
+ """Reduce tool_result blocks on an Anthropic client's messages.create."""
29
+ try:
30
+ client.messages.create = wrap_messages_create(client.messages.create, fmt="anthropic", opts=opts)
31
+ except Exception:
32
+ pass # fail open
33
+ return client
34
+
35
+
36
+ def wrap_gemini(client: Any, **opts) -> Any:
37
+ """Reduce functionResponse tool outputs on a google-genai client's generate_content."""
38
+ try:
39
+ models = client.models
40
+ models.generate_content = wrap_messages_create(
41
+ models.generate_content, fmt="gemini", opts=opts, key="contents"
42
+ )
43
+ except Exception:
44
+ pass # fail open
45
+ return client
46
+
47
+
48
+ def looks_like_openai(obj: Any) -> bool:
49
+ return hasattr(obj, "chat") and hasattr(obj.chat, "completions")
50
+
51
+
52
+ def looks_like_anthropic(obj: Any) -> bool:
53
+ return hasattr(obj, "messages") and hasattr(obj.messages, "create") \
54
+ and not looks_like_openai(obj)
55
+
56
+
57
+ def looks_like_gemini(obj: Any) -> bool:
58
+ return hasattr(obj, "models") and hasattr(obj.models, "generate_content")
@@ -0,0 +1,103 @@
1
+ """Framework-agnostic integration surfaces.
2
+
3
+ These never change a tool's contract: a tool that returns ``str`` still returns
4
+ ``str``; anything non-string is passed through untouched. The agent cannot tell
5
+ LeanContext is present. See AGENTS.md §5B/§5D.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import functools
11
+ import inspect
12
+ from collections.abc import Callable
13
+ from typing import Any
14
+
15
+ from ..core import reduce_text
16
+ from ._common import is_wrapped, mark
17
+
18
+
19
+ def _reduced(result: Any, opts: dict) -> Any:
20
+ return reduce_text(result, **opts).text if isinstance(result, str) else result
21
+
22
+
23
+ def wrap_callable(fn: Callable, **opts) -> Callable:
24
+ """Wrap a tool callable so its string return value is reduced at the source.
25
+
26
+ Works for sync and async tools; non-string returns pass through untouched.
27
+ """
28
+ if inspect.iscoroutinefunction(fn):
29
+ @functools.wraps(fn)
30
+ async def awrapper(*args, **kwargs):
31
+ return _reduced(await fn(*args, **kwargs), opts)
32
+
33
+ return mark(awrapper)
34
+
35
+ @functools.wraps(fn)
36
+ def wrapper(*args, **kwargs):
37
+ return _reduced(fn(*args, **kwargs), opts)
38
+
39
+ return mark(wrapper)
40
+
41
+
42
+ def wrap(target: Any, **opts) -> Any:
43
+ """Best-effort universal wrap.
44
+
45
+ Accepts a plain callable, a list/tuple of tools, an OpenAI/Anthropic SDK client,
46
+ or a framework tool object exposing its callable on a known attribute. Anything
47
+ it doesn't recognise is returned unchanged — fail open.
48
+ """
49
+ if isinstance(target, (list, tuple)):
50
+ return type(target)(wrap(t, **opts) for t in target)
51
+
52
+ # Framework tool objects first. Several are callable themselves, so they must
53
+ # be wrapped in place (keeping their schema) before the plain-callable path.
54
+ try:
55
+ from .frameworks import (
56
+ looks_like_agno_tool,
57
+ looks_like_langchain_tool,
58
+ wrap_agno,
59
+ wrap_langchain,
60
+ )
61
+
62
+ if looks_like_langchain_tool(target):
63
+ return wrap_langchain(target, **opts)
64
+ if looks_like_agno_tool(target):
65
+ return wrap_agno(target, **opts)
66
+ except Exception:
67
+ pass # fail open
68
+
69
+ # SDK clients (OpenAI / Anthropic / Gemini): reduce messages on the call.
70
+ try:
71
+ from .clients import (
72
+ looks_like_anthropic,
73
+ looks_like_gemini,
74
+ looks_like_openai,
75
+ wrap_anthropic,
76
+ wrap_gemini,
77
+ wrap_openai,
78
+ )
79
+
80
+ if looks_like_openai(target):
81
+ return wrap_openai(target, **opts)
82
+ if looks_like_anthropic(target):
83
+ return wrap_anthropic(target, **opts)
84
+ if looks_like_gemini(target):
85
+ return wrap_gemini(target, **opts)
86
+ except Exception:
87
+ pass # fail open
88
+
89
+ # Plain callable tools.
90
+ if callable(target) and not isinstance(target, type):
91
+ return target if is_wrapped(target) else wrap_callable(target, **opts)
92
+
93
+ # Fallback for other tool objects: wrap the user callable in place. These are
94
+ # the attribute names frameworks expose their tool function on (LlamaIndex .fn,
95
+ # Pydantic AI .function, OpenAI Agents SDK .on_invoke_tool, etc.).
96
+ for attr in ("func", "coroutine", "entrypoint", "fn", "function", "on_invoke_tool"):
97
+ inner = getattr(target, attr, None)
98
+ if callable(inner) and not is_wrapped(inner):
99
+ try:
100
+ setattr(target, attr, wrap_callable(inner, **opts))
101
+ except Exception:
102
+ pass # immutable attr -> leave as-is (fail open)
103
+ return target
@@ -0,0 +1,58 @@
1
+ """Framework adapters: reduce a framework's tool outputs.
2
+
3
+ Each framework wraps a tool differently, and several tool objects are themselves
4
+ callable, so we wrap the underlying user function *in place* and return the same
5
+ object (keeping its name, schema, and metadata).
6
+
7
+ Covered:
8
+ - LangChain (``StructuredTool``/``Tool`` via ``.func`` / ``.coroutine``)
9
+ - LangGraph (uses LangChain tools, so the LangChain adapter applies)
10
+ - Agno (``Function`` via ``.entrypoint``)
11
+
12
+ Plain functions don't need an adapter — ``leancontext.wrap`` / ``@reduce`` handle them.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from typing import Any
18
+
19
+ from ._common import is_wrapped
20
+ from .decorator import wrap_callable
21
+
22
+
23
+ def _module_root(obj: Any) -> str:
24
+ return type(obj).__module__.split(".")[0]
25
+
26
+
27
+ def looks_like_langchain_tool(obj: Any) -> bool:
28
+ return _module_root(obj) in ("langchain", "langchain_core") and hasattr(obj, "func")
29
+
30
+
31
+ def looks_like_agno_tool(obj: Any) -> bool:
32
+ return _module_root(obj) == "agno" and hasattr(obj, "entrypoint")
33
+
34
+
35
+ def _wrap_attr_in_place(obj: Any, attr: str, opts: dict) -> None:
36
+ fn = getattr(obj, attr, None)
37
+ if callable(fn) and not is_wrapped(fn):
38
+ try:
39
+ setattr(obj, attr, wrap_callable(fn, **opts))
40
+ except Exception:
41
+ pass # immutable/validated field -> leave as-is (fail open)
42
+
43
+
44
+ def wrap_langchain(tool: Any, **opts) -> Any:
45
+ """Reduce outputs of a LangChain or LangGraph tool, in place."""
46
+ if isinstance(tool, (list, tuple)):
47
+ return type(tool)(wrap_langchain(t, **opts) for t in tool)
48
+ _wrap_attr_in_place(tool, "func", opts)
49
+ _wrap_attr_in_place(tool, "coroutine", opts)
50
+ return tool
51
+
52
+
53
+ def wrap_agno(tool: Any, **opts) -> Any:
54
+ """Reduce outputs of an Agno tool (Function), in place."""
55
+ if isinstance(tool, (list, tuple)):
56
+ return type(tool)(wrap_agno(t, **opts) for t in tool)
57
+ _wrap_attr_in_place(tool, "entrypoint", opts)
58
+ return tool
@@ -0,0 +1,80 @@
1
+ """LiteLLM integration — gateway/proxy and SDK.
2
+
3
+ Verified against LiteLLM docs (docs.litellm.ai/docs/proxy/call_hooks):
4
+ a proxy callback subclasses ``CustomLogger`` and implements
5
+ ``async_pre_call_hook(self, user_api_key_dict, cache, data, call_type)``,
6
+ mutates ``data["messages"]``, and returns ``data``.
7
+
8
+ Nothing here is imported by ``leancontext`` at package load — ``litellm`` stays an
9
+ optional dependency. Import this module explicitly only when you use LiteLLM.
10
+
11
+ Proxy usage (config.yaml)::
12
+
13
+ litellm_settings:
14
+ callbacks: leancontext.integrations.litellm.proxy_handler_instance
15
+
16
+ SDK usage::
17
+
18
+ import leancontext.integrations.litellm as ll
19
+ ll.patch() # reduce messages on every litellm.completion call
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import functools
25
+
26
+ from ._common import mark, reduce_messages_in, wrap_messages_create
27
+
28
+ _REDUCIBLE_CALLS = ("completion", "text_completion")
29
+
30
+
31
+ def make_handler(**opts):
32
+ """Build a LiteLLM proxy callback that reduces tool outputs before each call."""
33
+ from litellm.integrations.custom_logger import CustomLogger # optional dependency
34
+
35
+ class LeanContextHandler(CustomLogger):
36
+ async def async_pre_call_hook(self, user_api_key_dict, cache, data, call_type):
37
+ if call_type in _REDUCIBLE_CALLS:
38
+ reduce_messages_in(data, "auto", opts) # fail-open in-place
39
+ return data
40
+
41
+ return LeanContextHandler()
42
+
43
+
44
+ def patch(**opts) -> None:
45
+ """Monkeypatch ``litellm.completion``/``acompletion`` to reduce messages. Idempotent."""
46
+ import litellm
47
+
48
+ if getattr(litellm, "_leancontext_patched", False):
49
+ return
50
+
51
+ litellm.completion = wrap_messages_create(litellm.completion, fmt="auto", opts=opts)
52
+
53
+ if hasattr(litellm, "acompletion"):
54
+ _orig_acompletion = litellm.acompletion
55
+
56
+ @functools.wraps(_orig_acompletion)
57
+ async def acompletion(*args, **kwargs):
58
+ reduce_messages_in(kwargs, "auto", opts)
59
+ return await _orig_acompletion(*args, **kwargs)
60
+
61
+ litellm.acompletion = mark(acompletion)
62
+
63
+ litellm._leancontext_patched = True
64
+
65
+
66
+ def unpatch() -> None:
67
+ import litellm
68
+
69
+ for name in ("completion", "acompletion"):
70
+ fn = getattr(litellm, name, None)
71
+ orig = getattr(fn, "__wrapped__", None)
72
+ if orig is not None:
73
+ setattr(litellm, name, orig)
74
+ litellm._leancontext_patched = False
75
+
76
+
77
+ try: # convenience instance for config.yaml; only built if litellm is installed
78
+ proxy_handler_instance = make_handler()
79
+ except Exception: # pragma: no cover - litellm not installed
80
+ proxy_handler_instance = None
@@ -0,0 +1,64 @@
1
+ """MCP server: expose LeanContext as tools any MCP client can call.
2
+
3
+ Three tools:
4
+ - ``reduce`` : shrink a tool-output payload to its signal, return the text.
5
+ - ``expand`` : fetch the original content behind a paging reference (lc://<id>).
6
+ - ``stats`` : report what a reduction would save, without changing anything.
7
+
8
+ The handlers below are plain functions (easy to test). ``mcp`` is imported lazily
9
+ inside ``create_server`` so this module stays import-safe without the ``mcp`` extra.
10
+
11
+ Run it::
12
+
13
+ pip install "leancontext[mcp]"
14
+ python -m leancontext.integrations.mcp_server # serves over stdio
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from typing import Any
20
+
21
+ import leancontext
22
+ from leancontext import paging
23
+
24
+
25
+ def reduce(text: str, kind: str = "auto") -> str:
26
+ """Reduce a tool-output payload (log, json, diff, stack trace, html, table)."""
27
+ return leancontext.reduce(text, kind=kind).text
28
+
29
+
30
+ def expand(ref: str) -> str:
31
+ """Return the original content for a LeanContext reference like 'lc://a1b2c3d4'."""
32
+ original = paging.expand(ref)
33
+ return original if original is not None else f"No content found for ref {ref!r}."
34
+
35
+
36
+ def stats(text: str, kind: str = "auto") -> dict[str, Any]:
37
+ """Report what reducing ``text`` would save, without changing it."""
38
+ r = leancontext.reduce(text, kind=kind)
39
+ return {
40
+ "kind": r.kind,
41
+ "tokens_before": r.tokens_before,
42
+ "tokens_after": r.tokens_after,
43
+ "ratio": round(r.ratio, 4),
44
+ "fidelity": round(r.fidelity, 4),
45
+ }
46
+
47
+
48
+ def create_server(name: str = "leancontext"):
49
+ """Build an MCP server exposing the tools above. Requires the ``mcp`` extra."""
50
+ from mcp.server.fastmcp import FastMCP
51
+
52
+ server = FastMCP(name)
53
+ server.tool()(reduce)
54
+ server.tool()(expand)
55
+ server.tool()(stats)
56
+ return server
57
+
58
+
59
+ def main() -> None:
60
+ create_server().run()
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main()
@@ -0,0 +1,78 @@
1
+ """OpenTelemetry integration — emit reduction savings as standard telemetry.
2
+
3
+ Follows the OpenTelemetry GenAI semantic-conventions posture (converged industry
4
+ standard as of early 2026): emit **metrics** for token usage/savings, and attach a
5
+ **content-free span event** to the active span if one is recording. We never put
6
+ payload content in attributes (that is the documented anti-pattern — size/PII).
7
+
8
+ Import-safe: ``opentelemetry`` is imported lazily inside ``instrument`` only, so it
9
+ stays an optional dependency (``pip install leancontext[otel]``).
10
+
11
+ Usage::
12
+
13
+ import leancontext.integrations.otel as lc_otel
14
+ lc_otel.instrument() # uses the global MeterProvider/TracerProvider
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from typing import Any
20
+
21
+ from ..core import on_reduction, remove_reduction_hook
22
+
23
+ _INSTALLED: dict[str, Any] = {}
24
+
25
+
26
+ def instrument(meter_provider: Any = None) -> Any:
27
+ """Register a reduction hook that records OTel metrics + span events. Idempotent."""
28
+ if "hook" in _INSTALLED:
29
+ return _INSTALLED["hook"]
30
+
31
+ from opentelemetry import metrics, trace
32
+
33
+ meter = metrics.get_meter("leancontext", meter_provider=meter_provider)
34
+
35
+ m_before = meter.create_counter("leancontext.tokens.before", unit="token",
36
+ description="Input tokens before reduction")
37
+ m_after = meter.create_counter("leancontext.tokens.after", unit="token",
38
+ description="Input tokens after reduction")
39
+ m_saved = meter.create_counter("leancontext.tokens.saved", unit="token",
40
+ description="Input tokens saved by reduction")
41
+ m_count = meter.create_counter("leancontext.reductions", unit="1",
42
+ description="Number of applied reductions")
43
+ h_ratio = meter.create_histogram("leancontext.reduction.ratio", unit="1",
44
+ description="Fraction of tokens saved (0..1)")
45
+ h_fidelity = meter.create_histogram("leancontext.reduction.fidelity", unit="1",
46
+ description="Signal preserved (0..1)")
47
+
48
+ def _hook(r) -> None:
49
+ attrs = {"leancontext.kind": r.kind}
50
+ saved = r.tokens_saved
51
+ m_before.add(r.tokens_before, attrs)
52
+ m_after.add(r.tokens_after, attrs)
53
+ m_saved.add(saved, attrs)
54
+ m_count.add(1, attrs)
55
+ h_ratio.record(r.ratio, attrs)
56
+ h_fidelity.record(r.fidelity, attrs)
57
+
58
+ span = trace.get_current_span()
59
+ if span is not None and span.is_recording():
60
+ # Metadata only — never the payload (GenAI semconv: no content in attributes).
61
+ span.add_event("leancontext.reduction", {
62
+ "leancontext.kind": r.kind,
63
+ "gen_ai.usage.input_tokens.before": r.tokens_before,
64
+ "gen_ai.usage.input_tokens.after": r.tokens_after,
65
+ "leancontext.tokens.saved": saved,
66
+ "leancontext.reduction.ratio": r.ratio,
67
+ "leancontext.reduction.fidelity": r.fidelity,
68
+ })
69
+
70
+ on_reduction(_hook)
71
+ _INSTALLED["hook"] = _hook
72
+ return _hook
73
+
74
+
75
+ def uninstrument() -> None:
76
+ hook = _INSTALLED.pop("hook", None)
77
+ if hook is not None:
78
+ remove_reduction_hook(hook)
@@ -0,0 +1,90 @@
1
+ """Standalone OpenAI-compatible reducing proxy (FastAPI/ASGI).
2
+
3
+ Point any client's ``base_url`` at this proxy and tool outputs in ``messages`` are
4
+ reduced before being forwarded upstream. Any language, any framework, no code change.
5
+
6
+ It forwards the caller's auth headers, supports streaming responses, and turns
7
+ upstream failures into a clean 502 instead of crashing. FastAPI is imported lazily
8
+ inside ``create_app`` so this module stays import-safe without the proxy extra.
9
+
10
+ from leancontext.integrations.proxy import create_app
11
+ app = create_app() # forwards to $LEANCONTEXT_UPSTREAM
12
+ # uvicorn leancontext.integrations.proxy:app (after `app = create_app()`)
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import inspect
18
+ import os
19
+ from collections.abc import Callable
20
+ from typing import Any
21
+
22
+ from ._common import reduce_messages_in
23
+
24
+ # Headers we pass through to the upstream provider (auth + provider version flags).
25
+ _FORWARD = ("authorization", "api-key", "x-api-key", "anthropic-version", "anthropic-beta")
26
+
27
+
28
+ def _forward_headers(request: Any) -> dict:
29
+ """Carry the caller's auth/version headers upstream; fall back to OPENAI_API_KEY."""
30
+ headers: dict[str, str] = {"content-type": "application/json"}
31
+ if request is not None:
32
+ for name in _FORWARD:
33
+ value = request.headers.get(name)
34
+ if value:
35
+ headers[name] = value
36
+ if not any(k.lower() == "authorization" for k in headers):
37
+ key = os.environ.get("OPENAI_API_KEY")
38
+ if key:
39
+ headers["Authorization"] = f"Bearer {key}"
40
+ return headers
41
+
42
+
43
+ def create_app(forwarder: Callable[[dict, dict], Any] | None = None,
44
+ upstream: str | None = None):
45
+ """Build the FastAPI app. Pass a custom ``forwarder(payload, headers)`` for tests."""
46
+ from fastapi import Body, FastAPI, Request
47
+ from fastapi.responses import JSONResponse, StreamingResponse
48
+
49
+ # Make the string annotation `Request` resolvable under `from __future__ import annotations`.
50
+ globals()["Request"] = Request
51
+
52
+ app = FastAPI(title="LeanContext proxy")
53
+ url = (upstream or os.environ.get("LEANCONTEXT_UPSTREAM", "https://api.openai.com")).rstrip("/")
54
+ url += "/v1/chat/completions"
55
+
56
+ def _httpx_forward(payload: dict, headers: dict) -> Any:
57
+ import httpx
58
+
59
+ if payload.get("stream"):
60
+ def body():
61
+ with httpx.stream("POST", url, json=payload, headers=headers, timeout=120) as resp:
62
+ yield from resp.iter_raw()
63
+ return StreamingResponse(body(), media_type="text/event-stream")
64
+
65
+ with httpx.Client(timeout=120) as client:
66
+ resp = client.post(url, json=payload, headers=headers)
67
+ return JSONResponse(resp.json(), status_code=resp.status_code)
68
+
69
+ forward = forwarder or _httpx_forward
70
+
71
+ @app.get("/healthz")
72
+ async def healthz():
73
+ return {"ok": True}
74
+
75
+ @app.post("/v1/chat/completions")
76
+ async def chat_completions(request: Request, payload: dict = Body(...)):
77
+ reduce_messages_in(payload, "openai", {}) # fail-open, in-place
78
+ try:
79
+ result = forward(payload, _forward_headers(request))
80
+ if inspect.isawaitable(result):
81
+ result = await result
82
+ except Exception as exc:
83
+ return JSONResponse(
84
+ {"error": {"message": str(exc), "type": "upstream_error"}}, status_code=502
85
+ )
86
+ if isinstance(result, (JSONResponse, StreamingResponse)):
87
+ return result
88
+ return JSONResponse(result)
89
+
90
+ return app