leancontext 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- leancontext/__init__.py +104 -0
- leancontext/cli.py +36 -0
- leancontext/core.py +214 -0
- leancontext/cost.py +104 -0
- leancontext/fidelity.py +108 -0
- leancontext/integrations/__init__.py +8 -0
- leancontext/integrations/_common.py +62 -0
- leancontext/integrations/anthropic_native.py +83 -0
- leancontext/integrations/clients.py +58 -0
- leancontext/integrations/decorator.py +103 -0
- leancontext/integrations/frameworks.py +58 -0
- leancontext/integrations/litellm.py +80 -0
- leancontext/integrations/mcp_server.py +64 -0
- leancontext/integrations/otel.py +78 -0
- leancontext/integrations/proxy.py +90 -0
- leancontext/messages.py +152 -0
- leancontext/paging.py +104 -0
- leancontext/py.typed +0 -0
- leancontext/reducers/__init__.py +36 -0
- leancontext/reducers/base.py +19 -0
- leancontext/reducers/diff.py +54 -0
- leancontext/reducers/html.py +64 -0
- leancontext/reducers/json_data.py +61 -0
- leancontext/reducers/logs.py +91 -0
- leancontext/reducers/stacktrace.py +59 -0
- leancontext/reducers/table.py +32 -0
- leancontext/tokens.py +79 -0
- leancontext-2.0.0.dist-info/METADATA +224 -0
- leancontext-2.0.0.dist-info/RECORD +32 -0
- leancontext-2.0.0.dist-info/WHEEL +4 -0
- leancontext-2.0.0.dist-info/entry_points.txt +2 -0
- leancontext-2.0.0.dist-info/licenses/LICENSE +190 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Run LeanContext's reduction alongside Anthropic's native context editing.
|
|
2
|
+
|
|
3
|
+
LeanContext reduces tool outputs by content on the way in; Anthropic's context
|
|
4
|
+
editing clears old tool results by age as the window grows. They're complementary,
|
|
5
|
+
and this module turns both on for one client:
|
|
6
|
+
|
|
7
|
+
from leancontext.integrations.anthropic_native import wrap_anthropic_native
|
|
8
|
+
client = wrap_anthropic_native(anthropic.Anthropic(),
|
|
9
|
+
trigger_input_tokens=30000, keep_tool_uses=3)
|
|
10
|
+
# every messages.create now: (1) LeanContext-reduces tool_result blocks,
|
|
11
|
+
# (2) enables clear_tool_uses_20250919,
|
|
12
|
+
# (3) sends the context-management beta header.
|
|
13
|
+
|
|
14
|
+
Schema verified against platform.claude.com/docs (context-editing), 2026-06.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from collections.abc import Iterable
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from ._common import wrap_messages_create
|
|
23
|
+
|
|
24
|
+
#: Beta header required to enable context management on the Messages API.
|
|
25
|
+
BETA_HEADER = "context-management-2025-06-27"
|
|
26
|
+
|
|
27
|
+
#: Tool-result clearing strategy identifier (verbatim from the API).
|
|
28
|
+
CLEAR_TOOL_USES = "clear_tool_uses_20250919"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def context_management(
|
|
32
|
+
*,
|
|
33
|
+
trigger_input_tokens: int | None = None,
|
|
34
|
+
keep_tool_uses: int | None = None,
|
|
35
|
+
clear_at_least_input_tokens: int | None = None,
|
|
36
|
+
exclude_tools: Iterable[str] | None = None,
|
|
37
|
+
clear_tool_inputs: bool | None = None,
|
|
38
|
+
) -> dict:
|
|
39
|
+
"""Build the ``context_management`` request param for tool-result clearing.
|
|
40
|
+
|
|
41
|
+
Omitted fields fall back to the API defaults. With no args this returns the
|
|
42
|
+
minimal ``{"edits": [{"type": "clear_tool_uses_20250919"}]}``.
|
|
43
|
+
"""
|
|
44
|
+
edit: dict[str, Any] = {"type": CLEAR_TOOL_USES}
|
|
45
|
+
if trigger_input_tokens is not None:
|
|
46
|
+
edit["trigger"] = {"type": "input_tokens", "value": int(trigger_input_tokens)}
|
|
47
|
+
if keep_tool_uses is not None:
|
|
48
|
+
edit["keep"] = {"type": "tool_uses", "value": int(keep_tool_uses)}
|
|
49
|
+
if clear_at_least_input_tokens is not None:
|
|
50
|
+
edit["clear_at_least"] = {"type": "input_tokens", "value": int(clear_at_least_input_tokens)}
|
|
51
|
+
if exclude_tools is not None:
|
|
52
|
+
edit["exclude_tools"] = list(exclude_tools)
|
|
53
|
+
if clear_tool_inputs is not None:
|
|
54
|
+
edit["clear_tool_inputs"] = bool(clear_tool_inputs)
|
|
55
|
+
return {"edits": [edit]}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def beta_headers(extra: dict | None = None) -> dict:
|
|
59
|
+
"""Return headers enabling context management, merged with ``extra``."""
|
|
60
|
+
headers = dict(extra or {})
|
|
61
|
+
headers.setdefault("anthropic-beta", BETA_HEADER)
|
|
62
|
+
return headers
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def wrap_anthropic_native(client: Any, *, reduce: bool = True, send_beta: bool = True, **cm) -> Any:
|
|
66
|
+
"""Wrap an Anthropic client so messages.create composes reduction + native clearing.
|
|
67
|
+
|
|
68
|
+
``cm`` kwargs are forwarded to :func:`context_management`. Fail-open.
|
|
69
|
+
"""
|
|
70
|
+
cm_config = context_management(**cm)
|
|
71
|
+
|
|
72
|
+
def inject(kwargs: dict) -> None:
|
|
73
|
+
kwargs.setdefault("context_management", cm_config)
|
|
74
|
+
if send_beta:
|
|
75
|
+
kwargs["extra_headers"] = beta_headers(kwargs.get("extra_headers"))
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
client.messages.create = wrap_messages_create(
|
|
79
|
+
client.messages.create, fmt="anthropic", opts={}, reduce=reduce, before=inject
|
|
80
|
+
)
|
|
81
|
+
except Exception:
|
|
82
|
+
pass # fail open
|
|
83
|
+
return client
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""SDK client wrappers for OpenAI, Anthropic, and Gemini.
|
|
2
|
+
|
|
3
|
+
Wraps the provider's call (OpenAI ``chat.completions.create``, Anthropic
|
|
4
|
+
``messages.create``, Gemini ``models.generate_content``) so tool outputs in the
|
|
5
|
+
outbound request are reduced before they're sent. Contract-preserving and
|
|
6
|
+
fail-open: anything unexpected leaves the original call untouched. Reductions are
|
|
7
|
+
deterministic, so the prompt-cache prefix stays stable.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from ._common import wrap_messages_create
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def wrap_openai(client: Any, **opts) -> Any:
|
|
18
|
+
"""Reduce tool outputs on an OpenAI client's chat.completions.create."""
|
|
19
|
+
try:
|
|
20
|
+
comp = client.chat.completions
|
|
21
|
+
comp.create = wrap_messages_create(comp.create, fmt="openai", opts=opts)
|
|
22
|
+
except Exception:
|
|
23
|
+
pass # fail open
|
|
24
|
+
return client
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def wrap_anthropic(client: Any, **opts) -> Any:
|
|
28
|
+
"""Reduce tool_result blocks on an Anthropic client's messages.create."""
|
|
29
|
+
try:
|
|
30
|
+
client.messages.create = wrap_messages_create(client.messages.create, fmt="anthropic", opts=opts)
|
|
31
|
+
except Exception:
|
|
32
|
+
pass # fail open
|
|
33
|
+
return client
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def wrap_gemini(client: Any, **opts) -> Any:
|
|
37
|
+
"""Reduce functionResponse tool outputs on a google-genai client's generate_content."""
|
|
38
|
+
try:
|
|
39
|
+
models = client.models
|
|
40
|
+
models.generate_content = wrap_messages_create(
|
|
41
|
+
models.generate_content, fmt="gemini", opts=opts, key="contents"
|
|
42
|
+
)
|
|
43
|
+
except Exception:
|
|
44
|
+
pass # fail open
|
|
45
|
+
return client
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def looks_like_openai(obj: Any) -> bool:
|
|
49
|
+
return hasattr(obj, "chat") and hasattr(obj.chat, "completions")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def looks_like_anthropic(obj: Any) -> bool:
|
|
53
|
+
return hasattr(obj, "messages") and hasattr(obj.messages, "create") \
|
|
54
|
+
and not looks_like_openai(obj)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def looks_like_gemini(obj: Any) -> bool:
|
|
58
|
+
return hasattr(obj, "models") and hasattr(obj.models, "generate_content")
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Framework-agnostic integration surfaces.
|
|
2
|
+
|
|
3
|
+
These never change a tool's contract: a tool that returns ``str`` still returns
|
|
4
|
+
``str``; anything non-string is passed through untouched. The agent cannot tell
|
|
5
|
+
LeanContext is present. See AGENTS.md §5B/§5D.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import functools
|
|
11
|
+
import inspect
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from ..core import reduce_text
|
|
16
|
+
from ._common import is_wrapped, mark
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _reduced(result: Any, opts: dict) -> Any:
|
|
20
|
+
return reduce_text(result, **opts).text if isinstance(result, str) else result
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def wrap_callable(fn: Callable, **opts) -> Callable:
|
|
24
|
+
"""Wrap a tool callable so its string return value is reduced at the source.
|
|
25
|
+
|
|
26
|
+
Works for sync and async tools; non-string returns pass through untouched.
|
|
27
|
+
"""
|
|
28
|
+
if inspect.iscoroutinefunction(fn):
|
|
29
|
+
@functools.wraps(fn)
|
|
30
|
+
async def awrapper(*args, **kwargs):
|
|
31
|
+
return _reduced(await fn(*args, **kwargs), opts)
|
|
32
|
+
|
|
33
|
+
return mark(awrapper)
|
|
34
|
+
|
|
35
|
+
@functools.wraps(fn)
|
|
36
|
+
def wrapper(*args, **kwargs):
|
|
37
|
+
return _reduced(fn(*args, **kwargs), opts)
|
|
38
|
+
|
|
39
|
+
return mark(wrapper)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def wrap(target: Any, **opts) -> Any:
|
|
43
|
+
"""Best-effort universal wrap.
|
|
44
|
+
|
|
45
|
+
Accepts a plain callable, a list/tuple of tools, an OpenAI/Anthropic SDK client,
|
|
46
|
+
or a framework tool object exposing its callable on a known attribute. Anything
|
|
47
|
+
it doesn't recognise is returned unchanged — fail open.
|
|
48
|
+
"""
|
|
49
|
+
if isinstance(target, (list, tuple)):
|
|
50
|
+
return type(target)(wrap(t, **opts) for t in target)
|
|
51
|
+
|
|
52
|
+
# Framework tool objects first. Several are callable themselves, so they must
|
|
53
|
+
# be wrapped in place (keeping their schema) before the plain-callable path.
|
|
54
|
+
try:
|
|
55
|
+
from .frameworks import (
|
|
56
|
+
looks_like_agno_tool,
|
|
57
|
+
looks_like_langchain_tool,
|
|
58
|
+
wrap_agno,
|
|
59
|
+
wrap_langchain,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if looks_like_langchain_tool(target):
|
|
63
|
+
return wrap_langchain(target, **opts)
|
|
64
|
+
if looks_like_agno_tool(target):
|
|
65
|
+
return wrap_agno(target, **opts)
|
|
66
|
+
except Exception:
|
|
67
|
+
pass # fail open
|
|
68
|
+
|
|
69
|
+
# SDK clients (OpenAI / Anthropic / Gemini): reduce messages on the call.
|
|
70
|
+
try:
|
|
71
|
+
from .clients import (
|
|
72
|
+
looks_like_anthropic,
|
|
73
|
+
looks_like_gemini,
|
|
74
|
+
looks_like_openai,
|
|
75
|
+
wrap_anthropic,
|
|
76
|
+
wrap_gemini,
|
|
77
|
+
wrap_openai,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
if looks_like_openai(target):
|
|
81
|
+
return wrap_openai(target, **opts)
|
|
82
|
+
if looks_like_anthropic(target):
|
|
83
|
+
return wrap_anthropic(target, **opts)
|
|
84
|
+
if looks_like_gemini(target):
|
|
85
|
+
return wrap_gemini(target, **opts)
|
|
86
|
+
except Exception:
|
|
87
|
+
pass # fail open
|
|
88
|
+
|
|
89
|
+
# Plain callable tools.
|
|
90
|
+
if callable(target) and not isinstance(target, type):
|
|
91
|
+
return target if is_wrapped(target) else wrap_callable(target, **opts)
|
|
92
|
+
|
|
93
|
+
# Fallback for other tool objects: wrap the user callable in place. These are
|
|
94
|
+
# the attribute names frameworks expose their tool function on (LlamaIndex .fn,
|
|
95
|
+
# Pydantic AI .function, OpenAI Agents SDK .on_invoke_tool, etc.).
|
|
96
|
+
for attr in ("func", "coroutine", "entrypoint", "fn", "function", "on_invoke_tool"):
|
|
97
|
+
inner = getattr(target, attr, None)
|
|
98
|
+
if callable(inner) and not is_wrapped(inner):
|
|
99
|
+
try:
|
|
100
|
+
setattr(target, attr, wrap_callable(inner, **opts))
|
|
101
|
+
except Exception:
|
|
102
|
+
pass # immutable attr -> leave as-is (fail open)
|
|
103
|
+
return target
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Framework adapters: reduce a framework's tool outputs.
|
|
2
|
+
|
|
3
|
+
Each framework wraps a tool differently, and several tool objects are themselves
|
|
4
|
+
callable, so we wrap the underlying user function *in place* and return the same
|
|
5
|
+
object (keeping its name, schema, and metadata).
|
|
6
|
+
|
|
7
|
+
Covered:
|
|
8
|
+
- LangChain (``StructuredTool``/``Tool`` via ``.func`` / ``.coroutine``)
|
|
9
|
+
- LangGraph (uses LangChain tools, so the LangChain adapter applies)
|
|
10
|
+
- Agno (``Function`` via ``.entrypoint``)
|
|
11
|
+
|
|
12
|
+
Plain functions don't need an adapter — ``leancontext.wrap`` / ``@reduce`` handle them.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from ._common import is_wrapped
|
|
20
|
+
from .decorator import wrap_callable
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _module_root(obj: Any) -> str:
|
|
24
|
+
return type(obj).__module__.split(".")[0]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def looks_like_langchain_tool(obj: Any) -> bool:
|
|
28
|
+
return _module_root(obj) in ("langchain", "langchain_core") and hasattr(obj, "func")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def looks_like_agno_tool(obj: Any) -> bool:
|
|
32
|
+
return _module_root(obj) == "agno" and hasattr(obj, "entrypoint")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _wrap_attr_in_place(obj: Any, attr: str, opts: dict) -> None:
|
|
36
|
+
fn = getattr(obj, attr, None)
|
|
37
|
+
if callable(fn) and not is_wrapped(fn):
|
|
38
|
+
try:
|
|
39
|
+
setattr(obj, attr, wrap_callable(fn, **opts))
|
|
40
|
+
except Exception:
|
|
41
|
+
pass # immutable/validated field -> leave as-is (fail open)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def wrap_langchain(tool: Any, **opts) -> Any:
|
|
45
|
+
"""Reduce outputs of a LangChain or LangGraph tool, in place."""
|
|
46
|
+
if isinstance(tool, (list, tuple)):
|
|
47
|
+
return type(tool)(wrap_langchain(t, **opts) for t in tool)
|
|
48
|
+
_wrap_attr_in_place(tool, "func", opts)
|
|
49
|
+
_wrap_attr_in_place(tool, "coroutine", opts)
|
|
50
|
+
return tool
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def wrap_agno(tool: Any, **opts) -> Any:
|
|
54
|
+
"""Reduce outputs of an Agno tool (Function), in place."""
|
|
55
|
+
if isinstance(tool, (list, tuple)):
|
|
56
|
+
return type(tool)(wrap_agno(t, **opts) for t in tool)
|
|
57
|
+
_wrap_attr_in_place(tool, "entrypoint", opts)
|
|
58
|
+
return tool
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""LiteLLM integration — gateway/proxy and SDK.
|
|
2
|
+
|
|
3
|
+
Verified against LiteLLM docs (docs.litellm.ai/docs/proxy/call_hooks):
|
|
4
|
+
a proxy callback subclasses ``CustomLogger`` and implements
|
|
5
|
+
``async_pre_call_hook(self, user_api_key_dict, cache, data, call_type)``,
|
|
6
|
+
mutates ``data["messages"]``, and returns ``data``.
|
|
7
|
+
|
|
8
|
+
Nothing here is imported by ``leancontext`` at package load — ``litellm`` stays an
|
|
9
|
+
optional dependency. Import this module explicitly only when you use LiteLLM.
|
|
10
|
+
|
|
11
|
+
Proxy usage (config.yaml)::
|
|
12
|
+
|
|
13
|
+
litellm_settings:
|
|
14
|
+
callbacks: leancontext.integrations.litellm.proxy_handler_instance
|
|
15
|
+
|
|
16
|
+
SDK usage::
|
|
17
|
+
|
|
18
|
+
import leancontext.integrations.litellm as ll
|
|
19
|
+
ll.patch() # reduce messages on every litellm.completion call
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import functools
|
|
25
|
+
|
|
26
|
+
from ._common import mark, reduce_messages_in, wrap_messages_create
|
|
27
|
+
|
|
28
|
+
_REDUCIBLE_CALLS = ("completion", "text_completion")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def make_handler(**opts):
|
|
32
|
+
"""Build a LiteLLM proxy callback that reduces tool outputs before each call."""
|
|
33
|
+
from litellm.integrations.custom_logger import CustomLogger # optional dependency
|
|
34
|
+
|
|
35
|
+
class LeanContextHandler(CustomLogger):
|
|
36
|
+
async def async_pre_call_hook(self, user_api_key_dict, cache, data, call_type):
|
|
37
|
+
if call_type in _REDUCIBLE_CALLS:
|
|
38
|
+
reduce_messages_in(data, "auto", opts) # fail-open in-place
|
|
39
|
+
return data
|
|
40
|
+
|
|
41
|
+
return LeanContextHandler()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def patch(**opts) -> None:
|
|
45
|
+
"""Monkeypatch ``litellm.completion``/``acompletion`` to reduce messages. Idempotent."""
|
|
46
|
+
import litellm
|
|
47
|
+
|
|
48
|
+
if getattr(litellm, "_leancontext_patched", False):
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
litellm.completion = wrap_messages_create(litellm.completion, fmt="auto", opts=opts)
|
|
52
|
+
|
|
53
|
+
if hasattr(litellm, "acompletion"):
|
|
54
|
+
_orig_acompletion = litellm.acompletion
|
|
55
|
+
|
|
56
|
+
@functools.wraps(_orig_acompletion)
|
|
57
|
+
async def acompletion(*args, **kwargs):
|
|
58
|
+
reduce_messages_in(kwargs, "auto", opts)
|
|
59
|
+
return await _orig_acompletion(*args, **kwargs)
|
|
60
|
+
|
|
61
|
+
litellm.acompletion = mark(acompletion)
|
|
62
|
+
|
|
63
|
+
litellm._leancontext_patched = True
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def unpatch() -> None:
|
|
67
|
+
import litellm
|
|
68
|
+
|
|
69
|
+
for name in ("completion", "acompletion"):
|
|
70
|
+
fn = getattr(litellm, name, None)
|
|
71
|
+
orig = getattr(fn, "__wrapped__", None)
|
|
72
|
+
if orig is not None:
|
|
73
|
+
setattr(litellm, name, orig)
|
|
74
|
+
litellm._leancontext_patched = False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
try: # convenience instance for config.yaml; only built if litellm is installed
|
|
78
|
+
proxy_handler_instance = make_handler()
|
|
79
|
+
except Exception: # pragma: no cover - litellm not installed
|
|
80
|
+
proxy_handler_instance = None
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""MCP server: expose LeanContext as tools any MCP client can call.
|
|
2
|
+
|
|
3
|
+
Three tools:
|
|
4
|
+
- ``reduce`` : shrink a tool-output payload to its signal, return the text.
|
|
5
|
+
- ``expand`` : fetch the original content behind a paging reference (lc://<id>).
|
|
6
|
+
- ``stats`` : report what a reduction would save, without changing anything.
|
|
7
|
+
|
|
8
|
+
The handlers below are plain functions (easy to test). ``mcp`` is imported lazily
|
|
9
|
+
inside ``create_server`` so this module stays import-safe without the ``mcp`` extra.
|
|
10
|
+
|
|
11
|
+
Run it::
|
|
12
|
+
|
|
13
|
+
pip install "leancontext[mcp]"
|
|
14
|
+
python -m leancontext.integrations.mcp_server # serves over stdio
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
import leancontext
|
|
22
|
+
from leancontext import paging
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def reduce(text: str, kind: str = "auto") -> str:
|
|
26
|
+
"""Reduce a tool-output payload (log, json, diff, stack trace, html, table)."""
|
|
27
|
+
return leancontext.reduce(text, kind=kind).text
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def expand(ref: str) -> str:
|
|
31
|
+
"""Return the original content for a LeanContext reference like 'lc://a1b2c3d4'."""
|
|
32
|
+
original = paging.expand(ref)
|
|
33
|
+
return original if original is not None else f"No content found for ref {ref!r}."
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def stats(text: str, kind: str = "auto") -> dict[str, Any]:
|
|
37
|
+
"""Report what reducing ``text`` would save, without changing it."""
|
|
38
|
+
r = leancontext.reduce(text, kind=kind)
|
|
39
|
+
return {
|
|
40
|
+
"kind": r.kind,
|
|
41
|
+
"tokens_before": r.tokens_before,
|
|
42
|
+
"tokens_after": r.tokens_after,
|
|
43
|
+
"ratio": round(r.ratio, 4),
|
|
44
|
+
"fidelity": round(r.fidelity, 4),
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def create_server(name: str = "leancontext"):
|
|
49
|
+
"""Build an MCP server exposing the tools above. Requires the ``mcp`` extra."""
|
|
50
|
+
from mcp.server.fastmcp import FastMCP
|
|
51
|
+
|
|
52
|
+
server = FastMCP(name)
|
|
53
|
+
server.tool()(reduce)
|
|
54
|
+
server.tool()(expand)
|
|
55
|
+
server.tool()(stats)
|
|
56
|
+
return server
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def main() -> None:
|
|
60
|
+
create_server().run()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
if __name__ == "__main__":
|
|
64
|
+
main()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""OpenTelemetry integration — emit reduction savings as standard telemetry.
|
|
2
|
+
|
|
3
|
+
Follows the OpenTelemetry GenAI semantic-conventions posture (converged industry
|
|
4
|
+
standard as of early 2026): emit **metrics** for token usage/savings, and attach a
|
|
5
|
+
**content-free span event** to the active span if one is recording. We never put
|
|
6
|
+
payload content in attributes (that is the documented anti-pattern — size/PII).
|
|
7
|
+
|
|
8
|
+
Import-safe: ``opentelemetry`` is imported lazily inside ``instrument`` only, so it
|
|
9
|
+
stays an optional dependency (``pip install leancontext[otel]``).
|
|
10
|
+
|
|
11
|
+
Usage::
|
|
12
|
+
|
|
13
|
+
import leancontext.integrations.otel as lc_otel
|
|
14
|
+
lc_otel.instrument() # uses the global MeterProvider/TracerProvider
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from ..core import on_reduction, remove_reduction_hook
|
|
22
|
+
|
|
23
|
+
_INSTALLED: dict[str, Any] = {}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def instrument(meter_provider: Any = None) -> Any:
|
|
27
|
+
"""Register a reduction hook that records OTel metrics + span events. Idempotent."""
|
|
28
|
+
if "hook" in _INSTALLED:
|
|
29
|
+
return _INSTALLED["hook"]
|
|
30
|
+
|
|
31
|
+
from opentelemetry import metrics, trace
|
|
32
|
+
|
|
33
|
+
meter = metrics.get_meter("leancontext", meter_provider=meter_provider)
|
|
34
|
+
|
|
35
|
+
m_before = meter.create_counter("leancontext.tokens.before", unit="token",
|
|
36
|
+
description="Input tokens before reduction")
|
|
37
|
+
m_after = meter.create_counter("leancontext.tokens.after", unit="token",
|
|
38
|
+
description="Input tokens after reduction")
|
|
39
|
+
m_saved = meter.create_counter("leancontext.tokens.saved", unit="token",
|
|
40
|
+
description="Input tokens saved by reduction")
|
|
41
|
+
m_count = meter.create_counter("leancontext.reductions", unit="1",
|
|
42
|
+
description="Number of applied reductions")
|
|
43
|
+
h_ratio = meter.create_histogram("leancontext.reduction.ratio", unit="1",
|
|
44
|
+
description="Fraction of tokens saved (0..1)")
|
|
45
|
+
h_fidelity = meter.create_histogram("leancontext.reduction.fidelity", unit="1",
|
|
46
|
+
description="Signal preserved (0..1)")
|
|
47
|
+
|
|
48
|
+
def _hook(r) -> None:
|
|
49
|
+
attrs = {"leancontext.kind": r.kind}
|
|
50
|
+
saved = r.tokens_saved
|
|
51
|
+
m_before.add(r.tokens_before, attrs)
|
|
52
|
+
m_after.add(r.tokens_after, attrs)
|
|
53
|
+
m_saved.add(saved, attrs)
|
|
54
|
+
m_count.add(1, attrs)
|
|
55
|
+
h_ratio.record(r.ratio, attrs)
|
|
56
|
+
h_fidelity.record(r.fidelity, attrs)
|
|
57
|
+
|
|
58
|
+
span = trace.get_current_span()
|
|
59
|
+
if span is not None and span.is_recording():
|
|
60
|
+
# Metadata only — never the payload (GenAI semconv: no content in attributes).
|
|
61
|
+
span.add_event("leancontext.reduction", {
|
|
62
|
+
"leancontext.kind": r.kind,
|
|
63
|
+
"gen_ai.usage.input_tokens.before": r.tokens_before,
|
|
64
|
+
"gen_ai.usage.input_tokens.after": r.tokens_after,
|
|
65
|
+
"leancontext.tokens.saved": saved,
|
|
66
|
+
"leancontext.reduction.ratio": r.ratio,
|
|
67
|
+
"leancontext.reduction.fidelity": r.fidelity,
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
on_reduction(_hook)
|
|
71
|
+
_INSTALLED["hook"] = _hook
|
|
72
|
+
return _hook
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def uninstrument() -> None:
|
|
76
|
+
hook = _INSTALLED.pop("hook", None)
|
|
77
|
+
if hook is not None:
|
|
78
|
+
remove_reduction_hook(hook)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Standalone OpenAI-compatible reducing proxy (FastAPI/ASGI).
|
|
2
|
+
|
|
3
|
+
Point any client's ``base_url`` at this proxy and tool outputs in ``messages`` are
|
|
4
|
+
reduced before being forwarded upstream. Any language, any framework, no code change.
|
|
5
|
+
|
|
6
|
+
It forwards the caller's auth headers, supports streaming responses, and turns
|
|
7
|
+
upstream failures into a clean 502 instead of crashing. FastAPI is imported lazily
|
|
8
|
+
inside ``create_app`` so this module stays import-safe without the proxy extra.
|
|
9
|
+
|
|
10
|
+
from leancontext.integrations.proxy import create_app
|
|
11
|
+
app = create_app() # forwards to $LEANCONTEXT_UPSTREAM
|
|
12
|
+
# uvicorn leancontext.integrations.proxy:app (after `app = create_app()`)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import inspect
|
|
18
|
+
import os
|
|
19
|
+
from collections.abc import Callable
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from ._common import reduce_messages_in
|
|
23
|
+
|
|
24
|
+
# Headers we pass through to the upstream provider (auth + provider version flags).
|
|
25
|
+
_FORWARD = ("authorization", "api-key", "x-api-key", "anthropic-version", "anthropic-beta")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _forward_headers(request: Any) -> dict:
|
|
29
|
+
"""Carry the caller's auth/version headers upstream; fall back to OPENAI_API_KEY."""
|
|
30
|
+
headers: dict[str, str] = {"content-type": "application/json"}
|
|
31
|
+
if request is not None:
|
|
32
|
+
for name in _FORWARD:
|
|
33
|
+
value = request.headers.get(name)
|
|
34
|
+
if value:
|
|
35
|
+
headers[name] = value
|
|
36
|
+
if not any(k.lower() == "authorization" for k in headers):
|
|
37
|
+
key = os.environ.get("OPENAI_API_KEY")
|
|
38
|
+
if key:
|
|
39
|
+
headers["Authorization"] = f"Bearer {key}"
|
|
40
|
+
return headers
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def create_app(forwarder: Callable[[dict, dict], Any] | None = None,
|
|
44
|
+
upstream: str | None = None):
|
|
45
|
+
"""Build the FastAPI app. Pass a custom ``forwarder(payload, headers)`` for tests."""
|
|
46
|
+
from fastapi import Body, FastAPI, Request
|
|
47
|
+
from fastapi.responses import JSONResponse, StreamingResponse
|
|
48
|
+
|
|
49
|
+
# Make the string annotation `Request` resolvable under `from __future__ import annotations`.
|
|
50
|
+
globals()["Request"] = Request
|
|
51
|
+
|
|
52
|
+
app = FastAPI(title="LeanContext proxy")
|
|
53
|
+
url = (upstream or os.environ.get("LEANCONTEXT_UPSTREAM", "https://api.openai.com")).rstrip("/")
|
|
54
|
+
url += "/v1/chat/completions"
|
|
55
|
+
|
|
56
|
+
def _httpx_forward(payload: dict, headers: dict) -> Any:
|
|
57
|
+
import httpx
|
|
58
|
+
|
|
59
|
+
if payload.get("stream"):
|
|
60
|
+
def body():
|
|
61
|
+
with httpx.stream("POST", url, json=payload, headers=headers, timeout=120) as resp:
|
|
62
|
+
yield from resp.iter_raw()
|
|
63
|
+
return StreamingResponse(body(), media_type="text/event-stream")
|
|
64
|
+
|
|
65
|
+
with httpx.Client(timeout=120) as client:
|
|
66
|
+
resp = client.post(url, json=payload, headers=headers)
|
|
67
|
+
return JSONResponse(resp.json(), status_code=resp.status_code)
|
|
68
|
+
|
|
69
|
+
forward = forwarder or _httpx_forward
|
|
70
|
+
|
|
71
|
+
@app.get("/healthz")
|
|
72
|
+
async def healthz():
|
|
73
|
+
return {"ok": True}
|
|
74
|
+
|
|
75
|
+
@app.post("/v1/chat/completions")
|
|
76
|
+
async def chat_completions(request: Request, payload: dict = Body(...)):
|
|
77
|
+
reduce_messages_in(payload, "openai", {}) # fail-open, in-place
|
|
78
|
+
try:
|
|
79
|
+
result = forward(payload, _forward_headers(request))
|
|
80
|
+
if inspect.isawaitable(result):
|
|
81
|
+
result = await result
|
|
82
|
+
except Exception as exc:
|
|
83
|
+
return JSONResponse(
|
|
84
|
+
{"error": {"message": str(exc), "type": "upstream_error"}}, status_code=502
|
|
85
|
+
)
|
|
86
|
+
if isinstance(result, (JSONResponse, StreamingResponse)):
|
|
87
|
+
return result
|
|
88
|
+
return JSONResponse(result)
|
|
89
|
+
|
|
90
|
+
return app
|