leancontext 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- leancontext/__init__.py +104 -0
- leancontext/cli.py +36 -0
- leancontext/core.py +214 -0
- leancontext/cost.py +104 -0
- leancontext/fidelity.py +108 -0
- leancontext/integrations/__init__.py +8 -0
- leancontext/integrations/_common.py +62 -0
- leancontext/integrations/anthropic_native.py +83 -0
- leancontext/integrations/clients.py +58 -0
- leancontext/integrations/decorator.py +103 -0
- leancontext/integrations/frameworks.py +58 -0
- leancontext/integrations/litellm.py +80 -0
- leancontext/integrations/mcp_server.py +64 -0
- leancontext/integrations/otel.py +78 -0
- leancontext/integrations/proxy.py +90 -0
- leancontext/messages.py +152 -0
- leancontext/paging.py +104 -0
- leancontext/py.typed +0 -0
- leancontext/reducers/__init__.py +36 -0
- leancontext/reducers/base.py +19 -0
- leancontext/reducers/diff.py +54 -0
- leancontext/reducers/html.py +64 -0
- leancontext/reducers/json_data.py +61 -0
- leancontext/reducers/logs.py +91 -0
- leancontext/reducers/stacktrace.py +59 -0
- leancontext/reducers/table.py +32 -0
- leancontext/tokens.py +79 -0
- leancontext-2.0.0.dist-info/METADATA +224 -0
- leancontext-2.0.0.dist-info/RECORD +32 -0
- leancontext-2.0.0.dist-info/WHEEL +4 -0
- leancontext-2.0.0.dist-info/entry_points.txt +2 -0
- leancontext-2.0.0.dist-info/licenses/LICENSE +190 -0
leancontext/__init__.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""LeanContext — keep agent context lean (drop redundant boilerplate, keep the signal).
|
|
2
|
+
|
|
3
|
+
Deterministic, type-aware reduction of agent tool outputs *at the source*.
|
|
4
|
+
Cut LLM token cost without making the agent do less.
|
|
5
|
+
|
|
6
|
+
Quick start::
|
|
7
|
+
|
|
8
|
+
from leancontext import reduce
|
|
9
|
+
|
|
10
|
+
# 1) manual
|
|
11
|
+
payload = reduce(tool_output).text
|
|
12
|
+
|
|
13
|
+
# 2) decorator (one line per tool)
|
|
14
|
+
@reduce
|
|
15
|
+
def search_logs(q: str) -> str:
|
|
16
|
+
...
|
|
17
|
+
|
|
18
|
+
# 3) bulk wrap (one line for all tools, any framework)
|
|
19
|
+
tools = reduce(tools) # or leancontext.wrap(tools)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
from .core import (
|
|
27
|
+
CONFIG,
|
|
28
|
+
Reduction,
|
|
29
|
+
clear_cache,
|
|
30
|
+
clear_reduction_hooks,
|
|
31
|
+
detect_kind,
|
|
32
|
+
disable,
|
|
33
|
+
enable,
|
|
34
|
+
is_disabled,
|
|
35
|
+
on_reduction,
|
|
36
|
+
reduce_text,
|
|
37
|
+
remove_reduction_hook,
|
|
38
|
+
)
|
|
39
|
+
from .cost import CostTracker, estimate_savings, set_price
|
|
40
|
+
from .integrations import (
|
|
41
|
+
wrap,
|
|
42
|
+
wrap_agno,
|
|
43
|
+
wrap_anthropic,
|
|
44
|
+
wrap_callable,
|
|
45
|
+
wrap_gemini,
|
|
46
|
+
wrap_langchain,
|
|
47
|
+
wrap_openai,
|
|
48
|
+
)
|
|
49
|
+
from .messages import detect_format, reduce_messages
|
|
50
|
+
from .tokens import active_tokenizer, count_tokens, set_token_counter, use_tiktoken
|
|
51
|
+
|
|
52
|
+
__version__ = "0.0.1"
|
|
53
|
+
|
|
54
|
+
__all__ = [
|
|
55
|
+
"reduce",
|
|
56
|
+
"reduce_text",
|
|
57
|
+
"reduce_messages",
|
|
58
|
+
"detect_format",
|
|
59
|
+
"Reduction",
|
|
60
|
+
"wrap",
|
|
61
|
+
"wrap_callable",
|
|
62
|
+
"wrap_openai",
|
|
63
|
+
"wrap_anthropic",
|
|
64
|
+
"wrap_gemini",
|
|
65
|
+
"wrap_agno",
|
|
66
|
+
"wrap_langchain",
|
|
67
|
+
"detect_kind",
|
|
68
|
+
"disable",
|
|
69
|
+
"enable",
|
|
70
|
+
"is_disabled",
|
|
71
|
+
"on_reduction",
|
|
72
|
+
"remove_reduction_hook",
|
|
73
|
+
"clear_reduction_hooks",
|
|
74
|
+
"clear_cache",
|
|
75
|
+
"count_tokens",
|
|
76
|
+
"set_token_counter",
|
|
77
|
+
"use_tiktoken",
|
|
78
|
+
"active_tokenizer",
|
|
79
|
+
"estimate_savings",
|
|
80
|
+
"CostTracker",
|
|
81
|
+
"set_price",
|
|
82
|
+
"CONFIG",
|
|
83
|
+
"__version__",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def reduce(content: Any = None, /, **opts) -> Any:
|
|
88
|
+
"""Polymorphic entry point.
|
|
89
|
+
|
|
90
|
+
- ``reduce(text)`` -> :class:`Reduction`
|
|
91
|
+
- ``reduce(callable)`` -> wrapped tool (decorator form: ``@reduce``)
|
|
92
|
+
- ``reduce(list_of_tools)`` / ``reduce(tool_obj)`` -> wrapped tools
|
|
93
|
+
- ``reduce(**opts)`` -> a decorator/partial carrying those options
|
|
94
|
+
"""
|
|
95
|
+
if content is None:
|
|
96
|
+
def deferred(target: Any) -> Any:
|
|
97
|
+
return reduce(target, **opts)
|
|
98
|
+
return deferred
|
|
99
|
+
|
|
100
|
+
if isinstance(content, str):
|
|
101
|
+
return reduce_text(content, **opts)
|
|
102
|
+
|
|
103
|
+
# callable, tool, list of tools, or SDK client -> best-effort wrap (fail open)
|
|
104
|
+
return wrap(content, **opts)
|
leancontext/cli.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""`leancontext reduce <file>` — see the saving on any payload from the terminal."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from .core import reduce_text
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main(argv: list[str] | None = None) -> int:
|
|
12
|
+
parser = argparse.ArgumentParser(prog="leancontext", description="Reduce a tool-output payload.")
|
|
13
|
+
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
14
|
+
|
|
15
|
+
p = sub.add_parser("reduce", help="reduce a file (or stdin) and report the saving")
|
|
16
|
+
p.add_argument("file", nargs="?", help="path to read; omit to read stdin")
|
|
17
|
+
p.add_argument("--kind", default="auto", help="force a content kind (default: auto)")
|
|
18
|
+
p.add_argument("--show", action="store_true", help="print the reduced payload")
|
|
19
|
+
|
|
20
|
+
args = parser.parse_args(argv)
|
|
21
|
+
text = sys.stdin.read() if not args.file else open(args.file, encoding="utf-8").read()
|
|
22
|
+
|
|
23
|
+
r = reduce_text(text, kind=args.kind)
|
|
24
|
+
print(f"kind : {r.kind}", file=sys.stderr)
|
|
25
|
+
print(f"tokens : {r.tokens_before} -> {r.tokens_after}", file=sys.stderr)
|
|
26
|
+
print(f"saved : {r.ratio:.0%}", file=sys.stderr)
|
|
27
|
+
print(f"fidelity : {r.fidelity:.0%}", file=sys.stderr)
|
|
28
|
+
for note in r.notes:
|
|
29
|
+
print(f"note : {note}", file=sys.stderr)
|
|
30
|
+
if args.show:
|
|
31
|
+
print(r.text)
|
|
32
|
+
return 0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
if __name__ == "__main__":
|
|
36
|
+
raise SystemExit(main())
|
leancontext/core.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Dispatch, type detection, the Reduction result, and the fail-open guard.
|
|
2
|
+
|
|
3
|
+
LeanContext never breaks the caller. If the content type is unknown, a reducer
|
|
4
|
+
raises, or the saving or fidelity falls below the configured threshold, we return
|
|
5
|
+
the original text unchanged.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
from collections import OrderedDict
|
|
13
|
+
from collections.abc import Callable
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
|
|
16
|
+
from .fidelity import fidelity_score
|
|
17
|
+
from .reducers import REGISTRY
|
|
18
|
+
from .tokens import content_ref, count_tokens
|
|
19
|
+
|
|
20
|
+
# --- configuration -----------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class _Config:
|
|
25
|
+
min_saving: float = 0.10 # require at least this fractional saving to apply
|
|
26
|
+
min_fidelity: float = 0.85 # require at least this signal preservation to apply
|
|
27
|
+
min_tokens: int = 50 # below this, not worth touching
|
|
28
|
+
max_input_chars: int = 0 # if >0, payloads larger than this pass through untouched
|
|
29
|
+
disabled: bool = False
|
|
30
|
+
cache_size: int = 2048 # max cached reductions; 0 disables the cache
|
|
31
|
+
hooks: list = field(default_factory=list)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
CONFIG = _Config()
|
|
35
|
+
|
|
36
|
+
# A tool output is re-sent on every turn, so we reduce each unique payload once and
|
|
37
|
+
# reuse the result. Keyed by content hash + options; deterministic, so this is safe.
|
|
38
|
+
_CACHE: OrderedDict[tuple, Reduction] = OrderedDict()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def clear_cache() -> None:
|
|
42
|
+
"""Drop all cached reductions."""
|
|
43
|
+
_CACHE.clear()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def disable() -> None:
|
|
47
|
+
CONFIG.disabled = True
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def enable() -> None:
|
|
51
|
+
CONFIG.disabled = False
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def is_disabled() -> bool:
|
|
55
|
+
return CONFIG.disabled or os.environ.get("LEANCONTEXT_DISABLED", "") == "1"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def on_reduction(callback: Callable[[Reduction], None]) -> Callable[[Reduction], None]:
|
|
59
|
+
"""Register a telemetry hook called after each *applied* reduction.
|
|
60
|
+
|
|
61
|
+
Composable: multiple hooks may be registered. Returns the callback so it can
|
|
62
|
+
later be passed to :func:`remove_reduction_hook`.
|
|
63
|
+
"""
|
|
64
|
+
CONFIG.hooks.append(callback)
|
|
65
|
+
return callback
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def remove_reduction_hook(callback: Callable[[Reduction], None]) -> None:
|
|
69
|
+
try:
|
|
70
|
+
CONFIG.hooks.remove(callback)
|
|
71
|
+
except ValueError:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def clear_reduction_hooks() -> None:
|
|
76
|
+
CONFIG.hooks.clear()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _emit(reduction: Reduction) -> None:
|
|
80
|
+
for callback in list(CONFIG.hooks):
|
|
81
|
+
try:
|
|
82
|
+
callback(reduction)
|
|
83
|
+
except Exception:
|
|
84
|
+
pass # telemetry must never break the agent
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# --- result ------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class Reduction:
|
|
92
|
+
text: str # the string to send to the model
|
|
93
|
+
kind: str # detected/used content kind ("log", "json", "passthrough", ...)
|
|
94
|
+
tokens_before: int
|
|
95
|
+
tokens_after: int
|
|
96
|
+
fidelity: float # 0..1 signal preserved
|
|
97
|
+
ref: str # content hash of the ORIGINAL (handle for paging/restore)
|
|
98
|
+
original: str
|
|
99
|
+
notes: list[str] = field(default_factory=list)
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def ratio(self) -> float:
|
|
103
|
+
if self.tokens_before == 0:
|
|
104
|
+
return 0.0
|
|
105
|
+
return 1.0 - self.tokens_after / self.tokens_before
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def tokens_saved(self) -> int:
|
|
109
|
+
return max(0, self.tokens_before - self.tokens_after)
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def applied(self) -> bool:
|
|
113
|
+
return self.kind != "passthrough"
|
|
114
|
+
|
|
115
|
+
def __str__(self) -> str: # so `str(reduction)` / f-strings give the payload
|
|
116
|
+
return self.text
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# --- detection & dispatch ----------------------------------------------------
|
|
120
|
+
|
|
121
|
+
REDUCERS: dict[str, Callable[[str], tuple[str, list[str]]]] = {r.kind: r.reduce for r in REGISTRY}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def detect_kind(text: str) -> str:
|
|
125
|
+
"""Return the kind of ``text`` by asking each reducer's detector, in priority order."""
|
|
126
|
+
for reducer in REGISTRY:
|
|
127
|
+
if reducer.detect(text):
|
|
128
|
+
return reducer.kind
|
|
129
|
+
return "text"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _to_text(content: object) -> str:
|
|
133
|
+
if isinstance(content, str):
|
|
134
|
+
return content
|
|
135
|
+
if isinstance(content, (dict, list)):
|
|
136
|
+
return json.dumps(content, ensure_ascii=False)
|
|
137
|
+
return str(content)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _passthrough(original: str, before: int, ref: str, notes: list[str]) -> Reduction:
|
|
141
|
+
return Reduction(original, "passthrough", before, before, 1.0, ref, original, notes)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def reduce_text(
|
|
145
|
+
content: object,
|
|
146
|
+
*,
|
|
147
|
+
kind: str = "auto",
|
|
148
|
+
min_saving: float | None = None,
|
|
149
|
+
min_fidelity: float | None = None,
|
|
150
|
+
) -> Reduction:
|
|
151
|
+
"""Reduce a single piece of content. Always safe: worst case is a no-op.
|
|
152
|
+
|
|
153
|
+
Deterministic results are cached by content hash, so a tool output that is
|
|
154
|
+
re-sent across turns is computed only once. Telemetry still fires on every
|
|
155
|
+
call (cache hit or miss), so per-turn savings are recorded as before.
|
|
156
|
+
"""
|
|
157
|
+
min_saving = CONFIG.min_saving if min_saving is None else min_saving
|
|
158
|
+
min_fidelity = CONFIG.min_fidelity if min_fidelity is None else min_fidelity
|
|
159
|
+
|
|
160
|
+
original = _to_text(content)
|
|
161
|
+
before = count_tokens(original)
|
|
162
|
+
ref = content_ref(original)
|
|
163
|
+
|
|
164
|
+
if is_disabled(): # global toggle; never cached so re-enabling takes effect at once
|
|
165
|
+
return _passthrough(original, before, ref, ["disabled"])
|
|
166
|
+
|
|
167
|
+
key = (ref, kind, min_saving, min_fidelity, CONFIG.min_tokens, CONFIG.max_input_chars)
|
|
168
|
+
use_cache = CONFIG.cache_size > 0
|
|
169
|
+
|
|
170
|
+
if use_cache and key in _CACHE:
|
|
171
|
+
result = _CACHE[key]
|
|
172
|
+
_CACHE.move_to_end(key)
|
|
173
|
+
else:
|
|
174
|
+
result = _compute(original, before, ref, kind, min_saving, min_fidelity)
|
|
175
|
+
if use_cache:
|
|
176
|
+
_CACHE[key] = result
|
|
177
|
+
if len(_CACHE) > CONFIG.cache_size:
|
|
178
|
+
_CACHE.popitem(last=False) # evict least-recently-used
|
|
179
|
+
|
|
180
|
+
if result.applied:
|
|
181
|
+
_emit(result)
|
|
182
|
+
return result
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _compute(original: str, before: int, ref: str, kind: str,
|
|
186
|
+
min_saving: float, min_fidelity: float) -> Reduction:
|
|
187
|
+
"""Run detection + the typed reducer. Fail-open: any problem returns the original."""
|
|
188
|
+
def passthrough(notes: list[str]) -> Reduction:
|
|
189
|
+
return _passthrough(original, before, ref, notes)
|
|
190
|
+
|
|
191
|
+
if CONFIG.max_input_chars and len(original) > CONFIG.max_input_chars:
|
|
192
|
+
return passthrough(["above max_input_chars"])
|
|
193
|
+
if before < CONFIG.min_tokens:
|
|
194
|
+
return passthrough(["below min_tokens"])
|
|
195
|
+
|
|
196
|
+
detected = detect_kind(original) if kind == "auto" else kind
|
|
197
|
+
reducer = REDUCERS.get(detected)
|
|
198
|
+
if reducer is None:
|
|
199
|
+
return passthrough([f"no reducer for kind={detected!r}"])
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
text, notes = reducer(original)
|
|
203
|
+
except Exception as exc: # fail open on any reducer bug
|
|
204
|
+
return passthrough([f"reducer error: {exc!r}"])
|
|
205
|
+
|
|
206
|
+
after = count_tokens(text)
|
|
207
|
+
saving = 0.0 if before == 0 else 1.0 - after / before
|
|
208
|
+
fid = fidelity_score(original, text, detected)
|
|
209
|
+
|
|
210
|
+
if saving < min_saving or fid < min_fidelity:
|
|
211
|
+
notes.append(f"reverted: saving={saving:.0%}, fidelity={fid:.0%} (below threshold)")
|
|
212
|
+
return passthrough(notes)
|
|
213
|
+
|
|
214
|
+
return Reduction(text, detected, before, after, fid, ref, original, notes)
|
leancontext/cost.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Cost accounting: turn token savings into dollars.
|
|
2
|
+
|
|
3
|
+
Reports two things:
|
|
4
|
+
1. Dollars saved, from the input-token price (input dominates agent cost).
|
|
5
|
+
2. ``cache_safe = True``: reductions are deterministic and content-addressed, so a
|
|
6
|
+
reduced block serialises to the same bytes every turn and the provider's
|
|
7
|
+
prompt-cache prefix stays intact.
|
|
8
|
+
|
|
9
|
+
Prices change often, so the built-in table is small and overridable: pass an
|
|
10
|
+
explicit ``input_price_per_mtok`` or register prices with :func:`set_price`. With
|
|
11
|
+
no known price, token savings are still reported and ``usd_saved`` is ``None``.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from collections.abc import Callable
|
|
17
|
+
|
|
18
|
+
#: USD per 1M tokens (input, output). Indicative — override via set_price().
|
|
19
|
+
PRICING: dict[str, tuple[float, float | None]] = {
|
|
20
|
+
"claude-sonnet-4-6": (3.0, 15.0), # verified 2026-06; others: register your own
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def set_price(model: str, input_per_mtok: float, output_per_mtok: float | None = None) -> None:
|
|
25
|
+
PRICING[model] = (input_per_mtok, output_per_mtok)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _input_price(model: str | None, override: float | None) -> float | None:
|
|
29
|
+
if override is not None:
|
|
30
|
+
return override
|
|
31
|
+
if model:
|
|
32
|
+
if model in PRICING:
|
|
33
|
+
return PRICING[model][0]
|
|
34
|
+
for key, (inp, _out) in PRICING.items():
|
|
35
|
+
if model.startswith(key):
|
|
36
|
+
return inp
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def estimate_savings(reduction, model: str | None = None,
|
|
41
|
+
input_price_per_mtok: float | None = None) -> dict:
|
|
42
|
+
"""Estimate token + USD savings for a single reduction."""
|
|
43
|
+
saved = reduction.tokens_saved
|
|
44
|
+
price = _input_price(model, input_price_per_mtok)
|
|
45
|
+
usd = None if price is None else round(saved / 1_000_000 * price, 6)
|
|
46
|
+
return {
|
|
47
|
+
"kind": reduction.kind,
|
|
48
|
+
"tokens_before": reduction.tokens_before,
|
|
49
|
+
"tokens_after": reduction.tokens_after,
|
|
50
|
+
"tokens_saved": saved,
|
|
51
|
+
"usd_saved": usd,
|
|
52
|
+
"cache_safe": True, # deterministic + content-addressed → prefix preserved
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class CostTracker:
|
|
57
|
+
"""Accumulate savings across many reductions. Install as a reduction hook.
|
|
58
|
+
|
|
59
|
+
tracker = CostTracker(model="claude-sonnet-4-6").install()
|
|
60
|
+
... run your agent ...
|
|
61
|
+
print(tracker.report())
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, model: str | None = None, input_price_per_mtok: float | None = None):
|
|
65
|
+
self.model = model
|
|
66
|
+
self.price = input_price_per_mtok
|
|
67
|
+
self.reductions = 0
|
|
68
|
+
self.tokens_before = 0
|
|
69
|
+
self.tokens_after = 0
|
|
70
|
+
self.tokens_saved = 0
|
|
71
|
+
self.usd_saved = 0.0
|
|
72
|
+
self.has_price = _input_price(model, input_price_per_mtok) is not None
|
|
73
|
+
self._hook: Callable | None = None
|
|
74
|
+
|
|
75
|
+
def _on(self, r) -> None:
|
|
76
|
+
self.reductions += 1
|
|
77
|
+
self.tokens_before += r.tokens_before
|
|
78
|
+
self.tokens_after += r.tokens_after
|
|
79
|
+
self.tokens_saved += r.tokens_saved
|
|
80
|
+
if self.has_price:
|
|
81
|
+
self.usd_saved += estimate_savings(r, self.model, self.price)["usd_saved"]
|
|
82
|
+
|
|
83
|
+
def install(self) -> CostTracker:
|
|
84
|
+
from .core import on_reduction
|
|
85
|
+
self._hook = on_reduction(self._on)
|
|
86
|
+
return self
|
|
87
|
+
|
|
88
|
+
def uninstall(self) -> None:
|
|
89
|
+
from .core import remove_reduction_hook
|
|
90
|
+
if self._hook is not None:
|
|
91
|
+
remove_reduction_hook(self._hook)
|
|
92
|
+
self._hook = None
|
|
93
|
+
|
|
94
|
+
def report(self) -> dict:
|
|
95
|
+
ratio = 0.0 if self.tokens_before == 0 else 1.0 - self.tokens_after / self.tokens_before
|
|
96
|
+
return {
|
|
97
|
+
"reductions": self.reductions,
|
|
98
|
+
"tokens_before": self.tokens_before,
|
|
99
|
+
"tokens_after": self.tokens_after,
|
|
100
|
+
"tokens_saved": self.tokens_saved,
|
|
101
|
+
"ratio": round(ratio, 4),
|
|
102
|
+
"usd_saved": round(self.usd_saved, 4) if self.has_price else None,
|
|
103
|
+
"cache_safe": True,
|
|
104
|
+
}
|
leancontext/fidelity.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Fidelity scoring: did the signal survive the reduction?
|
|
2
|
+
|
|
3
|
+
The score is per content type. For logs and text we check that error/anomaly
|
|
4
|
+
lines and the values on them are kept. For JSON, diff, and stack traces we check
|
|
5
|
+
the type-specific invariants that make those reductions safe (all values, all
|
|
6
|
+
change lines, the exception). If the score falls below the threshold, the core
|
|
7
|
+
reverts to the original.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import re
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
_SEVERITY = re.compile(r"(?i)\b(error|fatal|critical|exception|panic|traceback|warn|warning)\b")
|
|
17
|
+
_UUID = re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b")
|
|
18
|
+
_HEX = re.compile(r"0x[0-9a-fA-F]+")
|
|
19
|
+
_PATH = re.compile(r"(?:/[\w.\-]+){2,}")
|
|
20
|
+
_NUM = re.compile(r"\d+(?:\.\d+)?")
|
|
21
|
+
_QUOTE = re.compile(r'"[^"]*"')
|
|
22
|
+
|
|
23
|
+
_VALUE_PATTERNS = (_UUID, _HEX, _PATH, _NUM, _QUOTE)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _norm(line: str) -> str:
|
|
27
|
+
return " ".join(line.split())
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def salient_items(text: str) -> set[str]:
|
|
31
|
+
"""The must-not-lose items in logs/text: error lines and the values on them."""
|
|
32
|
+
items: set[str] = set()
|
|
33
|
+
for line in text.splitlines():
|
|
34
|
+
if _SEVERITY.search(line):
|
|
35
|
+
items.add(_norm(line))
|
|
36
|
+
for rx in _VALUE_PATTERNS:
|
|
37
|
+
items.update(rx.findall(line))
|
|
38
|
+
return items
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _signal_score(original: str, reduced: str) -> float:
|
|
42
|
+
"""Fraction of the original's salient items still present (logs/text/html)."""
|
|
43
|
+
items = salient_items(original)
|
|
44
|
+
if not items:
|
|
45
|
+
return 1.0
|
|
46
|
+
reduced_lines = {_norm(line) for line in reduced.splitlines()}
|
|
47
|
+
kept = sum(1 for item in items if item in reduced_lines or item in reduced)
|
|
48
|
+
return kept / len(items)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _iter_scalars(data: Any):
|
|
52
|
+
if isinstance(data, dict):
|
|
53
|
+
for value in data.values():
|
|
54
|
+
yield from _iter_scalars(value)
|
|
55
|
+
elif isinstance(data, list):
|
|
56
|
+
for item in data:
|
|
57
|
+
yield from _iter_scalars(item)
|
|
58
|
+
elif isinstance(data, (str, int, float)) and not isinstance(data, bool):
|
|
59
|
+
yield data
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _json_fidelity(original: str, reduced: str) -> float:
|
|
63
|
+
"""Fraction of JSON scalar values (strings and numbers) preserved in the output."""
|
|
64
|
+
try:
|
|
65
|
+
data = json.loads(original)
|
|
66
|
+
except Exception:
|
|
67
|
+
return 1.0
|
|
68
|
+
values = [str(v) for v in _iter_scalars(data) if str(v)]
|
|
69
|
+
if not values:
|
|
70
|
+
return 1.0
|
|
71
|
+
kept = sum(1 for v in values if v in reduced)
|
|
72
|
+
return kept / len(values)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _diff_fidelity(original: str, reduced: str) -> float:
|
|
76
|
+
"""Fraction of changed (+/-) lines preserved verbatim."""
|
|
77
|
+
changes = [
|
|
78
|
+
ln for ln in original.splitlines()
|
|
79
|
+
if ln[:1] in "+-" and not ln.startswith(("+++", "---"))
|
|
80
|
+
]
|
|
81
|
+
if not changes:
|
|
82
|
+
return 1.0
|
|
83
|
+
reduced_lines = set(reduced.splitlines())
|
|
84
|
+
kept = sum(1 for ln in changes if ln in reduced_lines)
|
|
85
|
+
return kept / len(changes)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _stacktrace_fidelity(original: str, reduced: str) -> float:
|
|
89
|
+
"""The exception line (the last non-empty line) must be preserved."""
|
|
90
|
+
lines = [ln for ln in original.splitlines() if ln.strip()]
|
|
91
|
+
if not lines:
|
|
92
|
+
return 1.0
|
|
93
|
+
return 1.0 if lines[-1] in reduced else 0.0
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
_TYPED = {
|
|
97
|
+
"json": _json_fidelity,
|
|
98
|
+
"diff": _diff_fidelity,
|
|
99
|
+
"stacktrace": _stacktrace_fidelity,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def fidelity_score(original: str, reduced: str, kind: str = "text") -> float:
|
|
104
|
+
"""Score how well the reduction preserved the signal, using a per-type check."""
|
|
105
|
+
scorer = _TYPED.get(kind)
|
|
106
|
+
if scorer is not None:
|
|
107
|
+
return scorer(original, reduced)
|
|
108
|
+
return _signal_score(original, reduced)
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from .clients import wrap_anthropic, wrap_gemini, wrap_openai
|
|
2
|
+
from .decorator import wrap, wrap_callable
|
|
3
|
+
from .frameworks import wrap_agno, wrap_langchain
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"wrap", "wrap_callable", "wrap_openai", "wrap_anthropic", "wrap_gemini",
|
|
7
|
+
"wrap_agno", "wrap_langchain",
|
|
8
|
+
]
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Shared plumbing for integration wrappers.
|
|
2
|
+
|
|
3
|
+
One place for the fail-open "wrap a ``create(**kwargs)`` callable, reduce its
|
|
4
|
+
``messages``, and mark it so we don't double-wrap" pattern, reused by the OpenAI/
|
|
5
|
+
Anthropic client wrappers, the Anthropic-native wrapper, and LiteLLM.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import functools
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from ..messages import reduce_messages
|
|
15
|
+
|
|
16
|
+
_MARKER = "__leancontext_wrapped__"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def is_wrapped(fn: Any) -> bool:
|
|
20
|
+
return getattr(fn, _MARKER, False)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def mark(fn: Callable) -> Callable:
|
|
24
|
+
setattr(fn, _MARKER, True)
|
|
25
|
+
return fn
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def reduce_messages_in(mapping: Any, fmt: str, opts: dict, key: str = "messages") -> None:
|
|
29
|
+
"""Fail-open, in-place reduction of ``mapping[key]`` (dict-like).
|
|
30
|
+
|
|
31
|
+
``key`` is ``messages`` for OpenAI/Anthropic, ``contents`` for Gemini.
|
|
32
|
+
"""
|
|
33
|
+
if isinstance(mapping, dict) and isinstance(mapping.get(key), list):
|
|
34
|
+
try:
|
|
35
|
+
mapping[key] = reduce_messages(mapping[key], fmt=fmt, **opts)
|
|
36
|
+
except Exception:
|
|
37
|
+
pass # fail open
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def wrap_messages_create(create: Callable, *, fmt: str, opts: dict, key: str = "messages",
|
|
41
|
+
reduce: bool = True,
|
|
42
|
+
before: Callable[[dict], None] | None = None) -> Callable:
|
|
43
|
+
"""Wrap a ``create(**kwargs)`` callable to reduce its messages before calling through.
|
|
44
|
+
|
|
45
|
+
``before`` runs after reduction (e.g. to inject provider params/headers).
|
|
46
|
+
Idempotent: an already-wrapped callable is returned unchanged.
|
|
47
|
+
"""
|
|
48
|
+
if is_wrapped(create):
|
|
49
|
+
return create
|
|
50
|
+
|
|
51
|
+
@functools.wraps(create)
|
|
52
|
+
def wrapper(*args, **kwargs):
|
|
53
|
+
if reduce:
|
|
54
|
+
reduce_messages_in(kwargs, fmt, opts, key=key)
|
|
55
|
+
if before is not None:
|
|
56
|
+
try:
|
|
57
|
+
before(kwargs)
|
|
58
|
+
except Exception:
|
|
59
|
+
pass # fail open
|
|
60
|
+
return create(*args, **kwargs)
|
|
61
|
+
|
|
62
|
+
return mark(wrapper)
|