leancontext 2.0.5__tar.gz → 2.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {leancontext-2.0.5 → leancontext-2.0.6}/CHANGELOG.md +28 -1
- {leancontext-2.0.5 → leancontext-2.0.6}/PKG-INFO +20 -9
- {leancontext-2.0.5 → leancontext-2.0.6}/README.md +19 -8
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/__init__.py +12 -1
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/cost.py +10 -6
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/fidelity.py +12 -2
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/integrations/_common.py +20 -9
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/integrations/litellm.py +4 -3
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/messages.py +27 -8
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/reducers/json_data.py +10 -8
- {leancontext-2.0.5 → leancontext-2.0.6}/pyproject.toml +1 -1
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_core.py +10 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_gateway.py +14 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_messages.py +22 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.editorconfig +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.github/CODEOWNERS +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.github/dependabot.yml +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.github/workflows/ci.yml +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.github/workflows/codeql.yml +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.github/workflows/publish.yml +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.gitignore +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/.pre-commit-config.yaml +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/AGENTS.md +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/CITATION.cff +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/CODE_OF_CONDUCT.md +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/CONTRIBUTING.md +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/LICENSE +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/RELEASING.md +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/SECURITY.md +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/SUPPORT.md +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/assets/logo.png +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/assets/logo.svg +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/bench.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/demo.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/docs/ARCHITECTURE.md +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/examples/basic_usage.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/examples/validate_caching.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/cli.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/core.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/integrations/__init__.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/integrations/anthropic_native.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/integrations/clients.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/integrations/decorator.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/integrations/frameworks.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/integrations/mcp_server.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/integrations/otel.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/integrations/proxy.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/paging.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/py.typed +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/reducers/__init__.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/reducers/base.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/reducers/diff.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/reducers/html.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/reducers/logs.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/reducers/stacktrace.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/reducers/table.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/leancontext/tokens.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_cache.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_concurrency.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_differentiators.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_fidelity.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_frameworks.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_gemini.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_limits.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_mcp.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_otel.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_paging.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_proxy.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_reducers.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_table.py +0 -0
- {leancontext-2.0.5 → leancontext-2.0.6}/tests/test_tokens.py +0 -0
|
@@ -5,6 +5,26 @@ All notable changes to this project are documented here. The format is based on
|
|
|
5
5
|
|
|
6
6
|
## [Unreleased]
|
|
7
7
|
|
|
8
|
+
## [2.0.6] - 2026-06-21
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- JSON reducer is now lossless on every value: rows are emitted as JSON arrays with
|
|
12
|
+
the field names factored into the header once, so values containing the column
|
|
13
|
+
delimiter, quotes, or newlines no longer corrupt the columnar layout. The JSON
|
|
14
|
+
fidelity check matches values in their encoded form, so it sees such corruption.
|
|
15
|
+
- Gateway paths (LiteLLM proxy + SDK patch) now reduce OpenAI Responses requests
|
|
16
|
+
(`input=`), not just chat (`messages=`).
|
|
17
|
+
- `reduce_messages` dispatches per item, so a list mixing message formats reduces
|
|
18
|
+
every tool output instead of only those matching the first format detected.
|
|
19
|
+
- OpenAI Responses tool outputs shaped as a list of content parts are now reduced.
|
|
20
|
+
- `__version__` is read from the installed package metadata (was a stale `0.0.1`).
|
|
21
|
+
- `CostTracker` running totals are guarded by a lock for multi-threaded agents.
|
|
22
|
+
|
|
23
|
+
### Docs
|
|
24
|
+
- README install commands use the published package (`pip install leancontext`),
|
|
25
|
+
document the `mcp` extra, note which tokenizer the benchmark uses, and state
|
|
26
|
+
which integrations are CI-verified vs best-effort.
|
|
27
|
+
|
|
8
28
|
## [2.0.5] - 2026-06-21
|
|
9
29
|
|
|
10
30
|
### Security
|
|
@@ -30,6 +50,11 @@ All notable changes to this project are documented here. The format is based on
|
|
|
30
50
|
- Lower the minimum Python from 3.14 to 3.10 so the package installs on current
|
|
31
51
|
interpreters (the code already supports 3.10+; CI runs 3.10 through 3.14).
|
|
32
52
|
|
|
53
|
+
## [2.0.1] - 2026-06-21
|
|
54
|
+
|
|
55
|
+
Intermediate release during the initial PyPI rollout (Python version metadata),
|
|
56
|
+
superseded by 2.0.2. Version 2.0.3 was never published.
|
|
57
|
+
|
|
33
58
|
## [2.0.0] - 2026-06-21
|
|
34
59
|
|
|
35
60
|
### Added
|
|
@@ -53,8 +78,10 @@ All notable changes to this project are documented here. The format is based on
|
|
|
53
78
|
- Targets Python 3.14; ruff, mypy, and coverage run in CI; examples, contributor, and
|
|
54
79
|
security docs included.
|
|
55
80
|
|
|
56
|
-
[Unreleased]: https://github.com/pankajniet/LeanContext/compare/v2.0.
|
|
81
|
+
[Unreleased]: https://github.com/pankajniet/LeanContext/compare/v2.0.6...HEAD
|
|
82
|
+
[2.0.6]: https://github.com/pankajniet/LeanContext/releases/tag/v2.0.6
|
|
57
83
|
[2.0.5]: https://github.com/pankajniet/LeanContext/releases/tag/v2.0.5
|
|
58
84
|
[2.0.4]: https://github.com/pankajniet/LeanContext/releases/tag/v2.0.4
|
|
59
85
|
[2.0.2]: https://github.com/pankajniet/LeanContext/releases/tag/v2.0.2
|
|
86
|
+
[2.0.1]: https://github.com/pankajniet/LeanContext/releases/tag/v2.0.1
|
|
60
87
|
[2.0.0]: https://github.com/pankajniet/LeanContext/releases/tag/v2.0.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: leancontext
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.6
|
|
4
4
|
Summary: Deterministic, type-aware reduction of agent tool outputs at the source. Cut LLM token cost without making the agent do less.
|
|
5
5
|
Project-URL: Homepage, https://github.com/pankajniet/LeanContext
|
|
6
6
|
Project-URL: Repository, https://github.com/pankajniet/LeanContext
|
|
@@ -84,14 +84,18 @@ $ python bench.py
|
|
|
84
84
|
sample kind before after saved fidelity
|
|
85
85
|
-----------------------------------------------------------------
|
|
86
86
|
log (incident) log 52642 100 100% 100%
|
|
87
|
-
json (RAG chunks) json 1862
|
|
87
|
+
json (RAG chunks) json 1862 1391 25% 100%
|
|
88
88
|
html (web fetch) html 1672 1093 35% 100%
|
|
89
89
|
diff (patch) diff 639 81 87% 100%
|
|
90
90
|
stacktrace stacktrace 896 94 90% 100%
|
|
91
91
|
-----------------------------------------------------------------
|
|
92
|
-
TOTAL 57711
|
|
92
|
+
TOTAL 57711 2759 95%
|
|
93
93
|
```
|
|
94
94
|
|
|
95
|
+
Counts above use the built-in heuristic tokenizer (≈4 chars/token). Install the
|
|
96
|
+
`tiktoken` extra for exact model token counts — the ratios are similar (~92% on
|
|
97
|
+
this sample). The reduced text is identical either way.
|
|
98
|
+
|
|
95
99
|
A real incident log, before and after:
|
|
96
100
|
|
|
97
101
|
```text
|
|
@@ -128,10 +132,11 @@ errors, anomalies, and identifiers, and collapses the rest.
|
|
|
128
132
|
## Install
|
|
129
133
|
|
|
130
134
|
```bash
|
|
131
|
-
pip install
|
|
132
|
-
pip install
|
|
133
|
-
pip install
|
|
134
|
-
pip install
|
|
135
|
+
pip install leancontext # core, standard library only
|
|
136
|
+
pip install "leancontext[integrations]" # openai, anthropic, litellm, fastapi adapters
|
|
137
|
+
pip install "leancontext[otel]" # OpenTelemetry metrics
|
|
138
|
+
pip install "leancontext[mcp]" # MCP server
|
|
139
|
+
pip install "leancontext[tiktoken]" # exact token counts (used automatically when present)
|
|
135
140
|
```
|
|
136
141
|
|
|
137
142
|
## Use it
|
|
@@ -176,6 +181,11 @@ r.fidelity # 0..1 signal preserved
|
|
|
176
181
|
| Frameworks | LangChain, LangGraph, Agno via `wrap(tools)`; any framework via `@reduce` on tool functions (sync or async) |
|
|
177
182
|
| MCP server | `python -m leancontext.integrations.mcp_server` — reduce / expand / stats over stdio |
|
|
178
183
|
|
|
184
|
+
CI exercises OpenAI (chat + Responses), Anthropic, LiteLLM, the standalone proxy, OpenTelemetry,
|
|
185
|
+
and the MCP server against the real packages. Message reduction for all formats (including Gemini)
|
|
186
|
+
is unit-tested directly. The framework adapters (LangChain / LangGraph / Agno) and the SDK-level
|
|
187
|
+
Gemini client wrapper are provided best-effort and are not yet covered in CI against the live SDKs.
|
|
188
|
+
|
|
179
189
|
## Reducers
|
|
180
190
|
|
|
181
191
|
| Kind | What it does |
|
|
@@ -185,6 +195,7 @@ r.fidelity # 0..1 signal preserved
|
|
|
185
195
|
| `diff` | Keep all change, hunk, and header lines, collapse unchanged context |
|
|
186
196
|
| `stacktrace` | Keep the exception and boundary frames, collapse the deep middle |
|
|
187
197
|
| `html` | Strip tags, scripts, and styles, keep visible text and links |
|
|
198
|
+
| `table` | Collapse whitespace-aligned command-line tables, keep header and data |
|
|
188
199
|
|
|
189
200
|
Anything else, or any payload below the size, saving, or fidelity thresholds, passes through unchanged.
|
|
190
201
|
|
|
@@ -216,8 +227,8 @@ leancontext.use_tiktoken("gpt-4o") # force a specific model's tokeniz
|
|
|
216
227
|
|
|
217
228
|
## Roadmap
|
|
218
229
|
|
|
219
|
-
|
|
220
|
-
|
|
230
|
+
CI-verified LangChain / LlamaIndex / CrewAI / Agno adapters, accurate provider tokenizers by
|
|
231
|
+
default, and broader Anthropic native interop.
|
|
221
232
|
|
|
222
233
|
## Contributing
|
|
223
234
|
|
|
@@ -38,14 +38,18 @@ $ python bench.py
|
|
|
38
38
|
sample kind before after saved fidelity
|
|
39
39
|
-----------------------------------------------------------------
|
|
40
40
|
log (incident) log 52642 100 100% 100%
|
|
41
|
-
json (RAG chunks) json 1862
|
|
41
|
+
json (RAG chunks) json 1862 1391 25% 100%
|
|
42
42
|
html (web fetch) html 1672 1093 35% 100%
|
|
43
43
|
diff (patch) diff 639 81 87% 100%
|
|
44
44
|
stacktrace stacktrace 896 94 90% 100%
|
|
45
45
|
-----------------------------------------------------------------
|
|
46
|
-
TOTAL 57711
|
|
46
|
+
TOTAL 57711 2759 95%
|
|
47
47
|
```
|
|
48
48
|
|
|
49
|
+
Counts above use the built-in heuristic tokenizer (≈4 chars/token). Install the
|
|
50
|
+
`tiktoken` extra for exact model token counts — the ratios are similar (~92% on
|
|
51
|
+
this sample). The reduced text is identical either way.
|
|
52
|
+
|
|
49
53
|
A real incident log, before and after:
|
|
50
54
|
|
|
51
55
|
```text
|
|
@@ -82,10 +86,11 @@ errors, anomalies, and identifiers, and collapses the rest.
|
|
|
82
86
|
## Install
|
|
83
87
|
|
|
84
88
|
```bash
|
|
85
|
-
pip install
|
|
86
|
-
pip install
|
|
87
|
-
pip install
|
|
88
|
-
pip install
|
|
89
|
+
pip install leancontext # core, standard library only
|
|
90
|
+
pip install "leancontext[integrations]" # openai, anthropic, litellm, fastapi adapters
|
|
91
|
+
pip install "leancontext[otel]" # OpenTelemetry metrics
|
|
92
|
+
pip install "leancontext[mcp]" # MCP server
|
|
93
|
+
pip install "leancontext[tiktoken]" # exact token counts (used automatically when present)
|
|
89
94
|
```
|
|
90
95
|
|
|
91
96
|
## Use it
|
|
@@ -130,6 +135,11 @@ r.fidelity # 0..1 signal preserved
|
|
|
130
135
|
| Frameworks | LangChain, LangGraph, Agno via `wrap(tools)`; any framework via `@reduce` on tool functions (sync or async) |
|
|
131
136
|
| MCP server | `python -m leancontext.integrations.mcp_server` — reduce / expand / stats over stdio |
|
|
132
137
|
|
|
138
|
+
CI exercises OpenAI (chat + Responses), Anthropic, LiteLLM, the standalone proxy, OpenTelemetry,
|
|
139
|
+
and the MCP server against the real packages. Message reduction for all formats (including Gemini)
|
|
140
|
+
is unit-tested directly. The framework adapters (LangChain / LangGraph / Agno) and the SDK-level
|
|
141
|
+
Gemini client wrapper are provided best-effort and are not yet covered in CI against the live SDKs.
|
|
142
|
+
|
|
133
143
|
## Reducers
|
|
134
144
|
|
|
135
145
|
| Kind | What it does |
|
|
@@ -139,6 +149,7 @@ r.fidelity # 0..1 signal preserved
|
|
|
139
149
|
| `diff` | Keep all change, hunk, and header lines, collapse unchanged context |
|
|
140
150
|
| `stacktrace` | Keep the exception and boundary frames, collapse the deep middle |
|
|
141
151
|
| `html` | Strip tags, scripts, and styles, keep visible text and links |
|
|
152
|
+
| `table` | Collapse whitespace-aligned command-line tables, keep header and data |
|
|
142
153
|
|
|
143
154
|
Anything else, or any payload below the size, saving, or fidelity thresholds, passes through unchanged.
|
|
144
155
|
|
|
@@ -170,8 +181,8 @@ leancontext.use_tiktoken("gpt-4o") # force a specific model's tokeniz
|
|
|
170
181
|
|
|
171
182
|
## Roadmap
|
|
172
183
|
|
|
173
|
-
|
|
174
|
-
|
|
184
|
+
CI-verified LangChain / LlamaIndex / CrewAI / Agno adapters, accurate provider tokenizers by
|
|
185
|
+
default, and broader Anthropic native interop.
|
|
175
186
|
|
|
176
187
|
## Contributing
|
|
177
188
|
|
|
@@ -49,7 +49,18 @@ from .integrations import (
|
|
|
49
49
|
from .messages import detect_format, reduce_messages
|
|
50
50
|
from .tokens import active_tokenizer, count_tokens, set_token_counter, use_tiktoken
|
|
51
51
|
|
|
52
|
-
|
|
52
|
+
# Single source of truth is the installed package metadata (pyproject version);
|
|
53
|
+
# the literal is only a fallback for running straight from a source tree.
|
|
54
|
+
try:
|
|
55
|
+
from importlib.metadata import PackageNotFoundError
|
|
56
|
+
from importlib.metadata import version as _pkg_version
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
__version__ = _pkg_version("leancontext")
|
|
60
|
+
except PackageNotFoundError:
|
|
61
|
+
__version__ = "2.0.6"
|
|
62
|
+
except ImportError: # pragma: no cover - importlib.metadata is stdlib on 3.10+
|
|
63
|
+
__version__ = "2.0.6"
|
|
53
64
|
|
|
54
65
|
__all__ = [
|
|
55
66
|
"reduce",
|
|
@@ -13,6 +13,7 @@ no known price, token savings are still reported and ``usd_saved`` is ``None``.
|
|
|
13
13
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
|
+
import threading
|
|
16
17
|
from collections.abc import Callable
|
|
17
18
|
|
|
18
19
|
#: USD per 1M tokens (input, output). Indicative — override via set_price().
|
|
@@ -71,14 +72,17 @@ class CostTracker:
|
|
|
71
72
|
self.usd_saved = 0.0
|
|
72
73
|
self.has_price = _input_price(model, input_price_per_mtok) is not None
|
|
73
74
|
self._hook: Callable | None = None
|
|
75
|
+
self._lock = threading.Lock()
|
|
74
76
|
|
|
75
77
|
def _on(self, r) -> None:
|
|
76
|
-
|
|
77
|
-
self.
|
|
78
|
-
self.
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
self.
|
|
78
|
+
# The hook fires from every reducing thread, so guard the running totals.
|
|
79
|
+
usd = estimate_savings(r, self.model, self.price)["usd_saved"] if self.has_price else 0.0
|
|
80
|
+
with self._lock:
|
|
81
|
+
self.reductions += 1
|
|
82
|
+
self.tokens_before += r.tokens_before
|
|
83
|
+
self.tokens_after += r.tokens_after
|
|
84
|
+
self.tokens_saved += r.tokens_saved
|
|
85
|
+
self.usd_saved += usd
|
|
82
86
|
|
|
83
87
|
def install(self) -> CostTracker:
|
|
84
88
|
from .core import on_reduction
|
|
@@ -60,12 +60,22 @@ def _iter_scalars(data: Any):
|
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
def _json_fidelity(original: str, reduced: str) -> float:
|
|
63
|
-
"""Fraction of JSON scalar values (strings and numbers) preserved in the output.
|
|
63
|
+
"""Fraction of JSON scalar values (strings and numbers) preserved in the output.
|
|
64
|
+
|
|
65
|
+
Values are matched in their JSON-encoded form (the reducer emits them that way),
|
|
66
|
+
so a value containing a delimiter, quote, or newline only counts as preserved if
|
|
67
|
+
its exact escaped bytes survive — the check sees structural corruption, not just
|
|
68
|
+
whether the characters appear somewhere.
|
|
69
|
+
"""
|
|
64
70
|
try:
|
|
65
71
|
data = json.loads(original)
|
|
66
72
|
except Exception:
|
|
67
73
|
return 1.0
|
|
68
|
-
values = [
|
|
74
|
+
values = [
|
|
75
|
+
json.dumps(v, ensure_ascii=False).strip('"')
|
|
76
|
+
for v in _iter_scalars(data)
|
|
77
|
+
]
|
|
78
|
+
values = [v for v in values if v]
|
|
69
79
|
if not values:
|
|
70
80
|
return 1.0
|
|
71
81
|
kept = sum(1 for v in values if v in reduced)
|
|
@@ -25,19 +25,30 @@ def mark(fn: Callable) -> Callable:
|
|
|
25
25
|
return fn
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
#: Request keys that can carry a message/tool-output list across providers:
|
|
29
|
+
#: ``messages`` (OpenAI chat / Anthropic), ``input`` (OpenAI Responses API).
|
|
30
|
+
_LIST_KEYS = ("messages", "input")
|
|
30
31
|
|
|
31
|
-
|
|
32
|
+
|
|
33
|
+
def reduce_messages_in(mapping: Any, fmt: str, opts: dict, key: str | None = "messages") -> None:
|
|
34
|
+
"""Fail-open, in-place reduction of the message list(s) in ``mapping`` (dict-like).
|
|
35
|
+
|
|
36
|
+
``key`` names the field to reduce (``messages`` for OpenAI/Anthropic). Pass
|
|
37
|
+
``key=None`` to reduce whichever known list keys are present — used on gateway
|
|
38
|
+
paths (LiteLLM) where a request may be chat (``messages``) or Responses (``input``).
|
|
32
39
|
"""
|
|
33
|
-
if isinstance(mapping, dict)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
40
|
+
if not isinstance(mapping, dict):
|
|
41
|
+
return
|
|
42
|
+
keys = _LIST_KEYS if key is None else (key,)
|
|
43
|
+
for k in keys:
|
|
44
|
+
if isinstance(mapping.get(k), list):
|
|
45
|
+
try:
|
|
46
|
+
mapping[k] = reduce_messages(mapping[k], fmt=fmt, **opts)
|
|
47
|
+
except Exception:
|
|
48
|
+
pass # fail open
|
|
38
49
|
|
|
39
50
|
|
|
40
|
-
def wrap_messages_create(create: Callable, *, fmt: str, opts: dict, key: str = "messages",
|
|
51
|
+
def wrap_messages_create(create: Callable, *, fmt: str, opts: dict, key: str | None = "messages",
|
|
41
52
|
reduce: bool = True,
|
|
42
53
|
before: Callable[[dict], None] | None = None) -> Callable:
|
|
43
54
|
"""Wrap a ``create(**kwargs)`` callable to reduce its messages before calling through.
|
|
@@ -35,7 +35,8 @@ def make_handler(**opts):
|
|
|
35
35
|
class LeanContextHandler(CustomLogger):
|
|
36
36
|
async def async_pre_call_hook(self, user_api_key_dict, cache, data, call_type):
|
|
37
37
|
if call_type in _REDUCIBLE_CALLS:
|
|
38
|
-
|
|
38
|
+
# key=None: reduce chat (messages) or Responses (input) payloads alike
|
|
39
|
+
reduce_messages_in(data, "auto", opts, key=None) # fail-open in-place
|
|
39
40
|
return data
|
|
40
41
|
|
|
41
42
|
return LeanContextHandler()
|
|
@@ -48,14 +49,14 @@ def patch(**opts) -> None:
|
|
|
48
49
|
if getattr(litellm, "_leancontext_patched", False):
|
|
49
50
|
return
|
|
50
51
|
|
|
51
|
-
litellm.completion = wrap_messages_create(litellm.completion, fmt="auto", opts=opts)
|
|
52
|
+
litellm.completion = wrap_messages_create(litellm.completion, fmt="auto", opts=opts, key=None)
|
|
52
53
|
|
|
53
54
|
if hasattr(litellm, "acompletion"):
|
|
54
55
|
_orig_acompletion = litellm.acompletion
|
|
55
56
|
|
|
56
57
|
@functools.wraps(_orig_acompletion)
|
|
57
58
|
async def acompletion(*args, **kwargs):
|
|
58
|
-
reduce_messages_in(kwargs, "auto", opts)
|
|
59
|
+
reduce_messages_in(kwargs, "auto", opts, key=None)
|
|
59
60
|
return await _orig_acompletion(*args, **kwargs)
|
|
60
61
|
|
|
61
62
|
litellm.acompletion = mark(acompletion)
|
|
@@ -125,13 +125,18 @@ def _reduce_gemini_message(content: Any, opts: dict) -> Any:
|
|
|
125
125
|
# type "function_call_output" whose `output` is a string.
|
|
126
126
|
|
|
127
127
|
def _reduce_responses_message(item: Any, opts: dict) -> Any:
|
|
128
|
-
if (
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
128
|
+
if not isinstance(item, dict) or item.get("type") != "function_call_output":
|
|
129
|
+
return item
|
|
130
|
+
output = item.get("output")
|
|
131
|
+
if isinstance(output, str):
|
|
132
|
+
new_item = dict(item)
|
|
133
|
+
new_item["output"] = _reduce_str(output, opts)
|
|
134
|
+
return new_item
|
|
135
|
+
# The Responses API also allows a list of content parts (e.g. output_text);
|
|
136
|
+
# reduce those the same way as chat parts. Anything else passes through.
|
|
137
|
+
if isinstance(output, list):
|
|
133
138
|
new_item = dict(item)
|
|
134
|
-
new_item["output"] =
|
|
139
|
+
new_item["output"] = [_reduce_openai_part(p, opts) for p in output]
|
|
135
140
|
return new_item
|
|
136
141
|
return item
|
|
137
142
|
|
|
@@ -180,6 +185,15 @@ _REDUCE_BY_NAME = {f.name: f.reduce for f in _FORMATS}
|
|
|
180
185
|
|
|
181
186
|
# --- public ------------------------------------------------------------------
|
|
182
187
|
|
|
188
|
+
def _format_for(m: Any) -> str:
|
|
189
|
+
"""The format a single message belongs to (priority order); defaults to ``openai``."""
|
|
190
|
+
if isinstance(m, dict):
|
|
191
|
+
for fmt in _FORMATS:
|
|
192
|
+
if fmt.detect(m):
|
|
193
|
+
return fmt.name
|
|
194
|
+
return "openai"
|
|
195
|
+
|
|
196
|
+
|
|
183
197
|
def detect_format(messages: list) -> str:
|
|
184
198
|
"""Best-effort detection of the message protocol; defaults to ``openai``."""
|
|
185
199
|
for m in messages:
|
|
@@ -197,9 +211,14 @@ def reduce_messages(messages: Any, *, fmt: str = "auto", **opts) -> Any:
|
|
|
197
211
|
Handles OpenAI (chat + Responses), Anthropic, and Gemini formats. Only tool-result
|
|
198
212
|
content is touched; instructions are never altered. Anything unrecognised passes
|
|
199
213
|
through unchanged (fail open).
|
|
214
|
+
|
|
215
|
+
With ``fmt="auto"`` each message is dispatched by its own format, so a list mixing
|
|
216
|
+
shapes (e.g. a chat tool message alongside a Responses ``function_call_output``)
|
|
217
|
+
reduces every item — not just the ones matching the first format seen.
|
|
200
218
|
"""
|
|
201
219
|
if not isinstance(messages, list):
|
|
202
220
|
return messages
|
|
203
|
-
|
|
204
|
-
|
|
221
|
+
if fmt == "auto":
|
|
222
|
+
return [_REDUCE_BY_NAME.get(_format_for(m), _reduce_openai_message)(m, opts) for m in messages]
|
|
223
|
+
reducer = _REDUCE_BY_NAME.get(fmt, _reduce_openai_message)
|
|
205
224
|
return [reducer(m, opts) for m in messages]
|
|
@@ -27,20 +27,22 @@ def _find_records(data: Any) -> list[dict] | None:
|
|
|
27
27
|
return None
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
def _fmt(value: Any) -> str:
|
|
31
|
-
if isinstance(value, str):
|
|
32
|
-
return value
|
|
33
|
-
return json.dumps(value, separators=(",", ":"), ensure_ascii=False)
|
|
34
|
-
|
|
35
|
-
|
|
36
30
|
def reduce_json(text: str) -> tuple[str, list[str]]:
|
|
37
31
|
data = json.loads(text)
|
|
38
32
|
records = _find_records(data)
|
|
39
33
|
|
|
40
34
|
if records is not None and len(records) >= 3:
|
|
41
35
|
keys = list(dict.fromkeys(k for row in records for k in row.keys()))
|
|
42
|
-
|
|
43
|
-
|
|
36
|
+
# Each row is a JSON array of values in `keys` order, with the field names
|
|
37
|
+
# factored out into the header once (a missing field becomes null, keeping
|
|
38
|
+
# every row positional). JSON-encoding every cell keeps values that contain
|
|
39
|
+
# the delimiter, quotes, or newlines unambiguous and lossless — a plain
|
|
40
|
+
# " | " join would corrupt those, and the fidelity check wouldn't catch it.
|
|
41
|
+
header = "fields: " + json.dumps(keys, separators=(",", ":"), ensure_ascii=False)
|
|
42
|
+
rows = [
|
|
43
|
+
json.dumps([row.get(k) for k in keys], separators=(",", ":"), ensure_ascii=False)
|
|
44
|
+
for row in records
|
|
45
|
+
]
|
|
44
46
|
notes = [f"columnar: {len(records)} records × {len(keys)} fields, keys factored out once"]
|
|
45
47
|
return header + "\n" + "\n".join(rows), notes
|
|
46
48
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "leancontext"
|
|
3
|
-
version = "2.0.
|
|
3
|
+
version = "2.0.6"
|
|
4
4
|
description = "Deterministic, type-aware reduction of agent tool outputs at the source. Cut LLM token cost without making the agent do less."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -41,6 +41,16 @@ def test_json_columnar_is_lossless_on_values():
|
|
|
41
41
|
for i in range(20):
|
|
42
42
|
assert f"n{i}" in r.text # every value preserved
|
|
43
43
|
|
|
44
|
+
|
|
45
|
+
def test_json_columnar_handles_delimiter_and_newline_values():
|
|
46
|
+
# Values containing the column delimiter or a newline must not corrupt rows:
|
|
47
|
+
# each row must parse back to exactly its original fields (regression test).
|
|
48
|
+
records = [{"id": i, "text": f"row {i} | part A\nrow {i} part B", "n": i} for i in range(10)]
|
|
49
|
+
r = leancontext.reduce(json.dumps(records))
|
|
50
|
+
assert r.kind == "json" # reduction applied, not reverted
|
|
51
|
+
rows = [json.loads(line) for line in r.text.splitlines()[1:]] # skip the fields header
|
|
52
|
+
assert rows == [[i, f"row {i} | part A\nrow {i} part B", i] for i in range(10)]
|
|
53
|
+
|
|
44
54
|
def test_decorator_preserves_contract():
|
|
45
55
|
@leancontext.reduce
|
|
46
56
|
def tool(_: str) -> str:
|
|
@@ -127,6 +127,20 @@ def test_proxy_reduces_before_forwarding():
|
|
|
127
127
|
assert len(sent) < len(_big_log()) and "root cause" in sent
|
|
128
128
|
|
|
129
129
|
|
|
130
|
+
# --- gateway helper: chat (messages) vs Responses (input) --------------------
|
|
131
|
+
|
|
132
|
+
def test_reduce_messages_in_handles_responses_input_key():
|
|
133
|
+
# Gateway paths use key=None so a Responses request (input=) reduces too, not
|
|
134
|
+
# just chat (messages=). No third-party dependency needed for this logic.
|
|
135
|
+
from leancontext.integrations._common import reduce_messages_in
|
|
136
|
+
|
|
137
|
+
data = {"model": "gpt-4o",
|
|
138
|
+
"input": [{"type": "function_call_output", "call_id": "c", "output": _big_log()}]}
|
|
139
|
+
reduce_messages_in(data, "auto", {}, key=None)
|
|
140
|
+
sent = data["input"][0]["output"]
|
|
141
|
+
assert len(sent) < len(_big_log()) and "root cause" in sent
|
|
142
|
+
|
|
143
|
+
|
|
130
144
|
# --- LiteLLM (real CustomLogger) ---------------------------------------------
|
|
131
145
|
|
|
132
146
|
def test_litellm_pre_call_hook_reduces():
|
|
@@ -63,6 +63,28 @@ def test_responses_format_reduced():
|
|
|
63
63
|
assert len(reduced) < len(_log()) and "root cause" in reduced
|
|
64
64
|
|
|
65
65
|
|
|
66
|
+
def test_mixed_format_list_reduces_every_item():
|
|
67
|
+
# A chat tool message AND a Responses function_call_output in one list: auto
|
|
68
|
+
# dispatch must reduce both, not just the format of the first message seen.
|
|
69
|
+
items = [
|
|
70
|
+
{"role": "tool", "tool_call_id": "c1", "content": _log()},
|
|
71
|
+
{"type": "function_call_output", "call_id": "c2", "output": _log()},
|
|
72
|
+
]
|
|
73
|
+
out = reduce_messages(items)
|
|
74
|
+
assert len(out[0]["content"]) < len(_log()) and "root cause" in out[0]["content"]
|
|
75
|
+
assert len(out[1]["output"]) < len(_log()) and "root cause" in out[1]["output"]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_responses_list_shaped_output_reduced():
|
|
79
|
+
items = [
|
|
80
|
+
{"type": "function_call_output", "call_id": "c1",
|
|
81
|
+
"output": [{"type": "output_text", "text": _log()}]},
|
|
82
|
+
]
|
|
83
|
+
out = reduce_messages(items)
|
|
84
|
+
reduced = out[0]["output"][0]["text"]
|
|
85
|
+
assert len(reduced) < len(_log()) and "root cause" in reduced
|
|
86
|
+
|
|
87
|
+
|
|
66
88
|
def test_non_list_passthrough():
|
|
67
89
|
assert reduce_messages("not a list") == "not a list"
|
|
68
90
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|