langchain-babeltele 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_babeltele-0.1.0/.gitignore +88 -0
- langchain_babeltele-0.1.0/LICENSE +21 -0
- langchain_babeltele-0.1.0/PKG-INFO +238 -0
- langchain_babeltele-0.1.0/README.md +208 -0
- langchain_babeltele-0.1.0/examples/README.md +63 -0
- langchain_babeltele-0.1.0/examples/compress_history.py +126 -0
- langchain_babeltele-0.1.0/examples/default.env +26 -0
- langchain_babeltele-0.1.0/examples/guardrail.py +88 -0
- langchain_babeltele-0.1.0/examples/hello.py +64 -0
- langchain_babeltele-0.1.0/examples/long_term_memory.py +136 -0
- langchain_babeltele-0.1.0/examples/strategies.py +91 -0
- langchain_babeltele-0.1.0/langchain_babeltele/__init__.py +39 -0
- langchain_babeltele-0.1.0/langchain_babeltele/_internal.py +49 -0
- langchain_babeltele-0.1.0/langchain_babeltele/compressor.py +72 -0
- langchain_babeltele-0.1.0/langchain_babeltele/core.py +163 -0
- langchain_babeltele-0.1.0/langchain_babeltele/memory.py +56 -0
- langchain_babeltele-0.1.0/langchain_babeltele/middleware.py +137 -0
- langchain_babeltele-0.1.0/langchain_babeltele/prompts.py +355 -0
- langchain_babeltele-0.1.0/langchain_babeltele/py.typed +0 -0
- langchain_babeltele-0.1.0/langchain_babeltele/result.py +34 -0
- langchain_babeltele-0.1.0/langchain_babeltele/verify.py +108 -0
- langchain_babeltele-0.1.0/paper.txt +2384 -0
- langchain_babeltele-0.1.0/pyproject.toml +53 -0
- langchain_babeltele-0.1.0/tests/__init__.py +0 -0
- langchain_babeltele-0.1.0/tests/conftest.py +17 -0
- langchain_babeltele-0.1.0/tests/test_compressor.py +39 -0
- langchain_babeltele-0.1.0/tests/test_core.py +61 -0
- langchain_babeltele-0.1.0/tests/test_memory.py +38 -0
- langchain_babeltele-0.1.0/tests/test_middleware.py +83 -0
- langchain_babeltele-0.1.0/tests/test_verify.py +48 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
*.manifest
|
|
31
|
+
*.spec
|
|
32
|
+
|
|
33
|
+
# Installer logs
|
|
34
|
+
pip-log.txt
|
|
35
|
+
pip-delete-this-directory.txt
|
|
36
|
+
|
|
37
|
+
# Unit test / coverage reports
|
|
38
|
+
htmlcov/
|
|
39
|
+
.tox/
|
|
40
|
+
.nox/
|
|
41
|
+
.coverage
|
|
42
|
+
.coverage.*
|
|
43
|
+
.cache
|
|
44
|
+
nosetests.xml
|
|
45
|
+
coverage.xml
|
|
46
|
+
*.cover
|
|
47
|
+
*.py,cover
|
|
48
|
+
.hypothesis/
|
|
49
|
+
.pytest_cache/
|
|
50
|
+
cover/
|
|
51
|
+
|
|
52
|
+
# Type checkers
|
|
53
|
+
.mypy_cache/
|
|
54
|
+
.dmypy.json
|
|
55
|
+
dmypy.json
|
|
56
|
+
.pyre/
|
|
57
|
+
.pytype/
|
|
58
|
+
|
|
59
|
+
# Ruff
|
|
60
|
+
.ruff_cache/
|
|
61
|
+
|
|
62
|
+
# Environments
|
|
63
|
+
.env
|
|
64
|
+
.venv
|
|
65
|
+
env/
|
|
66
|
+
venv/
|
|
67
|
+
ENV/
|
|
68
|
+
env.bak/
|
|
69
|
+
venv.bak/
|
|
70
|
+
|
|
71
|
+
# Jupyter Notebook
|
|
72
|
+
.ipynb_checkpoints
|
|
73
|
+
|
|
74
|
+
# IDEs and editors
|
|
75
|
+
.idea/
|
|
76
|
+
.vscode/
|
|
77
|
+
*.swp
|
|
78
|
+
*.swo
|
|
79
|
+
*~
|
|
80
|
+
|
|
81
|
+
# OS files
|
|
82
|
+
.DS_Store
|
|
83
|
+
Thumbs.db
|
|
84
|
+
|
|
85
|
+
# Publishing credentials (never commit PyPI tokens)
|
|
86
|
+
.pypirc
|
|
87
|
+
|
|
88
|
+
.claude/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ali Ashraf
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: langchain-babeltele
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: BabelTele: model-native, high-density text compression for LangChain pipelines and agents.
|
|
5
|
+
Project-URL: Homepage, https://github.com/AliFlux/langchain-babeltele
|
|
6
|
+
Project-URL: Repository, https://github.com/AliFlux/langchain-babeltele
|
|
7
|
+
Project-URL: Issues, https://github.com/AliFlux/langchain-babeltele/issues
|
|
8
|
+
Project-URL: Paper, https://arxiv.org/abs/2606.19857
|
|
9
|
+
Author: Ali Ashraf
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: agents,compression,context,langchain,llm,prompt
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.11
|
|
23
|
+
Requires-Dist: langchain-core>=1.0
|
|
24
|
+
Requires-Dist: langchain-text-splitters>=0.3
|
|
25
|
+
Requires-Dist: langchain>=1.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# langchain-babeltele
|
|
32
|
+
|
|
33
|
+
Model-native, high-density text compression for LangChain pipelines and agents.
|
|
34
|
+
|
|
35
|
+
BabelTele compresses verbose text into a dense form that people can't read but
|
|
36
|
+
LLMs can recover. It relaxes the readability prior, using omnilingual word choice
|
|
37
|
+
and symbolic collapse, then lets downstream models consume the dense text directly
|
|
38
|
+
with no decompression step. The paper reports ~28% of the original token length at
|
|
39
|
+
~99.5% downstream QA fidelity, with no fine-tuning and pure black-box API access.
|
|
40
|
+
|
|
41
|
+
```text
|
|
42
|
+
"Q3 revenue is projected to rise ~30% YoY; if it lands, the Berlin team
|
|
43
|
+
ships the mobile app in October."
|
|
44
|
+
→ Q3rev📈~30%YoY ∧ ?✅⇒Berlin🚀📱@Oct
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Based on *"Large Language Models Do Not Always Need Readable Language"*
|
|
48
|
+
([arXiv:2606.19857](https://arxiv.org/abs/2606.19857)).
|
|
49
|
+
|
|
50
|
+
## Install
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install langchain-babeltele
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
You also need a chat-model provider, e.g. `pip install langchain-anthropic`.
|
|
57
|
+
|
|
58
|
+
## Examples
|
|
59
|
+
|
|
60
|
+
If you learn faster from running code, start in [examples/](examples/). The
|
|
61
|
+
[hello.py](examples/hello.py) script is the smallest end-to-end demo run: compress one paragraph and print the before/after with token counts.
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
cp examples/default.env examples/.env # and add your API key
|
|
65
|
+
python examples/hello.py
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
From there, [examples/](examples/) builds up through prompt strategies, the fidelity
|
|
69
|
+
guardrail, agent-history compression, and long-term memory. See the
|
|
70
|
+
[examples README](examples/README.md) for the full list.
|
|
71
|
+
|
|
72
|
+
## The core engine
|
|
73
|
+
|
|
74
|
+
Everything composes from one primitive. Pass any chat model or a model string
|
|
75
|
+
(resolved via `init_chat_model`).
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from langchain_babeltele import BabelTeleCompressor
|
|
79
|
+
|
|
80
|
+
compressor = BabelTeleCompressor("anthropic:claude-sonnet-4-6")
|
|
81
|
+
result = compressor.compress(long_text)
|
|
82
|
+
|
|
83
|
+
print(result.text) # the dense BabelTele representation
|
|
84
|
+
print(result.retention_ratio) # e.g. 0.28
|
|
85
|
+
|
|
86
|
+
# Use it anywhere in an LCEL chain:
|
|
87
|
+
chain = compressor.as_runnable() | some_reader_model
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Long inputs that exceed the compressor's own context window are chunked
|
|
91
|
+
automatically:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
BabelTeleCompressor(model, chunk_tokens=200_000)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Where it plugs in
|
|
98
|
+
|
|
99
|
+
### RAG: compress retrieved documents
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from langchain.retrievers import ContextualCompressionRetriever
|
|
103
|
+
from langchain_babeltele import BabelTeleDocumentCompressor
|
|
104
|
+
|
|
105
|
+
retriever = ContextualCompressionRetriever(
|
|
106
|
+
base_compressor=BabelTeleDocumentCompressor(compressor=compressor),
|
|
107
|
+
base_retriever=base_retriever,
|
|
108
|
+
)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Agents: compress history and tool outputs
|
|
112
|
+
|
|
113
|
+
A denser drop-in alternative to `SummarizationMiddleware`. Folds overflowing
|
|
114
|
+
history into one dense message and compresses large tool outputs before they
|
|
115
|
+
re-enter context.
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from langchain.agents import create_agent
|
|
119
|
+
from langchain_babeltele import BabelTeleCompressionMiddleware
|
|
120
|
+
|
|
121
|
+
agent = create_agent(
|
|
122
|
+
model="anthropic:claude-sonnet-4-6",
|
|
123
|
+
tools=tools,
|
|
124
|
+
middleware=[
|
|
125
|
+
BabelTeleCompressionMiddleware(
|
|
126
|
+
compressor,
|
|
127
|
+
token_budget=4000,
|
|
128
|
+
keep_last_n=2,
|
|
129
|
+
tool_output_threshold=2000,
|
|
130
|
+
)
|
|
131
|
+
],
|
|
132
|
+
)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Long-term memory
|
|
136
|
+
|
|
137
|
+
The paper's LoCoMo recipe: compress each session, embed, retrieve top-k.
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from langchain_babeltele import BabelTeleMemoryStore
|
|
141
|
+
|
|
142
|
+
memory = BabelTeleMemoryStore(vector_store, compressor)
|
|
143
|
+
memory.add_session(conversation_text)
|
|
144
|
+
relevant = memory.retrieve("what did we decide about pricing?", k=4)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Choosing a prompt strategy
|
|
148
|
+
|
|
149
|
+
BabelTele offers several prompt strategies rather than one fixed prompt. Select a
|
|
150
|
+
built-in strategy or pass your own:
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from langchain_babeltele import BabelTeleStrategy
|
|
154
|
+
|
|
155
|
+
BabelTeleCompressor(model, strategy=BabelTeleStrategy.BT_P8) # fixed symbolic rules
|
|
156
|
+
BabelTeleCompressor(model, strategy="my custom compression prompt: ")
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Fidelity guardrail
|
|
160
|
+
|
|
161
|
+
Because BabelTele abandons readability, a faulty compression can silently drop
|
|
162
|
+
information. The guardrail scores recoverability with an LLM judge and retries
|
|
163
|
+
with milder structured strategies. If it can't ensure fidelity, it falls back to
|
|
164
|
+
the original text.
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from langchain_babeltele import FidelityGuardrail
|
|
168
|
+
|
|
169
|
+
compressor = BabelTeleCompressor(
|
|
170
|
+
model,
|
|
171
|
+
guardrail=FidelityGuardrail("anthropic:claude-sonnet-4-6", threshold=0.8),
|
|
172
|
+
)
|
|
173
|
+
result = compressor.compress(text)
|
|
174
|
+
print(result.verified) # True / False
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Benchmarks
|
|
178
|
+
|
|
179
|
+
**On document QA, BabelTele kept 99.5% semantic fidelity while
|
|
180
|
+
compressing text to 27.9% of its original length.**
|
|
181
|
+
|
|
182
|
+
**Agent memory (LoCoMo).** Compressing each session before storing it retains most
|
|
183
|
+
of the full-text accuracy at roughly half the tokens, and edges out plain
|
|
184
|
+
summarization. This is what `BabelTeleMemoryStore` does.
|
|
185
|
+
|
|
186
|
+
| Method | Tokens / query | Accuracy | vs. original |
|
|
187
|
+
| --------- | -------------: | -------: | -----------: |
|
|
188
|
+
| Original | 2819.5 | 64.81 | 100.0% |
|
|
189
|
+
| Summary | 1365.6 | 61.05 | 94.2% |
|
|
190
|
+
| BabelTele | 1382.2 | 62.53 | 96.5% |
|
|
191
|
+
|
|
192
|
+
Absolute scores reflect LoCoMo's difficulty (even full context scores only 64.81);
|
|
193
|
+
the point is relative retention. BabelTele preserves 96.5% of baseline accuracy at
|
|
194
|
+
roughly half the tokens, and beats summarization while doing it.
|
|
195
|
+
|
|
196
|
+
**Multi-agent communication.** Compressing inter-agent messages cut tokens sharply
|
|
197
|
+
with little score loss.
|
|
198
|
+
|
|
199
|
+
| Setting | Token reduction | Score (vs. uncompressed) |
|
|
200
|
+
| -------------------------------- | --------------: | -----------------------: |
|
|
201
|
+
| Homogeneous (Gemini with Gemini) | 38.96% | 96.6% |
|
|
202
|
+
| Heterogeneous (Gemini with GPT) | 44.21% | 99.7% |
|
|
203
|
+
|
|
204
|
+
**Beyond the context window.** When the input exceeds the window, chunked BabelTele
|
|
205
|
+
compression beat naive truncation on LongBench v2 Code Repo QA (Long). This is the
|
|
206
|
+
`chunk_tokens` path.
|
|
207
|
+
|
|
208
|
+
| Reader | Truncation | BabelTele |
|
|
209
|
+
| ----------- | ---------: | --------: |
|
|
210
|
+
| Qwen3.6-Max | 55.17 | 62.07 |
|
|
211
|
+
| GLM-5.1 | 62.07 | 72.41 |
|
|
212
|
+
| Kimi 2.5 | 44.82 | 48.28 |
|
|
213
|
+
|
|
214
|
+
**Compression strength varies by model.** On LongBench v2, Gemini 3.1 Pro was the
|
|
215
|
+
most aggressive at over 95% compression (about 4% retention), while GPT-5.4 was the
|
|
216
|
+
most conservative at roughly 75% (about 27% retention); other models landed in
|
|
217
|
+
between. Portability is a separate axis: in cross-model tests, GPT- and
|
|
218
|
+
Claude-compressed inputs were the most portable for other readers to decode, while
|
|
219
|
+
Qwen- and Kimi-compressed inputs caused larger accuracy drops.
|
|
220
|
+
|
|
221
|
+
All numbers are from the paper ([arXiv:2606.19857](https://arxiv.org/abs/2606.19857)). Pull requests are welcome to develop evals for this project and generate more results.
|
|
222
|
+
|
|
223
|
+
## Pro Tip
|
|
224
|
+
|
|
225
|
+
Figure out which strategy works best for your model and use-case. Different combinations may give surprisingly different answers.
|
|
226
|
+
|
|
227
|
+
No one config that works for all.
|
|
228
|
+
|
|
229
|
+
## Development
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
pip install -e ".[dev]"
|
|
233
|
+
pytest
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## License
|
|
237
|
+
|
|
238
|
+
MIT
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# langchain-babeltele
|
|
2
|
+
|
|
3
|
+
Model-native, high-density text compression for LangChain pipelines and agents.
|
|
4
|
+
|
|
5
|
+
BabelTele compresses verbose text into a dense form that people can't read but
|
|
6
|
+
LLMs can recover. It relaxes the readability prior, using omnilingual word choice
|
|
7
|
+
and symbolic collapse, then lets downstream models consume the dense text directly
|
|
8
|
+
with no decompression step. The paper reports ~28% of the original token length at
|
|
9
|
+
~99.5% downstream QA fidelity, with no fine-tuning and pure black-box API access.
|
|
10
|
+
|
|
11
|
+
```text
|
|
12
|
+
"Q3 revenue is projected to rise ~30% YoY; if it lands, the Berlin team
|
|
13
|
+
ships the mobile app in October."
|
|
14
|
+
→ Q3rev📈~30%YoY ∧ ?✅⇒Berlin🚀📱@Oct
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Based on *"Large Language Models Do Not Always Need Readable Language"*
|
|
18
|
+
([arXiv:2606.19857](https://arxiv.org/abs/2606.19857)).
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install langchain-babeltele
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
You also need a chat-model provider, e.g. `pip install langchain-anthropic`.
|
|
27
|
+
|
|
28
|
+
## Examples
|
|
29
|
+
|
|
30
|
+
If you learn faster from running code, start in [examples/](examples/). The
|
|
31
|
+
[hello.py](examples/hello.py) script is the smallest end-to-end demo run: compress one paragraph and print the before/after with token counts.
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
cp examples/default.env examples/.env # and add your API key
|
|
35
|
+
python examples/hello.py
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
From there, [examples/](examples/) builds up through prompt strategies, the fidelity
|
|
39
|
+
guardrail, agent-history compression, and long-term memory. See the
|
|
40
|
+
[examples README](examples/README.md) for the full list.
|
|
41
|
+
|
|
42
|
+
## The core engine
|
|
43
|
+
|
|
44
|
+
Everything composes from one primitive. Pass any chat model or a model string
|
|
45
|
+
(resolved via `init_chat_model`).
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from langchain_babeltele import BabelTeleCompressor
|
|
49
|
+
|
|
50
|
+
compressor = BabelTeleCompressor("anthropic:claude-sonnet-4-6")
|
|
51
|
+
result = compressor.compress(long_text)
|
|
52
|
+
|
|
53
|
+
print(result.text) # the dense BabelTele representation
|
|
54
|
+
print(result.retention_ratio) # e.g. 0.28
|
|
55
|
+
|
|
56
|
+
# Use it anywhere in an LCEL chain:
|
|
57
|
+
chain = compressor.as_runnable() | some_reader_model
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Long inputs that exceed the compressor's own context window are chunked
|
|
61
|
+
automatically:
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
BabelTeleCompressor(model, chunk_tokens=200_000)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Where it plugs in
|
|
68
|
+
|
|
69
|
+
### RAG: compress retrieved documents
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from langchain.retrievers import ContextualCompressionRetriever
|
|
73
|
+
from langchain_babeltele import BabelTeleDocumentCompressor
|
|
74
|
+
|
|
75
|
+
retriever = ContextualCompressionRetriever(
|
|
76
|
+
base_compressor=BabelTeleDocumentCompressor(compressor=compressor),
|
|
77
|
+
base_retriever=base_retriever,
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Agents: compress history and tool outputs
|
|
82
|
+
|
|
83
|
+
A denser drop-in alternative to `SummarizationMiddleware`. Folds overflowing
|
|
84
|
+
history into one dense message and compresses large tool outputs before they
|
|
85
|
+
re-enter context.
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from langchain.agents import create_agent
|
|
89
|
+
from langchain_babeltele import BabelTeleCompressionMiddleware
|
|
90
|
+
|
|
91
|
+
agent = create_agent(
|
|
92
|
+
model="anthropic:claude-sonnet-4-6",
|
|
93
|
+
tools=tools,
|
|
94
|
+
middleware=[
|
|
95
|
+
BabelTeleCompressionMiddleware(
|
|
96
|
+
compressor,
|
|
97
|
+
token_budget=4000,
|
|
98
|
+
keep_last_n=2,
|
|
99
|
+
tool_output_threshold=2000,
|
|
100
|
+
)
|
|
101
|
+
],
|
|
102
|
+
)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Long-term memory
|
|
106
|
+
|
|
107
|
+
The paper's LoCoMo recipe: compress each session, embed, retrieve top-k.
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from langchain_babeltele import BabelTeleMemoryStore
|
|
111
|
+
|
|
112
|
+
memory = BabelTeleMemoryStore(vector_store, compressor)
|
|
113
|
+
memory.add_session(conversation_text)
|
|
114
|
+
relevant = memory.retrieve("what did we decide about pricing?", k=4)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Choosing a prompt strategy
|
|
118
|
+
|
|
119
|
+
BabelTele offers several prompt strategies rather than one fixed prompt. Select a
|
|
120
|
+
built-in strategy or pass your own:
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
from langchain_babeltele import BabelTeleStrategy
|
|
124
|
+
|
|
125
|
+
BabelTeleCompressor(model, strategy=BabelTeleStrategy.BT_P8) # fixed symbolic rules
|
|
126
|
+
BabelTeleCompressor(model, strategy="my custom compression prompt: ")
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Fidelity guardrail
|
|
130
|
+
|
|
131
|
+
Because BabelTele abandons readability, a faulty compression can silently drop
|
|
132
|
+
information. The guardrail scores recoverability with an LLM judge and retries
|
|
133
|
+
with milder structured strategies. If it can't ensure fidelity, it falls back to
|
|
134
|
+
the original text.
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from langchain_babeltele import FidelityGuardrail
|
|
138
|
+
|
|
139
|
+
compressor = BabelTeleCompressor(
|
|
140
|
+
model,
|
|
141
|
+
guardrail=FidelityGuardrail("anthropic:claude-sonnet-4-6", threshold=0.8),
|
|
142
|
+
)
|
|
143
|
+
result = compressor.compress(text)
|
|
144
|
+
print(result.verified) # True / False
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Benchmarks
|
|
148
|
+
|
|
149
|
+
**On document QA, BabelTele kept 99.5% semantic fidelity while
|
|
150
|
+
compressing text to 27.9% of its original length.**
|
|
151
|
+
|
|
152
|
+
**Agent memory (LoCoMo).** Compressing each session before storing it retains most
|
|
153
|
+
of the full-text accuracy at roughly half the tokens, and edges out plain
|
|
154
|
+
summarization. This is what `BabelTeleMemoryStore` does.
|
|
155
|
+
|
|
156
|
+
| Method | Tokens / query | Accuracy | vs. original |
|
|
157
|
+
| --------- | -------------: | -------: | -----------: |
|
|
158
|
+
| Original | 2819.5 | 64.81 | 100.0% |
|
|
159
|
+
| Summary | 1365.6 | 61.05 | 94.2% |
|
|
160
|
+
| BabelTele | 1382.2 | 62.53 | 96.5% |
|
|
161
|
+
|
|
162
|
+
Absolute scores reflect LoCoMo's difficulty (even full context scores only 64.81);
|
|
163
|
+
the point is relative retention. BabelTele preserves 96.5% of baseline accuracy at
|
|
164
|
+
roughly half the tokens, and beats summarization while doing it.
|
|
165
|
+
|
|
166
|
+
**Multi-agent communication.** Compressing inter-agent messages cut tokens sharply
|
|
167
|
+
with little score loss.
|
|
168
|
+
|
|
169
|
+
| Setting | Token reduction | Score (vs. uncompressed) |
|
|
170
|
+
| -------------------------------- | --------------: | -----------------------: |
|
|
171
|
+
| Homogeneous (Gemini with Gemini) | 38.96% | 96.6% |
|
|
172
|
+
| Heterogeneous (Gemini with GPT) | 44.21% | 99.7% |
|
|
173
|
+
|
|
174
|
+
**Beyond the context window.** When the input exceeds the window, chunked BabelTele
|
|
175
|
+
compression beat naive truncation on LongBench v2 Code Repo QA (Long). This is the
|
|
176
|
+
`chunk_tokens` path.
|
|
177
|
+
|
|
178
|
+
| Reader | Truncation | BabelTele |
|
|
179
|
+
| ----------- | ---------: | --------: |
|
|
180
|
+
| Qwen3.6-Max | 55.17 | 62.07 |
|
|
181
|
+
| GLM-5.1 | 62.07 | 72.41 |
|
|
182
|
+
| Kimi 2.5 | 44.82 | 48.28 |
|
|
183
|
+
|
|
184
|
+
**Compression strength varies by model.** On LongBench v2, Gemini 3.1 Pro was the
|
|
185
|
+
most aggressive at over 95% compression (about 4% retention), while GPT-5.4 was the
|
|
186
|
+
most conservative at roughly 75% (about 27% retention); other models landed in
|
|
187
|
+
between. Portability is a separate axis: in cross-model tests, GPT- and
|
|
188
|
+
Claude-compressed inputs were the most portable for other readers to decode, while
|
|
189
|
+
Qwen- and Kimi-compressed inputs caused larger accuracy drops.
|
|
190
|
+
|
|
191
|
+
All numbers are from the paper ([arXiv:2606.19857](https://arxiv.org/abs/2606.19857)). Pull requests are welcome to develop evals for this project and generate more results.
|
|
192
|
+
|
|
193
|
+
## Pro Tip
|
|
194
|
+
|
|
195
|
+
Figure out which strategy works best for your model and use-case. Different combinations may give surprisingly different answers.
|
|
196
|
+
|
|
197
|
+
No one config that works for all.
|
|
198
|
+
|
|
199
|
+
## Development
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
pip install -e ".[dev]"
|
|
203
|
+
pytest
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
MIT
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Examples
|
|
2
|
+
|
|
3
|
+
Runnable scripts showing where BabelTele plugs in.
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install langchain-babeltele python-dotenv
|
|
9
|
+
pip install langchain-anthropic # or your provider's package
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Edit `default.env` and add your API key, or copy it to `.env` (which overrides
|
|
13
|
+
`default.env` and is git-ignored):
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
cp default.env .env
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Scripts
|
|
20
|
+
|
|
21
|
+
Start with `hello.py`, then work down.
|
|
22
|
+
|
|
23
|
+
- [hello.py](hello.py) — the smallest example: compress one paragraph and print the
|
|
24
|
+
before/after with token counts.
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
python examples/hello.py
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
- [strategies.py](strategies.py) — run the same fact-dense text through several
|
|
31
|
+
`BT_P*` prompt strategies and print a side-by-side density comparison.
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
python examples/strategies.py
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
- [guardrail.py](guardrail.py) — wrap compression in a `FidelityGuardrail` that
|
|
38
|
+
scores recoverability with an LLM judge, retries with milder strategies, and falls
|
|
39
|
+
back to the original text if it can't ensure fidelity. Prints the `verified` flag.
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
python examples/guardrail.py
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
- [compress_history.py](compress_history.py) — compress a multi-turn agent
|
|
46
|
+
conversation into a dense BabelTele digest, printing the before/after text and
|
|
47
|
+
token savings. This is the operation `BabelTeleCompressionMiddleware` performs
|
|
48
|
+
automatically inside `create_agent`.
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
python examples/compress_history.py
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
- [long_term_memory.py](long_term_memory.py) — store several past sessions as
|
|
55
|
+
compressed, embedded memories, then retrieve only the relevant one for a later
|
|
56
|
+
question. Prints each session's compression and the dense text that grounds the
|
|
57
|
+
answer. Needs both a chat model (`BABELTELE_MODEL`) and an embedding model
|
|
58
|
+
(`BABELTELE_EMBEDDINGS`), so install your embedding provider too:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install langchain-openai # for the default openai embeddings
|
|
62
|
+
python examples/long_term_memory.py
|
|
63
|
+
```
|