claw-compactor 7.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claw_compactor-7.0.0/LICENSE +21 -0
- claw_compactor-7.0.0/PKG-INFO +378 -0
- claw_compactor-7.0.0/README.md +342 -0
- claw_compactor-7.0.0/benchmark/__init__.py +1 -0
- claw_compactor-7.0.0/benchmark/compressors.py +520 -0
- claw_compactor-7.0.0/benchmark/evaluate.py +278 -0
- claw_compactor-7.0.0/benchmark/report.py +241 -0
- claw_compactor-7.0.0/benchmark/run_benchmark.py +238 -0
- claw_compactor-7.0.0/claw_compactor.egg-info/PKG-INFO +378 -0
- claw_compactor-7.0.0/claw_compactor.egg-info/SOURCES.txt +118 -0
- claw_compactor-7.0.0/claw_compactor.egg-info/dependency_links.txt +1 -0
- claw_compactor-7.0.0/claw_compactor.egg-info/entry_points.txt +2 -0
- claw_compactor-7.0.0/claw_compactor.egg-info/requires.txt +7 -0
- claw_compactor-7.0.0/claw_compactor.egg-info/top_level.txt +4 -0
- claw_compactor-7.0.0/pyproject.toml +67 -0
- claw_compactor-7.0.0/scripts/__init__.py +0 -0
- claw_compactor-7.0.0/scripts/audit_memory.py +216 -0
- claw_compactor-7.0.0/scripts/benchmark_fusion.py +1021 -0
- claw_compactor-7.0.0/scripts/cli.py +28 -0
- claw_compactor-7.0.0/scripts/compress_memory.py +236 -0
- claw_compactor-7.0.0/scripts/compressed_context.py +280 -0
- claw_compactor-7.0.0/scripts/dedup_memory.py +147 -0
- claw_compactor-7.0.0/scripts/dictionary_compress.py +170 -0
- claw_compactor-7.0.0/scripts/engram_auto.py +1083 -0
- claw_compactor-7.0.0/scripts/engram_cli.py +494 -0
- claw_compactor-7.0.0/scripts/estimate_tokens.py +131 -0
- claw_compactor-7.0.0/scripts/generate_summary_tiers.py +292 -0
- claw_compactor-7.0.0/scripts/lib/__init__.py +34 -0
- claw_compactor-7.0.0/scripts/lib/config.py +289 -0
- claw_compactor-7.0.0/scripts/lib/crunch_bench.py +283 -0
- claw_compactor-7.0.0/scripts/lib/dedup.py +119 -0
- claw_compactor-7.0.0/scripts/lib/dictionary.py +281 -0
- claw_compactor-7.0.0/scripts/lib/engram.py +793 -0
- claw_compactor-7.0.0/scripts/lib/engram_learner.py +439 -0
- claw_compactor-7.0.0/scripts/lib/engram_prompts.py +175 -0
- claw_compactor-7.0.0/scripts/lib/engram_storage.py +254 -0
- claw_compactor-7.0.0/scripts/lib/exceptions.py +24 -0
- claw_compactor-7.0.0/scripts/lib/feedback.py +147 -0
- claw_compactor-7.0.0/scripts/lib/fusion/__init__.py +37 -0
- claw_compactor-7.0.0/scripts/lib/fusion/base.py +97 -0
- claw_compactor-7.0.0/scripts/lib/fusion/content_detector.py +307 -0
- claw_compactor-7.0.0/scripts/lib/fusion/cortex.py +48 -0
- claw_compactor-7.0.0/scripts/lib/fusion/diff_crunch.py +222 -0
- claw_compactor-7.0.0/scripts/lib/fusion/engine.py +602 -0
- claw_compactor-7.0.0/scripts/lib/fusion/ionizer.py +255 -0
- claw_compactor-7.0.0/scripts/lib/fusion/log_crunch.py +256 -0
- claw_compactor-7.0.0/scripts/lib/fusion/neurosyntax.py +339 -0
- claw_compactor-7.0.0/scripts/lib/fusion/nexus.py +288 -0
- claw_compactor-7.0.0/scripts/lib/fusion/nexus_model.py +180 -0
- claw_compactor-7.0.0/scripts/lib/fusion/photon.py +497 -0
- claw_compactor-7.0.0/scripts/lib/fusion/pipeline.py +94 -0
- claw_compactor-7.0.0/scripts/lib/fusion/quantum_lock.py +240 -0
- claw_compactor-7.0.0/scripts/lib/fusion/search_crunch.py +253 -0
- claw_compactor-7.0.0/scripts/lib/fusion/semantic_dedup.py +434 -0
- claw_compactor-7.0.0/scripts/lib/fusion/structural_collapse.py +452 -0
- claw_compactor-7.0.0/scripts/lib/markdown.py +305 -0
- claw_compactor-7.0.0/scripts/lib/rewind/__init__.py +16 -0
- claw_compactor-7.0.0/scripts/lib/rewind/marker.py +50 -0
- claw_compactor-7.0.0/scripts/lib/rewind/retriever.py +75 -0
- claw_compactor-7.0.0/scripts/lib/rewind/store.py +89 -0
- claw_compactor-7.0.0/scripts/lib/rle.py +165 -0
- claw_compactor-7.0.0/scripts/lib/tokenizer_optimizer.py +183 -0
- claw_compactor-7.0.0/scripts/lib/tokens.py +78 -0
- claw_compactor-7.0.0/scripts/lib/unicode_maps.py +78 -0
- claw_compactor-7.0.0/scripts/mem_compress.py +791 -0
- claw_compactor-7.0.0/scripts/observation_compressor.py +402 -0
- claw_compactor-7.0.0/setup.cfg +4 -0
- claw_compactor-7.0.0/tests/__init__.py +0 -0
- claw_compactor-7.0.0/tests/conftest.py +94 -0
- claw_compactor-7.0.0/tests/test_audit_comprehensive.py +123 -0
- claw_compactor-7.0.0/tests/test_audit_memory.py +67 -0
- claw_compactor-7.0.0/tests/test_benchmark.py +122 -0
- claw_compactor-7.0.0/tests/test_cli_commands.py +228 -0
- claw_compactor-7.0.0/tests/test_compress_memory.py +109 -0
- claw_compactor-7.0.0/tests/test_compress_memory_comprehensive.py +191 -0
- claw_compactor-7.0.0/tests/test_compressed_context.py +145 -0
- claw_compactor-7.0.0/tests/test_config.py +54 -0
- claw_compactor-7.0.0/tests/test_cortex.py +241 -0
- claw_compactor-7.0.0/tests/test_crunch_bench.py +468 -0
- claw_compactor-7.0.0/tests/test_dedup_memory.py +124 -0
- claw_compactor-7.0.0/tests/test_dictionary.py +327 -0
- claw_compactor-7.0.0/tests/test_dictionary_comprehensive.py +241 -0
- claw_compactor-7.0.0/tests/test_engram.py +1091 -0
- claw_compactor-7.0.0/tests/test_engram_auto.py +947 -0
- claw_compactor-7.0.0/tests/test_engram_learner.py +388 -0
- claw_compactor-7.0.0/tests/test_error_handling.py +194 -0
- claw_compactor-7.0.0/tests/test_estimate_tokens.py +99 -0
- claw_compactor-7.0.0/tests/test_feedback.py +359 -0
- claw_compactor-7.0.0/tests/test_fusion_engine.py +898 -0
- claw_compactor-7.0.0/tests/test_fusion_pipeline.py +511 -0
- claw_compactor-7.0.0/tests/test_generate_summary_tiers.py +105 -0
- claw_compactor-7.0.0/tests/test_integration.py +249 -0
- claw_compactor-7.0.0/tests/test_lib_dedup.py +135 -0
- claw_compactor-7.0.0/tests/test_lib_markdown.py +388 -0
- claw_compactor-7.0.0/tests/test_lib_tokens.py +104 -0
- claw_compactor-7.0.0/tests/test_main_entry.py +176 -0
- claw_compactor-7.0.0/tests/test_markdown_advanced.py +284 -0
- claw_compactor-7.0.0/tests/test_neurosyntax.py +379 -0
- claw_compactor-7.0.0/tests/test_new_features.py +77 -0
- claw_compactor-7.0.0/tests/test_nexus.py +540 -0
- claw_compactor-7.0.0/tests/test_observation_comprehensive.py +261 -0
- claw_compactor-7.0.0/tests/test_observation_compressor.py +128 -0
- claw_compactor-7.0.0/tests/test_performance.py +172 -0
- claw_compactor-7.0.0/tests/test_phase3_structured.py +662 -0
- claw_compactor-7.0.0/tests/test_photon.py +431 -0
- claw_compactor-7.0.0/tests/test_pipeline.py +109 -0
- claw_compactor-7.0.0/tests/test_quantum_lock.py +374 -0
- claw_compactor-7.0.0/tests/test_real_workspace.py +161 -0
- claw_compactor-7.0.0/tests/test_rewind.py +560 -0
- claw_compactor-7.0.0/tests/test_rle.py +114 -0
- claw_compactor-7.0.0/tests/test_rle_comprehensive.py +151 -0
- claw_compactor-7.0.0/tests/test_roundtrip.py +253 -0
- claw_compactor-7.0.0/tests/test_roundtrip_comprehensive.py +192 -0
- claw_compactor-7.0.0/tests/test_semantic_dedup.py +693 -0
- claw_compactor-7.0.0/tests/test_structural_collapse.py +847 -0
- claw_compactor-7.0.0/tests/test_tiers_comprehensive.py +171 -0
- claw_compactor-7.0.0/tests/test_token_economics.py +237 -0
- claw_compactor-7.0.0/tests/test_tokenizer_optimizer.py +175 -0
- claw_compactor-7.0.0/tests/test_tokenizer_optimizer_comprehensive.py +241 -0
- claw_compactor-7.0.0/tests/test_tokens.py +27 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 OpenClaw Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: claw-compactor
|
|
3
|
+
Version: 7.0.0
|
|
4
|
+
Summary: 14-stage Fusion Pipeline for LLM token compression — 15-82% reduction depending on content, zero LLM inference cost, reversible compression, AST-aware code analysis
|
|
5
|
+
Author: Bot777, OpenClaw Contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/open-compress/claw-compactor
|
|
8
|
+
Project-URL: Documentation, https://docs.openclaw.ai
|
|
9
|
+
Project-URL: Repository, https://github.com/open-compress/claw-compactor
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/open-compress/claw-compactor/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/open-compress/claw-compactor/releases
|
|
12
|
+
Project-URL: Community, https://discord.com/invite/clawd
|
|
13
|
+
Keywords: token-compression,llm,prompt-compression,context-compression,ai-agent,token-optimization,token-reduction,cost-reduction,context-window,workspace-compression,memory-compression,openclaw,llm-tools,ai-cost-saving,context-pruning,tree-sitter
|
|
14
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Text Processing :: General
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Classifier: Topic :: Software Development :: Pre-processors
|
|
25
|
+
Classifier: Operating System :: OS Independent
|
|
26
|
+
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.9
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Provides-Extra: accurate
|
|
31
|
+
Requires-Dist: tiktoken>=0.5.0; extra == "accurate"
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pyyaml>=6.0; extra == "dev"
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
|
|
37
|
+
<!--
|
|
38
|
+
<script type="application/ld+json">
|
|
39
|
+
{
|
|
40
|
+
"@context": "https://schema.org",
|
|
41
|
+
"@type": "SoftwareApplication",
|
|
42
|
+
"name": "Claw Compactor",
|
|
43
|
+
"description": "14-stage Fusion Pipeline for LLM token compression with reversible compression, AST-aware code analysis, and intelligent content routing",
|
|
44
|
+
"applicationCategory": "DeveloperApplication",
|
|
45
|
+
"operatingSystem": "Cross-platform",
|
|
46
|
+
"softwareVersion": "7.0.0",
|
|
47
|
+
"license": "https://opensource.org/licenses/MIT",
|
|
48
|
+
"url": "https://github.com/open-compress/claw-compactor",
|
|
49
|
+
"downloadUrl": "https://github.com/open-compress/claw-compactor",
|
|
50
|
+
"author": {
|
|
51
|
+
"@type": "Organization",
|
|
52
|
+
"name": "OpenClaw",
|
|
53
|
+
"url": "https://openclaw.ai"
|
|
54
|
+
},
|
|
55
|
+
"offers": {
|
|
56
|
+
"@type": "Offer",
|
|
57
|
+
"price": "0",
|
|
58
|
+
"priceCurrency": "USD"
|
|
59
|
+
},
|
|
60
|
+
"keywords": "token compression, LLM, AI agent, fusion pipeline, reversible compression, AST code analysis, context window optimization"
|
|
61
|
+
}
|
|
62
|
+
</script>
|
|
63
|
+
-->
|
|
64
|
+
|
|
65
|
+
<div align="center">
|
|
66
|
+
|
|
67
|
+
# Claw Compactor
|
|
68
|
+
|
|
69
|
+
### 14-Stage Fusion Pipeline for LLM Token Compression
|
|
70
|
+
|
|
71
|
+

|
|
72
|
+
|
|
73
|
+
[](https://github.com/open-compress/claw-compactor/actions)
|
|
74
|
+
[](https://github.com/open-compress/claw-compactor)
|
|
75
|
+
[](https://python.org)
|
|
76
|
+
[](LICENSE)
|
|
77
|
+
[](https://github.com/open-compress/claw-compactor)
|
|
78
|
+
|
|
79
|
+
**15–82% compression depending on content · Zero LLM inference cost · Reversible · 1663 tests**
|
|
80
|
+
|
|
81
|
+
[Architecture](ARCHITECTURE.md) · [Benchmarks](#benchmarks) · [Quick Start](#quick-start) · [API](#api)
|
|
82
|
+
|
|
83
|
+
</div>
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## What is Claw Compactor?
|
|
88
|
+
|
|
89
|
+
Claw Compactor is an open-source **LLM token compression engine** built around a 14-stage **Fusion Pipeline**. Each stage is a specialized compressor — from AST-aware code analysis to JSON statistical sampling to simhash-based deduplication — chained through an immutable data flow architecture where each stage's output feeds the next.
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
Input
|
|
93
|
+
|
|
|
94
|
+
v
|
|
95
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
96
|
+
│ FUSION PIPELINE │
|
|
97
|
+
│ │
|
|
98
|
+
│ QuantumLock ─> Cortex ─> Photon ─> RLE ─> SemanticDedup ─> Ionizer │
|
|
99
|
+
│ | | | | | | │
|
|
100
|
+
│ KV-cache auto-detect base64 path simhash JSON │
|
|
101
|
+
│ alignment 16 languages strip shorten dedup sampling │
|
|
102
|
+
│ │
|
|
103
|
+
│ ─> LogCrunch ─> SearchCrunch ─> DiffCrunch ─> StructuralCollapse │
|
|
104
|
+
│ | | | | │
|
|
105
|
+
│ log folding result dedup context fold import merge │
|
|
106
|
+
│ │
|
|
107
|
+
│ ─> Neurosyntax ─> Nexus ─> TokenOpt ─> Abbrev ─────────> Output │
|
|
108
|
+
│ | | | | │
|
|
109
|
+
│ AST compress ML token format NL shorten │
|
|
110
|
+
│ (tree-sitter) classify optimize (text only) │
|
|
111
|
+
│ │
|
|
112
|
+
│ [ RewindStore ] ── hash-addressed LRU for reversible retrieval │
|
|
113
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Key design principles:
|
|
117
|
+
|
|
118
|
+
- **Immutable data flow** — `FusionContext` is a frozen dataclass. Every stage produces a new `FusionResult`; nothing is mutated in-place.
|
|
119
|
+
- **Gate-before-compress** — Each stage has `should_apply()` that inspects context type, language, and role before doing any work. Stages that don't apply are skipped at zero cost.
|
|
120
|
+
- **Content-aware routing** — Cortex auto-detects content type (code, JSON, logs, diffs, search results) and language (Python, Go, Rust, TypeScript, etc.), then downstream stages make type-aware compression decisions.
|
|
121
|
+
- **Reversible compression** — Ionizer stores originals in a hash-addressed `RewindStore`. The LLM can call a tool to retrieve any compressed section by its marker ID.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Benchmarks
|
|
126
|
+
|
|
127
|
+
### Real-World Compression (FusionEngine v7 vs Legacy Regex)
|
|
128
|
+
|
|
129
|
+
| Content Type | Legacy | FusionEngine | Improvement |
|
|
130
|
+
|:-------------|-------:|-------------:|:-----------:|
|
|
131
|
+
| Python source | 7.3% | **25.0%** | 3.4x |
|
|
132
|
+
| JSON (100 items) | 12.6% | **81.9%** | 6.5x |
|
|
133
|
+
| Build logs | 5.5% | **24.1%** | 4.4x |
|
|
134
|
+
| Agent conversation | 5.7% | **31.0%** | 5.4x |
|
|
135
|
+
| Git diff | 6.2% | **15.0%** | 2.4x |
|
|
136
|
+
| Search results | 5.3% | **40.7%** | 7.7x |
|
|
137
|
+
| **Weighted average** | **9.2%** | **36.3%** | **3.9x** |
|
|
138
|
+
|
|
139
|
+
### SWE-bench Real Tasks
|
|
140
|
+
|
|
141
|
+
Tested on real SWE-bench instances with actual repository code:
|
|
142
|
+
|
|
143
|
+
| Instance | Size | Compression |
|
|
144
|
+
|:---------|-----:|------------:|
|
|
145
|
+
| django__django-11620 | 4.5K | **14.5%** |
|
|
146
|
+
| sympy__sympy-14396 | 5.5K | **19.1%** |
|
|
147
|
+
| scikit-learn-25747 | 11.8K | **15.9%** |
|
|
148
|
+
| scikit-learn-13554 | 73K | **11.8%** |
|
|
149
|
+
| scikit-learn-25308 | 81K | **14.4%** |
|
|
150
|
+
|
|
151
|
+
### vs LLMLingua-2 (ROUGE-L Fidelity)
|
|
152
|
+
|
|
153
|
+
| Compression Rate | Claw Compactor | LLMLingua-2 | Delta |
|
|
154
|
+
|:-----------------|---------------:|------------:|------:|
|
|
155
|
+
| 0.3 (aggressive) | **0.653** | 0.346 | +88.2% |
|
|
156
|
+
| 0.5 (balanced) | **0.723** | 0.570 | +26.8% |
|
|
157
|
+
|
|
158
|
+
Claw Compactor preserves more semantic content at the same compression ratio, with zero LLM inference cost.
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## Quick Start
|
|
163
|
+
|
|
164
|
+
### Install from PyPI
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
pip install claw-compactor
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Or clone from source
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
git clone https://github.com/open-compress/claw-compactor.git
|
|
174
|
+
cd claw-compactor
|
|
175
|
+
pip install -e .
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Run
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
# Benchmark your workspace (non-destructive)
|
|
182
|
+
claw-compactor benchmark /path/to/workspace
|
|
183
|
+
|
|
184
|
+
# Full compression pipeline
|
|
185
|
+
claw-compactor compress /path/to/workspace
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
**Requirements:** Python 3.9+. Optional: `pip install claw-compactor[accurate]` for exact token counts via tiktoken.
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## API
|
|
193
|
+
|
|
194
|
+
### FusionEngine — Single Text
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from scripts.lib.fusion.engine import FusionEngine
|
|
198
|
+
|
|
199
|
+
engine = FusionEngine()
|
|
200
|
+
|
|
201
|
+
result = engine.compress(
|
|
202
|
+
text="def hello():\n # greeting function\n print('hello')",
|
|
203
|
+
content_type="code", # or let Cortex auto-detect
|
|
204
|
+
language="python", # optional hint
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
print(result["compressed"]) # compressed output
|
|
208
|
+
print(result["stats"]) # per-stage timing + token counts
|
|
209
|
+
print(result["markers"]) # Rewind markers for reversibility
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### FusionEngine — Chat Messages
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
messages = [
|
|
216
|
+
{"role": "system", "content": "You are a coding assistant..."},
|
|
217
|
+
{"role": "user", "content": "Fix the auth bug in login.py"},
|
|
218
|
+
{"role": "assistant", "content": "I found the issue. Here's the fix:\n```python\n..."},
|
|
219
|
+
{"role": "tool", "content": '{"results": [{"file": "login.py", ...}, ...]}'},
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
result = engine.compress_messages(messages)
|
|
223
|
+
|
|
224
|
+
# Cross-message dedup runs first, then per-message pipeline
|
|
225
|
+
print(result["stats"]["reduction_pct"]) # aggregate compression %
|
|
226
|
+
print(result["per_message"]) # per-message breakdown
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### Rewind — Reversible Retrieval
|
|
230
|
+
|
|
231
|
+
```python
|
|
232
|
+
engine = FusionEngine(enable_rewind=True)
|
|
233
|
+
result = engine.compress(large_json, content_type="json")
|
|
234
|
+
|
|
235
|
+
# LLM sees compressed output with markers like [rewind:abc123...]
|
|
236
|
+
# When the LLM needs the original, it calls the Rewind tool:
|
|
237
|
+
original = engine.rewind_store.retrieve("abc123def456...")
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### Custom Stage
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
from scripts.lib.fusion.base import FusionStage, FusionContext, FusionResult
|
|
244
|
+
|
|
245
|
+
class MyStage(FusionStage):
|
|
246
|
+
name = "my_compressor"
|
|
247
|
+
order = 22 # runs between StructuralCollapse (20) and Neurosyntax (25)
|
|
248
|
+
|
|
249
|
+
def should_apply(self, ctx: FusionContext) -> bool:
|
|
250
|
+
return ctx.content_type == "log"
|
|
251
|
+
|
|
252
|
+
def apply(self, ctx: FusionContext) -> FusionResult:
|
|
253
|
+
compressed = my_compression_logic(ctx.content)
|
|
254
|
+
return FusionResult(
|
|
255
|
+
content=compressed,
|
|
256
|
+
original_tokens=estimate_tokens(ctx.content),
|
|
257
|
+
compressed_tokens=estimate_tokens(compressed),
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Add to pipeline
|
|
261
|
+
pipeline = engine.pipeline.add(MyStage())
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
---
|
|
265
|
+
|
|
266
|
+
## The 14 Stages
|
|
267
|
+
|
|
268
|
+
| # | Stage | Order | Purpose | Applies To |
|
|
269
|
+
|:-:|:------|:-----:|:--------|:-----------|
|
|
270
|
+
| 1 | **QuantumLock** | 3 | Isolates dynamic content in system prompts to maximize KV-cache hit rate | system messages |
|
|
271
|
+
| 2 | **Cortex** | 5 | Auto-detects content type and programming language (16 languages) | untyped content |
|
|
272
|
+
| 3 | **Photon** | 8 | Detects and compresses base64-encoded images | all |
|
|
273
|
+
| 4 | **RLE** | 10 | Path shorthand (`$WS`), IP prefix compression, enum compaction | all |
|
|
274
|
+
| 5 | **SemanticDedup** | 12 | SimHash fingerprint deduplication across content blocks | all |
|
|
275
|
+
| 6 | **Ionizer** | 15 | JSON array statistical sampling with schema discovery + error preservation | json |
|
|
276
|
+
| 7 | **LogCrunch** | 16 | Folds repeated log lines with occurrence counts | log |
|
|
277
|
+
| 8 | **SearchCrunch** | 17 | Deduplicates search/grep results | search |
|
|
278
|
+
| 9 | **DiffCrunch** | 18 | Folds unchanged context lines in git diffs | diff |
|
|
279
|
+
| 10 | **StructuralCollapse** | 20 | Merges import blocks, collapses repeated assertions/patterns | code |
|
|
280
|
+
| 11 | **Neurosyntax** | 25 | AST-aware code compression via tree-sitter (safe regex fallback). Never shortens identifiers. | code |
|
|
281
|
+
| 12 | **Nexus** | 35 | ML token-level compression (stopword removal fallback without model) | text |
|
|
282
|
+
| 13 | **TokenOpt** | 40 | Tokenizer format optimization — strips bold/italic markers, normalizes whitespace | all |
|
|
283
|
+
| 14 | **Abbrev** | 45 | Natural language abbreviation. Only fires on text — never touches code, JSON, or structured data. | text |
|
|
284
|
+
|
|
285
|
+
Each stage is independent and stateless. Stages communicate only through the immutable `FusionContext` that flows forward through the pipeline.
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## Workspace Commands
|
|
290
|
+
|
|
291
|
+
```bash
|
|
292
|
+
python3 scripts/mem_compress.py <workspace> <command> [options]
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
| Command | Description |
|
|
296
|
+
|:--------|:-----------|
|
|
297
|
+
| `full` | Run complete compression pipeline |
|
|
298
|
+
| `benchmark` | Dry-run compression report |
|
|
299
|
+
| `compress` | Rule-based compression only |
|
|
300
|
+
| `dict` | Dictionary encoding with auto-learned codebook |
|
|
301
|
+
| `observe` | Session transcript JSONL to structured observations |
|
|
302
|
+
| `tiers` | Generate L0/L1/L2 tiered summaries |
|
|
303
|
+
| `dedup` | Cross-file duplicate detection |
|
|
304
|
+
| `estimate` | Token count report |
|
|
305
|
+
| `audit` | Workspace health check |
|
|
306
|
+
| `optimize` | Tokenizer-level format optimization |
|
|
307
|
+
| `auto` | Watch mode — compress on file changes |
|
|
308
|
+
|
|
309
|
+
Options: `--json`, `--dry-run`, `--since YYYY-MM-DD`, `--quiet`
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
## Architecture
|
|
314
|
+
|
|
315
|
+
See [ARCHITECTURE.md](ARCHITECTURE.md) for the full technical deep-dive:
|
|
316
|
+
- Immutable data flow design
|
|
317
|
+
- Stage execution model and gating
|
|
318
|
+
- Rewind reversible compression protocol
|
|
319
|
+
- Cross-message semantic deduplication
|
|
320
|
+
- How to extend the pipeline
|
|
321
|
+
|
|
322
|
+
```
|
|
323
|
+
12,000+ lines Python · 1,676 tests · 14 fusion stages · 0 external ML dependencies
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
|
|
328
|
+
## Installation
|
|
329
|
+
|
|
330
|
+
```bash
|
|
331
|
+
# Clone
|
|
332
|
+
git clone https://github.com/open-compress/claw-compactor.git
|
|
333
|
+
cd claw-compactor
|
|
334
|
+
|
|
335
|
+
# Optional: exact token counting
|
|
336
|
+
pip install tiktoken
|
|
337
|
+
|
|
338
|
+
# Optional: AST-aware code compression (Neurosyntax)
|
|
339
|
+
pip install tree-sitter-language-pack
|
|
340
|
+
|
|
341
|
+
# Development
|
|
342
|
+
pip install -e ".[dev,accurate]"
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
**Zero required dependencies.** tiktoken and tree-sitter are optional enhancements — the pipeline runs with built-in heuristic fallbacks for both.
|
|
346
|
+
|
|
347
|
+
---
|
|
348
|
+
|
|
349
|
+
## Project Stats
|
|
350
|
+
|
|
351
|
+
| Metric | Value |
|
|
352
|
+
|:-------|:------|
|
|
353
|
+
| Tests | 1,676 passed |
|
|
354
|
+
| Python source | 12,000+ lines |
|
|
355
|
+
| Fusion stages | 14 |
|
|
356
|
+
| Languages detected | 16 |
|
|
357
|
+
| Required dependencies | 0 |
|
|
358
|
+
| Compression (code) | 15–25% |
|
|
359
|
+
| Compression (JSON peak) | 81.9% |
|
|
360
|
+
| ROUGE-L @ 0.3 rate | 0.653 |
|
|
361
|
+
| License | MIT |
|
|
362
|
+
|
|
363
|
+
---
|
|
364
|
+
|
|
365
|
+
## Related
|
|
366
|
+
|
|
367
|
+
- [OpenClaw](https://openclaw.ai) — AI agent platform
|
|
368
|
+
- [ClawhubAI](https://clawhub.com) — Agent skills marketplace
|
|
369
|
+
- [OpenClaw Discord](https://discord.com/invite/clawd) — Community
|
|
370
|
+
- [OpenClaw Docs](https://docs.openclaw.ai) — Documentation
|
|
371
|
+
|
|
372
|
+
---
|
|
373
|
+
|
|
374
|
+
`token-compression` `llm-tools` `fusion-pipeline` `reversible-compression` `ast-code-analysis` `context-compression` `ai-agent` `openclaw` `python` `developer-tools`
|
|
375
|
+
|
|
376
|
+
## License
|
|
377
|
+
|
|
378
|
+
[MIT](LICENSE)
|