claw-compactor 7.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. claw_compactor-7.0.0/LICENSE +21 -0
  2. claw_compactor-7.0.0/PKG-INFO +378 -0
  3. claw_compactor-7.0.0/README.md +342 -0
  4. claw_compactor-7.0.0/benchmark/__init__.py +1 -0
  5. claw_compactor-7.0.0/benchmark/compressors.py +520 -0
  6. claw_compactor-7.0.0/benchmark/evaluate.py +278 -0
  7. claw_compactor-7.0.0/benchmark/report.py +241 -0
  8. claw_compactor-7.0.0/benchmark/run_benchmark.py +238 -0
  9. claw_compactor-7.0.0/claw_compactor.egg-info/PKG-INFO +378 -0
  10. claw_compactor-7.0.0/claw_compactor.egg-info/SOURCES.txt +118 -0
  11. claw_compactor-7.0.0/claw_compactor.egg-info/dependency_links.txt +1 -0
  12. claw_compactor-7.0.0/claw_compactor.egg-info/entry_points.txt +2 -0
  13. claw_compactor-7.0.0/claw_compactor.egg-info/requires.txt +7 -0
  14. claw_compactor-7.0.0/claw_compactor.egg-info/top_level.txt +4 -0
  15. claw_compactor-7.0.0/pyproject.toml +67 -0
  16. claw_compactor-7.0.0/scripts/__init__.py +0 -0
  17. claw_compactor-7.0.0/scripts/audit_memory.py +216 -0
  18. claw_compactor-7.0.0/scripts/benchmark_fusion.py +1021 -0
  19. claw_compactor-7.0.0/scripts/cli.py +28 -0
  20. claw_compactor-7.0.0/scripts/compress_memory.py +236 -0
  21. claw_compactor-7.0.0/scripts/compressed_context.py +280 -0
  22. claw_compactor-7.0.0/scripts/dedup_memory.py +147 -0
  23. claw_compactor-7.0.0/scripts/dictionary_compress.py +170 -0
  24. claw_compactor-7.0.0/scripts/engram_auto.py +1083 -0
  25. claw_compactor-7.0.0/scripts/engram_cli.py +494 -0
  26. claw_compactor-7.0.0/scripts/estimate_tokens.py +131 -0
  27. claw_compactor-7.0.0/scripts/generate_summary_tiers.py +292 -0
  28. claw_compactor-7.0.0/scripts/lib/__init__.py +34 -0
  29. claw_compactor-7.0.0/scripts/lib/config.py +289 -0
  30. claw_compactor-7.0.0/scripts/lib/crunch_bench.py +283 -0
  31. claw_compactor-7.0.0/scripts/lib/dedup.py +119 -0
  32. claw_compactor-7.0.0/scripts/lib/dictionary.py +281 -0
  33. claw_compactor-7.0.0/scripts/lib/engram.py +793 -0
  34. claw_compactor-7.0.0/scripts/lib/engram_learner.py +439 -0
  35. claw_compactor-7.0.0/scripts/lib/engram_prompts.py +175 -0
  36. claw_compactor-7.0.0/scripts/lib/engram_storage.py +254 -0
  37. claw_compactor-7.0.0/scripts/lib/exceptions.py +24 -0
  38. claw_compactor-7.0.0/scripts/lib/feedback.py +147 -0
  39. claw_compactor-7.0.0/scripts/lib/fusion/__init__.py +37 -0
  40. claw_compactor-7.0.0/scripts/lib/fusion/base.py +97 -0
  41. claw_compactor-7.0.0/scripts/lib/fusion/content_detector.py +307 -0
  42. claw_compactor-7.0.0/scripts/lib/fusion/cortex.py +48 -0
  43. claw_compactor-7.0.0/scripts/lib/fusion/diff_crunch.py +222 -0
  44. claw_compactor-7.0.0/scripts/lib/fusion/engine.py +602 -0
  45. claw_compactor-7.0.0/scripts/lib/fusion/ionizer.py +255 -0
  46. claw_compactor-7.0.0/scripts/lib/fusion/log_crunch.py +256 -0
  47. claw_compactor-7.0.0/scripts/lib/fusion/neurosyntax.py +339 -0
  48. claw_compactor-7.0.0/scripts/lib/fusion/nexus.py +288 -0
  49. claw_compactor-7.0.0/scripts/lib/fusion/nexus_model.py +180 -0
  50. claw_compactor-7.0.0/scripts/lib/fusion/photon.py +497 -0
  51. claw_compactor-7.0.0/scripts/lib/fusion/pipeline.py +94 -0
  52. claw_compactor-7.0.0/scripts/lib/fusion/quantum_lock.py +240 -0
  53. claw_compactor-7.0.0/scripts/lib/fusion/search_crunch.py +253 -0
  54. claw_compactor-7.0.0/scripts/lib/fusion/semantic_dedup.py +434 -0
  55. claw_compactor-7.0.0/scripts/lib/fusion/structural_collapse.py +452 -0
  56. claw_compactor-7.0.0/scripts/lib/markdown.py +305 -0
  57. claw_compactor-7.0.0/scripts/lib/rewind/__init__.py +16 -0
  58. claw_compactor-7.0.0/scripts/lib/rewind/marker.py +50 -0
  59. claw_compactor-7.0.0/scripts/lib/rewind/retriever.py +75 -0
  60. claw_compactor-7.0.0/scripts/lib/rewind/store.py +89 -0
  61. claw_compactor-7.0.0/scripts/lib/rle.py +165 -0
  62. claw_compactor-7.0.0/scripts/lib/tokenizer_optimizer.py +183 -0
  63. claw_compactor-7.0.0/scripts/lib/tokens.py +78 -0
  64. claw_compactor-7.0.0/scripts/lib/unicode_maps.py +78 -0
  65. claw_compactor-7.0.0/scripts/mem_compress.py +791 -0
  66. claw_compactor-7.0.0/scripts/observation_compressor.py +402 -0
  67. claw_compactor-7.0.0/setup.cfg +4 -0
  68. claw_compactor-7.0.0/tests/__init__.py +0 -0
  69. claw_compactor-7.0.0/tests/conftest.py +94 -0
  70. claw_compactor-7.0.0/tests/test_audit_comprehensive.py +123 -0
  71. claw_compactor-7.0.0/tests/test_audit_memory.py +67 -0
  72. claw_compactor-7.0.0/tests/test_benchmark.py +122 -0
  73. claw_compactor-7.0.0/tests/test_cli_commands.py +228 -0
  74. claw_compactor-7.0.0/tests/test_compress_memory.py +109 -0
  75. claw_compactor-7.0.0/tests/test_compress_memory_comprehensive.py +191 -0
  76. claw_compactor-7.0.0/tests/test_compressed_context.py +145 -0
  77. claw_compactor-7.0.0/tests/test_config.py +54 -0
  78. claw_compactor-7.0.0/tests/test_cortex.py +241 -0
  79. claw_compactor-7.0.0/tests/test_crunch_bench.py +468 -0
  80. claw_compactor-7.0.0/tests/test_dedup_memory.py +124 -0
  81. claw_compactor-7.0.0/tests/test_dictionary.py +327 -0
  82. claw_compactor-7.0.0/tests/test_dictionary_comprehensive.py +241 -0
  83. claw_compactor-7.0.0/tests/test_engram.py +1091 -0
  84. claw_compactor-7.0.0/tests/test_engram_auto.py +947 -0
  85. claw_compactor-7.0.0/tests/test_engram_learner.py +388 -0
  86. claw_compactor-7.0.0/tests/test_error_handling.py +194 -0
  87. claw_compactor-7.0.0/tests/test_estimate_tokens.py +99 -0
  88. claw_compactor-7.0.0/tests/test_feedback.py +359 -0
  89. claw_compactor-7.0.0/tests/test_fusion_engine.py +898 -0
  90. claw_compactor-7.0.0/tests/test_fusion_pipeline.py +511 -0
  91. claw_compactor-7.0.0/tests/test_generate_summary_tiers.py +105 -0
  92. claw_compactor-7.0.0/tests/test_integration.py +249 -0
  93. claw_compactor-7.0.0/tests/test_lib_dedup.py +135 -0
  94. claw_compactor-7.0.0/tests/test_lib_markdown.py +388 -0
  95. claw_compactor-7.0.0/tests/test_lib_tokens.py +104 -0
  96. claw_compactor-7.0.0/tests/test_main_entry.py +176 -0
  97. claw_compactor-7.0.0/tests/test_markdown_advanced.py +284 -0
  98. claw_compactor-7.0.0/tests/test_neurosyntax.py +379 -0
  99. claw_compactor-7.0.0/tests/test_new_features.py +77 -0
  100. claw_compactor-7.0.0/tests/test_nexus.py +540 -0
  101. claw_compactor-7.0.0/tests/test_observation_comprehensive.py +261 -0
  102. claw_compactor-7.0.0/tests/test_observation_compressor.py +128 -0
  103. claw_compactor-7.0.0/tests/test_performance.py +172 -0
  104. claw_compactor-7.0.0/tests/test_phase3_structured.py +662 -0
  105. claw_compactor-7.0.0/tests/test_photon.py +431 -0
  106. claw_compactor-7.0.0/tests/test_pipeline.py +109 -0
  107. claw_compactor-7.0.0/tests/test_quantum_lock.py +374 -0
  108. claw_compactor-7.0.0/tests/test_real_workspace.py +161 -0
  109. claw_compactor-7.0.0/tests/test_rewind.py +560 -0
  110. claw_compactor-7.0.0/tests/test_rle.py +114 -0
  111. claw_compactor-7.0.0/tests/test_rle_comprehensive.py +151 -0
  112. claw_compactor-7.0.0/tests/test_roundtrip.py +253 -0
  113. claw_compactor-7.0.0/tests/test_roundtrip_comprehensive.py +192 -0
  114. claw_compactor-7.0.0/tests/test_semantic_dedup.py +693 -0
  115. claw_compactor-7.0.0/tests/test_structural_collapse.py +847 -0
  116. claw_compactor-7.0.0/tests/test_tiers_comprehensive.py +171 -0
  117. claw_compactor-7.0.0/tests/test_token_economics.py +237 -0
  118. claw_compactor-7.0.0/tests/test_tokenizer_optimizer.py +175 -0
  119. claw_compactor-7.0.0/tests/test_tokenizer_optimizer_comprehensive.py +241 -0
  120. claw_compactor-7.0.0/tests/test_tokens.py +27 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 OpenClaw Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,378 @@
1
+ Metadata-Version: 2.4
2
+ Name: claw-compactor
3
+ Version: 7.0.0
4
+ Summary: 14-stage Fusion Pipeline for LLM token compression — 15-82% reduction depending on content, zero LLM inference cost, reversible compression, AST-aware code analysis
5
+ Author: Bot777, OpenClaw Contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/open-compress/claw-compactor
8
+ Project-URL: Documentation, https://docs.openclaw.ai
9
+ Project-URL: Repository, https://github.com/open-compress/claw-compactor
10
+ Project-URL: Bug Tracker, https://github.com/open-compress/claw-compactor/issues
11
+ Project-URL: Changelog, https://github.com/open-compress/claw-compactor/releases
12
+ Project-URL: Community, https://discord.com/invite/clawd
13
+ Keywords: token-compression,llm,prompt-compression,context-compression,ai-agent,token-optimization,token-reduction,cost-reduction,context-window,workspace-compression,memory-compression,openclaw,llm-tools,ai-cost-saving,context-pruning,tree-sitter
14
+ Classifier: Development Status :: 5 - Production/Stable
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Text Processing :: General
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
+ Classifier: Topic :: Software Development :: Pre-processors
25
+ Classifier: Operating System :: OS Independent
26
+ Classifier: Typing :: Typed
27
+ Requires-Python: >=3.9
28
+ Description-Content-Type: text/markdown
29
+ License-File: LICENSE
30
+ Provides-Extra: accurate
31
+ Requires-Dist: tiktoken>=0.5.0; extra == "accurate"
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest>=7.0; extra == "dev"
34
+ Requires-Dist: pyyaml>=6.0; extra == "dev"
35
+ Dynamic: license-file
36
+
37
+ <!--
38
+ <script type="application/ld+json">
39
+ {
40
+ "@context": "https://schema.org",
41
+ "@type": "SoftwareApplication",
42
+ "name": "Claw Compactor",
43
+ "description": "14-stage Fusion Pipeline for LLM token compression with reversible compression, AST-aware code analysis, and intelligent content routing",
44
+ "applicationCategory": "DeveloperApplication",
45
+ "operatingSystem": "Cross-platform",
46
+ "softwareVersion": "7.0.0",
47
+ "license": "https://opensource.org/licenses/MIT",
48
+ "url": "https://github.com/open-compress/claw-compactor",
49
+ "downloadUrl": "https://github.com/open-compress/claw-compactor",
50
+ "author": {
51
+ "@type": "Organization",
52
+ "name": "OpenClaw",
53
+ "url": "https://openclaw.ai"
54
+ },
55
+ "offers": {
56
+ "@type": "Offer",
57
+ "price": "0",
58
+ "priceCurrency": "USD"
59
+ },
60
+ "keywords": "token compression, LLM, AI agent, fusion pipeline, reversible compression, AST code analysis, context window optimization"
61
+ }
62
+ </script>
63
+ -->
64
+
65
+ <div align="center">
66
+
67
+ # Claw Compactor
68
+
69
+ ### 14-Stage Fusion Pipeline for LLM Token Compression
70
+
71
+ ![Claw Compactor Banner](assets/banner.png)
72
+
73
+ [![CI](https://github.com/open-compress/claw-compactor/actions/workflows/ci.yml/badge.svg)](https://github.com/open-compress/claw-compactor/actions)
74
+ [![Tests](https://img.shields.io/badge/tests-1663%20passed-brightgreen)](https://github.com/open-compress/claw-compactor)
75
+ [![Python](https://img.shields.io/badge/python-3.9%2B-blue)](https://python.org)
76
+ [![License](https://img.shields.io/badge/license-MIT-purple)](LICENSE)
77
+ [![Stars](https://img.shields.io/github/stars/open-compress/claw-compactor?style=social)](https://github.com/open-compress/claw-compactor)
78
+
79
+ **15–82% compression depending on content &middot; Zero LLM inference cost &middot; Reversible &middot; 1663 tests**
80
+
81
+ [Architecture](ARCHITECTURE.md) &middot; [Benchmarks](#benchmarks) &middot; [Quick Start](#quick-start) &middot; [API](#api)
82
+
83
+ </div>
84
+
85
+ ---
86
+
87
+ ## What is Claw Compactor?
88
+
89
+ Claw Compactor is an open-source **LLM token compression engine** built around a 14-stage **Fusion Pipeline**. Each stage is a specialized compressor — from AST-aware code analysis to JSON statistical sampling to simhash-based deduplication — chained through an immutable data flow architecture where each stage's output feeds the next.
90
+
91
+ ```
92
+ Input
93
+ |
94
+ v
95
+ ┌─────────────────────────────────────────────────────────────────────────┐
96
+ │ FUSION PIPELINE │
97
+ │ │
98
+ │ QuantumLock ─> Cortex ─> Photon ─> RLE ─> SemanticDedup ─> Ionizer │
99
+ │ | | | | | | │
100
+ │ KV-cache auto-detect base64 path simhash JSON │
101
+ │ alignment 16 languages strip shorten dedup sampling │
102
+ │ │
103
+ │ ─> LogCrunch ─> SearchCrunch ─> DiffCrunch ─> StructuralCollapse │
104
+ │ | | | | │
105
+ │ log folding result dedup context fold import merge │
106
+ │ │
107
+ │ ─> Neurosyntax ─> Nexus ─> TokenOpt ─> Abbrev ─────────> Output │
108
+ │ | | | | │
109
+ │ AST compress ML token format NL shorten │
110
+ │ (tree-sitter) classify optimize (text only) │
111
+ │ │
112
+ │ [ RewindStore ] ── hash-addressed LRU for reversible retrieval │
113
+ └─────────────────────────────────────────────────────────────────────────┘
114
+ ```
115
+
116
+ Key design principles:
117
+
118
+ - **Immutable data flow** — `FusionContext` is a frozen dataclass. Every stage produces a new `FusionResult`; nothing is mutated in-place.
119
+ - **Gate-before-compress** — Each stage has `should_apply()` that inspects context type, language, and role before doing any work. Stages that don't apply are skipped at zero cost.
120
+ - **Content-aware routing** — Cortex auto-detects content type (code, JSON, logs, diffs, search results) and language (Python, Go, Rust, TypeScript, etc.), then downstream stages make type-aware compression decisions.
121
+ - **Reversible compression** — Ionizer stores originals in a hash-addressed `RewindStore`. The LLM can call a tool to retrieve any compressed section by its marker ID.
122
+
123
+ ---
124
+
125
+ ## Benchmarks
126
+
127
+ ### Real-World Compression (FusionEngine v7 vs Legacy Regex)
128
+
129
+ | Content Type | Legacy | FusionEngine | Improvement |
130
+ |:-------------|-------:|-------------:|:-----------:|
131
+ | Python source | 7.3% | **25.0%** | 3.4x |
132
+ | JSON (100 items) | 12.6% | **81.9%** | 6.5x |
133
+ | Build logs | 5.5% | **24.1%** | 4.4x |
134
+ | Agent conversation | 5.7% | **31.0%** | 5.4x |
135
+ | Git diff | 6.2% | **15.0%** | 2.4x |
136
+ | Search results | 5.3% | **40.7%** | 7.7x |
137
+ | **Weighted average** | **9.2%** | **36.3%** | **3.9x** |
138
+
139
+ ### SWE-bench Real Tasks
140
+
141
+ Tested on real SWE-bench instances with actual repository code:
142
+
143
+ | Instance | Size | Compression |
144
+ |:---------|-----:|------------:|
145
+ | django__django-11620 | 4.5K | **14.5%** |
146
+ | sympy__sympy-14396 | 5.5K | **19.1%** |
147
+ | scikit-learn-25747 | 11.8K | **15.9%** |
148
+ | scikit-learn-13554 | 73K | **11.8%** |
149
+ | scikit-learn-25308 | 81K | **14.4%** |
150
+
151
+ ### vs LLMLingua-2 (ROUGE-L Fidelity)
152
+
153
+ | Compression Rate | Claw Compactor | LLMLingua-2 | Delta |
154
+ |:-----------------|---------------:|------------:|------:|
155
+ | 0.3 (aggressive) | **0.653** | 0.346 | +88.2% |
156
+ | 0.5 (balanced) | **0.723** | 0.570 | +26.8% |
157
+
158
+ Claw Compactor preserves more semantic content at the same compression ratio, with zero LLM inference cost.
159
+
160
+ ---
161
+
162
+ ## Quick Start
163
+
164
+ ### Install from PyPI
165
+
166
+ ```bash
167
+ pip install claw-compactor
168
+ ```
169
+
170
+ ### Or clone from source
171
+
172
+ ```bash
173
+ git clone https://github.com/open-compress/claw-compactor.git
174
+ cd claw-compactor
175
+ pip install -e .
176
+ ```
177
+
178
+ ### Run
179
+
180
+ ```bash
181
+ # Benchmark your workspace (non-destructive)
182
+ claw-compactor benchmark /path/to/workspace
183
+
184
+ # Full compression pipeline
185
+ claw-compactor compress /path/to/workspace
186
+ ```
187
+
188
+ **Requirements:** Python 3.9+. Optional: `pip install claw-compactor[accurate]` for exact token counts via tiktoken.
189
+
190
+ ---
191
+
192
+ ## API
193
+
194
+ ### FusionEngine — Single Text
195
+
196
+ ```python
197
+ from scripts.lib.fusion.engine import FusionEngine
198
+
199
+ engine = FusionEngine()
200
+
201
+ result = engine.compress(
202
+ text="def hello():\n # greeting function\n print('hello')",
203
+ content_type="code", # or let Cortex auto-detect
204
+ language="python", # optional hint
205
+ )
206
+
207
+ print(result["compressed"]) # compressed output
208
+ print(result["stats"]) # per-stage timing + token counts
209
+ print(result["markers"]) # Rewind markers for reversibility
210
+ ```
211
+
212
+ ### FusionEngine — Chat Messages
213
+
214
+ ```python
215
+ messages = [
216
+ {"role": "system", "content": "You are a coding assistant..."},
217
+ {"role": "user", "content": "Fix the auth bug in login.py"},
218
+ {"role": "assistant", "content": "I found the issue. Here's the fix:\n```python\n..."},
219
+ {"role": "tool", "content": '{"results": [{"file": "login.py", ...}, ...]}'},
220
+ ]
221
+
222
+ result = engine.compress_messages(messages)
223
+
224
+ # Cross-message dedup runs first, then per-message pipeline
225
+ print(result["stats"]["reduction_pct"]) # aggregate compression %
226
+ print(result["per_message"]) # per-message breakdown
227
+ ```
228
+
229
+ ### Rewind — Reversible Retrieval
230
+
231
+ ```python
232
+ engine = FusionEngine(enable_rewind=True)
233
+ result = engine.compress(large_json, content_type="json")
234
+
235
+ # LLM sees compressed output with markers like [rewind:abc123...]
236
+ # When the LLM needs the original, it calls the Rewind tool:
237
+ original = engine.rewind_store.retrieve("abc123def456...")
238
+ ```
239
+
240
+ ### Custom Stage
241
+
242
+ ```python
243
+ from scripts.lib.fusion.base import FusionStage, FusionContext, FusionResult
244
+
245
+ class MyStage(FusionStage):
246
+ name = "my_compressor"
247
+ order = 22 # runs between StructuralCollapse (20) and Neurosyntax (25)
248
+
249
+ def should_apply(self, ctx: FusionContext) -> bool:
250
+ return ctx.content_type == "log"
251
+
252
+ def apply(self, ctx: FusionContext) -> FusionResult:
253
+ compressed = my_compression_logic(ctx.content)
254
+ return FusionResult(
255
+ content=compressed,
256
+ original_tokens=estimate_tokens(ctx.content),
257
+ compressed_tokens=estimate_tokens(compressed),
258
+ )
259
+
260
+ # Add to pipeline
261
+ pipeline = engine.pipeline.add(MyStage())
262
+ ```
263
+
264
+ ---
265
+
266
+ ## The 14 Stages
267
+
268
+ | # | Stage | Order | Purpose | Applies To |
269
+ |:-:|:------|:-----:|:--------|:-----------|
270
+ | 1 | **QuantumLock** | 3 | Isolates dynamic content in system prompts to maximize KV-cache hit rate | system messages |
271
+ | 2 | **Cortex** | 5 | Auto-detects content type and programming language (16 languages) | untyped content |
272
+ | 3 | **Photon** | 8 | Detects and compresses base64-encoded images | all |
273
+ | 4 | **RLE** | 10 | Path shorthand (`$WS`), IP prefix compression, enum compaction | all |
274
+ | 5 | **SemanticDedup** | 12 | SimHash fingerprint deduplication across content blocks | all |
275
+ | 6 | **Ionizer** | 15 | JSON array statistical sampling with schema discovery + error preservation | json |
276
+ | 7 | **LogCrunch** | 16 | Folds repeated log lines with occurrence counts | log |
277
+ | 8 | **SearchCrunch** | 17 | Deduplicates search/grep results | search |
278
+ | 9 | **DiffCrunch** | 18 | Folds unchanged context lines in git diffs | diff |
279
+ | 10 | **StructuralCollapse** | 20 | Merges import blocks, collapses repeated assertions/patterns | code |
280
+ | 11 | **Neurosyntax** | 25 | AST-aware code compression via tree-sitter (safe regex fallback). Never shortens identifiers. | code |
281
+ | 12 | **Nexus** | 35 | ML token-level compression (stopword removal fallback without model) | text |
282
+ | 13 | **TokenOpt** | 40 | Tokenizer format optimization — strips bold/italic markers, normalizes whitespace | all |
283
+ | 14 | **Abbrev** | 45 | Natural language abbreviation. Only fires on text — never touches code, JSON, or structured data. | text |
284
+
285
+ Each stage is independent and stateless. Stages communicate only through the immutable `FusionContext` that flows forward through the pipeline.
286
+
287
+ ---
288
+
289
+ ## Workspace Commands
290
+
291
+ ```bash
292
+ python3 scripts/mem_compress.py <workspace> <command> [options]
293
+ ```
294
+
295
+ | Command | Description |
296
+ |:--------|:-----------|
297
+ | `full` | Run complete compression pipeline |
298
+ | `benchmark` | Dry-run compression report |
299
+ | `compress` | Rule-based compression only |
300
+ | `dict` | Dictionary encoding with auto-learned codebook |
301
+ | `observe` | Session transcript JSONL to structured observations |
302
+ | `tiers` | Generate L0/L1/L2 tiered summaries |
303
+ | `dedup` | Cross-file duplicate detection |
304
+ | `estimate` | Token count report |
305
+ | `audit` | Workspace health check |
306
+ | `optimize` | Tokenizer-level format optimization |
307
+ | `auto` | Watch mode — compress on file changes |
308
+
309
+ Options: `--json`, `--dry-run`, `--since YYYY-MM-DD`, `--quiet`
310
+
311
+ ---
312
+
313
+ ## Architecture
314
+
315
+ See [ARCHITECTURE.md](ARCHITECTURE.md) for the full technical deep-dive:
316
+ - Immutable data flow design
317
+ - Stage execution model and gating
318
+ - Rewind reversible compression protocol
319
+ - Cross-message semantic deduplication
320
+ - How to extend the pipeline
321
+
322
+ ```
323
+ 12,000+ lines Python · 1,676 tests · 14 fusion stages · 0 external ML dependencies
324
+ ```
325
+
326
+ ---
327
+
328
+ ## Installation
329
+
330
+ ```bash
331
+ # Clone
332
+ git clone https://github.com/open-compress/claw-compactor.git
333
+ cd claw-compactor
334
+
335
+ # Optional: exact token counting
336
+ pip install tiktoken
337
+
338
+ # Optional: AST-aware code compression (Neurosyntax)
339
+ pip install tree-sitter-language-pack
340
+
341
+ # Development
342
+ pip install -e ".[dev,accurate]"
343
+ ```
344
+
345
+ **Zero required dependencies.** tiktoken and tree-sitter are optional enhancements — the pipeline runs with built-in heuristic fallbacks for both.
346
+
347
+ ---
348
+
349
+ ## Project Stats
350
+
351
+ | Metric | Value |
352
+ |:-------|:------|
353
+ | Tests | 1,676 passed |
354
+ | Python source | 12,000+ lines |
355
+ | Fusion stages | 14 |
356
+ | Languages detected | 16 |
357
+ | Required dependencies | 0 |
358
+ | Compression (code) | 15–25% |
359
+ | Compression (JSON peak) | 81.9% |
360
+ | ROUGE-L @ 0.3 rate | 0.653 |
361
+ | License | MIT |
362
+
363
+ ---
364
+
365
+ ## Related
366
+
367
+ - [OpenClaw](https://openclaw.ai) — AI agent platform
368
+ - [ClawhubAI](https://clawhub.com) — Agent skills marketplace
369
+ - [OpenClaw Discord](https://discord.com/invite/clawd) — Community
370
+ - [OpenClaw Docs](https://docs.openclaw.ai) — Documentation
371
+
372
+ ---
373
+
374
+ `token-compression` `llm-tools` `fusion-pipeline` `reversible-compression` `ast-code-analysis` `context-compression` `ai-agent` `openclaw` `python` `developer-tools`
375
+
376
+ ## License
377
+
378
+ [MIT](LICENSE)