kompact 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. kompact-0.1.0/.github/workflows/ci.yml +32 -0
  2. kompact-0.1.0/.github/workflows/publish.yml +28 -0
  3. kompact-0.1.0/.gitignore +43 -0
  4. kompact-0.1.0/AGENTS.md +68 -0
  5. kompact-0.1.0/LICENSE +21 -0
  6. kompact-0.1.0/PKG-INFO +170 -0
  7. kompact-0.1.0/README.md +132 -0
  8. kompact-0.1.0/benchmarks/README.md +68 -0
  9. kompact-0.1.0/benchmarks/accuracy_preservation.py +120 -0
  10. kompact-0.1.0/benchmarks/compression_ratio.py +171 -0
  11. kompact-0.1.0/benchmarks/run_comparison.py +181 -0
  12. kompact-0.1.0/benchmarks/run_dataset_eval.py +162 -0
  13. kompact-0.1.0/benchmarks/suite/__init__.py +1 -0
  14. kompact-0.1.0/benchmarks/suite/baselines.py +152 -0
  15. kompact-0.1.0/benchmarks/suite/custom_metrics.py +57 -0
  16. kompact-0.1.0/benchmarks/suite/datasets.py +255 -0
  17. kompact-0.1.0/benchmarks/suite/evaluators.py +35 -0
  18. kompact-0.1.0/benchmarks/suite/fixture_generators.py +681 -0
  19. kompact-0.1.0/benchmarks/suite/metrics.py +79 -0
  20. kompact-0.1.0/benchmarks/suite/systems.py +143 -0
  21. kompact-0.1.0/docs/architecture.md +75 -0
  22. kompact-0.1.0/docs/benchmarks.md +50 -0
  23. kompact-0.1.0/docs/harness-engineering-learnings.md +43 -0
  24. kompact-0.1.0/docs/prd.md +58 -0
  25. kompact-0.1.0/docs/quality.md +38 -0
  26. kompact-0.1.0/docs/research/competitive-landscape.md +32 -0
  27. kompact-0.1.0/docs/research/compression-techniques.md +52 -0
  28. kompact-0.1.0/docs/research/economics.md +47 -0
  29. kompact-0.1.0/docs/sdd.md +100 -0
  30. kompact-0.1.0/pyproject.toml +64 -0
  31. kompact-0.1.0/src/kompact/__init__.py +3 -0
  32. kompact-0.1.0/src/kompact/__main__.py +73 -0
  33. kompact-0.1.0/src/kompact/cache/__init__.py +0 -0
  34. kompact-0.1.0/src/kompact/cache/store.py +224 -0
  35. kompact-0.1.0/src/kompact/config.py +110 -0
  36. kompact-0.1.0/src/kompact/metrics/__init__.py +0 -0
  37. kompact-0.1.0/src/kompact/metrics/tracker.py +138 -0
  38. kompact-0.1.0/src/kompact/parser/__init__.py +0 -0
  39. kompact-0.1.0/src/kompact/parser/messages.py +311 -0
  40. kompact-0.1.0/src/kompact/proxy/__init__.py +0 -0
  41. kompact-0.1.0/src/kompact/proxy/server.py +290 -0
  42. kompact-0.1.0/src/kompact/transforms/__init__.py +0 -0
  43. kompact-0.1.0/src/kompact/transforms/cache_aligner.py +125 -0
  44. kompact-0.1.0/src/kompact/transforms/code_compressor.py +204 -0
  45. kompact-0.1.0/src/kompact/transforms/content_compressor.py +255 -0
  46. kompact-0.1.0/src/kompact/transforms/json_crusher.py +200 -0
  47. kompact-0.1.0/src/kompact/transforms/log_compressor.py +155 -0
  48. kompact-0.1.0/src/kompact/transforms/observation_masker.py +100 -0
  49. kompact-0.1.0/src/kompact/transforms/pipeline.py +139 -0
  50. kompact-0.1.0/src/kompact/transforms/schema_optimizer.py +200 -0
  51. kompact-0.1.0/src/kompact/transforms/toon.py +482 -0
  52. kompact-0.1.0/src/kompact/types.py +119 -0
  53. kompact-0.1.0/tests/__init__.py +0 -0
  54. kompact-0.1.0/tests/fixtures/api_responses.json +247 -0
  55. kompact-0.1.0/tests/fixtures/code_files.py +492 -0
  56. kompact-0.1.0/tests/fixtures/log_outputs.txt +71 -0
  57. kompact-0.1.0/tests/fixtures/search_results.json +213 -0
  58. kompact-0.1.0/tests/test_artifact_index.py +61 -0
  59. kompact-0.1.0/tests/test_cache_aligner.py +123 -0
  60. kompact-0.1.0/tests/test_content_compressor.py +138 -0
  61. kompact-0.1.0/tests/test_json_crusher.py +103 -0
  62. kompact-0.1.0/tests/test_observation_masker.py +97 -0
  63. kompact-0.1.0/tests/test_pipeline.py +125 -0
  64. kompact-0.1.0/tests/test_schema_optimizer.py +122 -0
  65. kompact-0.1.0/tests/test_toon.py +141 -0
@@ -0,0 +1,32 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v4
21
+
22
+ - name: Set up Python ${{ matrix.python-version }}
23
+ run: uv python install ${{ matrix.python-version }}
24
+
25
+ - name: Install dependencies
26
+ run: uv sync --extra dev
27
+
28
+ - name: Lint
29
+ run: uv run ruff check src/ tests/
30
+
31
+ - name: Test
32
+ run: uv run pytest -v
@@ -0,0 +1,28 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ environment: pypi
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v4
20
+
21
+ - name: Set up Python
22
+ run: uv python install 3.12
23
+
24
+ - name: Build package
25
+ run: uv build
26
+
27
+ - name: Publish to PyPI
28
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,43 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ dist/
8
+ build/
9
+ .eggs/
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ ENV/
15
+
16
+ # IDE
17
+ .idea/
18
+ .vscode/
19
+ *.swp
20
+ *.swo
21
+ *~
22
+ .DS_Store
23
+
24
+ # Testing
25
+ .pytest_cache/
26
+ .coverage
27
+ htmlcov/
28
+ .mypy_cache/
29
+ .ruff_cache/
30
+
31
+ # Benchmark reports (generated)
32
+ benchmarks/reports/
33
+
34
+ # HuggingFace cache (downloaded datasets)
35
+ .cache/
36
+ hub/
37
+
38
+ # Environment
39
+ .env
40
+ .env.local
41
+
42
+ # uv
43
+ uv.lock
@@ -0,0 +1,68 @@
1
+ # AGENTS.md — Kompact Context Optimization Proxy
2
+
3
+ ## What is Kompact?
4
+
5
+ A transparent proxy that optimizes LLM context through multi-layer transforms.
6
+ Sits between agents (Claude Code, Cursor, etc.) and providers (Anthropic, OpenAI).
7
+
8
+ ## Architecture
9
+
10
+ ```
11
+ Request → Proxy → [Layer 1: Schema] → [Layer 2: Content] → [Layer 3: History] → [Layer 4: Cache] → Provider
12
+ ```
13
+
14
+ ## Entry Points
15
+
16
+ | What | Where | Notes |
17
+ |------|-------|-------|
18
+ | CLI | `src/kompact/__main__.py` | `kompact proxy --port 7878` |
19
+ | Proxy server | `src/kompact/proxy/server.py` | FastAPI, intercepts API requests |
20
+ | Transform pipeline | `src/kompact/transforms/pipeline.py` | Orchestrates all transforms |
21
+ | Configuration | `src/kompact/config.py` | Pydantic settings |
22
+ | Core types | `src/kompact/types.py` | Message, ToolOutput, TransformResult |
23
+
24
+ ## Transforms (each is independent, pure function)
25
+
26
+ | Transform | File | Layer | Typical Savings |
27
+ |-----------|------|-------|-----------------|
28
+ | TOON format | `src/kompact/transforms/toon.py` | 2 (Content) | 30-60% on JSON arrays |
29
+ | Observation masker | `src/kompact/transforms/observation_masker.py` | 3 (History) | 50% on old tool outputs |
30
+ | Cache aligner | `src/kompact/transforms/cache_aligner.py` | 4 (Cache) | Enables provider caching |
31
+ | JSON crusher | `src/kompact/transforms/json_crusher.py` | 2 (Content) | 40-80% on structured data |
32
+ | Schema optimizer | `src/kompact/transforms/schema_optimizer.py` | 1 (Schema) | 50-90% on tool defs |
33
+ | Code compressor | `src/kompact/transforms/code_compressor.py` | 2 (Content) | ~70% on code blocks |
34
+ | Log compressor | `src/kompact/transforms/log_compressor.py` | 2 (Content) | 60-90% on log output |
35
+
36
+ ## Key Invariants
37
+
38
+ 1. **All transforms are pure functions**: `list[Message] → TransformResult`
39
+ 2. **No transform modifies user messages** — only assistant/tool/system content
40
+ 3. **Every transform tracks `tokens_saved`** via `TransformResult`
41
+ 4. **Transforms are composable** — pipeline runs them in sequence
42
+
43
+ ## Documentation
44
+
45
+ | Doc | Path | Purpose |
46
+ |-----|------|---------|
47
+ | PRD | `docs/prd.md` | Product requirements |
48
+ | SDD | `docs/sdd.md` | System design |
49
+ | Architecture | `docs/architecture.md` | Layer details |
50
+ | Benchmarks | `docs/benchmarks.md` | Evaluation strategy |
51
+ | Quality | `docs/quality.md` | Quality grades per domain |
52
+ | Research | `docs/research/` | SOTA survey, competitors, economics |
53
+
54
+ ## Testing
55
+
56
+ ```bash
57
+ uv run pytest # All tests
58
+ uv run pytest tests/test_toon.py # Single transform
59
+ uv run python benchmarks/compression_ratio.py # Benchmarks
60
+ ```
61
+
62
+ ## Quick Start
63
+
64
+ ```bash
65
+ uv sync
66
+ uv run kompact proxy --port 7878
67
+ # Then: ANTHROPIC_BASE_URL=http://localhost:7878 claude
68
+ ```
kompact-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Kompact Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
kompact-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.4
2
+ Name: kompact
3
+ Version: 0.1.0
4
+ Summary: Multi-layer context optimization proxy for LLM agents
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: click>=8.1.0
18
+ Requires-Dist: fastapi>=0.115.0
19
+ Requires-Dist: httpx>=0.28.0
20
+ Requires-Dist: tiktoken>=0.8.0
21
+ Requires-Dist: uvicorn>=0.32.0
22
+ Provides-Extra: bench
23
+ Requires-Dist: context-bench>=0.1.0; extra == 'bench'
24
+ Requires-Dist: datasets>=4.5.0; extra == 'bench'
25
+ Requires-Dist: headroom-ai>=0.3.0; extra == 'bench'
26
+ Requires-Dist: llmlingua>=0.2.0; extra == 'bench'
27
+ Provides-Extra: code
28
+ Requires-Dist: tree-sitter-python>=0.23.0; extra == 'code'
29
+ Requires-Dist: tree-sitter>=0.23.0; extra == 'code'
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
32
+ Requires-Dist: pytest-httpx>=0.34.0; extra == 'dev'
33
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
34
+ Requires-Dist: ruff>=0.8.0; extra == 'dev'
35
+ Provides-Extra: schema
36
+ Requires-Dist: sentence-transformers>=3.0.0; extra == 'schema'
37
+ Description-Content-Type: text/markdown
38
+
39
+ # Kompact
40
+
41
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
42
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
43
+ [![Tests](https://img.shields.io/badge/tests-37%20passed-brightgreen.svg)](#development)
44
+ [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
45
+
46
+ Multi-layer context optimization proxy for LLM agents. Reduces token usage by 40-70% with zero information loss.
47
+
48
+ ```
49
+ Agent ──> Kompact Proxy (localhost:7878) ──> LLM Provider
50
+
51
+ ├─ Layer 1: Schema Optimizer (TF-IDF tool selection)
52
+ ├─ Layer 2: Content Compressors (TOON, JSON, code, logs)
53
+ ├─ Layer 2b: Extractive Compressor (query-aware sentence selection)
54
+ ├─ Layer 3: Observation Masker (history management)
55
+ └─ Layer 4: Cache Aligner (prefix cache optimization)
56
+ ```
57
+
58
+ ## Quick Start
59
+
60
+ ```bash
61
+ # Install
62
+ uv sync
63
+
64
+ # Start proxy
65
+ uv run kompact proxy --port 7878
66
+
67
+ # Point your agent at it
68
+ export ANTHROPIC_BASE_URL=http://localhost:7878
69
+ claude # or any Anthropic/OpenAI-compatible agent
70
+ ```
71
+
72
+ ## How It Works
73
+
74
+ Kompact is a transparent HTTP proxy. No code changes needed — just change your base URL. It intercepts LLM API requests, applies a pipeline of transforms to compress the context, then forwards the optimized request to the provider.
75
+
76
+ | Transform | Target | Savings | Cost |
77
+ |-----------|--------|--------:|------|
78
+ | **TOON** | JSON arrays of objects | 30-60% | Zero (string manipulation) |
79
+ | **JSON Crusher** | Structured JSON data | 40-80% | Minimal (Counter stats) |
80
+ | **Code Compressor** | Code in tool results | ~70% | Regex parse |
81
+ | **Log Compressor** | Repetitive log output | 60-90% | Regex dedup |
82
+ | **Content Compressor** | Long prose/text | 25-55% | TF-IDF scoring |
83
+ | **Schema Optimizer** | Tool definitions | 50-90% | TF-IDF cosine similarity |
84
+ | **Observation Masker** | Old tool outputs | ~50% | Zero (placeholder swap) |
85
+ | **Cache Aligner** | System prompts | Provider cache discount | Regex substitution |
86
+
87
+ The pipeline adapts automatically — short contexts get light compression, long contexts get aggressive optimization.
88
+
89
+ ## Configuration
90
+
91
+ ```bash
92
+ # Disable specific transforms
93
+ uv run kompact proxy --port 7878 --disable toon --disable log_compressor
94
+
95
+ # Verbose mode
96
+ uv run kompact proxy --port 7878 --verbose
97
+
98
+ # View live dashboard
99
+ open http://localhost:7878/dashboard
100
+ ```
101
+
102
+ ## Benchmarks
103
+
104
+ Tested against Headroom and LLMLingua-2 on real datasets (BFCL, HotpotQA, Glaive, LongBench) using [context-bench](https://github.com/context-bench/context-bench).
105
+
106
+ **Search-heavy scenario (100 JSON results, 3 needles):**
107
+
108
+ | System | Compression | NIAH | Effective Ratio |
109
+ |--------|------------:|-----:|----------------:|
110
+ | Headroom | 0.0% | 100% | 0.0% |
111
+ | LLMLingua-2 | 55.4% | 0% | -44.6% |
112
+ | Truncation (50%) | 50.0% | 33% | -16.6% |
113
+ | **Kompact** | **47.7%** | **100%** | **47.7%** |
114
+
115
+ *Effective ratio* accounts for retry cost: if compression destroys information (NIAH miss), you pay for both the failed attempt and the retry with full context. Negative = worse than no compression.
116
+
117
+ ```bash
118
+ # Run on real datasets
119
+ uv run python benchmarks/run_dataset_eval.py --dataset bfcl -n 100
120
+
121
+ # Run synthetic scenarios
122
+ uv run python benchmarks/run_comparison.py --scenario search
123
+
124
+ # Exclude slow baselines
125
+ uv run python benchmarks/run_comparison.py --scenario search --exclude llmlingua headroom
126
+ ```
127
+
128
+ See [`benchmarks/README.md`](benchmarks/README.md) for full methodology.
129
+
130
+ ## Development
131
+
132
+ ```bash
133
+ # Install with dev deps
134
+ uv sync --extra dev
135
+
136
+ # Run tests
137
+ uv run pytest
138
+
139
+ # Lint
140
+ uv run ruff check src/ tests/
141
+
142
+ # Run single transform test
143
+ uv run pytest tests/test_toon.py -v
144
+ ```
145
+
146
+ ## Architecture
147
+
148
+ ```
149
+ src/kompact/
150
+ ├── proxy/server.py # FastAPI proxy (Anthropic + OpenAI)
151
+ ├── parser/messages.py # Provider format ↔ internal types
152
+ ├── transforms/
153
+ │ ├── pipeline.py # Orchestration + adaptive scaling
154
+ │ ├── toon.py # JSON array → tabular (TOON format)
155
+ │ ├── json_crusher.py # Statistical JSON compression
156
+ │ ├── code_compressor.py # Code → skeleton extraction
157
+ │ ├── log_compressor.py # Log deduplication
158
+ │ ├── content_compressor.py # Extractive text compression (TF-IDF)
159
+ │ ├── schema_optimizer.py # TF-IDF tool selection
160
+ │ ├── observation_masker.py # History management
161
+ │ └── cache_aligner.py # Prefix cache optimization
162
+ ├── cache/store.py # Compression store + artifact index
163
+ ├── config.py # Per-transform configuration
164
+ ├── types.py # Core data models
165
+ └── metrics/tracker.py # Per-request metrics
166
+ ```
167
+
168
+ ## License
169
+
170
+ MIT
@@ -0,0 +1,132 @@
1
+ # Kompact
2
+
3
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
4
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
5
+ [![Tests](https://img.shields.io/badge/tests-37%20passed-brightgreen.svg)](#development)
6
+ [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
7
+
8
+ Multi-layer context optimization proxy for LLM agents. Reduces token usage by 40-70% with zero information loss.
9
+
10
+ ```
11
+ Agent ──> Kompact Proxy (localhost:7878) ──> LLM Provider
12
+
13
+ ├─ Layer 1: Schema Optimizer (TF-IDF tool selection)
14
+ ├─ Layer 2: Content Compressors (TOON, JSON, code, logs)
15
+ ├─ Layer 2b: Extractive Compressor (query-aware sentence selection)
16
+ ├─ Layer 3: Observation Masker (history management)
17
+ └─ Layer 4: Cache Aligner (prefix cache optimization)
18
+ ```
19
+
20
+ ## Quick Start
21
+
22
+ ```bash
23
+ # Install
24
+ uv sync
25
+
26
+ # Start proxy
27
+ uv run kompact proxy --port 7878
28
+
29
+ # Point your agent at it
30
+ export ANTHROPIC_BASE_URL=http://localhost:7878
31
+ claude # or any Anthropic/OpenAI-compatible agent
32
+ ```
33
+
34
+ ## How It Works
35
+
36
+ Kompact is a transparent HTTP proxy. No code changes needed — just change your base URL. It intercepts LLM API requests, applies a pipeline of transforms to compress the context, then forwards the optimized request to the provider.
37
+
38
+ | Transform | Target | Savings | Cost |
39
+ |-----------|--------|--------:|------|
40
+ | **TOON** | JSON arrays of objects | 30-60% | Zero (string manipulation) |
41
+ | **JSON Crusher** | Structured JSON data | 40-80% | Minimal (Counter stats) |
42
+ | **Code Compressor** | Code in tool results | ~70% | Regex parse |
43
+ | **Log Compressor** | Repetitive log output | 60-90% | Regex dedup |
44
+ | **Content Compressor** | Long prose/text | 25-55% | TF-IDF scoring |
45
+ | **Schema Optimizer** | Tool definitions | 50-90% | TF-IDF cosine similarity |
46
+ | **Observation Masker** | Old tool outputs | ~50% | Zero (placeholder swap) |
47
+ | **Cache Aligner** | System prompts | Provider cache discount | Regex substitution |
48
+
49
+ The pipeline adapts automatically — short contexts get light compression, long contexts get aggressive optimization.
50
+
51
+ ## Configuration
52
+
53
+ ```bash
54
+ # Disable specific transforms
55
+ uv run kompact proxy --port 7878 --disable toon --disable log_compressor
56
+
57
+ # Verbose mode
58
+ uv run kompact proxy --port 7878 --verbose
59
+
60
+ # View live dashboard
61
+ open http://localhost:7878/dashboard
62
+ ```
63
+
64
+ ## Benchmarks
65
+
66
+ Tested against Headroom and LLMLingua-2 on real datasets (BFCL, HotpotQA, Glaive, LongBench) using [context-bench](https://github.com/context-bench/context-bench).
67
+
68
+ **Search-heavy scenario (100 JSON results, 3 needles):**
69
+
70
+ | System | Compression | NIAH | Effective Ratio |
71
+ |--------|------------:|-----:|----------------:|
72
+ | Headroom | 0.0% | 100% | 0.0% |
73
+ | LLMLingua-2 | 55.4% | 0% | -44.6% |
74
+ | Truncation (50%) | 50.0% | 33% | -16.6% |
75
+ | **Kompact** | **47.7%** | **100%** | **47.7%** |
76
+
77
+ *Effective ratio* accounts for retry cost: if compression destroys information (NIAH miss), you pay for both the failed attempt and the retry with full context. Negative = worse than no compression.
78
+
79
+ ```bash
80
+ # Run on real datasets
81
+ uv run python benchmarks/run_dataset_eval.py --dataset bfcl -n 100
82
+
83
+ # Run synthetic scenarios
84
+ uv run python benchmarks/run_comparison.py --scenario search
85
+
86
+ # Exclude slow baselines
87
+ uv run python benchmarks/run_comparison.py --scenario search --exclude llmlingua headroom
88
+ ```
89
+
90
+ See [`benchmarks/README.md`](benchmarks/README.md) for full methodology.
91
+
92
+ ## Development
93
+
94
+ ```bash
95
+ # Install with dev deps
96
+ uv sync --extra dev
97
+
98
+ # Run tests
99
+ uv run pytest
100
+
101
+ # Lint
102
+ uv run ruff check src/ tests/
103
+
104
+ # Run single transform test
105
+ uv run pytest tests/test_toon.py -v
106
+ ```
107
+
108
+ ## Architecture
109
+
110
+ ```
111
+ src/kompact/
112
+ ├── proxy/server.py # FastAPI proxy (Anthropic + OpenAI)
113
+ ├── parser/messages.py # Provider format ↔ internal types
114
+ ├── transforms/
115
+ │ ├── pipeline.py # Orchestration + adaptive scaling
116
+ │ ├── toon.py # JSON array → tabular (TOON format)
117
+ │ ├── json_crusher.py # Statistical JSON compression
118
+ │ ├── code_compressor.py # Code → skeleton extraction
119
+ │ ├── log_compressor.py # Log deduplication
120
+ │ ├── content_compressor.py # Extractive text compression (TF-IDF)
121
+ │ ├── schema_optimizer.py # TF-IDF tool selection
122
+ │ ├── observation_masker.py # History management
123
+ │ └── cache_aligner.py # Prefix cache optimization
124
+ ├── cache/store.py # Compression store + artifact index
125
+ ├── config.py # Per-transform configuration
126
+ ├── types.py # Core data models
127
+ └── metrics/tracker.py # Per-request metrics
128
+ ```
129
+
130
+ ## License
131
+
132
+ MIT
@@ -0,0 +1,68 @@
1
+ # Benchmarks
2
+
3
+ ## Real Dataset Benchmarks (Primary)
4
+
5
+ Runs compression approaches against industry-standard datasets — the same ones
6
+ Headroom, LLMLingua, and other competitors publish numbers on.
7
+
8
+ ### Datasets
9
+
10
+ **Agentic / tool-calling (Kompact's target domain):**
11
+ - **BFCL** (Berkeley Function Calling Leaderboard) — real API schemas from the Gorilla project. The primary benchmark for tool-calling compression.
12
+ - **Glaive Function Calling v2** — 113K tool-calling conversations with JSON schemas in system prompts.
13
+
14
+ **QA / prose context (baseline comparison):**
15
+ - **HotpotQA** (distractor split) — multi-hop QA over Wikipedia paragraphs
16
+ - **LongBench v2** — long-context understanding across diverse domains
17
+
18
+ ### What's measured
19
+
20
+ - **Compression ratio** — tokens after / tokens before (lower = more compression)
21
+ - **Answer preservation** — does the answer string survive compression? (higher = better)
22
+ - **Latency** — wall-clock time per example
23
+
24
+ No LLM calls required. Measures compression quality, not downstream task accuracy.
25
+
26
+ ### Running
27
+
28
+ ```bash
29
+ # All 4 datasets (100 examples each)
30
+ uv run python benchmarks/run_dataset_eval.py
31
+
32
+ # Just the agentic datasets (BFCL + Glaive)
33
+ uv run python benchmarks/run_dataset_eval.py --dataset agentic
34
+
35
+ # Just the QA datasets (HotpotQA + LongBench)
36
+ uv run python benchmarks/run_dataset_eval.py --dataset qa
37
+
38
+ # Single dataset with custom size
39
+ uv run python benchmarks/run_dataset_eval.py --dataset bfcl -n 200
40
+ ```
41
+
42
+ Reports saved to `benchmarks/reports/dataset_eval_report.md`.
43
+
44
+ ## Synthetic Benchmarks (Secondary)
45
+
46
+ 6 synthetic agentic scenarios x 6 approaches. Useful for testing specific
47
+ transforms (TOON on JSON arrays, log compressor on logs, etc.).
48
+
49
+ ```bash
50
+ uv run python benchmarks/run_comparison.py
51
+ uv run python benchmarks/run_comparison.py --scenario search
52
+ ```
53
+
54
+ ## Approaches Compared
55
+
56
+ | # | Approach | Description |
57
+ |---|----------|-------------|
58
+ | 1 | No Compression | Pass-through baseline |
59
+ | 2 | JSON Minification | Re-serialize JSON compactly |
60
+ | 3 | Truncation (50%) | Keep first half of each content block |
61
+ | 4 | Headroom CCR | Replace large JSON arrays with schema marker + first item |
62
+ | 5 | LLMLingua-style | Word frequency pruning — remove low-importance words |
63
+ | 6 | **Kompact Pipeline** | Full multi-transform pipeline |
64
+
65
+ ## Legacy Benchmarks
66
+
67
+ - `compression_ratio.py` — per-transform compression ratios on test fixtures
68
+ - `accuracy_preservation.py` — NIAH test on synthetic data
@@ -0,0 +1,120 @@
1
+ """Benchmark: NIAH (Needle In A Haystack) accuracy preservation.
2
+
3
+ Verifies that critical items survive compression through the pipeline.
4
+
5
+ Usage:
6
+ uv run python benchmarks/accuracy_preservation.py
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import sys
13
+ from pathlib import Path
14
+
15
+ sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
16
+
17
+ from kompact.config import KompactConfig
18
+ from kompact.transforms.pipeline import run
19
+ from kompact.types import (
20
+ ContentBlock,
21
+ ContentType,
22
+ Message,
23
+ Provider,
24
+ Request,
25
+ Role,
26
+ )
27
+
28
+
29
+ def niah_test(needle: str, haystack_items: int = 100) -> dict:
30
+ """Insert a needle into a haystack and verify it survives compression."""
31
+ # Build haystack: many similar items
32
+ haystack = [
33
+ {"id": i, "type": "result", "title": f"Regular item {i}",
34
+ "description": f"This is a normal search result number {i}",
35
+ "url": f"https://example.com/{i}", "score": 0.5}
36
+ for i in range(haystack_items)
37
+ ]
38
+
39
+ # Insert needle at random position
40
+ needle_pos = haystack_items // 3
41
+ haystack.insert(needle_pos, {
42
+ "id": 9999,
43
+ "type": "CRITICAL",
44
+ "title": needle,
45
+ "description": f"IMPORTANT: {needle}",
46
+ "url": "https://critical.example.com/needle",
47
+ "score": 1.0,
48
+ })
49
+
50
+ json_text = json.dumps(haystack)
51
+
52
+ messages = [
53
+ Message(role=Role.USER, content=[
54
+ ContentBlock(type=ContentType.TOOL_RESULT, text=json_text, tool_use_id="search"),
55
+ ]),
56
+ ]
57
+
58
+ request = Request(
59
+ provider=Provider.ANTHROPIC,
60
+ messages=messages,
61
+ model="benchmark",
62
+ )
63
+
64
+ config = KompactConfig()
65
+ result = run(request, config)
66
+
67
+ compressed_text = result.request.messages[0].content[0].text
68
+
69
+ return {
70
+ "needle": needle,
71
+ "found": needle in compressed_text,
72
+ "haystack_items": haystack_items,
73
+ "original_chars": len(json_text),
74
+ "compressed_chars": len(compressed_text),
75
+ "ratio": len(compressed_text) / len(json_text),
76
+ "tokens_saved": result.total_tokens_saved,
77
+ }
78
+
79
+
80
+ def main():
81
+ print("=" * 60)
82
+ print("NIAH (Needle In A Haystack) Accuracy Test")
83
+ print("=" * 60)
84
+
85
+ needles = [
86
+ "The secret API key is sk-1234567890abcdef",
87
+ "Deploy to production at 3pm PST",
88
+ "Bug: users cannot login when password contains unicode",
89
+ "Revenue increased 47% in Q3 2024",
90
+ "CRITICAL: memory leak in worker process 7",
91
+ ]
92
+
93
+ total = 0
94
+ found = 0
95
+
96
+ for needle in needles:
97
+ result = niah_test(needle, haystack_items=100)
98
+ total += 1
99
+ if result["found"]:
100
+ found += 1
101
+ status = "PASS"
102
+ else:
103
+ status = "FAIL"
104
+
105
+ print(f"\n [{status}] Needle: \"{needle[:50]}...\"")
106
+ print(f" Compression: {result['ratio']:.2%} "
107
+ f"({result['tokens_saved']} tokens saved)")
108
+
109
+ print(f"\n{'=' * 60}")
110
+ print(f"Results: {found}/{total} needles preserved ({found/total:.0%})")
111
+
112
+ if found == total:
113
+ print("ALL CRITICAL ITEMS SURVIVED COMPRESSION")
114
+ else:
115
+ print("WARNING: Some critical items were lost!")
116
+ sys.exit(1)
117
+
118
+
119
+ if __name__ == "__main__":
120
+ main()