PyPI - kompact - Versions diffs - 0.1.0__tar.gz - Mend

kompact 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

kompact-0.1.0/.github/workflows/ci.yml +32 -0
kompact-0.1.0/.github/workflows/publish.yml +28 -0
kompact-0.1.0/.gitignore +43 -0
kompact-0.1.0/AGENTS.md +68 -0
kompact-0.1.0/LICENSE +21 -0
kompact-0.1.0/PKG-INFO +170 -0
kompact-0.1.0/README.md +132 -0
kompact-0.1.0/benchmarks/README.md +68 -0
kompact-0.1.0/benchmarks/accuracy_preservation.py +120 -0
kompact-0.1.0/benchmarks/compression_ratio.py +171 -0
kompact-0.1.0/benchmarks/run_comparison.py +181 -0
kompact-0.1.0/benchmarks/run_dataset_eval.py +162 -0
kompact-0.1.0/benchmarks/suite/__init__.py +1 -0
kompact-0.1.0/benchmarks/suite/baselines.py +152 -0
kompact-0.1.0/benchmarks/suite/custom_metrics.py +57 -0
kompact-0.1.0/benchmarks/suite/datasets.py +255 -0
kompact-0.1.0/benchmarks/suite/evaluators.py +35 -0
kompact-0.1.0/benchmarks/suite/fixture_generators.py +681 -0
kompact-0.1.0/benchmarks/suite/metrics.py +79 -0
kompact-0.1.0/benchmarks/suite/systems.py +143 -0
kompact-0.1.0/docs/architecture.md +75 -0
kompact-0.1.0/docs/benchmarks.md +50 -0
kompact-0.1.0/docs/harness-engineering-learnings.md +43 -0
kompact-0.1.0/docs/prd.md +58 -0
kompact-0.1.0/docs/quality.md +38 -0
kompact-0.1.0/docs/research/competitive-landscape.md +32 -0
kompact-0.1.0/docs/research/compression-techniques.md +52 -0
kompact-0.1.0/docs/research/economics.md +47 -0
kompact-0.1.0/docs/sdd.md +100 -0
kompact-0.1.0/pyproject.toml +64 -0
kompact-0.1.0/src/kompact/__init__.py +3 -0
kompact-0.1.0/src/kompact/__main__.py +73 -0
kompact-0.1.0/src/kompact/cache/__init__.py +0 -0
kompact-0.1.0/src/kompact/cache/store.py +224 -0
kompact-0.1.0/src/kompact/config.py +110 -0
kompact-0.1.0/src/kompact/metrics/__init__.py +0 -0
kompact-0.1.0/src/kompact/metrics/tracker.py +138 -0
kompact-0.1.0/src/kompact/parser/__init__.py +0 -0
kompact-0.1.0/src/kompact/parser/messages.py +311 -0
kompact-0.1.0/src/kompact/proxy/__init__.py +0 -0
kompact-0.1.0/src/kompact/proxy/server.py +290 -0
kompact-0.1.0/src/kompact/transforms/__init__.py +0 -0
kompact-0.1.0/src/kompact/transforms/cache_aligner.py +125 -0
kompact-0.1.0/src/kompact/transforms/code_compressor.py +204 -0
kompact-0.1.0/src/kompact/transforms/content_compressor.py +255 -0
kompact-0.1.0/src/kompact/transforms/json_crusher.py +200 -0
kompact-0.1.0/src/kompact/transforms/log_compressor.py +155 -0
kompact-0.1.0/src/kompact/transforms/observation_masker.py +100 -0
kompact-0.1.0/src/kompact/transforms/pipeline.py +139 -0
kompact-0.1.0/src/kompact/transforms/schema_optimizer.py +200 -0
kompact-0.1.0/src/kompact/transforms/toon.py +482 -0
kompact-0.1.0/src/kompact/types.py +119 -0
kompact-0.1.0/tests/__init__.py +0 -0
kompact-0.1.0/tests/fixtures/api_responses.json +247 -0
kompact-0.1.0/tests/fixtures/code_files.py +492 -0
kompact-0.1.0/tests/fixtures/log_outputs.txt +71 -0
kompact-0.1.0/tests/fixtures/search_results.json +213 -0
kompact-0.1.0/tests/test_artifact_index.py +61 -0
kompact-0.1.0/tests/test_cache_aligner.py +123 -0
kompact-0.1.0/tests/test_content_compressor.py +138 -0
kompact-0.1.0/tests/test_json_crusher.py +103 -0
kompact-0.1.0/tests/test_observation_masker.py +97 -0
kompact-0.1.0/tests/test_pipeline.py +125 -0
kompact-0.1.0/tests/test_schema_optimizer.py +122 -0
kompact-0.1.0/tests/test_toon.py +141 -0

kompact-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,32 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.12"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        run: uv python install ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: uv sync --extra dev
+      - name: Lint
+        run: uv run ruff check src/ tests/
+      - name: Test
+        run: uv run pytest -v

kompact-0.1.0/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,28 @@
+name: Publish to PyPI
+on:
+  release:
+    types: [published]
+permissions:
+  id-token: write
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment: pypi
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+      - name: Set up Python
+        run: uv python install 3.12
+      - name: Build package
+        run: uv build
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

kompact-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+*.egg
+dist/
+build/
+.eggs/
+# Virtual environments
+.venv/
+venv/
+ENV/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+.DS_Store
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.mypy_cache/
+.ruff_cache/
+# Benchmark reports (generated)
+benchmarks/reports/
+# HuggingFace cache (downloaded datasets)
+.cache/
+hub/
+# Environment
+.env
+.env.local
+# uv
+uv.lock

kompact-0.1.0/AGENTS.md ADDED Viewed

@@ -0,0 +1,68 @@
+# AGENTS.md — Kompact Context Optimization Proxy
+## What is Kompact?
+A transparent proxy that optimizes LLM context through multi-layer transforms.
+Sits between agents (Claude Code, Cursor, etc.) and providers (Anthropic, OpenAI).
+## Architecture
+```
+Request → Proxy → [Layer 1: Schema] → [Layer 2: Content] → [Layer 3: History] → [Layer 4: Cache] → Provider
+```
+## Entry Points
+| What | Where | Notes |
+|------|-------|-------|
+| CLI | `src/kompact/__main__.py` | `kompact proxy --port 7878` |
+| Proxy server | `src/kompact/proxy/server.py` | FastAPI, intercepts API requests |
+| Transform pipeline | `src/kompact/transforms/pipeline.py` | Orchestrates all transforms |
+| Configuration | `src/kompact/config.py` | Pydantic settings |
+| Core types | `src/kompact/types.py` | Message, ToolOutput, TransformResult |
+## Transforms (each is independent, pure function)
+| Transform | File | Layer | Typical Savings |
+|-----------|------|-------|-----------------|
+| TOON format | `src/kompact/transforms/toon.py` | 2 (Content) | 30-60% on JSON arrays |
+| Observation masker | `src/kompact/transforms/observation_masker.py` | 3 (History) | 50% on old tool outputs |
+| Cache aligner | `src/kompact/transforms/cache_aligner.py` | 4 (Cache) | Enables provider caching |
+| JSON crusher | `src/kompact/transforms/json_crusher.py` | 2 (Content) | 40-80% on structured data |
+| Schema optimizer | `src/kompact/transforms/schema_optimizer.py` | 1 (Schema) | 50-90% on tool defs |
+| Code compressor | `src/kompact/transforms/code_compressor.py` | 2 (Content) | ~70% on code blocks |
+| Log compressor | `src/kompact/transforms/log_compressor.py` | 2 (Content) | 60-90% on log output |
+## Key Invariants
+1. **All transforms are pure functions**: `list[Message] → TransformResult`
+2. **No transform modifies user messages** — only assistant/tool/system content
+3. **Every transform tracks `tokens_saved`** via `TransformResult`
+4. **Transforms are composable** — pipeline runs them in sequence
+## Documentation
+| Doc | Path | Purpose |
+|-----|------|---------|
+| PRD | `docs/prd.md` | Product requirements |
+| SDD | `docs/sdd.md` | System design |
+| Architecture | `docs/architecture.md` | Layer details |
+| Benchmarks | `docs/benchmarks.md` | Evaluation strategy |
+| Quality | `docs/quality.md` | Quality grades per domain |
+| Research | `docs/research/` | SOTA survey, competitors, economics |
+## Testing
+```bash
+uv run pytest                           # All tests
+uv run pytest tests/test_toon.py        # Single transform
+uv run python benchmarks/compression_ratio.py  # Benchmarks
+```
+## Quick Start
+```bash
+uv sync
+uv run kompact proxy --port 7878
+# Then: ANTHROPIC_BASE_URL=http://localhost:7878 claude
+```

kompact-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Kompact Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

kompact-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,170 @@
+Metadata-Version: 2.4
+Name: kompact
+Version: 0.1.0
+Summary: Multi-layer context optimization proxy for LLM agents
+License-Expression: MIT
+License-File: LICENSE
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.10
+Requires-Dist: click>=8.1.0
+Requires-Dist: fastapi>=0.115.0
+Requires-Dist: httpx>=0.28.0
+Requires-Dist: tiktoken>=0.8.0
+Requires-Dist: uvicorn>=0.32.0
+Provides-Extra: bench
+Requires-Dist: context-bench>=0.1.0; extra == 'bench'
+Requires-Dist: datasets>=4.5.0; extra == 'bench'
+Requires-Dist: headroom-ai>=0.3.0; extra == 'bench'
+Requires-Dist: llmlingua>=0.2.0; extra == 'bench'
+Provides-Extra: code
+Requires-Dist: tree-sitter-python>=0.23.0; extra == 'code'
+Requires-Dist: tree-sitter>=0.23.0; extra == 'code'
+Provides-Extra: dev
+Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
+Requires-Dist: pytest-httpx>=0.34.0; extra == 'dev'
+Requires-Dist: pytest>=8.0.0; extra == 'dev'
+Requires-Dist: ruff>=0.8.0; extra == 'dev'
+Provides-Extra: schema
+Requires-Dist: sentence-transformers>=3.0.0; extra == 'schema'
+Description-Content-Type: text/markdown
+# Kompact
+[![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
+[![Tests](https://img.shields.io/badge/tests-37%20passed-brightgreen.svg)](#development)
+[![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
+Multi-layer context optimization proxy for LLM agents. Reduces token usage by 40-70% with zero information loss.
+```
+Agent  ──>  Kompact Proxy (localhost:7878)  ──>  LLM Provider
+              │
+              ├─ Layer 1: Schema Optimizer     (TF-IDF tool selection)
+              ├─ Layer 2: Content Compressors   (TOON, JSON, code, logs)
+              ├─ Layer 2b: Extractive Compressor (query-aware sentence selection)
+              ├─ Layer 3: Observation Masker    (history management)
+              └─ Layer 4: Cache Aligner        (prefix cache optimization)
+```
+## Quick Start
+```bash
+# Install
+uv sync
+# Start proxy
+uv run kompact proxy --port 7878
+# Point your agent at it
+export ANTHROPIC_BASE_URL=http://localhost:7878
+claude  # or any Anthropic/OpenAI-compatible agent
+```
+## How It Works
+Kompact is a transparent HTTP proxy. No code changes needed — just change your base URL. It intercepts LLM API requests, applies a pipeline of transforms to compress the context, then forwards the optimized request to the provider.
+| Transform | Target | Savings | Cost |
+|-----------|--------|--------:|------|
+| **TOON** | JSON arrays of objects | 30-60% | Zero (string manipulation) |
+| **JSON Crusher** | Structured JSON data | 40-80% | Minimal (Counter stats) |
+| **Code Compressor** | Code in tool results | ~70% | Regex parse |
+| **Log Compressor** | Repetitive log output | 60-90% | Regex dedup |
+| **Content Compressor** | Long prose/text | 25-55% | TF-IDF scoring |
+| **Schema Optimizer** | Tool definitions | 50-90% | TF-IDF cosine similarity |
+| **Observation Masker** | Old tool outputs | ~50% | Zero (placeholder swap) |
+| **Cache Aligner** | System prompts | Provider cache discount | Regex substitution |
+The pipeline adapts automatically — short contexts get light compression, long contexts get aggressive optimization.
+## Configuration
+```bash
+# Disable specific transforms
+uv run kompact proxy --port 7878 --disable toon --disable log_compressor
+# Verbose mode
+uv run kompact proxy --port 7878 --verbose
+# View live dashboard
+open http://localhost:7878/dashboard
+```
+## Benchmarks
+Tested against Headroom and LLMLingua-2 on real datasets (BFCL, HotpotQA, Glaive, LongBench) using [context-bench](https://github.com/context-bench/context-bench).
+**Search-heavy scenario (100 JSON results, 3 needles):**
+| System | Compression | NIAH | Effective Ratio |
+|--------|------------:|-----:|----------------:|
+| Headroom | 0.0% | 100% | 0.0% |
+| LLMLingua-2 | 55.4% | 0% | -44.6% |
+| Truncation (50%) | 50.0% | 33% | -16.6% |
+| **Kompact** | **47.7%** | **100%** | **47.7%** |
+*Effective ratio* accounts for retry cost: if compression destroys information (NIAH miss), you pay for both the failed attempt and the retry with full context. Negative = worse than no compression.
+```bash
+# Run on real datasets
+uv run python benchmarks/run_dataset_eval.py --dataset bfcl -n 100
+# Run synthetic scenarios
+uv run python benchmarks/run_comparison.py --scenario search
+# Exclude slow baselines
+uv run python benchmarks/run_comparison.py --scenario search --exclude llmlingua headroom
+```
+See [`benchmarks/README.md`](benchmarks/README.md) for full methodology.
+## Development
+```bash
+# Install with dev deps
+uv sync --extra dev
+# Run tests
+uv run pytest
+# Lint
+uv run ruff check src/ tests/
+# Run single transform test
+uv run pytest tests/test_toon.py -v
+```
+## Architecture
+```
+src/kompact/
+├── proxy/server.py          # FastAPI proxy (Anthropic + OpenAI)
+├── parser/messages.py       # Provider format ↔ internal types
+├── transforms/
+│   ├── pipeline.py          # Orchestration + adaptive scaling
+│   ├── toon.py              # JSON array → tabular (TOON format)
+│   ├── json_crusher.py      # Statistical JSON compression
+│   ├── code_compressor.py   # Code → skeleton extraction
+│   ├── log_compressor.py    # Log deduplication
+│   ├── content_compressor.py # Extractive text compression (TF-IDF)
+│   ├── schema_optimizer.py  # TF-IDF tool selection
+│   ├── observation_masker.py # History management
+│   └── cache_aligner.py     # Prefix cache optimization
+├── cache/store.py           # Compression store + artifact index
+├── config.py                # Per-transform configuration
+├── types.py                 # Core data models
+└── metrics/tracker.py       # Per-request metrics
+```
+## License
+MIT

kompact-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,132 @@
+# Kompact
+[![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
+[![Tests](https://img.shields.io/badge/tests-37%20passed-brightgreen.svg)](#development)
+[![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
+Multi-layer context optimization proxy for LLM agents. Reduces token usage by 40-70% with zero information loss.
+```
+Agent  ──>  Kompact Proxy (localhost:7878)  ──>  LLM Provider
+              │
+              ├─ Layer 1: Schema Optimizer     (TF-IDF tool selection)
+              ├─ Layer 2: Content Compressors   (TOON, JSON, code, logs)
+              ├─ Layer 2b: Extractive Compressor (query-aware sentence selection)
+              ├─ Layer 3: Observation Masker    (history management)
+              └─ Layer 4: Cache Aligner        (prefix cache optimization)
+```
+## Quick Start
+```bash
+# Install
+uv sync
+# Start proxy
+uv run kompact proxy --port 7878
+# Point your agent at it
+export ANTHROPIC_BASE_URL=http://localhost:7878
+claude  # or any Anthropic/OpenAI-compatible agent
+```
+## How It Works
+Kompact is a transparent HTTP proxy. No code changes needed — just change your base URL. It intercepts LLM API requests, applies a pipeline of transforms to compress the context, then forwards the optimized request to the provider.
+| Transform | Target | Savings | Cost |
+|-----------|--------|--------:|------|
+| **TOON** | JSON arrays of objects | 30-60% | Zero (string manipulation) |
+| **JSON Crusher** | Structured JSON data | 40-80% | Minimal (Counter stats) |
+| **Code Compressor** | Code in tool results | ~70% | Regex parse |
+| **Log Compressor** | Repetitive log output | 60-90% | Regex dedup |
+| **Content Compressor** | Long prose/text | 25-55% | TF-IDF scoring |
+| **Schema Optimizer** | Tool definitions | 50-90% | TF-IDF cosine similarity |
+| **Observation Masker** | Old tool outputs | ~50% | Zero (placeholder swap) |
+| **Cache Aligner** | System prompts | Provider cache discount | Regex substitution |
+The pipeline adapts automatically — short contexts get light compression, long contexts get aggressive optimization.
+## Configuration
+```bash
+# Disable specific transforms
+uv run kompact proxy --port 7878 --disable toon --disable log_compressor
+# Verbose mode
+uv run kompact proxy --port 7878 --verbose
+# View live dashboard
+open http://localhost:7878/dashboard
+```
+## Benchmarks
+Tested against Headroom and LLMLingua-2 on real datasets (BFCL, HotpotQA, Glaive, LongBench) using [context-bench](https://github.com/context-bench/context-bench).
+**Search-heavy scenario (100 JSON results, 3 needles):**
+| System | Compression | NIAH | Effective Ratio |
+|--------|------------:|-----:|----------------:|
+| Headroom | 0.0% | 100% | 0.0% |
+| LLMLingua-2 | 55.4% | 0% | -44.6% |
+| Truncation (50%) | 50.0% | 33% | -16.6% |
+| **Kompact** | **47.7%** | **100%** | **47.7%** |
+*Effective ratio* accounts for retry cost: if compression destroys information (NIAH miss), you pay for both the failed attempt and the retry with full context. Negative = worse than no compression.
+```bash
+# Run on real datasets
+uv run python benchmarks/run_dataset_eval.py --dataset bfcl -n 100
+# Run synthetic scenarios
+uv run python benchmarks/run_comparison.py --scenario search
+# Exclude slow baselines
+uv run python benchmarks/run_comparison.py --scenario search --exclude llmlingua headroom
+```
+See [`benchmarks/README.md`](benchmarks/README.md) for full methodology.
+## Development
+```bash
+# Install with dev deps
+uv sync --extra dev
+# Run tests
+uv run pytest
+# Lint
+uv run ruff check src/ tests/
+# Run single transform test
+uv run pytest tests/test_toon.py -v
+```
+## Architecture
+```
+src/kompact/
+├── proxy/server.py          # FastAPI proxy (Anthropic + OpenAI)
+├── parser/messages.py       # Provider format ↔ internal types
+├── transforms/
+│   ├── pipeline.py          # Orchestration + adaptive scaling
+│   ├── toon.py              # JSON array → tabular (TOON format)
+│   ├── json_crusher.py      # Statistical JSON compression
+│   ├── code_compressor.py   # Code → skeleton extraction
+│   ├── log_compressor.py    # Log deduplication
+│   ├── content_compressor.py # Extractive text compression (TF-IDF)
+│   ├── schema_optimizer.py  # TF-IDF tool selection
+│   ├── observation_masker.py # History management
+│   └── cache_aligner.py     # Prefix cache optimization
+├── cache/store.py           # Compression store + artifact index
+├── config.py                # Per-transform configuration
+├── types.py                 # Core data models
+└── metrics/tracker.py       # Per-request metrics
+```
+## License
+MIT

kompact-0.1.0/benchmarks/README.md ADDED Viewed

@@ -0,0 +1,68 @@
+# Benchmarks
+## Real Dataset Benchmarks (Primary)
+Runs compression approaches against industry-standard datasets — the same ones
+Headroom, LLMLingua, and other competitors publish numbers on.
+### Datasets
+**Agentic / tool-calling (Kompact's target domain):**
+- **BFCL** (Berkeley Function Calling Leaderboard) — real API schemas from the Gorilla project. The primary benchmark for tool-calling compression.
+- **Glaive Function Calling v2** — 113K tool-calling conversations with JSON schemas in system prompts.
+**QA / prose context (baseline comparison):**
+- **HotpotQA** (distractor split) — multi-hop QA over Wikipedia paragraphs
+- **LongBench v2** — long-context understanding across diverse domains
+### What's measured
+- **Compression ratio** — tokens after / tokens before (lower = more compression)
+- **Answer preservation** — does the answer string survive compression? (higher = better)
+- **Latency** — wall-clock time per example
+No LLM calls required. Measures compression quality, not downstream task accuracy.
+### Running
+```bash
+# All 4 datasets (100 examples each)
+uv run python benchmarks/run_dataset_eval.py
+# Just the agentic datasets (BFCL + Glaive)
+uv run python benchmarks/run_dataset_eval.py --dataset agentic
+# Just the QA datasets (HotpotQA + LongBench)
+uv run python benchmarks/run_dataset_eval.py --dataset qa
+# Single dataset with custom size
+uv run python benchmarks/run_dataset_eval.py --dataset bfcl -n 200
+```
+Reports saved to `benchmarks/reports/dataset_eval_report.md`.
+## Synthetic Benchmarks (Secondary)
+6 synthetic agentic scenarios x 6 approaches. Useful for testing specific
+transforms (TOON on JSON arrays, log compressor on logs, etc.).
+```bash
+uv run python benchmarks/run_comparison.py
+uv run python benchmarks/run_comparison.py --scenario search
+```
+## Approaches Compared
+| # | Approach | Description |
+|---|----------|-------------|
+| 1 | No Compression | Pass-through baseline |
+| 2 | JSON Minification | Re-serialize JSON compactly |
+| 3 | Truncation (50%) | Keep first half of each content block |
+| 4 | Headroom CCR | Replace large JSON arrays with schema marker + first item |
+| 5 | LLMLingua-style | Word frequency pruning — remove low-importance words |
+| 6 | **Kompact Pipeline** | Full multi-transform pipeline |
+## Legacy Benchmarks
+- `compression_ratio.py` — per-transform compression ratios on test fixtures
+- `accuracy_preservation.py` — NIAH test on synthetic data

kompact-0.1.0/benchmarks/accuracy_preservation.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Benchmark: NIAH (Needle In A Haystack) accuracy preservation.
+Verifies that critical items survive compression through the pipeline.
+Usage:
+    uv run python benchmarks/accuracy_preservation.py
+"""
+from __future__ import annotations
+import json
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+from kompact.config import KompactConfig
+from kompact.transforms.pipeline import run
+from kompact.types import (
+    ContentBlock,
+    ContentType,
+    Message,
+    Provider,
+    Request,
+    Role,
+)
+def niah_test(needle: str, haystack_items: int = 100) -> dict:
+    """Insert a needle into a haystack and verify it survives compression."""
+    # Build haystack: many similar items
+    haystack = [
+        {"id": i, "type": "result", "title": f"Regular item {i}",
+         "description": f"This is a normal search result number {i}",
+         "url": f"https://example.com/{i}", "score": 0.5}
+        for i in range(haystack_items)
+    ]
+    # Insert needle at random position
+    needle_pos = haystack_items // 3
+    haystack.insert(needle_pos, {
+        "id": 9999,
+        "type": "CRITICAL",
+        "title": needle,
+        "description": f"IMPORTANT: {needle}",
+        "url": "https://critical.example.com/needle",
+        "score": 1.0,
+    })
+    json_text = json.dumps(haystack)
+    messages = [
+        Message(role=Role.USER, content=[
+            ContentBlock(type=ContentType.TOOL_RESULT, text=json_text, tool_use_id="search"),
+        ]),
+    ]
+    request = Request(
+        provider=Provider.ANTHROPIC,
+        messages=messages,
+        model="benchmark",
+    )
+    config = KompactConfig()
+    result = run(request, config)
+    compressed_text = result.request.messages[0].content[0].text
+    return {
+        "needle": needle,
+        "found": needle in compressed_text,
+        "haystack_items": haystack_items,
+        "original_chars": len(json_text),
+        "compressed_chars": len(compressed_text),
+        "ratio": len(compressed_text) / len(json_text),
+        "tokens_saved": result.total_tokens_saved,
+    }
+def main():
+    print("=" * 60)
+    print("NIAH (Needle In A Haystack) Accuracy Test")
+    print("=" * 60)
+    needles = [
+        "The secret API key is sk-1234567890abcdef",
+        "Deploy to production at 3pm PST",
+        "Bug: users cannot login when password contains unicode",
+        "Revenue increased 47% in Q3 2024",
+        "CRITICAL: memory leak in worker process 7",
+    ]
+    total = 0
+    found = 0
+    for needle in needles:
+        result = niah_test(needle, haystack_items=100)
+        total += 1
+        if result["found"]:
+            found += 1
+            status = "PASS"
+        else:
+            status = "FAIL"
+        print(f"\n  [{status}] Needle: \"{needle[:50]}...\"")
+        print(f"         Compression: {result['ratio']:.2%} "
+              f"({result['tokens_saved']} tokens saved)")
+    print(f"\n{'=' * 60}")
+    print(f"Results: {found}/{total} needles preserved ({found/total:.0%})")
+    if found == total:
+        print("ALL CRITICAL ITEMS SURVIVED COMPRESSION")
+    else:
+        print("WARNING: Some critical items were lost!")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()