PyPI - fittok - Versions diffs - 0.3.0__tar.gz - Mend

fittok 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

fittok-0.3.0/.claude/settings.local.json +26 -0
fittok-0.3.0/.gitignore +18 -0
fittok-0.3.0/LICENSE +21 -0
fittok-0.3.0/PKG-INFO +153 -0
fittok-0.3.0/README.md +102 -0
fittok-0.3.0/RELEASE.md +54 -0
fittok-0.3.0/docs/RESULTS.md +158 -0
fittok-0.3.0/examples/usage.py +159 -0
fittok-0.3.0/lib/bindings/utils.js +189 -0
fittok-0.3.0/lib/tom-select/tom-select.complete.min.js +356 -0
fittok-0.3.0/lib/tom-select/tom-select.css +334 -0
fittok-0.3.0/lib/vis-9.1.2/vis-network.css +1 -0
fittok-0.3.0/lib/vis-9.1.2/vis-network.min.js +27 -0
fittok-0.3.0/plugin/.claude-plugin/plugin.json +14 -0
fittok-0.3.0/plugin/README.md +55 -0
fittok-0.3.0/plugin/hooks/fittok_hook.py +96 -0
fittok-0.3.0/plugin/hooks/hooks.json +12 -0
fittok-0.3.0/plugin/skills/fittok/SKILL.md +17 -0
fittok-0.3.0/pyproject.toml +79 -0
fittok-0.3.0/src/fittok/__init__.py +27 -0
fittok-0.3.0/src/fittok/__main__.py +5 -0
fittok-0.3.0/src/fittok/cache.py +223 -0
fittok-0.3.0/src/fittok/cli.py +81 -0
fittok-0.3.0/src/fittok/diff.py +110 -0
fittok-0.3.0/src/fittok/embeddings.py +115 -0
fittok-0.3.0/src/fittok/graphify.py +710 -0
fittok-0.3.0/src/fittok/indexer.py +85 -0
fittok-0.3.0/src/fittok/llmlingua_wrapper.py +175 -0
fittok-0.3.0/src/fittok/models.py +113 -0
fittok-0.3.0/src/fittok/pii_scrubber.py +150 -0
fittok-0.3.0/src/fittok/server.py +715 -0
fittok-0.3.0/src/fittok/slurp.py +497 -0
fittok-0.3.0/src/fittok/tokens.py +22 -0
fittok-0.3.0/src/fittok/ui.py +201 -0
fittok-0.3.0/src/fittok/watcher.py +145 -0
fittok-0.3.0/tests/__init__.py +0 -0
fittok-0.3.0/tests/test_cache.py +46 -0
fittok-0.3.0/tests/test_diff.py +91 -0
fittok-0.3.0/tests/test_graphify.py +170 -0
fittok-0.3.0/tests/test_llmlingua.py +87 -0
fittok-0.3.0/tests/test_pii_scrubber.py +106 -0
fittok-0.3.0/tests/test_robustness.py +82 -0
fittok-0.3.0/tests/test_server.py +88 -0
fittok-0.3.0/tests/test_server_v2.py +174 -0
fittok-0.3.0/tests/test_slurp.py +183 -0

fittok-0.3.0/.claude/settings.local.json ADDED Viewed

@@ -0,0 +1,26 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(.venv/bin/python -m pip install \"sentence-transformers>=2.2\")",
+      "Bash(echo \"ST_EXIT=$?\")",
+      "Read(//tmp/**)",
+      "Bash(.venv/bin/python -c \"import transformers, sentence_transformers as st; print\\('transformers', transformers.__version__, '| sentence-transformers', st.__version__\\)\")",
+      "Bash(.venv/bin/python -m pytest tests/ -q)",
+      "Bash(git add *)",
+      "Bash(git commit -q -m ' *)",
+      "Bash(awk '/def _is_trivial_callback/,/^def [a-z_]+\\\\\\(/' src/context_optimizer/graphify.py)",
+      "Bash(rm -rf ~/.cache/fittok)",
+      "Bash(.venv/bin/python /tmp/fittok_proof_table.py)",
+      "Bash(CONTEXT_OPTIMIZER_SHOW_SAVINGS=true .venv/bin/python -c ' *)",
+      "Bash(.venv/bin/python -c ' *)",
+      "Bash(.venv/bin/python -m pip install -e . --no-deps -q)",
+      "Bash(.venv/bin/python -c \"import fittok; print\\('import fittok OK, version', fittok.__version__\\)\")",
+      "Bash(.venv/bin/python -c \"from fittok.server import mcp; print\\('MCP server name:', mcp.name\\)\")",
+      "Bash(.venv/bin/fittok --help)",
+      "Bash(git remote *)",
+      "Bash(gh auth *)",
+      "Bash(gh repo *)",
+      "Bash(git push *)"
+    ]
+  }
+}

fittok-0.3.0/.gitignore ADDED Viewed

@@ -0,0 +1,18 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+*.egg
+dist/
+build/
+.eggs/
+.pytest_cache/
+.mypy_cache/
+.tox/
+.coverage
+htmlcov/
+graph.json
+.venv/
+venv/
+.env
+*.so

fittok-0.3.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Context Optimizer Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

fittok-0.3.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,153 @@
+Metadata-Version: 2.4
+Name: fittok
+Version: 0.3.0
+Summary: MCP server that retrieves the most relevant source code for a query, within a token budget
+Project-URL: Homepage, https://github.com/likhithreddy/fittok
+Project-URL: Repository, https://github.com/likhithreddy/fittok
+Project-URL: Issues, https://github.com/likhithreddy/fittok/issues
+Author: Fittok Contributors
+License: MIT
+License-File: LICENSE
+Keywords: code-analysis,context,embeddings,llm,mcp,retrieval
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Libraries
+Requires-Python: >=3.10
+Requires-Dist: diskcache>=5.0
+Requires-Dist: llmlingua>=0.0.5
+Requires-Dist: mcp[cli]>=1.0.0
+Requires-Dist: networkx>=3.0
+Requires-Dist: numpy>=1.24
+Requires-Dist: pydantic>=2.0
+Requires-Dist: python-dotenv>=1.0
+Requires-Dist: scikit-learn>=1.3
+Requires-Dist: sentence-transformers>=2.2
+Requires-Dist: tiktoken>=0.7.0
+Requires-Dist: transformers<5,>=4.41
+Requires-Dist: tree-sitter-go>=0.22.0
+Requires-Dist: tree-sitter-java>=0.22.0
+Requires-Dist: tree-sitter-javascript>=0.22.0
+Requires-Dist: tree-sitter-python>=0.22.0
+Requires-Dist: tree-sitter-rust>=0.22.0
+Requires-Dist: tree-sitter-typescript>=0.22.0
+Requires-Dist: tree-sitter>=0.22.0
+Requires-Dist: watchdog>=3.0
+Provides-Extra: dev
+Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
+Requires-Dist: pytest-cov>=4.0; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'
+Provides-Extra: gpu
+Requires-Dist: torch>=2.0; extra == 'gpu'
+Provides-Extra: ui
+Requires-Dist: gradio>=4.0; extra == 'ui'
+Requires-Dist: pyvis>=0.3; extra == 'ui'
+Description-Content-Type: text/markdown
+# fittok
+**Retrieve only the relevant source code for a question — instead of the model
+reading whole files — so an LLM answers codebase questions on a small, focused
+slice of context.** Less input = fewer tokens, lower cost, faster answers.
+Works three ways from one install: an **MCP server**, a **CLI**, and a **Python
+library** — plus a **Claude Code plugin** that injects context automatically.
+---
+## How it works
+```
+codebase ──▶ graphify ──▶ slurp ──▶ readable slice ──▶ LLM answers
+             (parse)      (select)   (trim to budget)
+```
+1. **graphify** — parses the repo with tree-sitter into a knowledge graph of
+   functions / classes / methods (Python, JS, JSX, TS, TSX, Java, Go, Rust).
+2. **slurp** — scores every node against the question with **semantic embeddings
+   + TF-IDF + PageRank**, then selects *only* the genuinely relevant nodes via a
+   relevance cliff (no budget-padding with noise).
+3. **readable output** — returns the **actual source code** of those nodes,
+   top-ranked in full and the supporting tail as signatures, trimmed to a budget.
+   The model answers directly from it.
+> Note: an earlier design compressed the slice with LLMLingua, but that produced
+> unreadable token-salad the model ignored (then re-read the files). fittok
+> returns **real, readable code** instead. LLMLingua remains available only as the
+> standalone `compress_context` tool.
+Graphs and embeddings are cached on disk (`~/.cache/fittok`), keyed by content —
+so after a code change only the changed functions re-embed.
+---
+## Install & use
+### As an MCP server (recommended — for Claude Code / Cursor)
+Add one entry to your client's MCP config:
+```json
+{ "mcpServers": { "fittok": { "command": "uvx", "args": ["fittok"] } } }
+```
+Then ask codebase questions normally. To make it trigger **without mentioning it**,
+add one line to your client's `CLAUDE.md`:
+> *"For any codebase question, call fittok first and answer from its output."*
+### As a CLI (no MCP needed)
+```bash
+pip install fittok
+fittok index <repo>                       # optional one-time pre-warm
+fittok query <repo> "how does auth work"  # prints the relevant code slice
+```
+### As a library
+```python
+from fittok import optimize
+result = optimize("/path/to/repo", "how does authentication work")
+print(result["optimized_context"])
+```
+First query on a repo auto-indexes (~15s once, cached); after that it's instant.
+---
+## Token savings — honest numbers
+fittok cuts the **input/exploration cost** of a codebase question. On a real
+Next.js/TS repo (~5k functions) it returns a **~1.5–3.5k-token slice** instead of
+the model reading **15–20k+ tokens** of files — an **~80–90% reduction on input**,
+deterministic and reported in the tool's `savings` footer.
+**How to measure it honestly:**
+- ✅ Use the **`savings` footer** (e.g. `84% — 2,494 vs 15,631 tokens`) or your
+  **API bill** (total tokens — which counts the subagent crawls fittok avoids).
+- ⚠️ Do **not** judge by Claude Code's `/context` "Messages" number — it excludes
+  subagent tokens and is dominated by the model's own reasoning, which fittok
+  doesn't touch. On thorough models the real saving (e.g. ~84k → ~27k total
+  tokens, by avoiding an Explore subagent) is invisible there but clear on the bill.
+**Where it shines:** broad / multi-file questions, large files, unfamiliar repos,
+and thorough models that would otherwise explore heavily. On a tiny question a
+capable model can answer from one small file, so the win is marginal there.
+---
+## Configuration (env vars)
+| Variable | Default | Purpose |
+|---|---|---|
+| `FITTOK_SHOW_SAVINGS` | `false` | Append a `🪙 saved X%` footer to answers |
+| `CONTEXT_OPTIMIZER_EMBED_MODEL` | `all-MiniLM-L6-v2` | Embedding model |
+| `CONTEXT_OPTIMIZER_DEVICE` | `auto` | `auto` / `cuda` / `mps` / `cpu` |
+| `CONTEXT_OPTIMIZER_CACHE_DIR` | `~/.cache/fittok` | Cache location |
+## Requirements
+Python ≥ 3.10. First run downloads a ~90 MB embedding model. Optional extras:
+`pip install "fittok[ui]"` (graph visualizer), `"fittok[gpu]"` (torch/CUDA).
+## License
+MIT.

fittok-0.3.0/README.md ADDED Viewed

@@ -0,0 +1,102 @@
+# fittok
+**Retrieve only the relevant source code for a question — instead of the model
+reading whole files — so an LLM answers codebase questions on a small, focused
+slice of context.** Less input = fewer tokens, lower cost, faster answers.
+Works three ways from one install: an **MCP server**, a **CLI**, and a **Python
+library** — plus a **Claude Code plugin** that injects context automatically.
+---
+## How it works
+```
+codebase ──▶ graphify ──▶ slurp ──▶ readable slice ──▶ LLM answers
+             (parse)      (select)   (trim to budget)
+```
+1. **graphify** — parses the repo with tree-sitter into a knowledge graph of
+   functions / classes / methods (Python, JS, JSX, TS, TSX, Java, Go, Rust).
+2. **slurp** — scores every node against the question with **semantic embeddings
+   + TF-IDF + PageRank**, then selects *only* the genuinely relevant nodes via a
+   relevance cliff (no budget-padding with noise).
+3. **readable output** — returns the **actual source code** of those nodes,
+   top-ranked in full and the supporting tail as signatures, trimmed to a budget.
+   The model answers directly from it.
+> Note: an earlier design compressed the slice with LLMLingua, but that produced
+> unreadable token-salad the model ignored (then re-read the files). fittok
+> returns **real, readable code** instead. LLMLingua remains available only as the
+> standalone `compress_context` tool.
+Graphs and embeddings are cached on disk (`~/.cache/fittok`), keyed by content —
+so after a code change only the changed functions re-embed.
+---
+## Install & use
+### As an MCP server (recommended — for Claude Code / Cursor)
+Add one entry to your client's MCP config:
+```json
+{ "mcpServers": { "fittok": { "command": "uvx", "args": ["fittok"] } } }
+```
+Then ask codebase questions normally. To make it trigger **without mentioning it**,
+add one line to your client's `CLAUDE.md`:
+> *"For any codebase question, call fittok first and answer from its output."*
+### As a CLI (no MCP needed)
+```bash
+pip install fittok
+fittok index <repo>                       # optional one-time pre-warm
+fittok query <repo> "how does auth work"  # prints the relevant code slice
+```
+### As a library
+```python
+from fittok import optimize
+result = optimize("/path/to/repo", "how does authentication work")
+print(result["optimized_context"])
+```
+First query on a repo auto-indexes (~15s once, cached); after that it's instant.
+---
+## Token savings — honest numbers
+fittok cuts the **input/exploration cost** of a codebase question. On a real
+Next.js/TS repo (~5k functions) it returns a **~1.5–3.5k-token slice** instead of
+the model reading **15–20k+ tokens** of files — an **~80–90% reduction on input**,
+deterministic and reported in the tool's `savings` footer.
+**How to measure it honestly:**
+- ✅ Use the **`savings` footer** (e.g. `84% — 2,494 vs 15,631 tokens`) or your
+  **API bill** (total tokens — which counts the subagent crawls fittok avoids).
+- ⚠️ Do **not** judge by Claude Code's `/context` "Messages" number — it excludes
+  subagent tokens and is dominated by the model's own reasoning, which fittok
+  doesn't touch. On thorough models the real saving (e.g. ~84k → ~27k total
+  tokens, by avoiding an Explore subagent) is invisible there but clear on the bill.
+**Where it shines:** broad / multi-file questions, large files, unfamiliar repos,
+and thorough models that would otherwise explore heavily. On a tiny question a
+capable model can answer from one small file, so the win is marginal there.
+---
+## Configuration (env vars)
+| Variable | Default | Purpose |
+|---|---|---|
+| `FITTOK_SHOW_SAVINGS` | `false` | Append a `🪙 saved X%` footer to answers |
+| `CONTEXT_OPTIMIZER_EMBED_MODEL` | `all-MiniLM-L6-v2` | Embedding model |
+| `CONTEXT_OPTIMIZER_DEVICE` | `auto` | `auto` / `cuda` / `mps` / `cpu` |
+| `CONTEXT_OPTIMIZER_CACHE_DIR` | `~/.cache/fittok` | Cache location |
+## Requirements
+Python ≥ 3.10. First run downloads a ~90 MB embedding model. Optional extras:
+`pip install "fittok[ui]"` (graph visualizer), `"fittok[gpu]"` (torch/CUDA).
+## License
+MIT.

fittok-0.3.0/RELEASE.md ADDED Viewed

@@ -0,0 +1,54 @@
+# Releasing to PyPI
+The package is build-ready (`hatchling`, src layout, metadata + classifiers,
+deps pinned). These are the final steps — **you run the upload** with your own
+PyPI token.
+## 1. Verify version
+Bump `version` in `pyproject.toml` **and** `__version__` in
+`src/fittok/__init__.py` (keep them in sync). Current: `0.3.0`.
+## 2. Build
+```bash
+python -m pip install --upgrade build twine
+rm -rf dist
+python -m build            # creates dist/*.whl and dist/*.tar.gz
+python -m twine check dist/*
+```
+## 3. (Recommended) Test on TestPyPI first
+```bash
+python -m twine upload --repository testpypi dist/*
+# then in a clean venv:
+pip install -i https://test.pypi.org/simple/ \
+  --extra-index-url https://pypi.org/simple fittok
+```
+## 4. Upload to PyPI
+```bash
+python -m twine upload dist/*
+# username: __token__
+# password: pypi-<your-API-token>
+```
+## 5. After publish — how users consume it
+```bash
+pip install fittok          # core (retrieval + embeddings)
+pip install "fittok[ui]"    # + Gradio/pyvis graph visualizer
+```
+Register the MCP server (user scope, available in every repo):
+```bash
+claude mcp add fittok --scope user -- python -m fittok
+```
+Optional pre-warm so the first query is instant:
+```bash
+fittok-index /path/to/repo
+```
+## Notes / gotchas baked into the package
+- `requires-python = ">=3.10"` (the `mcp` dep needs it; 3.9 fails).
+- `transformers` pinned `<5` (5.x breaks llmlingua model loading).
+- First use auto-indexes (graph + embeddings) and caches under `~/.cache/fittok`;
+  embeddings are content-keyed so changes only re-embed what changed.
+- The embedding model (`all-MiniLM-L6-v2`, ~90 MB) downloads from HuggingFace on
+  first run — document this for users behind firewalls.

fittok-0.3.0/docs/RESULTS.md ADDED Viewed

@@ -0,0 +1,158 @@
+# fittok — How it works & token-savings results
+> **fittok** retrieves only the *relevant* source code for a question — instead of
+> the model reading whole files — so an LLM answers codebase questions on a small,
+> focused slice of context. Less input = fewer tokens, lower cost, faster answers.
+---
+## TL;DR
+- On a real Next.js/TypeScript codebase (≈5k functions), fittok answers a focused
+  question from a **~1.2–3.5k-token slice instead of the ~9–20k tokens** the model
+  would otherwise read — a **60–91% reduction in input context**, same answer.
+- In a live Claude Code session, the same question consumed **~18.9k tokens without
+  fittok vs ~10.7k with it** (one tool call, zero file reads).
+- It works three ways from one install — **MCP server, CLI, and Python library** —
+  plus a **Claude Code plugin** that injects the context automatically.
+---
+## 1. The problem
+To answer "how does X work in this codebase?", an AI agent typically **reads many
+whole files** (Grep → Read → Read …). Most of those tokens are irrelevant — config,
+imports, unrelated functions. You pay for all of them on every question.
+## 2. How it works — a 3-stage pipeline
+```
+codebase ──▶ graphify ──▶ slurp ──▶ readable slice ──▶ LLM answers
+             (parse)      (select)   (trim to budget)
+```
+1. **graphify** — parses the repo with tree-sitter into a knowledge graph of
+   functions/classes/methods (Python, JS, JSX, TS, TSX, Java, Go, Rust).
+2. **slurp** — scores every node against the question with **semantic embeddings +
+   TF-IDF + PageRank**, and selects only the most relevant nodes within a token
+   budget (auto-sized by default).
+3. **readable output** — returns the *actual source code* of those nodes (trimmed to
+   budget), so the model can answer directly. (An earlier design compressed the text
+   with LLMLingua, but that produced unreadable token-salad the model ignored — so
+   fittok returns real, readable code instead.)
+Embeddings are cached on disk and keyed by content, so re-indexing after a code
+change only re-embeds what changed.
+## 3. Interfaces (one install, four front doors)
+| Interface | Who uses it | How |
+|---|---|---|
+| **MCP server** | AI clients (Claude Code, Cursor) | `uvx fittok` registered as an MCP; the model calls `optimize_context` |
+| **Claude Code plugin** | Claude Code users who want it automatic | `UserPromptSubmit` hook auto-injects the relevant context every codebase question |
+| **CLI** | scripts / CI / verification | `fittok query <repo> "<question>"` |
+| **Python library** | custom pipelines | `from fittok import optimize` |
+## 4. Install & usage
+**Recommended (MCP via uvx):** add to your client's MCP config —
+```json
+{ "mcpServers": { "fittok": { "command": "uvx", "args": ["fittok"] } } }
+```
+**Auto-trigger without mentioning it:** add one line to your client's `CLAUDE.md` —
+> *"For any codebase question, call fittok first and answer from its output."*
+**CLI / library:** `pip install fittok`, then `fittok query <repo> "<q>"` or
+`from fittok import optimize; optimize("<repo>", "<q>")`.
+---
+## 5. Token-savings results
+### 5a. Engine savings — deterministic (no model in the loop)
+Measured directly via `fittok query` on the `mira` repo (adaptive budget). "Baseline"
+= total tokens of the files the answer lives in (what the model would otherwise read):
+| Question | Baseline (files) | fittok sent | **Reduction** |
+|---|---:|---:|---:|
+| How does authentication & login work | 13,178 | 1,200 | **90.9%** |
+| How does silence detection end a turn | 9,782 | 3,500 | **64.2%** |
+| How does the AI gateway route & rotate keys | 14,041 | 3,500 | **75.1%** |
+| How are interview questions generated from the resume | 19,668 | 3,500 | **82.2%** |
+This number is **deterministic** — same every run, independent of the host model.
+### 5b. End-to-end session — with vs without (live `/context`)
+Same focused question, same repo, in Claude Code:
+| | Without fittok | With fittok |
+|---|---|---|
+| fittok calls | 0 | 1 |
+| Files the model read | several | 0 |
+| Context consumed (`Messages`) | **~18.9k** | **~10.7k** |
+> _Note: a fixed ~5.3k of "MCP tools" overhead is present in both and cancels out;
+> the comparison is the `Messages` delta._
+### 5c. Selectivity proof
+On a synthetic repo of **1,010 functions across 10 unrelated domains** (auth,
+payment, email, geometry, weather, …), the query *"how does authentication and login
+work"* selected **68 nodes, 100% from `auth.py`** — zero leakage from the other 9
+domains.
+---
+## 6. Live demo — screenshots (fill in from your run)
+> Replace each placeholder with your screenshot and the measured numbers.
+**Question used:** `How does silence detection end the candidate's turn?`
+### Without fittok  ✅ measured
+- `/context` before — Messages: **1.1k** (total 37.4k)
+- `/context` after — Messages: **11.5k** (total 46.9k)
+- Files read directly: **~5** (`silenceDetector.ts`, `useSpeechRecognition.ts`,
+  `speechRecognitionWrapper.ts`, + grep/cat of `submitAnswer.ts`, `constants.ts`)
+- **Context consumed (delta): ≈10.4k tokens**
+- _[screenshot ①: `/context` before] · [screenshot ②: `/context` after]_
+### With fittok  ⏳ pending
+- `/context` before — Messages: `____`
+- `/context` after — Messages: `____`
+- fittok calls: `____` (target 1) · files read: `____` (target 0)
+- fittok's own `savings`: `____`% (`____` vs `____` tokens)
+- _[screenshot ④: before] · [⑤: after] · [⑥: tool call + savings] · [⑦: answer + 🪙 footer]_
+### Result
+> Same focused question, same repo. **Without fittok** the model ran ~5 reads/greps
+> and consumed **≈10.4k tokens** of context. **With fittok** it answered from one
+> `~__k`-token slice — **`__`% less context, same answer.**
+---
+## 7. Reproduce it yourself
+```bash
+pip install fittok          # or: uvx fittok ...
+fittok index   <your-repo>  # one-time pre-warm (parse + embeddings, cached)
+fittok query   <your-repo> "how does <feature> work"   # prints the slice + savings on stderr
+```
+The stderr line shows `Sent X tokens instead of Y (Z% reduction)` — the deterministic proof.
+---
+## 8. Honest limitations
+- **Focused questions are where it shines** (tight, zero file reads). **Broad
+  "explain the entire flow" questions** can miss a pivotal connector function (it may
+  rank low on vocabulary, or sit across an HTTP boundary a code graph can't cross), so
+  the model may still read 1–2 files. The win is smaller there, not absent.
+- **The MCP can't force the model** to use fittok or to stop reading files — it can
+  only make it the easy, obvious path. The **plugin hook** is the deterministic
+  guarantee (it injects the context before the model decides), at the cost of running
+  on every matched prompt.
+- Token savings depend on the model trusting the slice; readable output makes that
+  far more likely, but the host model always has final say.