fittok 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. fittok-0.3.0/.claude/settings.local.json +26 -0
  2. fittok-0.3.0/.gitignore +18 -0
  3. fittok-0.3.0/LICENSE +21 -0
  4. fittok-0.3.0/PKG-INFO +153 -0
  5. fittok-0.3.0/README.md +102 -0
  6. fittok-0.3.0/RELEASE.md +54 -0
  7. fittok-0.3.0/docs/RESULTS.md +158 -0
  8. fittok-0.3.0/examples/usage.py +159 -0
  9. fittok-0.3.0/lib/bindings/utils.js +189 -0
  10. fittok-0.3.0/lib/tom-select/tom-select.complete.min.js +356 -0
  11. fittok-0.3.0/lib/tom-select/tom-select.css +334 -0
  12. fittok-0.3.0/lib/vis-9.1.2/vis-network.css +1 -0
  13. fittok-0.3.0/lib/vis-9.1.2/vis-network.min.js +27 -0
  14. fittok-0.3.0/plugin/.claude-plugin/plugin.json +14 -0
  15. fittok-0.3.0/plugin/README.md +55 -0
  16. fittok-0.3.0/plugin/hooks/fittok_hook.py +96 -0
  17. fittok-0.3.0/plugin/hooks/hooks.json +12 -0
  18. fittok-0.3.0/plugin/skills/fittok/SKILL.md +17 -0
  19. fittok-0.3.0/pyproject.toml +79 -0
  20. fittok-0.3.0/src/fittok/__init__.py +27 -0
  21. fittok-0.3.0/src/fittok/__main__.py +5 -0
  22. fittok-0.3.0/src/fittok/cache.py +223 -0
  23. fittok-0.3.0/src/fittok/cli.py +81 -0
  24. fittok-0.3.0/src/fittok/diff.py +110 -0
  25. fittok-0.3.0/src/fittok/embeddings.py +115 -0
  26. fittok-0.3.0/src/fittok/graphify.py +710 -0
  27. fittok-0.3.0/src/fittok/indexer.py +85 -0
  28. fittok-0.3.0/src/fittok/llmlingua_wrapper.py +175 -0
  29. fittok-0.3.0/src/fittok/models.py +113 -0
  30. fittok-0.3.0/src/fittok/pii_scrubber.py +150 -0
  31. fittok-0.3.0/src/fittok/server.py +715 -0
  32. fittok-0.3.0/src/fittok/slurp.py +497 -0
  33. fittok-0.3.0/src/fittok/tokens.py +22 -0
  34. fittok-0.3.0/src/fittok/ui.py +201 -0
  35. fittok-0.3.0/src/fittok/watcher.py +145 -0
  36. fittok-0.3.0/tests/__init__.py +0 -0
  37. fittok-0.3.0/tests/test_cache.py +46 -0
  38. fittok-0.3.0/tests/test_diff.py +91 -0
  39. fittok-0.3.0/tests/test_graphify.py +170 -0
  40. fittok-0.3.0/tests/test_llmlingua.py +87 -0
  41. fittok-0.3.0/tests/test_pii_scrubber.py +106 -0
  42. fittok-0.3.0/tests/test_robustness.py +82 -0
  43. fittok-0.3.0/tests/test_server.py +88 -0
  44. fittok-0.3.0/tests/test_server_v2.py +174 -0
  45. fittok-0.3.0/tests/test_slurp.py +183 -0
@@ -0,0 +1,26 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(.venv/bin/python -m pip install \"sentence-transformers>=2.2\")",
5
+ "Bash(echo \"ST_EXIT=$?\")",
6
+ "Read(//tmp/**)",
7
+ "Bash(.venv/bin/python -c \"import transformers, sentence_transformers as st; print\\('transformers', transformers.__version__, '| sentence-transformers', st.__version__\\)\")",
8
+ "Bash(.venv/bin/python -m pytest tests/ -q)",
9
+ "Bash(git add *)",
10
+ "Bash(git commit -q -m ' *)",
11
+ "Bash(awk '/def _is_trivial_callback/,/^def [a-z_]+\\\\\\(/' src/context_optimizer/graphify.py)",
12
+ "Bash(rm -rf ~/.cache/fittok)",
13
+ "Bash(.venv/bin/python /tmp/fittok_proof_table.py)",
14
+ "Bash(CONTEXT_OPTIMIZER_SHOW_SAVINGS=true .venv/bin/python -c ' *)",
15
+ "Bash(.venv/bin/python -c ' *)",
16
+ "Bash(.venv/bin/python -m pip install -e . --no-deps -q)",
17
+ "Bash(.venv/bin/python -c \"import fittok; print\\('import fittok OK, version', fittok.__version__\\)\")",
18
+ "Bash(.venv/bin/python -c \"from fittok.server import mcp; print\\('MCP server name:', mcp.name\\)\")",
19
+ "Bash(.venv/bin/fittok --help)",
20
+ "Bash(git remote *)",
21
+ "Bash(gh auth *)",
22
+ "Bash(gh repo *)",
23
+ "Bash(git push *)"
24
+ ]
25
+ }
26
+ }
@@ -0,0 +1,18 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ *.egg
6
+ dist/
7
+ build/
8
+ .eggs/
9
+ .pytest_cache/
10
+ .mypy_cache/
11
+ .tox/
12
+ .coverage
13
+ htmlcov/
14
+ graph.json
15
+ .venv/
16
+ venv/
17
+ .env
18
+ *.so
fittok-0.3.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Context Optimizer Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
fittok-0.3.0/PKG-INFO ADDED
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: fittok
3
+ Version: 0.3.0
4
+ Summary: MCP server that retrieves the most relevant source code for a query, within a token budget
5
+ Project-URL: Homepage, https://github.com/likhithreddy/fittok
6
+ Project-URL: Repository, https://github.com/likhithreddy/fittok
7
+ Project-URL: Issues, https://github.com/likhithreddy/fittok/issues
8
+ Author: Fittok Contributors
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: code-analysis,context,embeddings,llm,mcp,retrieval
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: diskcache>=5.0
23
+ Requires-Dist: llmlingua>=0.0.5
24
+ Requires-Dist: mcp[cli]>=1.0.0
25
+ Requires-Dist: networkx>=3.0
26
+ Requires-Dist: numpy>=1.24
27
+ Requires-Dist: pydantic>=2.0
28
+ Requires-Dist: python-dotenv>=1.0
29
+ Requires-Dist: scikit-learn>=1.3
30
+ Requires-Dist: sentence-transformers>=2.2
31
+ Requires-Dist: tiktoken>=0.7.0
32
+ Requires-Dist: transformers<5,>=4.41
33
+ Requires-Dist: tree-sitter-go>=0.22.0
34
+ Requires-Dist: tree-sitter-java>=0.22.0
35
+ Requires-Dist: tree-sitter-javascript>=0.22.0
36
+ Requires-Dist: tree-sitter-python>=0.22.0
37
+ Requires-Dist: tree-sitter-rust>=0.22.0
38
+ Requires-Dist: tree-sitter-typescript>=0.22.0
39
+ Requires-Dist: tree-sitter>=0.22.0
40
+ Requires-Dist: watchdog>=3.0
41
+ Provides-Extra: dev
42
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
43
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
44
+ Requires-Dist: pytest>=7.0; extra == 'dev'
45
+ Provides-Extra: gpu
46
+ Requires-Dist: torch>=2.0; extra == 'gpu'
47
+ Provides-Extra: ui
48
+ Requires-Dist: gradio>=4.0; extra == 'ui'
49
+ Requires-Dist: pyvis>=0.3; extra == 'ui'
50
+ Description-Content-Type: text/markdown
51
+
52
+ # fittok
53
+
54
+ **Retrieve only the relevant source code for a question — instead of the model
55
+ reading whole files — so an LLM answers codebase questions on a small, focused
56
+ slice of context.** Less input = fewer tokens, lower cost, faster answers.
57
+
58
+ Works three ways from one install: an **MCP server**, a **CLI**, and a **Python
59
+ library** — plus a **Claude Code plugin** that injects context automatically.
60
+
61
+ ---
62
+
63
+ ## How it works
64
+
65
+ ```
66
+ codebase ──▶ graphify ──▶ slurp ──▶ readable slice ──▶ LLM answers
67
+ (parse) (select) (trim to budget)
68
+ ```
69
+
70
+ 1. **graphify** — parses the repo with tree-sitter into a knowledge graph of
71
+ functions / classes / methods (Python, JS, JSX, TS, TSX, Java, Go, Rust).
72
+ 2. **slurp** — scores every node against the question with **semantic embeddings
73
+ + TF-IDF + PageRank**, then selects *only* the genuinely relevant nodes via a
74
+ relevance cliff (no budget-padding with noise).
75
+ 3. **readable output** — returns the **actual source code** of those nodes,
76
+ top-ranked in full and the supporting tail as signatures, trimmed to a budget.
77
+ The model answers directly from it.
78
+
79
+ > Note: an earlier design compressed the slice with LLMLingua, but that produced
80
+ > unreadable token-salad the model ignored (then re-read the files). fittok
81
+ > returns **real, readable code** instead. LLMLingua remains available only as the
82
+ > standalone `compress_context` tool.
83
+
84
+ Graphs and embeddings are cached on disk (`~/.cache/fittok`), keyed by content —
85
+ so after a code change only the changed functions re-embed.
86
+
87
+ ---
88
+
89
+ ## Install & use
90
+
91
+ ### As an MCP server (recommended — for Claude Code / Cursor)
92
+ Add one entry to your client's MCP config:
93
+ ```json
94
+ { "mcpServers": { "fittok": { "command": "uvx", "args": ["fittok"] } } }
95
+ ```
96
+ Then ask codebase questions normally. To make it trigger **without mentioning it**,
97
+ add one line to your client's `CLAUDE.md`:
98
+ > *"For any codebase question, call fittok first and answer from its output."*
99
+
100
+ ### As a CLI (no MCP needed)
101
+ ```bash
102
+ pip install fittok
103
+ fittok index <repo> # optional one-time pre-warm
104
+ fittok query <repo> "how does auth work" # prints the relevant code slice
105
+ ```
106
+
107
+ ### As a library
108
+ ```python
109
+ from fittok import optimize
110
+ result = optimize("/path/to/repo", "how does authentication work")
111
+ print(result["optimized_context"])
112
+ ```
113
+
114
+ First query on a repo auto-indexes (~15s once, cached); after that it's instant.
115
+
116
+ ---
117
+
118
+ ## Token savings — honest numbers
119
+
120
+ fittok cuts the **input/exploration cost** of a codebase question. On a real
121
+ Next.js/TS repo (~5k functions) it returns a **~1.5–3.5k-token slice** instead of
122
+ the model reading **15–20k+ tokens** of files — an **~80–90% reduction on input**,
123
+ deterministic and reported in the tool's `savings` footer.
124
+
125
+ **How to measure it honestly:**
126
+ - ✅ Use the **`savings` footer** (e.g. `84% — 2,494 vs 15,631 tokens`) or your
127
+ **API bill** (total tokens — which counts the subagent crawls fittok avoids).
128
+ - ⚠️ Do **not** judge by Claude Code's `/context` "Messages" number — it excludes
129
+ subagent tokens and is dominated by the model's own reasoning, which fittok
130
+ doesn't touch. On thorough models the real saving (e.g. ~84k → ~27k total
131
+ tokens, by avoiding an Explore subagent) is invisible there but clear on the bill.
132
+
133
+ **Where it shines:** broad / multi-file questions, large files, unfamiliar repos,
134
+ and thorough models that would otherwise explore heavily. On a tiny question a
135
+ capable model can answer from one small file, so the win is marginal there.
136
+
137
+ ---
138
+
139
+ ## Configuration (env vars)
140
+
141
+ | Variable | Default | Purpose |
142
+ |---|---|---|
143
+ | `FITTOK_SHOW_SAVINGS` | `false` | Append a `🪙 saved X%` footer to answers |
144
+ | `CONTEXT_OPTIMIZER_EMBED_MODEL` | `all-MiniLM-L6-v2` | Embedding model |
145
+ | `CONTEXT_OPTIMIZER_DEVICE` | `auto` | `auto` / `cuda` / `mps` / `cpu` |
146
+ | `CONTEXT_OPTIMIZER_CACHE_DIR` | `~/.cache/fittok` | Cache location |
147
+
148
+ ## Requirements
149
+ Python ≥ 3.10. First run downloads a ~90 MB embedding model. Optional extras:
150
+ `pip install "fittok[ui]"` (graph visualizer), `"fittok[gpu]"` (torch/CUDA).
151
+
152
+ ## License
153
+ MIT.
fittok-0.3.0/README.md ADDED
@@ -0,0 +1,102 @@
1
+ # fittok
2
+
3
+ **Retrieve only the relevant source code for a question — instead of the model
4
+ reading whole files — so an LLM answers codebase questions on a small, focused
5
+ slice of context.** Less input = fewer tokens, lower cost, faster answers.
6
+
7
+ Works three ways from one install: an **MCP server**, a **CLI**, and a **Python
8
+ library** — plus a **Claude Code plugin** that injects context automatically.
9
+
10
+ ---
11
+
12
+ ## How it works
13
+
14
+ ```
15
+ codebase ──▶ graphify ──▶ slurp ──▶ readable slice ──▶ LLM answers
16
+ (parse) (select) (trim to budget)
17
+ ```
18
+
19
+ 1. **graphify** — parses the repo with tree-sitter into a knowledge graph of
20
+ functions / classes / methods (Python, JS, JSX, TS, TSX, Java, Go, Rust).
21
+ 2. **slurp** — scores every node against the question with **semantic embeddings
22
+ + TF-IDF + PageRank**, then selects *only* the genuinely relevant nodes via a
23
+ relevance cliff (no budget-padding with noise).
24
+ 3. **readable output** — returns the **actual source code** of those nodes,
25
+ top-ranked in full and the supporting tail as signatures, trimmed to a budget.
26
+ The model answers directly from it.
27
+
28
+ > Note: an earlier design compressed the slice with LLMLingua, but that produced
29
+ > unreadable token-salad the model ignored (then re-read the files). fittok
30
+ > returns **real, readable code** instead. LLMLingua remains available only as the
31
+ > standalone `compress_context` tool.
32
+
33
+ Graphs and embeddings are cached on disk (`~/.cache/fittok`), keyed by content —
34
+ so after a code change only the changed functions re-embed.
35
+
36
+ ---
37
+
38
+ ## Install & use
39
+
40
+ ### As an MCP server (recommended — for Claude Code / Cursor)
41
+ Add one entry to your client's MCP config:
42
+ ```json
43
+ { "mcpServers": { "fittok": { "command": "uvx", "args": ["fittok"] } } }
44
+ ```
45
+ Then ask codebase questions normally. To make it trigger **without mentioning it**,
46
+ add one line to your client's `CLAUDE.md`:
47
+ > *"For any codebase question, call fittok first and answer from its output."*
48
+
49
+ ### As a CLI (no MCP needed)
50
+ ```bash
51
+ pip install fittok
52
+ fittok index <repo> # optional one-time pre-warm
53
+ fittok query <repo> "how does auth work" # prints the relevant code slice
54
+ ```
55
+
56
+ ### As a library
57
+ ```python
58
+ from fittok import optimize
59
+ result = optimize("/path/to/repo", "how does authentication work")
60
+ print(result["optimized_context"])
61
+ ```
62
+
63
+ First query on a repo auto-indexes (~15s once, cached); after that it's instant.
64
+
65
+ ---
66
+
67
+ ## Token savings — honest numbers
68
+
69
+ fittok cuts the **input/exploration cost** of a codebase question. On a real
70
+ Next.js/TS repo (~5k functions) it returns a **~1.5–3.5k-token slice** instead of
71
+ the model reading **15–20k+ tokens** of files — an **~80–90% reduction on input**,
72
+ deterministic and reported in the tool's `savings` footer.
73
+
74
+ **How to measure it honestly:**
75
+ - ✅ Use the **`savings` footer** (e.g. `84% — 2,494 vs 15,631 tokens`) or your
76
+ **API bill** (total tokens — which counts the subagent crawls fittok avoids).
77
+ - ⚠️ Do **not** judge by Claude Code's `/context` "Messages" number — it excludes
78
+ subagent tokens and is dominated by the model's own reasoning, which fittok
79
+ doesn't touch. On thorough models the real saving (e.g. ~84k → ~27k total
80
+ tokens, by avoiding an Explore subagent) is invisible there but clear on the bill.
81
+
82
+ **Where it shines:** broad / multi-file questions, large files, unfamiliar repos,
83
+ and thorough models that would otherwise explore heavily. On a tiny question a
84
+ capable model can answer from one small file, so the win is marginal there.
85
+
86
+ ---
87
+
88
+ ## Configuration (env vars)
89
+
90
+ | Variable | Default | Purpose |
91
+ |---|---|---|
92
+ | `FITTOK_SHOW_SAVINGS` | `false` | Append a `🪙 saved X%` footer to answers |
93
+ | `CONTEXT_OPTIMIZER_EMBED_MODEL` | `all-MiniLM-L6-v2` | Embedding model |
94
+ | `CONTEXT_OPTIMIZER_DEVICE` | `auto` | `auto` / `cuda` / `mps` / `cpu` |
95
+ | `CONTEXT_OPTIMIZER_CACHE_DIR` | `~/.cache/fittok` | Cache location |
96
+
97
+ ## Requirements
98
+ Python ≥ 3.10. First run downloads a ~90 MB embedding model. Optional extras:
99
+ `pip install "fittok[ui]"` (graph visualizer), `"fittok[gpu]"` (torch/CUDA).
100
+
101
+ ## License
102
+ MIT.
@@ -0,0 +1,54 @@
1
+ # Releasing to PyPI
2
+
3
+ The package is build-ready (`hatchling`, src layout, metadata + classifiers,
4
+ deps pinned). These are the final steps — **you run the upload** with your own
5
+ PyPI token.
6
+
7
+ ## 1. Verify version
8
+ Bump `version` in `pyproject.toml` **and** `__version__` in
9
+ `src/fittok/__init__.py` (keep them in sync). Current: `0.3.0`.
10
+
11
+ ## 2. Build
12
+ ```bash
13
+ python -m pip install --upgrade build twine
14
+ rm -rf dist
15
+ python -m build # creates dist/*.whl and dist/*.tar.gz
16
+ python -m twine check dist/*
17
+ ```
18
+
19
+ ## 3. (Recommended) Test on TestPyPI first
20
+ ```bash
21
+ python -m twine upload --repository testpypi dist/*
22
+ # then in a clean venv:
23
+ pip install -i https://test.pypi.org/simple/ \
24
+ --extra-index-url https://pypi.org/simple fittok
25
+ ```
26
+
27
+ ## 4. Upload to PyPI
28
+ ```bash
29
+ python -m twine upload dist/*
30
+ # username: __token__
31
+ # password: pypi-<your-API-token>
32
+ ```
33
+
34
+ ## 5. After publish — how users consume it
35
+ ```bash
36
+ pip install fittok # core (retrieval + embeddings)
37
+ pip install "fittok[ui]" # + Gradio/pyvis graph visualizer
38
+ ```
39
+ Register the MCP server (user scope, available in every repo):
40
+ ```bash
41
+ claude mcp add fittok --scope user -- python -m fittok
42
+ ```
43
+ Optional pre-warm so the first query is instant:
44
+ ```bash
45
+ fittok-index /path/to/repo
46
+ ```
47
+
48
+ ## Notes / gotchas baked into the package
49
+ - `requires-python = ">=3.10"` (the `mcp` dep needs it; 3.9 fails).
50
+ - `transformers` pinned `<5` (5.x breaks llmlingua model loading).
51
+ - First use auto-indexes (graph + embeddings) and caches under `~/.cache/fittok`;
52
+ embeddings are content-keyed so changes only re-embed what changed.
53
+ - The embedding model (`all-MiniLM-L6-v2`, ~90 MB) downloads from HuggingFace on
54
+ first run — document this for users behind firewalls.
@@ -0,0 +1,158 @@
1
+ # fittok — How it works & token-savings results
2
+
3
+ > **fittok** retrieves only the *relevant* source code for a question — instead of
4
+ > the model reading whole files — so an LLM answers codebase questions on a small,
5
+ > focused slice of context. Less input = fewer tokens, lower cost, faster answers.
6
+
7
+ ---
8
+
9
+ ## TL;DR
10
+
11
+ - On a real Next.js/TypeScript codebase (≈5k functions), fittok answers a focused
12
+ question from a **~1.2–3.5k-token slice instead of the ~9–20k tokens** the model
13
+ would otherwise read — a **60–91% reduction in input context**, same answer.
14
+ - In a live Claude Code session, the same question consumed **~18.9k tokens without
15
+ fittok vs ~10.7k with it** (one tool call, zero file reads).
16
+ - It works three ways from one install — **MCP server, CLI, and Python library** —
17
+ plus a **Claude Code plugin** that injects the context automatically.
18
+
19
+ ---
20
+
21
+ ## 1. The problem
22
+
23
+ To answer "how does X work in this codebase?", an AI agent typically **reads many
24
+ whole files** (Grep → Read → Read …). Most of those tokens are irrelevant — config,
25
+ imports, unrelated functions. You pay for all of them on every question.
26
+
27
+ ## 2. How it works — a 3-stage pipeline
28
+
29
+ ```
30
+ codebase ──▶ graphify ──▶ slurp ──▶ readable slice ──▶ LLM answers
31
+ (parse) (select) (trim to budget)
32
+ ```
33
+
34
+ 1. **graphify** — parses the repo with tree-sitter into a knowledge graph of
35
+ functions/classes/methods (Python, JS, JSX, TS, TSX, Java, Go, Rust).
36
+ 2. **slurp** — scores every node against the question with **semantic embeddings +
37
+ TF-IDF + PageRank**, and selects only the most relevant nodes within a token
38
+ budget (auto-sized by default).
39
+ 3. **readable output** — returns the *actual source code* of those nodes (trimmed to
40
+ budget), so the model can answer directly. (An earlier design compressed the text
41
+ with LLMLingua, but that produced unreadable token-salad the model ignored — so
42
+ fittok returns real, readable code instead.)
43
+
44
+ Embeddings are cached on disk and keyed by content, so re-indexing after a code
45
+ change only re-embeds what changed.
46
+
47
+ ## 3. Interfaces (one install, four front doors)
48
+
49
+ | Interface | Who uses it | How |
50
+ |---|---|---|
51
+ | **MCP server** | AI clients (Claude Code, Cursor) | `uvx fittok` registered as an MCP; the model calls `optimize_context` |
52
+ | **Claude Code plugin** | Claude Code users who want it automatic | `UserPromptSubmit` hook auto-injects the relevant context every codebase question |
53
+ | **CLI** | scripts / CI / verification | `fittok query <repo> "<question>"` |
54
+ | **Python library** | custom pipelines | `from fittok import optimize` |
55
+
56
+ ## 4. Install & usage
57
+
58
+ **Recommended (MCP via uvx):** add to your client's MCP config —
59
+ ```json
60
+ { "mcpServers": { "fittok": { "command": "uvx", "args": ["fittok"] } } }
61
+ ```
62
+ **Auto-trigger without mentioning it:** add one line to your client's `CLAUDE.md` —
63
+ > *"For any codebase question, call fittok first and answer from its output."*
64
+
65
+ **CLI / library:** `pip install fittok`, then `fittok query <repo> "<q>"` or
66
+ `from fittok import optimize; optimize("<repo>", "<q>")`.
67
+
68
+ ---
69
+
70
+ ## 5. Token-savings results
71
+
72
+ ### 5a. Engine savings — deterministic (no model in the loop)
73
+
74
+ Measured directly via `fittok query` on the `mira` repo (adaptive budget). "Baseline"
75
+ = total tokens of the files the answer lives in (what the model would otherwise read):
76
+
77
+ | Question | Baseline (files) | fittok sent | **Reduction** |
78
+ |---|---:|---:|---:|
79
+ | How does authentication & login work | 13,178 | 1,200 | **90.9%** |
80
+ | How does silence detection end a turn | 9,782 | 3,500 | **64.2%** |
81
+ | How does the AI gateway route & rotate keys | 14,041 | 3,500 | **75.1%** |
82
+ | How are interview questions generated from the resume | 19,668 | 3,500 | **82.2%** |
83
+
84
+ This number is **deterministic** — same every run, independent of the host model.
85
+
86
+ ### 5b. End-to-end session — with vs without (live `/context`)
87
+
88
+ Same focused question, same repo, in Claude Code:
89
+
90
+ | | Without fittok | With fittok |
91
+ |---|---|---|
92
+ | fittok calls | 0 | 1 |
93
+ | Files the model read | several | 0 |
94
+ | Context consumed (`Messages`) | **~18.9k** | **~10.7k** |
95
+
96
+ > _Note: a fixed ~5.3k of "MCP tools" overhead is present in both and cancels out;
97
+ > the comparison is the `Messages` delta._
98
+
99
+ ### 5c. Selectivity proof
100
+
101
+ On a synthetic repo of **1,010 functions across 10 unrelated domains** (auth,
102
+ payment, email, geometry, weather, …), the query *"how does authentication and login
103
+ work"* selected **68 nodes, 100% from `auth.py`** — zero leakage from the other 9
104
+ domains.
105
+
106
+ ---
107
+
108
+ ## 6. Live demo — screenshots (fill in from your run)
109
+
110
+ > Replace each placeholder with your screenshot and the measured numbers.
111
+
112
+ **Question used:** `How does silence detection end the candidate's turn?`
113
+
114
+ ### Without fittok ✅ measured
115
+ - `/context` before — Messages: **1.1k** (total 37.4k)
116
+ - `/context` after — Messages: **11.5k** (total 46.9k)
117
+ - Files read directly: **~5** (`silenceDetector.ts`, `useSpeechRecognition.ts`,
118
+ `speechRecognitionWrapper.ts`, + grep/cat of `submitAnswer.ts`, `constants.ts`)
119
+ - **Context consumed (delta): ≈10.4k tokens**
120
+ - _[screenshot ①: `/context` before] · [screenshot ②: `/context` after]_
121
+
122
+ ### With fittok ⏳ pending
123
+ - `/context` before — Messages: `____`
124
+ - `/context` after — Messages: `____`
125
+ - fittok calls: `____` (target 1) · files read: `____` (target 0)
126
+ - fittok's own `savings`: `____`% (`____` vs `____` tokens)
127
+ - _[screenshot ④: before] · [⑤: after] · [⑥: tool call + savings] · [⑦: answer + 🪙 footer]_
128
+
129
+ ### Result
130
+ > Same focused question, same repo. **Without fittok** the model ran ~5 reads/greps
131
+ > and consumed **≈10.4k tokens** of context. **With fittok** it answered from one
132
+ > `~__k`-token slice — **`__`% less context, same answer.**
133
+
134
+ ---
135
+
136
+ ## 7. Reproduce it yourself
137
+
138
+ ```bash
139
+ pip install fittok # or: uvx fittok ...
140
+ fittok index <your-repo> # one-time pre-warm (parse + embeddings, cached)
141
+ fittok query <your-repo> "how does <feature> work" # prints the slice + savings on stderr
142
+ ```
143
+ The stderr line shows `Sent X tokens instead of Y (Z% reduction)` — the deterministic proof.
144
+
145
+ ---
146
+
147
+ ## 8. Honest limitations
148
+
149
+ - **Focused questions are where it shines** (tight, zero file reads). **Broad
150
+ "explain the entire flow" questions** can miss a pivotal connector function (it may
151
+ rank low on vocabulary, or sit across an HTTP boundary a code graph can't cross), so
152
+ the model may still read 1–2 files. The win is smaller there, not absent.
153
+ - **The MCP can't force the model** to use fittok or to stop reading files — it can
154
+ only make it the easy, obvious path. The **plugin hook** is the deterministic
155
+ guarantee (it injects the context before the model decides), at the cost of running
156
+ on every matched prompt.
157
+ - Token savings depend on the model trusting the slice; readable output makes that
158
+ far more likely, but the host model always has final say.