code-context-engine 0.4.19__tar.gz → 0.4.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {code_context_engine-0.4.19/src/code_context_engine.egg-info → code_context_engine-0.4.21}/PKG-INFO +73 -11
  2. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/README.md +70 -9
  3. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/pyproject.toml +6 -2
  4. {code_context_engine-0.4.19 → code_context_engine-0.4.21/src/code_context_engine.egg-info}/PKG-INFO +73 -11
  5. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/code_context_engine.egg-info/SOURCES.txt +1 -0
  6. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/code_context_engine.egg-info/requires.txt +3 -1
  7. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/cli.py +373 -76
  8. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/compression/output_rules.py +23 -8
  9. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/config.py +7 -0
  10. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/editors.py +11 -0
  11. code_context_engine-0.4.21/src/context_engine/indexer/embedder.py +539 -0
  12. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/indexer/git_hooks.py +16 -2
  13. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/indexer/manifest.py +27 -0
  14. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/indexer/pipeline.py +45 -3
  15. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/indexer/watcher.py +35 -4
  16. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/integration/mcp_server.py +62 -15
  17. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/memory/hook_installer.py +35 -0
  18. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/memory/hook_server.py +25 -9
  19. code_context_engine-0.4.21/src/context_engine/pricing.py +148 -0
  20. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/project_commands.py +6 -0
  21. code_context_engine-0.4.21/tests/test_cli_init_agents.py +155 -0
  22. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_savings.py +4 -4
  23. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_savings_e2e.py +5 -5
  24. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_smoke.py +41 -7
  25. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_project_commands.py +15 -0
  26. code_context_engine-0.4.19/src/context_engine/indexer/embedder.py +0 -158
  27. code_context_engine-0.4.19/src/context_engine/pricing.py +0 -104
  28. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/LICENSE +0 -0
  29. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/setup.cfg +0 -0
  30. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/code_context_engine.egg-info/dependency_links.txt +0 -0
  31. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/code_context_engine.egg-info/entry_points.txt +0 -0
  32. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/code_context_engine.egg-info/top_level.txt +0 -0
  33. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/__init__.py +0 -0
  34. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/cli_style.py +0 -0
  35. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/compression/__init__.py +0 -0
  36. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/compression/compressor.py +0 -0
  37. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/compression/ollama_client.py +0 -0
  38. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/compression/prompts.py +0 -0
  39. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/compression/quality.py +0 -0
  40. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/dashboard/__init__.py +0 -0
  41. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/dashboard/_page.py +0 -0
  42. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/dashboard/server.py +0 -0
  43. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/event_bus.py +0 -0
  44. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/indexer/__init__.py +0 -0
  45. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/indexer/chunker.py +0 -0
  46. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/indexer/embedding_cache.py +0 -0
  47. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/indexer/git_indexer.py +0 -0
  48. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/indexer/ignorefile.py +0 -0
  49. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/indexer/secrets.py +0 -0
  50. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/integration/__init__.py +0 -0
  51. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/integration/bootstrap.py +0 -0
  52. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/integration/git_context.py +0 -0
  53. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/integration/session_capture.py +0 -0
  54. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/memory/__init__.py +0 -0
  55. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/memory/compressor.py +0 -0
  56. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/memory/db.py +0 -0
  57. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/memory/decision_extractor.py +0 -0
  58. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/memory/extractive.py +0 -0
  59. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/memory/grammar.py +0 -0
  60. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/memory/hooks.py +0 -0
  61. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/memory/migrate.py +0 -0
  62. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/models.py +0 -0
  63. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/retrieval/__init__.py +0 -0
  64. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/retrieval/confidence.py +0 -0
  65. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/retrieval/query_parser.py +0 -0
  66. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/retrieval/retriever.py +0 -0
  67. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/serve_http.py +0 -0
  68. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/services.py +0 -0
  69. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/storage/__init__.py +0 -0
  70. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/storage/backend.py +0 -0
  71. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/storage/fts_store.py +0 -0
  72. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/storage/graph_store.py +0 -0
  73. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/storage/local_backend.py +0 -0
  74. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/storage/remote_backend.py +0 -0
  75. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/storage/vector_store.py +0 -0
  76. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/src/context_engine/utils.py +0 -0
  77. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_init_probe.py +0 -0
  78. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_mcp_config.py +0 -0
  79. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_safe_cwd.py +0 -0
  80. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_savings_buckets.py +0 -0
  81. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_serve.py +0 -0
  82. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_sessions_export.py +0 -0
  83. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_sessions_status.py +0 -0
  84. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_cli_uninstall.py +0 -0
  85. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_config.py +0 -0
  86. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_e2e.py +0 -0
  87. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_editors_codex.py +0 -0
  88. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_editors_opencode.py +0 -0
  89. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_event_bus.py +0 -0
  90. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_models.py +0 -0
  91. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_real_life.py +0 -0
  92. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_services.py +0 -0
  93. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_token_efficiency.py +0 -0
  94. {code_context_engine-0.4.19 → code_context_engine-0.4.21}/tests/test_token_packing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-context-engine
3
- Version: 0.4.19
3
+ Version: 0.4.21
4
4
  Summary: Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server.
5
5
  Author-email: Fazle Elahee <felahee@gmail.com>, Raj <rajkumar.sakti@gmail.com>
6
6
  License-Expression: MIT
@@ -21,7 +21,6 @@ License-File: LICENSE
21
21
  Requires-Dist: click>=8.1
22
22
  Requires-Dist: pyyaml>=6.0
23
23
  Requires-Dist: sqlite-vec>=0.1.6
24
- Requires-Dist: fastembed>=0.4
25
24
  Requires-Dist: numpy>=1.24
26
25
  Requires-Dist: tree-sitter>=0.22
27
26
  Requires-Dist: tree-sitter-python>=0.21
@@ -46,6 +45,8 @@ Requires-Dist: pytest-cov>=5.0; extra == "dev"
46
45
  Requires-Dist: pytest-xdist>=3.5; extra == "dev"
47
46
  Requires-Dist: ruff>=0.13; extra == "dev"
48
47
  Provides-Extra: http
48
+ Provides-Extra: local
49
+ Requires-Dist: fastembed>=0.4; extra == "local"
49
50
  Dynamic: license-file
50
51
 
51
52
  <p align="center">
@@ -119,10 +120,10 @@ Dynamic: license-file
119
120
  ```bash
120
121
  uv tool install code-context-engine
121
122
  cd /path/to/your/project
122
- cce init
123
+ cce init # or: cce init --agent all
123
124
  ```
124
125
 
125
- That's it. Claude now searches your index instead of reading entire files. No config needed.
126
+ That's it. Your AI coding agent now searches your index instead of reading entire files. No config needed.
126
127
 
127
128
  ---
128
129
 
@@ -148,23 +149,34 @@ cd /path/to/your/project
148
149
  cce init # index, install hooks, register MCP server
149
150
  ```
150
151
 
152
+ **Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
153
+
154
+ ```bash
155
+ uv tool install "code-context-engine[local]" # includes fastembed + ONNX Runtime
156
+ ```
157
+
151
158
  Restart your editor. Done. Every question now hits the index instead of re-reading files.
152
159
 
153
- `cce init` auto-detects your editor and writes the right config:
160
+ `cce init` auto-detects your editor and writes the right config. To target a
161
+ specific agent, use `--agent claude`, `--agent codex`, `--agent copilot`, or
162
+ `--agent all`.
154
163
 
155
164
  | Editor | Config written | Instructions |
156
165
  |--------|---------------|--------------|
157
166
  | Claude Code | `.mcp.json` | `CLAUDE.md` |
158
- | VS Code / Copilot | `.vscode/mcp.json` | |
167
+ | VS Code / Copilot | `.vscode/mcp.json` | `.github/copilot-instructions.md` |
159
168
  | Cursor | `.cursor/mcp.json` | `.cursorrules` |
160
169
  | Gemini CLI | `.gemini/settings.json` | `GEMINI.md` |
161
- | OpenAI Codex | `~/.codex/config.toml` (user-global, per-project section) | |
170
+ | OpenAI Codex | `~/.codex/config.toml` (user-global, per-project section) | `AGENTS.md` |
162
171
  | OpenCode | `opencode.json` | |
163
172
  | Tabnine | `.tabnine/agent/settings.json` | `TABNINE.md` |
164
173
 
165
174
  Multiple editors in the same project? All get configured in one command.
166
175
 
167
- **Codex note:** Codex CLI reads MCP servers from `~/.codex/config.toml` only — it has no per-project config. `cce init` adds one `[mcp_servers.cce-<project>-<hash>]` section per project so multiple projects coexist; `cce uninstall` removes only the section for the current project.
176
+ **Codex note:** Codex CLI reads MCP servers from `~/.codex/config.toml` only —
177
+ it has no per-project config. `cce init` adds one `[mcp_servers.cce-<project>-<hash>]`
178
+ section per project so multiple projects coexist; `cce uninstall` removes only
179
+ the section for the current project.
168
180
 
169
181
  ```
170
182
  my-project · 38 queries
@@ -425,11 +437,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
425
437
 
426
438
  | Component | Size |
427
439
  |-----------|------|
428
- | Installed package | ~189 MB (ONNX Runtime is 66 MB of that) |
429
- | Embedding model (one-time download) | ~60 MB |
440
+ | Core install (Ollama backend) | ~17 MB |
441
+ | With `[local]` extra (fastembed + ONNX) | ~189 MB |
442
+ | Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
430
443
  | Index per project (small/medium/large) | 5-60 MB |
431
444
 
432
- No GPU required. Embedding model runs on CPU via ONNX Runtime.
445
+ No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
433
446
 
434
447
  ---
435
448
 
@@ -479,6 +492,55 @@ All other text files are chunked by line range. Binary files are skipped.
479
492
 
480
493
  ---
481
494
 
495
+ ## FAQ
496
+
497
+ ### Does CCE affect response quality?
498
+
499
+ No. Quality stays the same or slightly improves.
500
+
501
+ CCE replaces "dump the entire file" with "search for the relevant function." The model still gets the code it needs (0.90 Recall@10 in benchmarks). Less irrelevant context means less noise competing for attention, which can improve the model's focus on your actual question.
502
+
503
+ ### How do I increase output token savings?
504
+
505
+ Set the output compression level in your project config (`cce.yaml`):
506
+
507
+ ```yaml
508
+ compression:
509
+ output: max # off | lite | standard | max
510
+ ```
511
+
512
+ Or change it at runtime via the MCP tool:
513
+
514
+ ```
515
+ set_output_level output_level=max
516
+ ```
517
+
518
+ | Level | Savings | What it does |
519
+ |-------|---------|--------------|
520
+ | `off` | 0% | No compression |
521
+ | `lite` | ~25% | Removes filler/hedging/pleasantries + diff-only for code changes |
522
+ | `standard` | ~70% | Drops articles, fragments, short synonyms + diff-only for code |
523
+ | `max` | ~80% | Telegraphic style + diff-only for code |
524
+
525
+ Default is `standard`. All levels include **code output rules** that instruct the model to show only changed lines (not full file rewrites), which is where most output tokens go in coding sessions. The `max` level produces very terse prose (similar to "caveman mode"). Code blocks, paths, and commands are never compressed regardless of level.
526
+
527
+ ### Where do the savings come from?
528
+
529
+ Most savings are **input tokens** (what goes into the model):
530
+
531
+ | Layer | Type | Typical savings |
532
+ |-------|------|-----------------|
533
+ | Retrieval | Input | 94% (full files → relevant chunks) |
534
+ | Chunk compression | Input | 89% (chunks → signatures) |
535
+ | Grammar compression | Input | 13% (article/filler removal) |
536
+ | Turn summarization | Input | varies (session history) |
537
+ | Progressive disclosure | Input | varies (tool payloads) |
538
+ | Output compression | Output | 25-80% (depends on level) |
539
+
540
+ Output tokens cost 5x more per token (e.g. Opus: $15/1M input vs $75/1M output), so even a small output reduction has outsized cost impact.
541
+
542
+ ---
543
+
482
544
  ## Roadmap
483
545
 
484
546
  - [x] Multi-repo benchmarks (FastAPI, chi, fiber)
@@ -69,10 +69,10 @@
69
69
  ```bash
70
70
  uv tool install code-context-engine
71
71
  cd /path/to/your/project
72
- cce init
72
+ cce init # or: cce init --agent all
73
73
  ```
74
74
 
75
- That's it. Claude now searches your index instead of reading entire files. No config needed.
75
+ That's it. Your AI coding agent now searches your index instead of reading entire files. No config needed.
76
76
 
77
77
  ---
78
78
 
@@ -98,23 +98,34 @@ cd /path/to/your/project
98
98
  cce init # index, install hooks, register MCP server
99
99
  ```
100
100
 
101
+ **Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
102
+
103
+ ```bash
104
+ uv tool install "code-context-engine[local]" # includes fastembed + ONNX Runtime
105
+ ```
106
+
101
107
  Restart your editor. Done. Every question now hits the index instead of re-reading files.
102
108
 
103
- `cce init` auto-detects your editor and writes the right config:
109
+ `cce init` auto-detects your editor and writes the right config. To target a
110
+ specific agent, use `--agent claude`, `--agent codex`, `--agent copilot`, or
111
+ `--agent all`.
104
112
 
105
113
  | Editor | Config written | Instructions |
106
114
  |--------|---------------|--------------|
107
115
  | Claude Code | `.mcp.json` | `CLAUDE.md` |
108
- | VS Code / Copilot | `.vscode/mcp.json` | |
116
+ | VS Code / Copilot | `.vscode/mcp.json` | `.github/copilot-instructions.md` |
109
117
  | Cursor | `.cursor/mcp.json` | `.cursorrules` |
110
118
  | Gemini CLI | `.gemini/settings.json` | `GEMINI.md` |
111
- | OpenAI Codex | `~/.codex/config.toml` (user-global, per-project section) | |
119
+ | OpenAI Codex | `~/.codex/config.toml` (user-global, per-project section) | `AGENTS.md` |
112
120
  | OpenCode | `opencode.json` | |
113
121
  | Tabnine | `.tabnine/agent/settings.json` | `TABNINE.md` |
114
122
 
115
123
  Multiple editors in the same project? All get configured in one command.
116
124
 
117
- **Codex note:** Codex CLI reads MCP servers from `~/.codex/config.toml` only — it has no per-project config. `cce init` adds one `[mcp_servers.cce-<project>-<hash>]` section per project so multiple projects coexist; `cce uninstall` removes only the section for the current project.
125
+ **Codex note:** Codex CLI reads MCP servers from `~/.codex/config.toml` only —
126
+ it has no per-project config. `cce init` adds one `[mcp_servers.cce-<project>-<hash>]`
127
+ section per project so multiple projects coexist; `cce uninstall` removes only
128
+ the section for the current project.
118
129
 
119
130
  ```
120
131
  my-project · 38 queries
@@ -375,11 +386,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
375
386
 
376
387
  | Component | Size |
377
388
  |-----------|------|
378
- | Installed package | ~189 MB (ONNX Runtime is 66 MB of that) |
379
- | Embedding model (one-time download) | ~60 MB |
389
+ | Core install (Ollama backend) | ~17 MB |
390
+ | With `[local]` extra (fastembed + ONNX) | ~189 MB |
391
+ | Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
380
392
  | Index per project (small/medium/large) | 5-60 MB |
381
393
 
382
- No GPU required. Embedding model runs on CPU via ONNX Runtime.
394
+ No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
383
395
 
384
396
  ---
385
397
 
@@ -429,6 +441,55 @@ All other text files are chunked by line range. Binary files are skipped.
429
441
 
430
442
  ---
431
443
 
444
+ ## FAQ
445
+
446
+ ### Does CCE affect response quality?
447
+
448
+ No. Quality stays the same or slightly improves.
449
+
450
+ CCE replaces "dump the entire file" with "search for the relevant function." The model still gets the code it needs (0.90 Recall@10 in benchmarks). Less irrelevant context means less noise competing for attention, which can improve the model's focus on your actual question.
451
+
452
+ ### How do I increase output token savings?
453
+
454
+ Set the output compression level in your project config (`cce.yaml`):
455
+
456
+ ```yaml
457
+ compression:
458
+ output: max # off | lite | standard | max
459
+ ```
460
+
461
+ Or change it at runtime via the MCP tool:
462
+
463
+ ```
464
+ set_output_level output_level=max
465
+ ```
466
+
467
+ | Level | Savings | What it does |
468
+ |-------|---------|--------------|
469
+ | `off` | 0% | No compression |
470
+ | `lite` | ~25% | Removes filler/hedging/pleasantries + diff-only for code changes |
471
+ | `standard` | ~70% | Drops articles, fragments, short synonyms + diff-only for code |
472
+ | `max` | ~80% | Telegraphic style + diff-only for code |
473
+
474
+ Default is `standard`. All levels include **code output rules** that instruct the model to show only changed lines (not full file rewrites), which is where most output tokens go in coding sessions. The `max` level produces very terse prose (similar to "caveman mode"). Code blocks, paths, and commands are never compressed regardless of level.
475
+
476
+ ### Where do the savings come from?
477
+
478
+ Most savings are **input tokens** (what goes into the model):
479
+
480
+ | Layer | Type | Typical savings |
481
+ |-------|------|-----------------|
482
+ | Retrieval | Input | 94% (full files → relevant chunks) |
483
+ | Chunk compression | Input | 89% (chunks → signatures) |
484
+ | Grammar compression | Input | 13% (article/filler removal) |
485
+ | Turn summarization | Input | varies (session history) |
486
+ | Progressive disclosure | Input | varies (tool payloads) |
487
+ | Output compression | Output | 25-80% (depends on level) |
488
+
489
+ Output tokens cost 5x more per token (e.g. Opus: $15/1M input vs $75/1M output), so even a small output reduction has outsized cost impact.
490
+
491
+ ---
492
+
432
493
  ## Roadmap
433
494
 
434
495
  - [x] Multi-repo benchmarks (FastAPI, chi, fiber)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "code-context-engine"
3
- version = "0.4.19"
3
+ version = "0.4.21"
4
4
  description = "Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server."
5
5
  readme = {file = "README.md", content-type = "text/markdown"}
6
6
  license = "MIT"
@@ -23,7 +23,6 @@ dependencies = [
23
23
  "click>=8.1",
24
24
  "pyyaml>=6.0",
25
25
  "sqlite-vec>=0.1.6",
26
- "fastembed>=0.4",
27
26
  "numpy>=1.24",
28
27
  "tree-sitter>=0.22",
29
28
  "tree-sitter-python>=0.21",
@@ -60,6 +59,11 @@ dev = [
60
59
  "ruff>=0.13",
61
60
  ]
62
61
  http = [] # back-compat: aiohttp is now a core dependency
62
+ # Local on-device embedding via fastembed (ONNX). ~172 MB install
63
+ # footprint; needed only if you don't have Ollama running. Without
64
+ # this extra, CCE auto-detects Ollama at localhost:11434 and uses
65
+ # nomic-embed-text via /api/embed.
66
+ local = ["fastembed>=0.4"]
63
67
 
64
68
  [project.scripts]
65
69
  cce = "context_engine.cli:main"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-context-engine
3
- Version: 0.4.19
3
+ Version: 0.4.21
4
4
  Summary: Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server.
5
5
  Author-email: Fazle Elahee <felahee@gmail.com>, Raj <rajkumar.sakti@gmail.com>
6
6
  License-Expression: MIT
@@ -21,7 +21,6 @@ License-File: LICENSE
21
21
  Requires-Dist: click>=8.1
22
22
  Requires-Dist: pyyaml>=6.0
23
23
  Requires-Dist: sqlite-vec>=0.1.6
24
- Requires-Dist: fastembed>=0.4
25
24
  Requires-Dist: numpy>=1.24
26
25
  Requires-Dist: tree-sitter>=0.22
27
26
  Requires-Dist: tree-sitter-python>=0.21
@@ -46,6 +45,8 @@ Requires-Dist: pytest-cov>=5.0; extra == "dev"
46
45
  Requires-Dist: pytest-xdist>=3.5; extra == "dev"
47
46
  Requires-Dist: ruff>=0.13; extra == "dev"
48
47
  Provides-Extra: http
48
+ Provides-Extra: local
49
+ Requires-Dist: fastembed>=0.4; extra == "local"
49
50
  Dynamic: license-file
50
51
 
51
52
  <p align="center">
@@ -119,10 +120,10 @@ Dynamic: license-file
119
120
  ```bash
120
121
  uv tool install code-context-engine
121
122
  cd /path/to/your/project
122
- cce init
123
+ cce init # or: cce init --agent all
123
124
  ```
124
125
 
125
- That's it. Claude now searches your index instead of reading entire files. No config needed.
126
+ That's it. Your AI coding agent now searches your index instead of reading entire files. No config needed.
126
127
 
127
128
  ---
128
129
 
@@ -148,23 +149,34 @@ cd /path/to/your/project
148
149
  cce init # index, install hooks, register MCP server
149
150
  ```
150
151
 
152
+ **Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
153
+
154
+ ```bash
155
+ uv tool install "code-context-engine[local]" # includes fastembed + ONNX Runtime
156
+ ```
157
+
151
158
  Restart your editor. Done. Every question now hits the index instead of re-reading files.
152
159
 
153
- `cce init` auto-detects your editor and writes the right config:
160
+ `cce init` auto-detects your editor and writes the right config. To target a
161
+ specific agent, use `--agent claude`, `--agent codex`, `--agent copilot`, or
162
+ `--agent all`.
154
163
 
155
164
  | Editor | Config written | Instructions |
156
165
  |--------|---------------|--------------|
157
166
  | Claude Code | `.mcp.json` | `CLAUDE.md` |
158
- | VS Code / Copilot | `.vscode/mcp.json` | |
167
+ | VS Code / Copilot | `.vscode/mcp.json` | `.github/copilot-instructions.md` |
159
168
  | Cursor | `.cursor/mcp.json` | `.cursorrules` |
160
169
  | Gemini CLI | `.gemini/settings.json` | `GEMINI.md` |
161
- | OpenAI Codex | `~/.codex/config.toml` (user-global, per-project section) | |
170
+ | OpenAI Codex | `~/.codex/config.toml` (user-global, per-project section) | `AGENTS.md` |
162
171
  | OpenCode | `opencode.json` | |
163
172
  | Tabnine | `.tabnine/agent/settings.json` | `TABNINE.md` |
164
173
 
165
174
  Multiple editors in the same project? All get configured in one command.
166
175
 
167
- **Codex note:** Codex CLI reads MCP servers from `~/.codex/config.toml` only — it has no per-project config. `cce init` adds one `[mcp_servers.cce-<project>-<hash>]` section per project so multiple projects coexist; `cce uninstall` removes only the section for the current project.
176
+ **Codex note:** Codex CLI reads MCP servers from `~/.codex/config.toml` only —
177
+ it has no per-project config. `cce init` adds one `[mcp_servers.cce-<project>-<hash>]`
178
+ section per project so multiple projects coexist; `cce uninstall` removes only
179
+ the section for the current project.
168
180
 
169
181
  ```
170
182
  my-project · 38 queries
@@ -425,11 +437,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
425
437
 
426
438
  | Component | Size |
427
439
  |-----------|------|
428
- | Installed package | ~189 MB (ONNX Runtime is 66 MB of that) |
429
- | Embedding model (one-time download) | ~60 MB |
440
+ | Core install (Ollama backend) | ~17 MB |
441
+ | With `[local]` extra (fastembed + ONNX) | ~189 MB |
442
+ | Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
430
443
  | Index per project (small/medium/large) | 5-60 MB |
431
444
 
432
- No GPU required. Embedding model runs on CPU via ONNX Runtime.
445
+ No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
433
446
 
434
447
  ---
435
448
 
@@ -479,6 +492,55 @@ All other text files are chunked by line range. Binary files are skipped.
479
492
 
480
493
  ---
481
494
 
495
+ ## FAQ
496
+
497
+ ### Does CCE affect response quality?
498
+
499
+ No. Quality stays the same or slightly improves.
500
+
501
+ CCE replaces "dump the entire file" with "search for the relevant function." The model still gets the code it needs (0.90 Recall@10 in benchmarks). Less irrelevant context means less noise competing for attention, which can improve the model's focus on your actual question.
502
+
503
+ ### How do I increase output token savings?
504
+
505
+ Set the output compression level in your project config (`cce.yaml`):
506
+
507
+ ```yaml
508
+ compression:
509
+ output: max # off | lite | standard | max
510
+ ```
511
+
512
+ Or change it at runtime via the MCP tool:
513
+
514
+ ```
515
+ set_output_level output_level=max
516
+ ```
517
+
518
+ | Level | Savings | What it does |
519
+ |-------|---------|--------------|
520
+ | `off` | 0% | No compression |
521
+ | `lite` | ~25% | Removes filler/hedging/pleasantries + diff-only for code changes |
522
+ | `standard` | ~70% | Drops articles, fragments, short synonyms + diff-only for code |
523
+ | `max` | ~80% | Telegraphic style + diff-only for code |
524
+
525
+ Default is `standard`. All levels include **code output rules** that instruct the model to show only changed lines (not full file rewrites), which is where most output tokens go in coding sessions. The `max` level produces very terse prose (similar to "caveman mode"). Code blocks, paths, and commands are never compressed regardless of level.
526
+
527
+ ### Where do the savings come from?
528
+
529
+ Most savings are **input tokens** (what goes into the model):
530
+
531
+ | Layer | Type | Typical savings |
532
+ |-------|------|-----------------|
533
+ | Retrieval | Input | 94% (full files → relevant chunks) |
534
+ | Chunk compression | Input | 89% (chunks → signatures) |
535
+ | Grammar compression | Input | 13% (article/filler removal) |
536
+ | Turn summarization | Input | varies (session history) |
537
+ | Progressive disclosure | Input | varies (tool payloads) |
538
+ | Output compression | Output | 25-80% (depends on level) |
539
+
540
+ Output tokens cost 5x more per token (e.g. Opus: $15/1M input vs $75/1M output), so even a small output reduction has outsized cost impact.
541
+
542
+ ---
543
+
482
544
  ## Roadmap
483
545
 
484
546
  - [x] Multi-repo benchmarks (FastAPI, chi, fiber)
@@ -65,6 +65,7 @@ src/context_engine/storage/graph_store.py
65
65
  src/context_engine/storage/local_backend.py
66
66
  src/context_engine/storage/remote_backend.py
67
67
  src/context_engine/storage/vector_store.py
68
+ tests/test_cli_init_agents.py
68
69
  tests/test_cli_init_probe.py
69
70
  tests/test_cli_mcp_config.py
70
71
  tests/test_cli_safe_cwd.py
@@ -1,7 +1,6 @@
1
1
  click>=8.1
2
2
  pyyaml>=6.0
3
3
  sqlite-vec>=0.1.6
4
- fastembed>=0.4
5
4
  numpy>=1.24
6
5
  tree-sitter>=0.22
7
6
  tree-sitter-python>=0.21
@@ -28,3 +27,6 @@ pytest-xdist>=3.5
28
27
  ruff>=0.13
29
28
 
30
29
  [http]
30
+
31
+ [local]
32
+ fastembed>=0.4