code-context-engine 0.4.20__py3-none-any.whl → 0.4.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_context_engine-0.4.20.dist-info → code_context_engine-0.4.22.dist-info}/METADATA +76 -16
- {code_context_engine-0.4.20.dist-info → code_context_engine-0.4.22.dist-info}/RECORD +13 -13
- context_engine/cli.py +326 -81
- context_engine/compression/output_rules.py +77 -8
- context_engine/editors.py +31 -4
- context_engine/indexer/embedder.py +58 -8
- context_engine/memory/db.py +8 -0
- context_engine/pricing.py +64 -20
- context_engine/storage/vector_store.py +17 -3
- {code_context_engine-0.4.20.dist-info → code_context_engine-0.4.22.dist-info}/WHEEL +0 -0
- {code_context_engine-0.4.20.dist-info → code_context_engine-0.4.22.dist-info}/entry_points.txt +0 -0
- {code_context_engine-0.4.20.dist-info → code_context_engine-0.4.22.dist-info}/licenses/LICENSE +0 -0
- {code_context_engine-0.4.20.dist-info → code_context_engine-0.4.22.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-context-engine
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.22
|
|
4
4
|
Summary: Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server.
|
|
5
5
|
Author-email: Fazle Elahee <felahee@gmail.com>, Raj <rajkumar.sakti@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -115,15 +115,17 @@ Dynamic: license-file
|
|
|
115
115
|
|
|
116
116
|
---
|
|
117
117
|
|
|
118
|
-
## Quick start
|
|
118
|
+
## Quick start
|
|
119
119
|
|
|
120
120
|
```bash
|
|
121
|
-
uv tool install code-context-engine
|
|
121
|
+
uv tool install "code-context-engine[local]" # or: pipx install "code-context-engine[local]"
|
|
122
122
|
cd /path/to/your/project
|
|
123
|
-
cce init
|
|
123
|
+
cce init # or: cce init --agent all
|
|
124
124
|
```
|
|
125
125
|
|
|
126
|
-
That's it.
|
|
126
|
+
That's it. Your AI coding agent now searches your index instead of reading entire files.
|
|
127
|
+
|
|
128
|
+
> **Already have Ollama?** You can skip `[local]` and use `uv tool install code-context-engine` instead. CCE auto-detects Ollama at localhost:11434 and uses `nomic-embed-text`.
|
|
127
129
|
|
|
128
130
|
---
|
|
129
131
|
|
|
@@ -143,35 +145,42 @@ Tested on all three platforms in CI (macOS, Linux, Windows × Python 3.11/3.12/3
|
|
|
143
145
|
|
|
144
146
|
## Install and see savings in 60 seconds
|
|
145
147
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
148
|
+
You need an embedding backend to index code. Pick one:
|
|
149
|
+
|
|
150
|
+
| Option | Install command | Size | Requires |
|
|
151
|
+
|--------|----------------|------|----------|
|
|
152
|
+
| **Local (recommended)** | `uv tool install "code-context-engine[local]"` | +60 MB | Nothing else |
|
|
153
|
+
| **Ollama** | `uv tool install code-context-engine` | Core only | Ollama running + `nomic-embed-text` pulled |
|
|
151
154
|
|
|
152
|
-
|
|
155
|
+
Then:
|
|
153
156
|
|
|
154
157
|
```bash
|
|
155
|
-
|
|
158
|
+
cd /path/to/your/project
|
|
159
|
+
cce init # index, install hooks, register MCP server
|
|
156
160
|
```
|
|
157
161
|
|
|
158
162
|
Restart your editor. Done. Every question now hits the index instead of re-reading files.
|
|
159
163
|
|
|
160
|
-
`cce init` auto-detects your editor and writes the right config
|
|
164
|
+
`cce init` auto-detects your editor and writes the right config. To target a
|
|
165
|
+
specific agent, use `--agent claude`, `--agent codex`, `--agent copilot`, or
|
|
166
|
+
`--agent all`.
|
|
161
167
|
|
|
162
168
|
| Editor | Config written | Instructions |
|
|
163
169
|
|--------|---------------|--------------|
|
|
164
170
|
| Claude Code | `.mcp.json` | `CLAUDE.md` |
|
|
165
|
-
| VS Code / Copilot | `.vscode/mcp.json` | |
|
|
171
|
+
| VS Code / Copilot | `.vscode/mcp.json` | `.github/copilot-instructions.md` |
|
|
166
172
|
| Cursor | `.cursor/mcp.json` | `.cursorrules` |
|
|
167
173
|
| Gemini CLI | `.gemini/settings.json` | `GEMINI.md` |
|
|
168
|
-
| OpenAI Codex | `~/.codex/config.toml` (user-global, per-project section) | |
|
|
174
|
+
| OpenAI Codex | `~/.codex/config.toml` (user-global, per-project section) | `AGENTS.md` |
|
|
169
175
|
| OpenCode | `opencode.json` | |
|
|
170
176
|
| Tabnine | `.tabnine/agent/settings.json` | `TABNINE.md` |
|
|
171
177
|
|
|
172
178
|
Multiple editors in the same project? All get configured in one command.
|
|
173
179
|
|
|
174
|
-
**Codex note:** Codex CLI reads MCP servers from `~/.codex/config.toml` only —
|
|
180
|
+
**Codex note:** Codex CLI reads MCP servers from `~/.codex/config.toml` only —
|
|
181
|
+
it has no per-project config. `cce init` adds one `[mcp_servers.cce-<project>-<hash>]`
|
|
182
|
+
section per project so multiple projects coexist; `cce uninstall` removes only
|
|
183
|
+
the section for the current project.
|
|
175
184
|
|
|
176
185
|
```
|
|
177
186
|
my-project · 38 queries
|
|
@@ -487,6 +496,57 @@ All other text files are chunked by line range. Binary files are skipped.
|
|
|
487
496
|
|
|
488
497
|
---
|
|
489
498
|
|
|
499
|
+
## FAQ
|
|
500
|
+
|
|
501
|
+
### Does CCE affect response quality?
|
|
502
|
+
|
|
503
|
+
No. Quality stays the same or slightly improves.
|
|
504
|
+
|
|
505
|
+
CCE replaces "dump the entire file" with "search for the relevant function." The model still gets the code it needs (0.90 Recall@10 in benchmarks). Less irrelevant context means less noise competing for attention, which can improve the model's focus on your actual question.
|
|
506
|
+
|
|
507
|
+
### How does output token savings work?
|
|
508
|
+
|
|
509
|
+
CCE writes output compression rules directly into your agent's instruction files (`CLAUDE.md`, `AGENTS.md`, `.cursorrules`, etc.) during `cce init`. These rules apply to the **entire session**, not just CCE tool responses, so every reply from the agent follows them.
|
|
510
|
+
|
|
511
|
+
Set the level in `cce.yaml`:
|
|
512
|
+
|
|
513
|
+
```yaml
|
|
514
|
+
compression:
|
|
515
|
+
output: max # off | lite | standard | max
|
|
516
|
+
```
|
|
517
|
+
|
|
518
|
+
Then re-run `cce init` to update instruction files. Or change at runtime:
|
|
519
|
+
|
|
520
|
+
```
|
|
521
|
+
set_output_level output_level=max
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
| Level | Savings | What it does |
|
|
525
|
+
|-------|---------|--------------|
|
|
526
|
+
| `off` | 0% | No compression |
|
|
527
|
+
| `lite` | ~25% | Removes filler/hedging/pleasantries + diff-only for code changes |
|
|
528
|
+
| `standard` | ~70% | Drops articles, fragments, short synonyms + diff-only for code |
|
|
529
|
+
| `max` | ~80% | Telegraphic style + diff-only for code |
|
|
530
|
+
|
|
531
|
+
Default is `standard`. All levels include **code output rules** that tell the model to show only changed lines (not full file rewrites), which is where most output tokens go in coding sessions. The `max` level produces very terse prose (similar to "caveman mode"). Code blocks, paths, and commands are never compressed regardless of level.
|
|
532
|
+
|
|
533
|
+
### Where do the savings come from?
|
|
534
|
+
|
|
535
|
+
Most savings are **input tokens** (what goes into the model):
|
|
536
|
+
|
|
537
|
+
| Layer | Type | Typical savings |
|
|
538
|
+
|-------|------|-----------------|
|
|
539
|
+
| Retrieval | Input | 94% (full files → relevant chunks) |
|
|
540
|
+
| Chunk compression | Input | 89% (chunks → signatures) |
|
|
541
|
+
| Grammar compression | Input | 13% (article/filler removal) |
|
|
542
|
+
| Turn summarization | Input | varies (session history) |
|
|
543
|
+
| Progressive disclosure | Input | varies (tool payloads) |
|
|
544
|
+
| Output compression | Output | 25-80% (depends on level) |
|
|
545
|
+
|
|
546
|
+
Output tokens cost 5x more per token (e.g. Opus: $15/1M input vs $75/1M output), so even a small output reduction has outsized cost impact.
|
|
547
|
+
|
|
548
|
+
---
|
|
549
|
+
|
|
490
550
|
## Roadmap
|
|
491
551
|
|
|
492
552
|
- [x] Multi-repo benchmarks (FastAPI, chi, fiber)
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
code_context_engine-0.4.
|
|
1
|
+
code_context_engine-0.4.22.dist-info/licenses/LICENSE,sha256=vLbw0GGCVJSIRppMus7Oq0PyMDhDXz-dfvz2rPpWtjQ,1069
|
|
2
2
|
context_engine/__init__.py,sha256=qThGxB7xfZi5M9jDpUno0MKBp7KKrEOdH1hG4wHMuLc,193
|
|
3
|
-
context_engine/cli.py,sha256=
|
|
3
|
+
context_engine/cli.py,sha256=iZbxwA0O4zFD_WRVgPnh1WdhsmZpu6Me-9lJTeT28DE,130226
|
|
4
4
|
context_engine/cli_style.py,sha256=a3l3Smq1gIN2asbNalFUz0i_5x7Tmkp_wEhyGMoo8a4,2460
|
|
5
5
|
context_engine/config.py,sha256=UGbVuc8_wTMflzGh80AotMZXZHzzUpLI3QjMnCxTzRo,8370
|
|
6
|
-
context_engine/editors.py,sha256=
|
|
6
|
+
context_engine/editors.py,sha256=k9jrqzU5gvYkR5kMu3VcVKHdjxEODZNmxBIEhQUOszE,23986
|
|
7
7
|
context_engine/event_bus.py,sha256=7Jgw_2YvGQFrnYewXk6T6FJcvRHz0LVEMDgZym9YBCE,760
|
|
8
8
|
context_engine/models.py,sha256=XBbM0CUqNDQ5MOp6F3STST2qLqy2Zk0m050ZtWdXkrk,2048
|
|
9
|
-
context_engine/pricing.py,sha256=
|
|
9
|
+
context_engine/pricing.py,sha256=aT1bsQuZXPlCdTgtwesJLwlKc2tzh8rxL67sZlMbz4E,4684
|
|
10
10
|
context_engine/project_commands.py,sha256=ZePtRU48F1MS0LsVE-32kUA7kjy7yeSh0swL0L6irLA,10741
|
|
11
11
|
context_engine/serve_http.py,sha256=bWG4yyeSusz19qM3SzDINO7oYd6SpWKsVD7c_VniZi4,9563
|
|
12
12
|
context_engine/services.py,sha256=8WSVGS7jtqArIihIHKW4fN2ZgfBex9GSEBnjMWecUQM,9827
|
|
@@ -14,7 +14,7 @@ context_engine/utils.py,sha256=rytymcEY0tjG4uknJU3DXKz1_ZGjUjJRV3PhkjXoC8A,3192
|
|
|
14
14
|
context_engine/compression/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
context_engine/compression/compressor.py,sha256=JlNxZeM6-tXISWVOGiJAcLoixqAxwfEGcYtE0dj8FPw,6680
|
|
16
16
|
context_engine/compression/ollama_client.py,sha256=MKF1gii2BXMU-wxBRPyMCjo8t72v3dZ06Kv2JNfILgQ,1265
|
|
17
|
-
context_engine/compression/output_rules.py,sha256=
|
|
17
|
+
context_engine/compression/output_rules.py,sha256=kpLZ6r6Ng6PyAvA22wed5ecm8YTxHwwKI57PgsnX6ls,6655
|
|
18
18
|
context_engine/compression/prompts.py,sha256=jZnpqhr77uI9R3S0vm3Dj17JYy03AXq24E6HQTPXy-A,711
|
|
19
19
|
context_engine/compression/quality.py,sha256=F6fyxDdWjq-Hgtw4xFIaE4BqPoJw1W1EQSn3RXDgdHc,1676
|
|
20
20
|
context_engine/dashboard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -22,7 +22,7 @@ context_engine/dashboard/_page.py,sha256=2LOz6GxVFHdNyd6iGV-u6sbwCnTrw2p_cVUY-Ly
|
|
|
22
22
|
context_engine/dashboard/server.py,sha256=N-QVaDCUL1h70QUgKrIy6QhQIedasf0KYHcV5LACZ0U,17437
|
|
23
23
|
context_engine/indexer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
24
|
context_engine/indexer/chunker.py,sha256=f0n7gJughdHP1fmMd1sbHAxLmVlCnIq6scHOeGFmBS8,6503
|
|
25
|
-
context_engine/indexer/embedder.py,sha256=
|
|
25
|
+
context_engine/indexer/embedder.py,sha256=xznLoW8A9KfDRZWO2MYzCk6o_Kj5YLIMuQ2J-MIbo3g,22717
|
|
26
26
|
context_engine/indexer/embedding_cache.py,sha256=yp7zvjjbhDei1tEczdo25GB_a5SJt3XfO4TVGujjSA0,6454
|
|
27
27
|
context_engine/indexer/git_hooks.py,sha256=GjncsmFu2TZx_3TNQNSBSp15uDwOJ3AtUJxuePQCP24,3258
|
|
28
28
|
context_engine/indexer/git_indexer.py,sha256=3IbAHYKa-XzpEX4zUfdvU0EHj-qjyn8muK6yPuxy9kw,4154
|
|
@@ -38,7 +38,7 @@ context_engine/integration/mcp_server.py,sha256=hIvap8fnpbeAOjJ0oy0GZdgjnUln6b-D
|
|
|
38
38
|
context_engine/integration/session_capture.py,sha256=azc0I2PoQQ-0gsmTFy254na_Ez3ADHJ5IdOKU5oFIEU,12440
|
|
39
39
|
context_engine/memory/__init__.py,sha256=-mzH2HLbjF6mlyzlt0IZoezDPLHBTJmIXFlsn8cjeQA,299
|
|
40
40
|
context_engine/memory/compressor.py,sha256=TiHxFHRPS3TQxo2_YnnXv8QaQXwxehmH2iwe-azuxpw,15763
|
|
41
|
-
context_engine/memory/db.py,sha256=
|
|
41
|
+
context_engine/memory/db.py,sha256=C700MhsdzT8NhpTz_8q-XV4kO6i-Rp4h4GTRoDa8OC4,34936
|
|
42
42
|
context_engine/memory/decision_extractor.py,sha256=tAFcKVaX5Y1qax71MAR03eq6uyCBIfiEDlbsgiodHUw,3508
|
|
43
43
|
context_engine/memory/extractive.py,sha256=VJFBG8P6Wku0OaKBQmOr3eTk5XRS2ed3q-TYb432GLc,3227
|
|
44
44
|
context_engine/memory/grammar.py,sha256=1yrMky1MlmT9m4-_XW3Rq8ZAEE6fBp4miFiWNEcH8ao,16776
|
|
@@ -56,9 +56,9 @@ context_engine/storage/fts_store.py,sha256=GzsF-xUPInqovcK72ULgpYAtMAymx4BRrYmps
|
|
|
56
56
|
context_engine/storage/graph_store.py,sha256=EAJaDK1OzSabm6HY4h7ZdZcykzlqtdFosNTypW5VNpc,8991
|
|
57
57
|
context_engine/storage/local_backend.py,sha256=5MVoAn6Jkiltho-9BjClisLkyXMkSZZc2Z_h3N7Vfcg,4200
|
|
58
58
|
context_engine/storage/remote_backend.py,sha256=6AwEI9YQnmP1w0a7S0ei3YrU2h3z7wbrwv34k7g5YOU,5483
|
|
59
|
-
context_engine/storage/vector_store.py,sha256=
|
|
60
|
-
code_context_engine-0.4.
|
|
61
|
-
code_context_engine-0.4.
|
|
62
|
-
code_context_engine-0.4.
|
|
63
|
-
code_context_engine-0.4.
|
|
64
|
-
code_context_engine-0.4.
|
|
59
|
+
context_engine/storage/vector_store.py,sha256=GyXSTlcKpByjr2C9JUF_cUCvMbGAc1UVV8Apx5X82kw,15772
|
|
60
|
+
code_context_engine-0.4.22.dist-info/METADATA,sha256=UUastWJFLBpuSBE0fr-bWL857Jp06tyCq_5V1bj00CI,25756
|
|
61
|
+
code_context_engine-0.4.22.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
62
|
+
code_context_engine-0.4.22.dist-info/entry_points.txt,sha256=DQuRWUuVFM7nPcXtDmJzlem7QA0IboD_4N8AnTtDD9Q,144
|
|
63
|
+
code_context_engine-0.4.22.dist-info/top_level.txt,sha256=X1-RUqb61WXBjy3JjsW2oXwfvqk2ydXKDNidxmw4CZ4,15
|
|
64
|
+
code_context_engine-0.4.22.dist-info/RECORD,,
|
context_engine/cli.py
CHANGED
|
@@ -56,6 +56,88 @@ def _safe_cwd() -> Path:
|
|
|
56
56
|
) from exc
|
|
57
57
|
|
|
58
58
|
|
|
59
|
+
# ── Update check ─────────────────────────────────────────────────────
|
|
60
|
+
_CCE_HOME = Path.home() / ".cce"
|
|
61
|
+
_UPDATE_CACHE = _CCE_HOME / "update_check.json"
|
|
62
|
+
_UPDATE_CHECK_TTL = 24 * 3600 # 1 day
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _version_tuple(v: str) -> tuple[int, ...]:
|
|
66
|
+
"""Parse '0.4.21' into (0, 4, 21) for comparison."""
|
|
67
|
+
return tuple(int(x) for x in v.split(".") if x.isdigit())
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _check_for_update() -> str | None:
|
|
71
|
+
"""Return the latest PyPI version if newer than installed, else None.
|
|
72
|
+
|
|
73
|
+
Checks at most once per day. Best-effort: swallows all errors.
|
|
74
|
+
"""
|
|
75
|
+
import time
|
|
76
|
+
from importlib.metadata import version as pkg_version
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
current = pkg_version("code-context-engine")
|
|
80
|
+
except Exception:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
# Read cache
|
|
84
|
+
try:
|
|
85
|
+
if _UPDATE_CACHE.exists():
|
|
86
|
+
data = json.loads(_UPDATE_CACHE.read_text())
|
|
87
|
+
if time.time() - data.get("ts", 0) < _UPDATE_CHECK_TTL:
|
|
88
|
+
latest = data.get("latest", "")
|
|
89
|
+
if latest and _version_tuple(latest) > _version_tuple(current):
|
|
90
|
+
return latest
|
|
91
|
+
return None
|
|
92
|
+
except Exception:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
# Fetch from PyPI
|
|
96
|
+
latest = None
|
|
97
|
+
try:
|
|
98
|
+
import httpx
|
|
99
|
+
resp = httpx.get(
|
|
100
|
+
"https://pypi.org/pypi/code-context-engine/json",
|
|
101
|
+
timeout=3.0,
|
|
102
|
+
follow_redirects=True,
|
|
103
|
+
)
|
|
104
|
+
if resp.status_code == 200:
|
|
105
|
+
latest = resp.json()["info"]["version"]
|
|
106
|
+
except Exception:
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
# Cache result
|
|
110
|
+
try:
|
|
111
|
+
_CCE_HOME.mkdir(parents=True, exist_ok=True)
|
|
112
|
+
_UPDATE_CACHE.write_text(json.dumps({"ts": time.time(), "latest": latest or ""}))
|
|
113
|
+
except Exception:
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
if latest and _version_tuple(latest) > _version_tuple(current):
|
|
117
|
+
return latest
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _show_update_notice() -> None:
|
|
122
|
+
"""Print a one-line update notice if a newer version is available."""
|
|
123
|
+
from importlib.metadata import version as pkg_version
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
latest = _check_for_update()
|
|
127
|
+
if latest:
|
|
128
|
+
current = pkg_version("code-context-engine")
|
|
129
|
+
click.echo(
|
|
130
|
+
f"\n {click.style('Update available', fg='yellow', bold=True)} "
|
|
131
|
+
f"{click.style(current, dim=True)} → "
|
|
132
|
+
f"{click.style(latest, fg='green', bold=True)} "
|
|
133
|
+
f"{click.style('Run', dim=True)} "
|
|
134
|
+
f"{click.style('cce upgrade', fg='cyan')} "
|
|
135
|
+
f"{click.style('to update', dim=True)}"
|
|
136
|
+
)
|
|
137
|
+
except Exception:
|
|
138
|
+
pass
|
|
139
|
+
|
|
140
|
+
|
|
59
141
|
def _configure_mcp(project_dir: Path) -> bool:
|
|
60
142
|
"""Write MCP server config to .mcp.json in the project directory.
|
|
61
143
|
|
|
@@ -100,12 +182,12 @@ _CCE_CLAUDE_MD_MARKER = "## Context Engine (CCE)"
|
|
|
100
182
|
# Version stamp embedded as an HTML comment so it doesn't render in the final
|
|
101
183
|
# Markdown but lets `_ensure_claude_md` detect when the installed block is
|
|
102
184
|
# stale and needs replacing. Bump whenever _CCE_CLAUDE_MD_BLOCK changes.
|
|
103
|
-
_CCE_CLAUDE_MD_VERSION = "
|
|
185
|
+
_CCE_CLAUDE_MD_VERSION = "4"
|
|
104
186
|
_CCE_CLAUDE_MD_VERSION_TAG = f"<!-- cce-block-version: {_CCE_CLAUDE_MD_VERSION} -->"
|
|
105
187
|
_CCE_CLAUDE_MD_VERSION_PREFIX = "<!-- cce-block-version: "
|
|
106
188
|
_CCE_CLAUDE_MD_END_MARKER = "<!-- /cce-block -->"
|
|
107
189
|
|
|
108
|
-
|
|
190
|
+
_CCE_CLAUDE_MD_BLOCK_TEMPLATE = f"""\
|
|
109
191
|
{_CCE_CLAUDE_MD_VERSION_TAG}
|
|
110
192
|
## Context Engine (CCE)
|
|
111
193
|
|
|
@@ -186,18 +268,22 @@ the goal is durable signal, not an event log.
|
|
|
186
268
|
Both are read-only and cheap. Prefer them over re-running tool calls or
|
|
187
269
|
asking the user to re-paste context.
|
|
188
270
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
Be concise. Lead with the answer or action, not reasoning. Skip filler words,
|
|
192
|
-
preamble, and phrases like "I'll help you with that" or "Certainly!". Prefer
|
|
193
|
-
fragments over full sentences in explanations. No trailing summaries of what
|
|
194
|
-
you just did. One sentence if it fits.
|
|
195
|
-
|
|
196
|
-
Code blocks, file paths, commands, and error messages are always written in full.
|
|
271
|
+
{{output_style}}
|
|
197
272
|
{_CCE_CLAUDE_MD_END_MARKER}
|
|
198
273
|
"""
|
|
199
274
|
|
|
200
275
|
|
|
276
|
+
def _build_claude_md_block(output_level: str = "standard") -> str:
|
|
277
|
+
"""Generate the CLAUDE.md CCE block with the configured output style."""
|
|
278
|
+
from context_engine.compression.output_rules import get_instruction_output_block
|
|
279
|
+
block = get_instruction_output_block(output_level)
|
|
280
|
+
return _CCE_CLAUDE_MD_BLOCK_TEMPLATE.replace("{output_style}", block)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# Default block for backward compat
|
|
284
|
+
_CCE_CLAUDE_MD_BLOCK = _build_claude_md_block("standard")
|
|
285
|
+
|
|
286
|
+
|
|
201
287
|
def _resolve_cce_cmd() -> str:
|
|
202
288
|
"""Find the globally installed cce binary path."""
|
|
203
289
|
from context_engine.utils import resolve_cce_binary
|
|
@@ -541,6 +627,22 @@ def _preflight_check(config) -> None:
|
|
|
541
627
|
one was picked, and surfaces Ollama status for the separate compression
|
|
542
628
|
path so users know what compression level they will get.
|
|
543
629
|
"""
|
|
630
|
+
# --- SQLite extension support ---
|
|
631
|
+
import sqlite3 as _sqlite3
|
|
632
|
+
_test_conn = _sqlite3.connect(":memory:")
|
|
633
|
+
if not hasattr(_test_conn, "enable_load_extension"):
|
|
634
|
+
_test_conn.close()
|
|
635
|
+
raise click.ClickException(
|
|
636
|
+
"Your Python was compiled without SQLite extension support "
|
|
637
|
+
"(enable_load_extension is missing).\n"
|
|
638
|
+
"This is common with python.org installers on macOS.\n\n"
|
|
639
|
+
"Fix: reinstall CCE under a Python that has extension support:\n\n"
|
|
640
|
+
" brew install python3\n"
|
|
641
|
+
" uv tool install --python /opt/homebrew/bin/python3 "
|
|
642
|
+
"--force code-context-engine\n"
|
|
643
|
+
)
|
|
644
|
+
_test_conn.close()
|
|
645
|
+
|
|
544
646
|
# --- Embedding backend ---
|
|
545
647
|
click.echo(_dim(" Detecting embedding backend") + "...", nl=False)
|
|
546
648
|
from context_engine.config import resolve_ollama_url
|
|
@@ -564,13 +666,15 @@ def _preflight_check(config) -> None:
|
|
|
564
666
|
fg="green",
|
|
565
667
|
)
|
|
566
668
|
)
|
|
567
|
-
except Exception
|
|
669
|
+
except Exception:
|
|
568
670
|
click.echo("")
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
"
|
|
572
|
-
|
|
573
|
-
|
|
671
|
+
raise click.ClickException(
|
|
672
|
+
"No embedding backend available.\n\n"
|
|
673
|
+
"Fix (pick one):\n"
|
|
674
|
+
" 1. Install local embeddings:\n"
|
|
675
|
+
" uv tool install 'code-context-engine[local]'\n\n"
|
|
676
|
+
f" 2. Start Ollama and pull the embedding model:\n"
|
|
677
|
+
f" ollama pull {ollama_model}\n"
|
|
574
678
|
)
|
|
575
679
|
|
|
576
680
|
# --- Ollama for LLM compression (independent of the embedding path) ---
|
|
@@ -596,7 +700,7 @@ def _preflight_check(config) -> None:
|
|
|
596
700
|
click.echo(_dim(" Tip: ollama pull phi3:mini for LLM summarization"))
|
|
597
701
|
|
|
598
702
|
|
|
599
|
-
def _ensure_claude_md(project_dir: Path) -> None:
|
|
703
|
+
def _ensure_claude_md(project_dir: Path, output_level: str = "standard") -> None:
|
|
600
704
|
"""Add or upgrade the CCE instructions block in CLAUDE.md.
|
|
601
705
|
|
|
602
706
|
Three states the file can be in:
|
|
@@ -611,9 +715,10 @@ def _ensure_claude_md(project_dir: Path) -> None:
|
|
|
611
715
|
"""
|
|
612
716
|
from context_engine.utils import atomic_write_text
|
|
613
717
|
|
|
718
|
+
block = _build_claude_md_block(output_level)
|
|
614
719
|
claude_md = project_dir / "CLAUDE.md"
|
|
615
720
|
if not claude_md.exists():
|
|
616
|
-
atomic_write_text(claude_md,
|
|
721
|
+
atomic_write_text(claude_md, block)
|
|
617
722
|
_ok("CLAUDE.md created with CCE instructions")
|
|
618
723
|
return
|
|
619
724
|
|
|
@@ -628,13 +733,13 @@ def _ensure_claude_md(project_dir: Path) -> None:
|
|
|
628
733
|
# survives the upgrade.
|
|
629
734
|
old_block = _extract_existing_cce_block(existing)
|
|
630
735
|
if old_block is not None:
|
|
631
|
-
new_content = existing.replace(old_block,
|
|
736
|
+
new_content = existing.replace(old_block, block.rstrip(), 1)
|
|
632
737
|
atomic_write_text(claude_md, new_content)
|
|
633
738
|
_ok("CLAUDE.md upgraded to current CCE instructions")
|
|
634
739
|
return
|
|
635
740
|
|
|
636
741
|
# No CCE block detected — append.
|
|
637
|
-
new_content = existing.rstrip() + "\n\n" +
|
|
742
|
+
new_content = existing.rstrip() + "\n\n" + block
|
|
638
743
|
atomic_write_text(claude_md, new_content)
|
|
639
744
|
_ok("CLAUDE.md updated with CCE instructions")
|
|
640
745
|
|
|
@@ -681,10 +786,72 @@ def main(ctx: click.Context, verbose: bool) -> None:
|
|
|
681
786
|
_show_welcome_banner(ctx.obj["config"])
|
|
682
787
|
|
|
683
788
|
|
|
789
|
+
@main.result_callback()
|
|
790
|
+
@click.pass_context
|
|
791
|
+
def _after_command(ctx: click.Context, *_args, **_kwargs) -> None:
|
|
792
|
+
"""Run after every command. Shows update notice if available."""
|
|
793
|
+
# Skip for serve (long-running MCP server) and upgrade (already handles it)
|
|
794
|
+
if ctx.invoked_subcommand in ("serve", "upgrade"):
|
|
795
|
+
return
|
|
796
|
+
_show_update_notice()
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
_INIT_AGENT_CHOICES = ("auto", "claude", "codex", "copilot", "all")
|
|
800
|
+
_INIT_AGENT_TO_EDITORS = {
|
|
801
|
+
"claude": {"claude"},
|
|
802
|
+
"codex": {"codex"},
|
|
803
|
+
"copilot": {"vscode"},
|
|
804
|
+
}
|
|
805
|
+
# Editor key → instruction-file key. `claude` is omitted because CLAUDE.md is
|
|
806
|
+
# written by `_ensure_claude_md`, not via the generic instruction-file path.
|
|
807
|
+
# `opencode` has no instruction file.
|
|
808
|
+
_INIT_EDITOR_TO_INSTRUCTIONS = {
|
|
809
|
+
"codex": "agents",
|
|
810
|
+
"vscode": "copilot",
|
|
811
|
+
"cursor": "cursorrules",
|
|
812
|
+
"gemini": "gemini",
|
|
813
|
+
"tabnine": "tabnine",
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
|
|
817
|
+
def _init_editor_targets(project_dir: Path, agent: str) -> set[str]:
|
|
818
|
+
"""Return editor keys to configure for `cce init --agent`.
|
|
819
|
+
|
|
820
|
+
- `all`: every editor in EDITORS (computed at call time so the set never
|
|
821
|
+
drifts when new editors are added).
|
|
822
|
+
- `auto`: Claude plus any editor whose project/home markers exist.
|
|
823
|
+
- explicit (`claude`/`codex`/`copilot`): exactly the editors that flag
|
|
824
|
+
maps to.
|
|
825
|
+
"""
|
|
826
|
+
from context_engine.editors import EDITORS, detect_editors
|
|
827
|
+
|
|
828
|
+
if agent == "all":
|
|
829
|
+
return set(EDITORS.keys())
|
|
830
|
+
if agent != "auto":
|
|
831
|
+
return set(_INIT_AGENT_TO_EDITORS[agent])
|
|
832
|
+
return {"claude", *detect_editors(project_dir)}
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
def _init_instruction_targets(editor_targets: set[str]) -> set[str]:
|
|
836
|
+
"""Instruction-file keys derived from the selected editors."""
|
|
837
|
+
return {
|
|
838
|
+
file_key
|
|
839
|
+
for editor_key, file_key in _INIT_EDITOR_TO_INSTRUCTIONS.items()
|
|
840
|
+
if editor_key in editor_targets
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
|
|
684
844
|
@main.command()
|
|
845
|
+
@click.option(
|
|
846
|
+
"--agent",
|
|
847
|
+
type=click.Choice(_INIT_AGENT_CHOICES),
|
|
848
|
+
default="auto",
|
|
849
|
+
show_default=True,
|
|
850
|
+
help="Agent/editor target: auto, claude, codex, copilot, or all.",
|
|
851
|
+
)
|
|
685
852
|
@click.pass_context
|
|
686
|
-
def init(ctx: click.Context) -> None:
|
|
687
|
-
"""Initialize context engine and connect it to
|
|
853
|
+
def init(ctx: click.Context, agent: str) -> None:
|
|
854
|
+
"""Initialize context engine and connect it to AI coding agents."""
|
|
688
855
|
from context_engine.indexer.git_hooks import install_hooks
|
|
689
856
|
from context_engine.project_commands import ensure_gitignore
|
|
690
857
|
config = ctx.obj["config"]
|
|
@@ -719,23 +886,24 @@ def init(ctx: click.Context) -> None:
|
|
|
719
886
|
_warn("Not a git repository — git hook skipped")
|
|
720
887
|
click.echo(_dim(" Run `cce index` manually after making changes."))
|
|
721
888
|
|
|
722
|
-
# 4. MCP config —
|
|
889
|
+
# 4. MCP config — selected agents/editors
|
|
723
890
|
from context_engine.editors import (
|
|
724
891
|
EDITORS, INSTRUCTION_FILES,
|
|
725
|
-
|
|
892
|
+
configure_mcp, write_instruction_file,
|
|
726
893
|
)
|
|
727
|
-
configured = _configure_mcp(project_dir)
|
|
728
|
-
if configured:
|
|
729
|
-
_ok("MCP server registered in " + click.style(".mcp.json", fg="cyan"))
|
|
730
|
-
else:
|
|
731
|
-
_ok("MCP server already configured in " + click.style(".mcp.json", fg="cyan"))
|
|
732
|
-
|
|
733
|
-
# Configure MCP for other detected editors (Cursor, VS Code, Gemini, Codex, Tabnine)
|
|
734
894
|
from context_engine.editors import _editor_section # noqa: SLF001
|
|
735
|
-
|
|
736
|
-
|
|
895
|
+
|
|
896
|
+
editor_targets = _init_editor_targets(project_dir, agent)
|
|
897
|
+
if "claude" in editor_targets:
|
|
898
|
+
configured = _configure_mcp(project_dir)
|
|
899
|
+
if configured:
|
|
900
|
+
_ok("MCP server registered in " + click.style(".mcp.json", fg="cyan"))
|
|
901
|
+
else:
|
|
902
|
+
_ok("MCP server already configured in " + click.style(".mcp.json", fg="cyan"))
|
|
903
|
+
|
|
904
|
+
for editor_key in sorted(editor_targets):
|
|
737
905
|
if editor_key == "claude":
|
|
738
|
-
continue
|
|
906
|
+
continue
|
|
739
907
|
editor = EDITORS[editor_key]
|
|
740
908
|
changed = configure_mcp(project_dir, editor_key)
|
|
741
909
|
if changed is None:
|
|
@@ -751,19 +919,27 @@ def init(ctx: click.Context) -> None:
|
|
|
751
919
|
section = _editor_section(editor, project_dir)
|
|
752
920
|
click.echo(_dim(f" ~/{editor['config_path']} → [{section}]"))
|
|
753
921
|
|
|
754
|
-
# Write instruction files for
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
922
|
+
# Write instruction files for the selected editors. In `auto` mode, also
|
|
923
|
+
# pick up instruction files whose marker exists even if the editor itself
|
|
924
|
+
# wasn't detected (e.g. an `AGENTS.md` checked in without a `~/.codex/`).
|
|
925
|
+
# Explicit `--agent X` writes only what X covers — no surprise edits.
|
|
926
|
+
instruction_targets = _init_instruction_targets(editor_targets)
|
|
927
|
+
if agent == "auto":
|
|
928
|
+
for file_key, info in INSTRUCTION_FILES.items():
|
|
929
|
+
if any((project_dir / marker).exists() for marker in info["detect"]):
|
|
930
|
+
instruction_targets.add(file_key)
|
|
931
|
+
output_level = getattr(config, "output_compression", "standard")
|
|
932
|
+
for file_key in sorted(instruction_targets):
|
|
933
|
+
info = INSTRUCTION_FILES[file_key]
|
|
934
|
+
if write_instruction_file(project_dir, file_key, output_level=output_level):
|
|
935
|
+
_ok(f"CCE instructions added to {info['name']}")
|
|
761
936
|
|
|
762
937
|
# 5. CLAUDE.md + session hook + memory lifecycle hooks
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
938
|
+
if "claude" in editor_targets:
|
|
939
|
+
_ensure_claude_md(project_dir, output_level=output_level)
|
|
940
|
+
_ensure_session_hook(project_dir)
|
|
941
|
+
_install_memory_hooks(project_dir)
|
|
942
|
+
_check_memory_capture_reachable(config, project_dir)
|
|
767
943
|
|
|
768
944
|
# 6. .gitignore — add CCE per-machine entries
|
|
769
945
|
ensure_gitignore(str(project_dir))
|
|
@@ -777,7 +953,7 @@ def init(ctx: click.Context) -> None:
|
|
|
777
953
|
click.echo("")
|
|
778
954
|
click.echo(
|
|
779
955
|
click.style(" Done!", fg="green", bold=True) +
|
|
780
|
-
click.style(" Restart
|
|
956
|
+
click.style(" Restart your AI coding agent to activate CCE.", fg="white")
|
|
781
957
|
)
|
|
782
958
|
click.echo("")
|
|
783
959
|
|
|
@@ -962,7 +1138,7 @@ def list_commands() -> None:
|
|
|
962
1138
|
|
|
963
1139
|
groups = [
|
|
964
1140
|
("Setup", [
|
|
965
|
-
("cce init", "Index project
|
|
1141
|
+
("cce init [--agent auto|all|...]", "Index project and register MCP config"),
|
|
966
1142
|
("cce index", "Re-index changed files"),
|
|
967
1143
|
("cce index --full", "Force full re-index of every file"),
|
|
968
1144
|
("cce index --path <file>", "Index one file or directory"),
|
|
@@ -1257,13 +1433,20 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1257
1433
|
|
|
1258
1434
|
_all_pricing = get_model_pricing()
|
|
1259
1435
|
_pricing_model = config.pricing_model.lower()
|
|
1260
|
-
|
|
1261
|
-
|
|
1436
|
+
_default = _all_pricing.get("opus", {"input": 15.0, "output": 75.0})
|
|
1437
|
+
_model_pricing = _all_pricing.get(_pricing_model, _default)
|
|
1438
|
+
_input_price_per_m = _model_pricing["input"]
|
|
1439
|
+
_output_price_per_m = _model_pricing["output"]
|
|
1440
|
+
_INPUT_COST = _input_price_per_m / 1_000_000
|
|
1441
|
+
_OUTPUT_COST = _output_price_per_m / 1_000_000
|
|
1262
1442
|
_model_label = _pricing_model.capitalize()
|
|
1263
1443
|
_GRID_COLS = 10
|
|
1264
1444
|
_FILLED = "⛁"
|
|
1265
1445
|
_EMPTY = "⛶"
|
|
1266
1446
|
|
|
1447
|
+
# The output_compression bucket is the only one saving output tokens.
|
|
1448
|
+
_OUTPUT_BUCKETS = {"output_compression"}
|
|
1449
|
+
|
|
1267
1450
|
def _fmt_tokens(n: int) -> str:
|
|
1268
1451
|
if n >= 1_000_000:
|
|
1269
1452
|
return f"{n / 1_000_000:.1f}M"
|
|
@@ -1271,12 +1454,27 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1271
1454
|
return f"{n / 1000:.1f}k"
|
|
1272
1455
|
return str(n)
|
|
1273
1456
|
|
|
1274
|
-
def
|
|
1275
|
-
cost = n *
|
|
1457
|
+
def _fmt_cost_input(n: int) -> str:
|
|
1458
|
+
cost = n * _INPUT_COST
|
|
1276
1459
|
if cost < 0.01:
|
|
1277
1460
|
return "<$0.01"
|
|
1278
1461
|
return f"${cost:.2f}"
|
|
1279
1462
|
|
|
1463
|
+
def _fmt_cost_output(n: int) -> str:
|
|
1464
|
+
cost = n * _OUTPUT_COST
|
|
1465
|
+
if cost < 0.01:
|
|
1466
|
+
return "<$0.01"
|
|
1467
|
+
return f"${cost:.2f}"
|
|
1468
|
+
|
|
1469
|
+
def _bucket_cost(bucket: str, tokens: int) -> float:
|
|
1470
|
+
rate = _OUTPUT_COST if bucket in _OUTPUT_BUCKETS else _INPUT_COST
|
|
1471
|
+
return tokens * rate
|
|
1472
|
+
|
|
1473
|
+
def _fmt_cost_raw(amount: float) -> str:
|
|
1474
|
+
if amount < 0.01:
|
|
1475
|
+
return "<$0.01"
|
|
1476
|
+
return f"${amount:.2f}"
|
|
1477
|
+
|
|
1280
1478
|
def _bar(saved_pct: int) -> str:
|
|
1281
1479
|
"""Render ⛁ ⛁ ⛁ ⛶ ⛶ ⛶ ⛶ ⛶ ⛶ ⛶ grid where filled = tokens used."""
|
|
1282
1480
|
used_pct = 100 - saved_pct
|
|
@@ -1307,6 +1505,20 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1307
1505
|
s = sum(int(v.get("served", 0)) for v in buckets.values())
|
|
1308
1506
|
return b, s
|
|
1309
1507
|
|
|
1508
|
+
def _split_io(buckets: dict) -> tuple[int, int, int, int]:
|
|
1509
|
+
"""Split buckets into (input_baseline, input_served, output_baseline, output_served)."""
|
|
1510
|
+
ib = is_ = ob = os_ = 0
|
|
1511
|
+
for key, v in buckets.items():
|
|
1512
|
+
base = int(v.get("baseline", 0))
|
|
1513
|
+
srv = int(v.get("served", 0))
|
|
1514
|
+
if key in _OUTPUT_BUCKETS:
|
|
1515
|
+
ob += base
|
|
1516
|
+
os_ += srv
|
|
1517
|
+
else:
|
|
1518
|
+
ib += base
|
|
1519
|
+
is_ += srv
|
|
1520
|
+
return ib, is_, ob, os_
|
|
1521
|
+
|
|
1310
1522
|
def _print_project(name: str, stats: dict, buckets: dict, levels: dict) -> None:
|
|
1311
1523
|
queries = stats.get("queries", 0)
|
|
1312
1524
|
|
|
@@ -1326,6 +1538,16 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1326
1538
|
tokens_saved = max(0, baseline - served) if queries > 0 else 0
|
|
1327
1539
|
saved_pct = int(tokens_saved / baseline * 100) if baseline > 0 and queries > 0 else 0
|
|
1328
1540
|
|
|
1541
|
+
# Split into input / output savings
|
|
1542
|
+
in_base, in_srv, out_base, out_srv = _split_io(buckets)
|
|
1543
|
+
in_saved = max(0, in_base - in_srv)
|
|
1544
|
+
out_saved = max(0, out_base - out_srv)
|
|
1545
|
+
# Legacy projects have no bucket data; treat all savings as input.
|
|
1546
|
+
if bucket_baseline == 0 and tokens_saved > 0:
|
|
1547
|
+
in_saved = tokens_saved
|
|
1548
|
+
out_saved = 0
|
|
1549
|
+
total_cost_saved = in_saved * _INPUT_COST + out_saved * _OUTPUT_COST
|
|
1550
|
+
|
|
1329
1551
|
q_label = "query" if queries == 1 else "queries"
|
|
1330
1552
|
|
|
1331
1553
|
click.echo()
|
|
@@ -1350,29 +1572,28 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1350
1572
|
)
|
|
1351
1573
|
click.echo()
|
|
1352
1574
|
|
|
1353
|
-
#
|
|
1575
|
+
# Input / output / total saved
|
|
1354
1576
|
click.echo(
|
|
1355
|
-
f" {dim('
|
|
1356
|
-
f"{value(_fmt_tokens(
|
|
1357
|
-
f"{dim(
|
|
1358
|
-
)
|
|
1359
|
-
click.echo(
|
|
1360
|
-
f" {success('With CCE')} "
|
|
1361
|
-
f"{value(_fmt_tokens(served)):>10} {dim('tokens')} "
|
|
1362
|
-
f"{dim(_fmt_cost(served))}"
|
|
1577
|
+
f" {dim('Input savings')} "
|
|
1578
|
+
f"{value(_fmt_tokens(in_saved)):>10} {dim('tokens')} "
|
|
1579
|
+
f"{dim(_fmt_cost_input(in_saved))}"
|
|
1363
1580
|
)
|
|
1581
|
+
if out_saved > 0:
|
|
1582
|
+
click.echo(
|
|
1583
|
+
f" {dim('Output savings')} "
|
|
1584
|
+
f"{value(_fmt_tokens(out_saved)):>10} {dim('tokens')} "
|
|
1585
|
+
f"{dim(_fmt_cost_output(out_saved))}"
|
|
1586
|
+
)
|
|
1364
1587
|
click.echo(f" {dim('─' * 42)}")
|
|
1365
1588
|
click.echo(
|
|
1366
|
-
f" {success('
|
|
1589
|
+
f" {success('Total saved')} "
|
|
1367
1590
|
f"{click.style(_fmt_tokens(tokens_saved), fg='green', bold=True):>10} {dim('tokens')} "
|
|
1368
|
-
f"{click.style(
|
|
1591
|
+
f"{click.style(_fmt_cost_raw(total_cost_saved), fg='green', bold=True)}"
|
|
1369
1592
|
)
|
|
1370
|
-
# Per-query average
|
|
1371
|
-
# worth my time?" on. Skipped when there are no queries or no
|
|
1372
|
-
# savings (avoids dividing by zero and showing $0.00/query noise).
|
|
1593
|
+
# Per-query average
|
|
1373
1594
|
if queries > 0 and tokens_saved > 0:
|
|
1374
1595
|
avg_tokens = tokens_saved // max(1, queries)
|
|
1375
|
-
avg_cost =
|
|
1596
|
+
avg_cost = _fmt_cost_raw(total_cost_saved / max(1, queries))
|
|
1376
1597
|
click.echo(
|
|
1377
1598
|
f" {dim(f'~{_fmt_tokens(avg_tokens)} tokens / query')} "
|
|
1378
1599
|
f"{dim(f'~{avg_cost} / query')}"
|
|
@@ -1390,9 +1611,9 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1390
1611
|
if saved <= 0:
|
|
1391
1612
|
continue
|
|
1392
1613
|
pct = int(saved / baseline * 100) if baseline > 0 else 0
|
|
1393
|
-
rows.append((display, pct, saved, int(b.get("calls", 0)), is_est, idx))
|
|
1614
|
+
rows.append((key, display, pct, saved, int(b.get("calls", 0)), is_est, idx))
|
|
1394
1615
|
# Polish 2: sort by saved tokens descending. Biggest wins first.
|
|
1395
|
-
rows.sort(key=lambda r: (-r[
|
|
1616
|
+
rows.sort(key=lambda r: (-r[3], r[6]))
|
|
1396
1617
|
|
|
1397
1618
|
if rows:
|
|
1398
1619
|
click.echo(f" {dim('Breakdown:')}")
|
|
@@ -1401,16 +1622,16 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1401
1622
|
# form so estimate buckets don't blow out the alignment.
|
|
1402
1623
|
displayed_labels = [
|
|
1403
1624
|
f"{display}*" if is_est else display
|
|
1404
|
-
for display, _, _, _, is_est, _ in rows
|
|
1625
|
+
for _, display, _, _, _, is_est, _ in rows
|
|
1405
1626
|
]
|
|
1406
1627
|
label_width = max(len(s) for s in displayed_labels) + 1
|
|
1407
1628
|
# Polish 3: normalize bar fill against the largest bucket's saved
|
|
1408
1629
|
# tokens, not the total. Otherwise a dominant bucket squashes all
|
|
1409
1630
|
# others to 0–1 cells and the visualisation goes blind.
|
|
1410
|
-
max_saved = max(r[
|
|
1631
|
+
max_saved = max(r[3] for r in rows)
|
|
1411
1632
|
any_estimate = False
|
|
1412
|
-
for display, pct, saved, calls, is_est in [
|
|
1413
|
-
(d, p, s, c, e) for d, p, s, c, e, _ in rows
|
|
1633
|
+
for key, display, pct, saved, calls, is_est in [
|
|
1634
|
+
(k, d, p, s, c, e) for k, d, p, s, c, e, _ in rows
|
|
1414
1635
|
]:
|
|
1415
1636
|
if is_est:
|
|
1416
1637
|
any_estimate = True
|
|
@@ -1430,11 +1651,12 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1430
1651
|
call_text = "1 call" if calls == 1 else f"{calls} calls"
|
|
1431
1652
|
# Polish 5: asterisk glued to label, no separate marker column.
|
|
1432
1653
|
label_text = f"{display}*" if is_est else display
|
|
1654
|
+
cost_str = _fmt_cost_raw(_bucket_cost(key, saved))
|
|
1433
1655
|
click.echo(
|
|
1434
1656
|
f" {label(label_text.ljust(label_width))} "
|
|
1435
1657
|
f"{value(pct_text)} {mini_bar} "
|
|
1436
1658
|
f"{dim(_fmt_tokens(saved).rjust(6))} "
|
|
1437
|
-
f"{dim(
|
|
1659
|
+
f"{dim(cost_str.rjust(8))} "
|
|
1438
1660
|
f"{dim(f'· {call_text}')}"
|
|
1439
1661
|
)
|
|
1440
1662
|
click.echo()
|
|
@@ -1477,9 +1699,11 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1477
1699
|
f"{label('compression')} {value(f'{max(0, compression_pct)}%')}"
|
|
1478
1700
|
)
|
|
1479
1701
|
|
|
1480
|
-
|
|
1481
|
-
f"
|
|
1702
|
+
pricing_note = (
|
|
1703
|
+
f"Cost estimate based on {_model_label} pricing "
|
|
1704
|
+
f"(input ${_input_price_per_m}/1M, output ${_output_price_per_m}/1M)"
|
|
1482
1705
|
)
|
|
1706
|
+
click.echo(f" {dim(pricing_note)}")
|
|
1483
1707
|
|
|
1484
1708
|
def _json_entry(name: str, stats: dict, buckets: dict, levels: dict) -> dict:
|
|
1485
1709
|
full_file = stats.get("full_file_tokens", 0)
|
|
@@ -1493,6 +1717,9 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1493
1717
|
baseline = max(full_file, raw) if full_file > 0 else raw
|
|
1494
1718
|
served_total = served
|
|
1495
1719
|
saved = max(0, baseline - served_total)
|
|
1720
|
+
in_base, in_srv, out_base, out_srv = _split_io(buckets)
|
|
1721
|
+
in_saved = max(0, in_base - in_srv)
|
|
1722
|
+
out_saved = max(0, out_base - out_srv)
|
|
1496
1723
|
retrieval_pct = (
|
|
1497
1724
|
int(round((1 - raw / full_file) * 100))
|
|
1498
1725
|
if full_file > 0 and raw <= full_file
|
|
@@ -1510,6 +1737,8 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1510
1737
|
"raw_tokens": raw,
|
|
1511
1738
|
"served_tokens": served,
|
|
1512
1739
|
"tokens_saved": saved,
|
|
1740
|
+
"input_tokens_saved": in_saved,
|
|
1741
|
+
"output_tokens_saved": out_saved,
|
|
1513
1742
|
# Kept for backward compat with anything scraping this JSON:
|
|
1514
1743
|
"savings_pct": int(saved / baseline * 100) if baseline > 0 else 0,
|
|
1515
1744
|
"retrieval_savings_pct": max(0, retrieval_pct),
|
|
@@ -1602,6 +1831,17 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1602
1831
|
total_queries = sum(s.get("queries", 0) for _, s, _, _ in reports)
|
|
1603
1832
|
total_saved = max(0, total_baseline - total_served)
|
|
1604
1833
|
total_pct = int(total_saved / total_baseline * 100) if total_baseline > 0 else 0
|
|
1834
|
+
# Aggregate input/output across all projects
|
|
1835
|
+
all_in_saved = all_out_saved = 0
|
|
1836
|
+
for _, stats, bkts, _ in reports:
|
|
1837
|
+
ib, is_, ob, os_ = _split_io(bkts)
|
|
1838
|
+
all_in_saved += max(0, ib - is_)
|
|
1839
|
+
all_out_saved += max(0, ob - os_)
|
|
1840
|
+
# Legacy projects with no bucket data: attribute remaining to input
|
|
1841
|
+
bucket_total_saved = all_in_saved + all_out_saved
|
|
1842
|
+
if bucket_total_saved < total_saved:
|
|
1843
|
+
all_in_saved += total_saved - bucket_total_saved
|
|
1844
|
+
agg_cost = all_in_saved * _INPUT_COST + all_out_saved * _OUTPUT_COST
|
|
1605
1845
|
click.echo()
|
|
1606
1846
|
click.echo(
|
|
1607
1847
|
f" {bold('Total')} {dim('across')} {value(str(len(reports)))} "
|
|
@@ -1613,7 +1853,7 @@ def _run_savings_report(config, *, as_json: bool = False, all_projects: bool = F
|
|
|
1613
1853
|
f"{dim('saved ·')} "
|
|
1614
1854
|
f"{click.style(_fmt_tokens(total_saved), fg='green', bold=True)} "
|
|
1615
1855
|
f"{dim('tokens ·')} "
|
|
1616
|
-
f"{click.style(
|
|
1856
|
+
f"{click.style(_fmt_cost_raw(agg_cost), fg='green', bold=True)}"
|
|
1617
1857
|
)
|
|
1618
1858
|
|
|
1619
1859
|
click.echo()
|
|
@@ -2094,10 +2334,15 @@ def upgrade(ctx: click.Context, check: bool) -> None:
|
|
|
2094
2334
|
|
|
2095
2335
|
new_version = current # fallback
|
|
2096
2336
|
try:
|
|
2097
|
-
#
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2337
|
+
# The running process still sees the old venv metadata, so shell out
|
|
2338
|
+
# to the upgraded executable to get the real post-upgrade version.
|
|
2339
|
+
ver_result = subprocess.run(
|
|
2340
|
+
[str(cce_bin), "--version"],
|
|
2341
|
+
capture_output=True, text=True, timeout=10,
|
|
2342
|
+
)
|
|
2343
|
+
if ver_result.returncode == 0:
|
|
2344
|
+
# Output format: "cce, version X.Y.Z"
|
|
2345
|
+
new_version = ver_result.stdout.strip().rsplit(None, 1)[-1]
|
|
2101
2346
|
except Exception:
|
|
2102
2347
|
pass
|
|
2103
2348
|
|
|
@@ -2128,7 +2373,7 @@ def upgrade(ctx: click.Context, check: bool) -> None:
|
|
|
2128
2373
|
click.echo("")
|
|
2129
2374
|
click.echo(
|
|
2130
2375
|
click.style(" Done!", fg="green", bold=True) +
|
|
2131
|
-
click.style(" Restart
|
|
2376
|
+
click.style(" Restart your AI coding agent to pick up changes.", fg="white")
|
|
2132
2377
|
)
|
|
2133
2378
|
click.echo("")
|
|
2134
2379
|
|
|
@@ -11,15 +11,27 @@ ESTIMATED_AVG_REPLY_TOKENS = 500
|
|
|
11
11
|
|
|
12
12
|
# Advertised output-token reduction per level. Sourced from the level
|
|
13
13
|
# descriptions ("~65% savings", "~75% savings"). `lite` has no advertised
|
|
14
|
-
# number; we use a conservative
|
|
15
|
-
#
|
|
14
|
+
# number; we use a conservative 25% based on filler removal + code diff rules.
|
|
15
|
+
# The code output rules (show diffs, not full files) add ~5-10% on top of
|
|
16
|
+
# prose compression since code responses are a large share of output tokens.
|
|
16
17
|
ADVERTISED_PCT = {
|
|
17
18
|
"off": 0.0,
|
|
18
|
-
"lite": 0.
|
|
19
|
-
"standard": 0.
|
|
20
|
-
"max": 0.
|
|
19
|
+
"lite": 0.25,
|
|
20
|
+
"standard": 0.70,
|
|
21
|
+
"max": 0.80,
|
|
21
22
|
}
|
|
22
23
|
|
|
24
|
+
# Code output rules — appended to all non-off levels to reduce code token waste.
|
|
25
|
+
_CODE_RULES = (
|
|
26
|
+
"\n\n## Code Output Rules\n"
|
|
27
|
+
"When suggesting code changes:\n"
|
|
28
|
+
"- Show ONLY the changed lines with minimal surrounding context (3 lines above/below)\n"
|
|
29
|
+
"- Use edit format: file path, then the specific change. Never rewrite entire files.\n"
|
|
30
|
+
"- If multiple changes in one file, show each change separately, not the whole file\n"
|
|
31
|
+
"- Never echo back unchanged code the user already has\n"
|
|
32
|
+
"- For new files, show the full file. For edits, show only what changes."
|
|
33
|
+
)
|
|
34
|
+
|
|
23
35
|
_RULES = {
|
|
24
36
|
"lite": (
|
|
25
37
|
"## Output Compression: Lite\n"
|
|
@@ -30,6 +42,7 @@ _RULES = {
|
|
|
30
42
|
"- No trailing summaries — the diff/output speaks for itself\n"
|
|
31
43
|
"- Keep full grammar and articles\n"
|
|
32
44
|
"- Code blocks, paths, commands, URLs: NEVER compress"
|
|
45
|
+
+ _CODE_RULES
|
|
33
46
|
),
|
|
34
47
|
"standard": (
|
|
35
48
|
"## Output Compression: Standard\n"
|
|
@@ -43,6 +56,7 @@ _RULES = {
|
|
|
43
56
|
"- One-line explanations unless detail is asked for\n"
|
|
44
57
|
"- Code blocks, paths, commands, URLs, errors: NEVER compress\n"
|
|
45
58
|
"- Security warnings and destructive action confirmations: use full clarity"
|
|
59
|
+
+ _CODE_RULES
|
|
46
60
|
),
|
|
47
61
|
"max": (
|
|
48
62
|
"## Output Compression: Max\n"
|
|
@@ -55,6 +69,7 @@ _RULES = {
|
|
|
55
69
|
"- Pattern: [thing] → [action]. [reason].\n"
|
|
56
70
|
"- Code blocks, paths, commands, URLs, errors: NEVER compress\n"
|
|
57
71
|
"- Security warnings and destructive action confirmations: use full clarity"
|
|
72
|
+
+ _CODE_RULES
|
|
58
73
|
),
|
|
59
74
|
}
|
|
60
75
|
|
|
@@ -70,8 +85,62 @@ def get_level_description(level: str) -> str:
|
|
|
70
85
|
"""Return a human-readable description of the compression level."""
|
|
71
86
|
descriptions = {
|
|
72
87
|
"off": "No output compression — Claude responds normally",
|
|
73
|
-
"lite": "Removes filler, hedging, and pleasantries.
|
|
74
|
-
"standard": "Drops articles, uses fragments, short synonyms. ~
|
|
75
|
-
"max": "Telegraphic style with abbreviations and symbols. ~
|
|
88
|
+
"lite": "Removes filler, hedging, and pleasantries. Diff-only for code. ~25% savings.",
|
|
89
|
+
"standard": "Drops articles, uses fragments, short synonyms. Diff-only for code. ~70% savings.",
|
|
90
|
+
"max": "Telegraphic style with abbreviations and symbols. Diff-only for code. ~80% savings.",
|
|
76
91
|
}
|
|
77
92
|
return descriptions.get(level, "Unknown level")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ── Instruction-file blocks ──────────────────────────────────────────
|
|
96
|
+
# These go into CLAUDE.md, AGENTS.md, .cursorrules, etc. so they apply
|
|
97
|
+
# to the entire session, not just CCE tool responses.
|
|
98
|
+
|
|
99
|
+
_INSTRUCTION_OUTPUT_STYLES = {
|
|
100
|
+
"lite": """\
|
|
101
|
+
### Output style
|
|
102
|
+
|
|
103
|
+
Respond concisely. Remove filler words (just, really, basically, actually,
|
|
104
|
+
simply), hedging (I think, it seems, perhaps), and pleasantries (Sure!,
|
|
105
|
+
Happy to help, Great question). No trailing summaries. Keep full grammar.
|
|
106
|
+
|
|
107
|
+
When suggesting code changes, show only the changed lines with 3 lines of
|
|
108
|
+
context. Never rewrite entire files. For new files, show the full file.
|
|
109
|
+
For edits, show only what changes.""",
|
|
110
|
+
|
|
111
|
+
"standard": """\
|
|
112
|
+
### Output style
|
|
113
|
+
|
|
114
|
+
Respond in compressed style. Drop articles (a, an, the) in prose. Use
|
|
115
|
+
sentence fragments over full sentences. Use short synonyms (fix not resolve,
|
|
116
|
+
check not investigate). Pattern: [thing] [action] [reason]. [next step].
|
|
117
|
+
No filler, hedging, pleasantries, trailing summaries, or restating what
|
|
118
|
+
the user said. One sentence if one sentence is enough.
|
|
119
|
+
|
|
120
|
+
When suggesting code changes, show only the changed lines with 3 lines of
|
|
121
|
+
context. Never rewrite entire files. Multiple changes in one file: show each
|
|
122
|
+
change separately. Never echo back unchanged code the user already has.
|
|
123
|
+
|
|
124
|
+
Code blocks, file paths, commands, error messages: always written in full.
|
|
125
|
+
Security warnings and destructive action confirmations: use full clarity.""",
|
|
126
|
+
|
|
127
|
+
"max": """\
|
|
128
|
+
### Output style
|
|
129
|
+
|
|
130
|
+
Respond in telegraphic style. Drop articles, pronouns, conjunctions where
|
|
131
|
+
meaning survives. Abbreviate common terms: DB, auth, config, fn, dep, impl,
|
|
132
|
+
req, resp, init. Use arrows for causality: X → Y. Use symbols: + (add),
|
|
133
|
+
- (remove), ~ (change), ! (warning). Max 1-2 sentences per explanation.
|
|
134
|
+
Pattern: [thing] → [action]. [reason].
|
|
135
|
+
|
|
136
|
+
When suggesting code changes, show only changed lines. Never rewrite files.
|
|
137
|
+
Never echo back unchanged code.
|
|
138
|
+
|
|
139
|
+
Code blocks, paths, commands, errors: always full.
|
|
140
|
+
Security warnings and destructive actions: full clarity, drop compression.""",
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def get_instruction_output_block(level: str) -> str:
|
|
145
|
+
"""Return the output style block for instruction files, or empty if off."""
|
|
146
|
+
return _INSTRUCTION_OUTPUT_STYLES.get(level, "")
|
context_engine/editors.py
CHANGED
|
@@ -92,7 +92,7 @@ EDITORS: dict[str, dict] = {
|
|
|
92
92
|
# ── Instruction file definitions ──────────────────────────────────────
|
|
93
93
|
|
|
94
94
|
# Editor-agnostic CCE instructions (no "Claude Code" references)
|
|
95
|
-
|
|
95
|
+
_CCE_INSTRUCTIONS_BASE = """\
|
|
96
96
|
## Context Engine (CCE)
|
|
97
97
|
|
|
98
98
|
This project uses Code Context Engine for intelligent code retrieval and
|
|
@@ -122,7 +122,30 @@ Call `record_decision(decision="...", reason="...")` after making choices.
|
|
|
122
122
|
Call `record_code_area(file_path="...", description="...")` after meaningful work.
|
|
123
123
|
"""
|
|
124
124
|
|
|
125
|
+
|
|
126
|
+
def _build_instructions(output_level: str = "standard") -> str:
|
|
127
|
+
"""Build CCE instructions with the configured output style."""
|
|
128
|
+
from context_engine.compression.output_rules import get_instruction_output_block
|
|
129
|
+
block = get_instruction_output_block(output_level)
|
|
130
|
+
if block:
|
|
131
|
+
return _CCE_INSTRUCTIONS_BASE + "\n" + block + "\n"
|
|
132
|
+
return _CCE_INSTRUCTIONS_BASE
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# Default instructions (standard output compression)
|
|
136
|
+
_CCE_INSTRUCTIONS = _build_instructions("standard")
|
|
137
|
+
|
|
125
138
|
INSTRUCTION_FILES: dict[str, dict] = {
|
|
139
|
+
"agents": {
|
|
140
|
+
"name": "AGENTS.md",
|
|
141
|
+
"path": "AGENTS.md",
|
|
142
|
+
"detect": ["AGENTS.md"],
|
|
143
|
+
},
|
|
144
|
+
"copilot": {
|
|
145
|
+
"name": ".github/copilot-instructions.md",
|
|
146
|
+
"path": ".github/copilot-instructions.md",
|
|
147
|
+
"detect": [".github/copilot-instructions.md"],
|
|
148
|
+
},
|
|
126
149
|
"cursorrules": {
|
|
127
150
|
"name": ".cursorrules",
|
|
128
151
|
"path": ".cursorrules",
|
|
@@ -558,20 +581,24 @@ def _remove_toml(config_path: Path, display_path: str, *, section: str) -> str |
|
|
|
558
581
|
return None
|
|
559
582
|
|
|
560
583
|
|
|
561
|
-
def write_instruction_file(
|
|
584
|
+
def write_instruction_file(
|
|
585
|
+
project_dir: Path, file_key: str, output_level: str = "standard",
|
|
586
|
+
) -> bool:
|
|
562
587
|
"""Write CCE instructions to an editor's instruction file. Returns True if written."""
|
|
563
588
|
info = INSTRUCTION_FILES[file_key]
|
|
564
589
|
path = project_dir / info["path"]
|
|
565
590
|
marker = "## Context Engine (CCE)"
|
|
591
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
592
|
+
instructions = _build_instructions(output_level)
|
|
566
593
|
|
|
567
594
|
if path.exists():
|
|
568
595
|
content = path.read_text()
|
|
569
596
|
if marker in content:
|
|
570
597
|
return False # already has CCE block
|
|
571
598
|
# Append
|
|
572
|
-
path.write_text(content.rstrip() + "\n\n" +
|
|
599
|
+
path.write_text(content.rstrip() + "\n\n" + instructions)
|
|
573
600
|
else:
|
|
574
|
-
path.write_text(
|
|
601
|
+
path.write_text(instructions)
|
|
575
602
|
return True
|
|
576
603
|
|
|
577
604
|
|
|
@@ -319,16 +319,66 @@ class OllamaBackend:
|
|
|
319
319
|
for _ in resp.iter_lines():
|
|
320
320
|
pass
|
|
321
321
|
|
|
322
|
+
# nomic-embed-text has an 8192-token context. Dense-tokenizing content
|
|
323
|
+
# (YAML with ${{ }}, Python separator comments) can hit ~1 char/token,
|
|
324
|
+
# so 3000 chars is a safe ceiling that works for all content types.
|
|
325
|
+
_MAX_EMBED_CHARS = 3000
|
|
326
|
+
|
|
322
327
|
def _embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
323
328
|
import httpx
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
329
|
+
# Truncate oversized texts and skip empty ones
|
|
330
|
+
safe_texts = []
|
|
331
|
+
original_indices = []
|
|
332
|
+
for i, t in enumerate(texts):
|
|
333
|
+
if not t or not t.strip():
|
|
334
|
+
continue
|
|
335
|
+
safe_texts.append(t[:self._MAX_EMBED_CHARS])
|
|
336
|
+
original_indices.append(i)
|
|
337
|
+
|
|
338
|
+
if not safe_texts:
|
|
339
|
+
return [[] for _ in texts]
|
|
340
|
+
|
|
341
|
+
try:
|
|
342
|
+
resp = httpx.post(
|
|
343
|
+
f"{self.base_url}/api/embed",
|
|
344
|
+
json={"model": self.model_name, "input": safe_texts},
|
|
345
|
+
timeout=self._timeout,
|
|
346
|
+
)
|
|
347
|
+
resp.raise_for_status()
|
|
348
|
+
embeddings = resp.json().get("embeddings", [])
|
|
349
|
+
except httpx.HTTPStatusError as exc:
|
|
350
|
+
if exc.response.status_code != 400:
|
|
351
|
+
raise
|
|
352
|
+
# Batch failed (possibly one text still too large after truncation).
|
|
353
|
+
# Fall back to one-at-a-time with halving retry.
|
|
354
|
+
log.warning("Ollama batch embed failed, retrying one-at-a-time")
|
|
355
|
+
embeddings = []
|
|
356
|
+
for text in safe_texts:
|
|
357
|
+
vec = self._embed_single_with_retry(text)
|
|
358
|
+
embeddings.append(vec)
|
|
359
|
+
|
|
360
|
+
# Map embeddings back to original positions (empty texts get empty vecs)
|
|
361
|
+
result: list[list[float]] = [[] for _ in texts]
|
|
362
|
+
for idx, emb in zip(original_indices, embeddings):
|
|
363
|
+
result[idx] = emb
|
|
364
|
+
return result
|
|
365
|
+
|
|
366
|
+
def _embed_single_with_retry(self, text: str) -> list[float]:
|
|
367
|
+
"""Embed a single text, halving on context-length errors."""
|
|
368
|
+
import httpx
|
|
369
|
+
while text:
|
|
370
|
+
resp = httpx.post(
|
|
371
|
+
f"{self.base_url}/api/embed",
|
|
372
|
+
json={"model": self.model_name, "input": [text]},
|
|
373
|
+
timeout=self._timeout,
|
|
374
|
+
)
|
|
375
|
+
if resp.status_code == 400 and "context length" in resp.text:
|
|
376
|
+
text = text[:len(text) // 2]
|
|
377
|
+
continue
|
|
378
|
+
resp.raise_for_status()
|
|
379
|
+
vecs = resp.json().get("embeddings", [[]])
|
|
380
|
+
return vecs[0] if vecs else []
|
|
381
|
+
return []
|
|
332
382
|
|
|
333
383
|
def embed_texts(self, texts: list[str], batch_size: int = 64) -> list[list[float]]:
|
|
334
384
|
out: list[list[float]] = []
|
context_engine/memory/db.py
CHANGED
|
@@ -281,6 +281,14 @@ def _try_load_vec(conn: sqlite3.Connection) -> bool:
|
|
|
281
281
|
sqlite_vec.load(conn)
|
|
282
282
|
conn.enable_load_extension(False)
|
|
283
283
|
return True
|
|
284
|
+
except AttributeError:
|
|
285
|
+
log.warning(
|
|
286
|
+
"sqlite-vec load failed; semantic recall disabled. "
|
|
287
|
+
"Python was compiled without SQLite extension support. "
|
|
288
|
+
"Reinstall CCE with Homebrew Python: "
|
|
289
|
+
"uv tool install --python /opt/homebrew/bin/python3 --force code-context-engine"
|
|
290
|
+
)
|
|
291
|
+
return False
|
|
284
292
|
except Exception as exc:
|
|
285
293
|
log.warning("sqlite-vec load failed; semantic recall disabled: %s", exc)
|
|
286
294
|
return False
|
context_engine/pricing.py
CHANGED
|
@@ -3,23 +3,38 @@ import json
|
|
|
3
3
|
import re
|
|
4
4
|
import time
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from typing import TypedDict
|
|
6
7
|
|
|
7
8
|
_CCE_HOME = Path.home() / ".cce"
|
|
8
9
|
_CACHE_PATH = _CCE_HOME / "pricing_cache.json"
|
|
9
10
|
_CACHE_TTL = 7 * 24 * 3600 # 7 days
|
|
10
11
|
_DOCS_URL = "https://docs.anthropic.com/en/docs/about-claude/models"
|
|
11
12
|
|
|
13
|
+
|
|
14
|
+
class ModelPricing(TypedDict):
|
|
15
|
+
input: float # $/1M input tokens
|
|
16
|
+
output: float # $/1M output tokens
|
|
17
|
+
|
|
18
|
+
|
|
12
19
|
# Used only when fetch fails and no cache exists
|
|
13
|
-
_FALLBACK: dict[str,
|
|
14
|
-
"opus":
|
|
20
|
+
_FALLBACK: dict[str, ModelPricing] = {
|
|
21
|
+
"opus": {"input": 15.0, "output": 75.0},
|
|
22
|
+
"sonnet": {"input": 3.0, "output": 15.0},
|
|
23
|
+
"haiku": {"input": 0.80, "output": 4.0},
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
# Flat input-only fallback kept for backward compat with existing cache files
|
|
27
|
+
_FALLBACK_INPUT: dict[str, float] = {
|
|
28
|
+
"opus": 15.0,
|
|
15
29
|
"sonnet": 3.0,
|
|
16
|
-
"haiku":
|
|
30
|
+
"haiku": 0.80,
|
|
17
31
|
}
|
|
18
32
|
|
|
19
33
|
|
|
20
|
-
def _parse_html(html: str) -> dict[str,
|
|
21
|
-
"""Parse per-family input pricing from Anthropic docs HTML table."""
|
|
22
|
-
|
|
34
|
+
def _parse_html(html: str) -> dict[str, ModelPricing] | None:
|
|
35
|
+
"""Parse per-family input + output pricing from Anthropic docs HTML table."""
|
|
36
|
+
input_pricing: dict[str, float] = {}
|
|
37
|
+
output_pricing: dict[str, float] = {}
|
|
23
38
|
|
|
24
39
|
rows = re.findall(r"<tr[^>]*>(.*?)</tr>", html, re.DOTALL | re.IGNORECASE)
|
|
25
40
|
col_families: list[str | None] = []
|
|
@@ -44,23 +59,42 @@ def _parse_html(html: str) -> dict[str, float] | None:
|
|
|
44
59
|
col_families = families_in_row
|
|
45
60
|
continue
|
|
46
61
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
62
|
+
if not col_families:
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
# Detect whether this is an input or output pricing row
|
|
66
|
+
is_input = any("input" in c.lower() and "tok" in c.lower() for c in cells)
|
|
67
|
+
is_output = any("output" in c.lower() and "tok" in c.lower() for c in cells)
|
|
68
|
+
target = None
|
|
69
|
+
if is_input and not is_output:
|
|
70
|
+
target = input_pricing
|
|
71
|
+
elif is_output and not is_input:
|
|
72
|
+
target = output_pricing
|
|
73
|
+
|
|
74
|
+
if target is not None:
|
|
51
75
|
for i, cell in enumerate(cells):
|
|
52
76
|
if i < len(col_families) and col_families[i]:
|
|
53
77
|
m = re.search(r"\$(\d+(?:\.\d+)?)", cell)
|
|
54
78
|
if m:
|
|
55
79
|
family = col_families[i]
|
|
56
|
-
if family not in
|
|
57
|
-
|
|
58
|
-
|
|
80
|
+
if family not in target:
|
|
81
|
+
target[family] = float(m.group(1))
|
|
82
|
+
if target is output_pricing:
|
|
83
|
+
col_families = []
|
|
84
|
+
|
|
85
|
+
if not input_pricing:
|
|
86
|
+
return None
|
|
59
87
|
|
|
60
|
-
|
|
88
|
+
result: dict[str, ModelPricing] = {}
|
|
89
|
+
for family in input_pricing:
|
|
90
|
+
result[family] = {
|
|
91
|
+
"input": input_pricing[family],
|
|
92
|
+
"output": output_pricing.get(family, input_pricing[family] * 5),
|
|
93
|
+
}
|
|
94
|
+
return result
|
|
61
95
|
|
|
62
96
|
|
|
63
|
-
def _fetch() -> dict[str,
|
|
97
|
+
def _fetch() -> dict[str, ModelPricing] | None:
|
|
64
98
|
try:
|
|
65
99
|
import httpx
|
|
66
100
|
|
|
@@ -72,19 +106,29 @@ def _fetch() -> dict[str, float] | None:
|
|
|
72
106
|
return None
|
|
73
107
|
|
|
74
108
|
|
|
75
|
-
def _load_cache() -> dict[str,
|
|
109
|
+
def _load_cache() -> dict[str, ModelPricing] | None:
|
|
76
110
|
try:
|
|
77
111
|
if not _CACHE_PATH.exists():
|
|
78
112
|
return None
|
|
79
113
|
data = json.loads(_CACHE_PATH.read_text())
|
|
80
114
|
if time.time() - data.get("ts", 0) < _CACHE_TTL:
|
|
81
|
-
|
|
115
|
+
raw = data.get("pricing")
|
|
116
|
+
if not raw:
|
|
117
|
+
return None
|
|
118
|
+
# Migrate flat input-only cache to ModelPricing format
|
|
119
|
+
first = next(iter(raw.values()), None)
|
|
120
|
+
if isinstance(first, (int, float)):
|
|
121
|
+
return {
|
|
122
|
+
k: {"input": v, "output": v * 5}
|
|
123
|
+
for k, v in raw.items()
|
|
124
|
+
}
|
|
125
|
+
return raw
|
|
82
126
|
except Exception:
|
|
83
127
|
pass
|
|
84
128
|
return None
|
|
85
129
|
|
|
86
130
|
|
|
87
|
-
def _save_cache(pricing: dict[str,
|
|
131
|
+
def _save_cache(pricing: dict[str, ModelPricing]) -> None:
|
|
88
132
|
try:
|
|
89
133
|
_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
90
134
|
_CACHE_PATH.write_text(json.dumps({"ts": time.time(), "pricing": pricing}))
|
|
@@ -92,8 +136,8 @@ def _save_cache(pricing: dict[str, float]) -> None:
|
|
|
92
136
|
pass
|
|
93
137
|
|
|
94
138
|
|
|
95
|
-
def get_model_pricing() -> dict[str,
|
|
96
|
-
"""Return {family:
|
|
139
|
+
def get_model_pricing() -> dict[str, ModelPricing]:
|
|
140
|
+
"""Return {family: {input, output}} pricing per 1M tokens. Cached 7 days."""
|
|
97
141
|
cached = _load_cache()
|
|
98
142
|
if cached:
|
|
99
143
|
return cached
|
|
@@ -46,9 +46,23 @@ class VectorStore:
|
|
|
46
46
|
def _connect(self) -> sqlite3.Connection:
|
|
47
47
|
import sqlite_vec
|
|
48
48
|
conn = sqlite3.connect(self._db_file, check_same_thread=False)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
49
|
+
try:
|
|
50
|
+
conn.enable_load_extension(True)
|
|
51
|
+
sqlite_vec.load(conn)
|
|
52
|
+
conn.enable_load_extension(False)
|
|
53
|
+
except AttributeError:
|
|
54
|
+
raise RuntimeError(
|
|
55
|
+
"Your Python was compiled without SQLite extension support "
|
|
56
|
+
"(enable_load_extension is missing). This is common with "
|
|
57
|
+
"python.org installers on macOS.\n\n"
|
|
58
|
+
"Fix: reinstall CCE under a Python that has extension support:\n"
|
|
59
|
+
" uv tool install --python $(brew --prefix python3)/bin/python3 "
|
|
60
|
+
"--force code-context-engine\n\n"
|
|
61
|
+
"Or use Homebrew Python directly:\n"
|
|
62
|
+
" brew install python3\n"
|
|
63
|
+
" uv tool install --python /opt/homebrew/bin/python3 "
|
|
64
|
+
"--force code-context-engine"
|
|
65
|
+
) from None
|
|
52
66
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
53
67
|
conn.execute("PRAGMA synchronous=NORMAL")
|
|
54
68
|
return conn
|
|
File without changes
|
{code_context_engine-0.4.20.dist-info → code_context_engine-0.4.22.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{code_context_engine-0.4.20.dist-info → code_context_engine-0.4.22.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|