code-context-engine 0.4.19__tar.gz → 0.4.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_context_engine-0.4.19/src/code_context_engine.egg-info → code_context_engine-0.4.20}/PKG-INFO +13 -5
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/README.md +10 -3
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/pyproject.toml +6 -2
- {code_context_engine-0.4.19 → code_context_engine-0.4.20/src/code_context_engine.egg-info}/PKG-INFO +13 -5
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/requires.txt +3 -1
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/cli.py +91 -15
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/config.py +7 -0
- code_context_engine-0.4.20/src/context_engine/indexer/embedder.py +539 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/git_hooks.py +16 -2
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/manifest.py +27 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/pipeline.py +45 -3
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/watcher.py +35 -4
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/integration/mcp_server.py +62 -15
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/hook_installer.py +35 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/hook_server.py +25 -9
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/project_commands.py +6 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_project_commands.py +15 -0
- code_context_engine-0.4.19/src/context_engine/indexer/embedder.py +0 -158
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/LICENSE +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/setup.cfg +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/SOURCES.txt +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/dependency_links.txt +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/entry_points.txt +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/top_level.txt +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/__init__.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/cli_style.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/__init__.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/compressor.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/ollama_client.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/output_rules.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/prompts.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/quality.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/dashboard/__init__.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/dashboard/_page.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/dashboard/server.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/editors.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/event_bus.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/__init__.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/chunker.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/embedding_cache.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/git_indexer.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/ignorefile.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/secrets.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/integration/__init__.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/integration/bootstrap.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/integration/git_context.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/integration/session_capture.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/__init__.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/compressor.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/db.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/decision_extractor.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/extractive.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/grammar.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/hooks.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/migrate.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/models.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/pricing.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/retrieval/__init__.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/retrieval/confidence.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/retrieval/query_parser.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/retrieval/retriever.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/serve_http.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/services.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/__init__.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/backend.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/fts_store.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/graph_store.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/local_backend.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/remote_backend.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/vector_store.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/utils.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_init_probe.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_mcp_config.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_safe_cwd.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_savings.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_savings_buckets.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_savings_e2e.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_serve.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_sessions_export.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_sessions_status.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_smoke.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_uninstall.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_config.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_e2e.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_editors_codex.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_editors_opencode.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_event_bus.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_models.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_real_life.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_services.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_token_efficiency.py +0 -0
- {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_token_packing.py +0 -0
{code_context_engine-0.4.19/src/code_context_engine.egg-info → code_context_engine-0.4.20}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-context-engine
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.20
|
|
4
4
|
Summary: Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server.
|
|
5
5
|
Author-email: Fazle Elahee <felahee@gmail.com>, Raj <rajkumar.sakti@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -21,7 +21,6 @@ License-File: LICENSE
|
|
|
21
21
|
Requires-Dist: click>=8.1
|
|
22
22
|
Requires-Dist: pyyaml>=6.0
|
|
23
23
|
Requires-Dist: sqlite-vec>=0.1.6
|
|
24
|
-
Requires-Dist: fastembed>=0.4
|
|
25
24
|
Requires-Dist: numpy>=1.24
|
|
26
25
|
Requires-Dist: tree-sitter>=0.22
|
|
27
26
|
Requires-Dist: tree-sitter-python>=0.21
|
|
@@ -46,6 +45,8 @@ Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
|
46
45
|
Requires-Dist: pytest-xdist>=3.5; extra == "dev"
|
|
47
46
|
Requires-Dist: ruff>=0.13; extra == "dev"
|
|
48
47
|
Provides-Extra: http
|
|
48
|
+
Provides-Extra: local
|
|
49
|
+
Requires-Dist: fastembed>=0.4; extra == "local"
|
|
49
50
|
Dynamic: license-file
|
|
50
51
|
|
|
51
52
|
<p align="center">
|
|
@@ -148,6 +149,12 @@ cd /path/to/your/project
|
|
|
148
149
|
cce init # index, install hooks, register MCP server
|
|
149
150
|
```
|
|
150
151
|
|
|
152
|
+
**Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
uv tool install "code-context-engine[local]" # includes fastembed + ONNX Runtime
|
|
156
|
+
```
|
|
157
|
+
|
|
151
158
|
Restart your editor. Done. Every question now hits the index instead of re-reading files.
|
|
152
159
|
|
|
153
160
|
`cce init` auto-detects your editor and writes the right config:
|
|
@@ -425,11 +432,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
|
|
|
425
432
|
|
|
426
433
|
| Component | Size |
|
|
427
434
|
|-----------|------|
|
|
428
|
-
|
|
|
429
|
-
|
|
|
435
|
+
| Core install (Ollama backend) | ~17 MB |
|
|
436
|
+
| With `[local]` extra (fastembed + ONNX) | ~189 MB |
|
|
437
|
+
| Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
|
|
430
438
|
| Index per project (small/medium/large) | 5-60 MB |
|
|
431
439
|
|
|
432
|
-
No GPU required.
|
|
440
|
+
No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
|
|
433
441
|
|
|
434
442
|
---
|
|
435
443
|
|
|
@@ -98,6 +98,12 @@ cd /path/to/your/project
|
|
|
98
98
|
cce init # index, install hooks, register MCP server
|
|
99
99
|
```
|
|
100
100
|
|
|
101
|
+
**Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
uv tool install "code-context-engine[local]" # includes fastembed + ONNX Runtime
|
|
105
|
+
```
|
|
106
|
+
|
|
101
107
|
Restart your editor. Done. Every question now hits the index instead of re-reading files.
|
|
102
108
|
|
|
103
109
|
`cce init` auto-detects your editor and writes the right config:
|
|
@@ -375,11 +381,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
|
|
|
375
381
|
|
|
376
382
|
| Component | Size |
|
|
377
383
|
|-----------|------|
|
|
378
|
-
|
|
|
379
|
-
|
|
|
384
|
+
| Core install (Ollama backend) | ~17 MB |
|
|
385
|
+
| With `[local]` extra (fastembed + ONNX) | ~189 MB |
|
|
386
|
+
| Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
|
|
380
387
|
| Index per project (small/medium/large) | 5-60 MB |
|
|
381
388
|
|
|
382
|
-
No GPU required.
|
|
389
|
+
No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
|
|
383
390
|
|
|
384
391
|
---
|
|
385
392
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "code-context-engine"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.20"
|
|
4
4
|
description = "Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server."
|
|
5
5
|
readme = {file = "README.md", content-type = "text/markdown"}
|
|
6
6
|
license = "MIT"
|
|
@@ -23,7 +23,6 @@ dependencies = [
|
|
|
23
23
|
"click>=8.1",
|
|
24
24
|
"pyyaml>=6.0",
|
|
25
25
|
"sqlite-vec>=0.1.6",
|
|
26
|
-
"fastembed>=0.4",
|
|
27
26
|
"numpy>=1.24",
|
|
28
27
|
"tree-sitter>=0.22",
|
|
29
28
|
"tree-sitter-python>=0.21",
|
|
@@ -60,6 +59,11 @@ dev = [
|
|
|
60
59
|
"ruff>=0.13",
|
|
61
60
|
]
|
|
62
61
|
http = [] # back-compat: aiohttp is now a core dependency
|
|
62
|
+
# Local on-device embedding via fastembed (ONNX). ~172 MB install
|
|
63
|
+
# footprint; needed only if you don't have Ollama running. Without
|
|
64
|
+
# this extra, CCE auto-detects Ollama at localhost:11434 and uses
|
|
65
|
+
# nomic-embed-text via /api/embed.
|
|
66
|
+
local = ["fastembed>=0.4"]
|
|
63
67
|
|
|
64
68
|
[project.scripts]
|
|
65
69
|
cce = "context_engine.cli:main"
|
{code_context_engine-0.4.19 → code_context_engine-0.4.20/src/code_context_engine.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-context-engine
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.20
|
|
4
4
|
Summary: Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server.
|
|
5
5
|
Author-email: Fazle Elahee <felahee@gmail.com>, Raj <rajkumar.sakti@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -21,7 +21,6 @@ License-File: LICENSE
|
|
|
21
21
|
Requires-Dist: click>=8.1
|
|
22
22
|
Requires-Dist: pyyaml>=6.0
|
|
23
23
|
Requires-Dist: sqlite-vec>=0.1.6
|
|
24
|
-
Requires-Dist: fastembed>=0.4
|
|
25
24
|
Requires-Dist: numpy>=1.24
|
|
26
25
|
Requires-Dist: tree-sitter>=0.22
|
|
27
26
|
Requires-Dist: tree-sitter-python>=0.21
|
|
@@ -46,6 +45,8 @@ Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
|
46
45
|
Requires-Dist: pytest-xdist>=3.5; extra == "dev"
|
|
47
46
|
Requires-Dist: ruff>=0.13; extra == "dev"
|
|
48
47
|
Provides-Extra: http
|
|
48
|
+
Provides-Extra: local
|
|
49
|
+
Requires-Dist: fastembed>=0.4; extra == "local"
|
|
49
50
|
Dynamic: license-file
|
|
50
51
|
|
|
51
52
|
<p align="center">
|
|
@@ -148,6 +149,12 @@ cd /path/to/your/project
|
|
|
148
149
|
cce init # index, install hooks, register MCP server
|
|
149
150
|
```
|
|
150
151
|
|
|
152
|
+
**Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
uv tool install "code-context-engine[local]" # includes fastembed + ONNX Runtime
|
|
156
|
+
```
|
|
157
|
+
|
|
151
158
|
Restart your editor. Done. Every question now hits the index instead of re-reading files.
|
|
152
159
|
|
|
153
160
|
`cce init` auto-detects your editor and writes the right config:
|
|
@@ -425,11 +432,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
|
|
|
425
432
|
|
|
426
433
|
| Component | Size |
|
|
427
434
|
|-----------|------|
|
|
428
|
-
|
|
|
429
|
-
|
|
|
435
|
+
| Core install (Ollama backend) | ~17 MB |
|
|
436
|
+
| With `[local]` extra (fastembed + ONNX) | ~189 MB |
|
|
437
|
+
| Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
|
|
430
438
|
| Index per project (small/medium/large) | 5-60 MB |
|
|
431
439
|
|
|
432
|
-
No GPU required.
|
|
440
|
+
No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
|
|
433
441
|
|
|
434
442
|
---
|
|
435
443
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
"""CLI entry point for code-context-engine."""
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
|
+
import os
|
|
5
6
|
import socket
|
|
6
7
|
import sys
|
|
7
8
|
from pathlib import Path
|
|
@@ -536,25 +537,43 @@ def _show_welcome_banner(config) -> None:
|
|
|
536
537
|
def _preflight_check(config) -> None:
|
|
537
538
|
"""Verify all required components are ready before indexing starts.
|
|
538
539
|
|
|
539
|
-
|
|
540
|
-
and
|
|
540
|
+
Auto-detects an embedding backend (fastembed → Ollama), reports which
|
|
541
|
+
one was picked, and surfaces Ollama status for the separate compression
|
|
542
|
+
path so users know what compression level they will get.
|
|
541
543
|
"""
|
|
542
|
-
# --- Embedding
|
|
543
|
-
click.echo(_dim("
|
|
544
|
+
# --- Embedding backend ---
|
|
545
|
+
click.echo(_dim(" Detecting embedding backend") + "...", nl=False)
|
|
546
|
+
from context_engine.config import resolve_ollama_url
|
|
547
|
+
ollama_model = getattr(config, "ollama_embed_model", "nomic-embed-text")
|
|
548
|
+
ollama_url = resolve_ollama_url(config)
|
|
544
549
|
try:
|
|
545
|
-
from
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
550
|
+
from context_engine.indexer.embedder import select_backend
|
|
551
|
+
# Don't echo a tentative "loading fastembed…" or "using Ollama…"
|
|
552
|
+
# banner before select_backend() picks. CCE_EMBED_BACKEND can
|
|
553
|
+
# force a different choice than the probe order suggests, and
|
|
554
|
+
# printing both messages produced contradictory output. Wait for
|
|
555
|
+
# the actual selection, then echo once with the truth.
|
|
556
|
+
backend = select_backend(
|
|
557
|
+
model_name=getattr(config, "embedding_model", "BAAI/bge-small-en-v1.5"),
|
|
558
|
+
ollama_model=ollama_model,
|
|
559
|
+
ollama_url=ollama_url,
|
|
560
|
+
)
|
|
561
|
+
click.echo(
|
|
562
|
+
" " + click.style(
|
|
563
|
+
f"ready ({backend.name}, {backend.dimension}-d, {backend.model_name})",
|
|
564
|
+
fg="green",
|
|
565
|
+
)
|
|
566
|
+
)
|
|
552
567
|
except Exception as exc:
|
|
553
568
|
click.echo("")
|
|
554
|
-
_warn(f"
|
|
555
|
-
_warn(
|
|
569
|
+
_warn(f"No embedding backend available: {exc}")
|
|
570
|
+
_warn(
|
|
571
|
+
"Install fastembed (`pip install code-context-engine[local]`) "
|
|
572
|
+
f"or start an Ollama server at {ollama_url} and pull "
|
|
573
|
+
f"{ollama_model}."
|
|
574
|
+
)
|
|
556
575
|
|
|
557
|
-
# --- Ollama (
|
|
576
|
+
# --- Ollama for LLM compression (independent of the embedding path) ---
|
|
558
577
|
try:
|
|
559
578
|
import httpx
|
|
560
579
|
resp = httpx.get("http://localhost:11434/api/tags", timeout=2.0)
|
|
@@ -2786,6 +2805,16 @@ async def _run_index(
|
|
|
2786
2805
|
async def _run_serve(config) -> None:
|
|
2787
2806
|
"""Start MCP server with live file watcher."""
|
|
2788
2807
|
import logging
|
|
2808
|
+
import signal
|
|
2809
|
+
# Force single-process embedding inside `cce serve` unless the user
|
|
2810
|
+
# explicitly overrode it. The reindex worker triggered by file changes
|
|
2811
|
+
# otherwise spawns a fastembed forkserver pool (~4 workers × ~1.6 GB on
|
|
2812
|
+
# Linux) that orphans on abnormal exit and leaks RSS across `cce index`
|
|
2813
|
+
# invocations (issue #66). Single-process embed is plenty for one-file
|
|
2814
|
+
# watcher reindexes; bulk `cce index` run from a separate shell still
|
|
2815
|
+
# gets the multiprocess path.
|
|
2816
|
+
os.environ.setdefault("CCE_EMBED_PARALLEL", "0")
|
|
2817
|
+
|
|
2789
2818
|
from context_engine.storage.local_backend import LocalBackend
|
|
2790
2819
|
from context_engine.indexer.embedder import Embedder
|
|
2791
2820
|
from context_engine.retrieval.retriever import HybridRetriever
|
|
@@ -2903,9 +2932,56 @@ async def _run_serve(config) -> None:
|
|
|
2903
2932
|
file=sys.stderr,
|
|
2904
2933
|
)
|
|
2905
2934
|
|
|
2935
|
+
# Install signal handlers so SIGINT (Ctrl-C), SIGTERM, and SIGHUP all
|
|
2936
|
+
# route through the same orderly shutdown path. Previously only SIGTERM
|
|
2937
|
+
# cancelled the MCP task — SIGINT was swallowed by stdio reads, leaving
|
|
2938
|
+
# `cce serve` unkillable except via SIGKILL, which orphans the embed
|
|
2939
|
+
# workers (#66).
|
|
2940
|
+
serve_loop = asyncio.get_running_loop()
|
|
2941
|
+
mcp_task = asyncio.create_task(mcp.run_stdio())
|
|
2942
|
+
|
|
2943
|
+
def _request_shutdown(signame: str) -> None:
|
|
2944
|
+
if not mcp_task.done():
|
|
2945
|
+
_log.info("Received %s, shutting down...", signame)
|
|
2946
|
+
mcp_task.cancel()
|
|
2947
|
+
|
|
2948
|
+
# Build the candidate list with getattr so we don't reference
|
|
2949
|
+
# `signal.SIGHUP` at the tuple-construction site — SIGHUP is
|
|
2950
|
+
# undefined on Windows and that AttributeError would fire *before*
|
|
2951
|
+
# the try/except below could swallow it, crashing `cce serve` on
|
|
2952
|
+
# Windows entirely (Copilot review on #69).
|
|
2953
|
+
installed_signals: list[int] = []
|
|
2954
|
+
candidate_sigs = [
|
|
2955
|
+
s for s in (
|
|
2956
|
+
getattr(signal, "SIGINT", None),
|
|
2957
|
+
getattr(signal, "SIGTERM", None),
|
|
2958
|
+
getattr(signal, "SIGHUP", None),
|
|
2959
|
+
) if s is not None
|
|
2960
|
+
]
|
|
2961
|
+
for _sig in candidate_sigs:
|
|
2962
|
+
try:
|
|
2963
|
+
serve_loop.add_signal_handler(
|
|
2964
|
+
_sig, _request_shutdown, _sig.name,
|
|
2965
|
+
)
|
|
2966
|
+
installed_signals.append(_sig)
|
|
2967
|
+
except (NotImplementedError, RuntimeError):
|
|
2968
|
+
# Windows's ProactorEventLoop refuses add_signal_handler;
|
|
2969
|
+
# asyncio also raises NotImplementedError outside the main
|
|
2970
|
+
# thread. SIGTERM still arrives via the default Python
|
|
2971
|
+
# handler in those environments.
|
|
2972
|
+
pass
|
|
2973
|
+
|
|
2906
2974
|
try:
|
|
2907
|
-
|
|
2975
|
+
try:
|
|
2976
|
+
await mcp_task
|
|
2977
|
+
except asyncio.CancelledError:
|
|
2978
|
+
pass
|
|
2908
2979
|
finally:
|
|
2980
|
+
for _sig in installed_signals:
|
|
2981
|
+
try:
|
|
2982
|
+
serve_loop.remove_signal_handler(_sig)
|
|
2983
|
+
except (NotImplementedError, RuntimeError):
|
|
2984
|
+
pass
|
|
2909
2985
|
if watcher:
|
|
2910
2986
|
watcher.stop()
|
|
2911
2987
|
if worker_task:
|
|
@@ -59,6 +59,11 @@ class Config:
|
|
|
59
59
|
|
|
60
60
|
# Embedding
|
|
61
61
|
embedding_model: str = "BAAI/bge-small-en-v1.5"
|
|
62
|
+
# Model used when the Ollama embedding backend is selected. Only
|
|
63
|
+
# consulted if fastembed isn't installed or `CCE_EMBED_BACKEND=ollama`
|
|
64
|
+
# forces the Ollama path. 768-dim default; switching this triggers a
|
|
65
|
+
# full reindex because the vector store rejects dimension mismatches.
|
|
66
|
+
ollama_embed_model: str = "nomic-embed-text"
|
|
62
67
|
|
|
63
68
|
# Retrieval
|
|
64
69
|
retrieval_confidence_threshold: float = 0.2
|
|
@@ -120,6 +125,7 @@ _EXPECTED_TYPES: dict[str, type | tuple[type, ...]] = {
|
|
|
120
125
|
"ollama_url": str,
|
|
121
126
|
"output_compression": str,
|
|
122
127
|
"embedding_model": str,
|
|
128
|
+
"ollama_embed_model": str,
|
|
123
129
|
"retrieval_confidence_threshold": (int, float),
|
|
124
130
|
"retrieval_top_k": int,
|
|
125
131
|
"bootstrap_max_tokens": int,
|
|
@@ -141,6 +147,7 @@ def _apply_dict_to_config(config: Config, data: dict) -> None:
|
|
|
141
147
|
("compression", "ollama_url"): "ollama_url",
|
|
142
148
|
("compression", "output"): "output_compression",
|
|
143
149
|
("embedding", "model"): "embedding_model",
|
|
150
|
+
("embedding", "ollama_model"): "ollama_embed_model",
|
|
144
151
|
("retrieval", "confidence_threshold"): "retrieval_confidence_threshold",
|
|
145
152
|
("retrieval", "top_k"): "retrieval_top_k",
|
|
146
153
|
("retrieval", "bootstrap_max_tokens"): "bootstrap_max_tokens",
|