code-context-engine 0.4.19__tar.gz → 0.4.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {code_context_engine-0.4.19/src/code_context_engine.egg-info → code_context_engine-0.4.20}/PKG-INFO +13 -5
  2. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/README.md +10 -3
  3. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/pyproject.toml +6 -2
  4. {code_context_engine-0.4.19 → code_context_engine-0.4.20/src/code_context_engine.egg-info}/PKG-INFO +13 -5
  5. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/requires.txt +3 -1
  6. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/cli.py +91 -15
  7. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/config.py +7 -0
  8. code_context_engine-0.4.20/src/context_engine/indexer/embedder.py +539 -0
  9. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/git_hooks.py +16 -2
  10. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/manifest.py +27 -0
  11. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/pipeline.py +45 -3
  12. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/watcher.py +35 -4
  13. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/integration/mcp_server.py +62 -15
  14. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/hook_installer.py +35 -0
  15. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/hook_server.py +25 -9
  16. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/project_commands.py +6 -0
  17. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_project_commands.py +15 -0
  18. code_context_engine-0.4.19/src/context_engine/indexer/embedder.py +0 -158
  19. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/LICENSE +0 -0
  20. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/setup.cfg +0 -0
  21. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/SOURCES.txt +0 -0
  22. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/dependency_links.txt +0 -0
  23. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/entry_points.txt +0 -0
  24. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/code_context_engine.egg-info/top_level.txt +0 -0
  25. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/__init__.py +0 -0
  26. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/cli_style.py +0 -0
  27. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/__init__.py +0 -0
  28. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/compressor.py +0 -0
  29. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/ollama_client.py +0 -0
  30. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/output_rules.py +0 -0
  31. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/prompts.py +0 -0
  32. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/compression/quality.py +0 -0
  33. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/dashboard/__init__.py +0 -0
  34. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/dashboard/_page.py +0 -0
  35. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/dashboard/server.py +0 -0
  36. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/editors.py +0 -0
  37. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/event_bus.py +0 -0
  38. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/__init__.py +0 -0
  39. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/chunker.py +0 -0
  40. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/embedding_cache.py +0 -0
  41. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/git_indexer.py +0 -0
  42. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/ignorefile.py +0 -0
  43. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/indexer/secrets.py +0 -0
  44. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/integration/__init__.py +0 -0
  45. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/integration/bootstrap.py +0 -0
  46. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/integration/git_context.py +0 -0
  47. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/integration/session_capture.py +0 -0
  48. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/__init__.py +0 -0
  49. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/compressor.py +0 -0
  50. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/db.py +0 -0
  51. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/decision_extractor.py +0 -0
  52. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/extractive.py +0 -0
  53. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/grammar.py +0 -0
  54. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/hooks.py +0 -0
  55. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/memory/migrate.py +0 -0
  56. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/models.py +0 -0
  57. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/pricing.py +0 -0
  58. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/retrieval/__init__.py +0 -0
  59. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/retrieval/confidence.py +0 -0
  60. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/retrieval/query_parser.py +0 -0
  61. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/retrieval/retriever.py +0 -0
  62. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/serve_http.py +0 -0
  63. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/services.py +0 -0
  64. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/__init__.py +0 -0
  65. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/backend.py +0 -0
  66. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/fts_store.py +0 -0
  67. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/graph_store.py +0 -0
  68. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/local_backend.py +0 -0
  69. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/remote_backend.py +0 -0
  70. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/storage/vector_store.py +0 -0
  71. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/src/context_engine/utils.py +0 -0
  72. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_init_probe.py +0 -0
  73. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_mcp_config.py +0 -0
  74. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_safe_cwd.py +0 -0
  75. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_savings.py +0 -0
  76. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_savings_buckets.py +0 -0
  77. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_savings_e2e.py +0 -0
  78. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_serve.py +0 -0
  79. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_sessions_export.py +0 -0
  80. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_sessions_status.py +0 -0
  81. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_smoke.py +0 -0
  82. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_cli_uninstall.py +0 -0
  83. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_config.py +0 -0
  84. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_e2e.py +0 -0
  85. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_editors_codex.py +0 -0
  86. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_editors_opencode.py +0 -0
  87. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_event_bus.py +0 -0
  88. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_models.py +0 -0
  89. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_real_life.py +0 -0
  90. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_services.py +0 -0
  91. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_token_efficiency.py +0 -0
  92. {code_context_engine-0.4.19 → code_context_engine-0.4.20}/tests/test_token_packing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-context-engine
3
- Version: 0.4.19
3
+ Version: 0.4.20
4
4
  Summary: Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server.
5
5
  Author-email: Fazle Elahee <felahee@gmail.com>, Raj <rajkumar.sakti@gmail.com>
6
6
  License-Expression: MIT
@@ -21,7 +21,6 @@ License-File: LICENSE
21
21
  Requires-Dist: click>=8.1
22
22
  Requires-Dist: pyyaml>=6.0
23
23
  Requires-Dist: sqlite-vec>=0.1.6
24
- Requires-Dist: fastembed>=0.4
25
24
  Requires-Dist: numpy>=1.24
26
25
  Requires-Dist: tree-sitter>=0.22
27
26
  Requires-Dist: tree-sitter-python>=0.21
@@ -46,6 +45,8 @@ Requires-Dist: pytest-cov>=5.0; extra == "dev"
46
45
  Requires-Dist: pytest-xdist>=3.5; extra == "dev"
47
46
  Requires-Dist: ruff>=0.13; extra == "dev"
48
47
  Provides-Extra: http
48
+ Provides-Extra: local
49
+ Requires-Dist: fastembed>=0.4; extra == "local"
49
50
  Dynamic: license-file
50
51
 
51
52
  <p align="center">
@@ -148,6 +149,12 @@ cd /path/to/your/project
148
149
  cce init # index, install hooks, register MCP server
149
150
  ```
150
151
 
152
+ **Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
153
+
154
+ ```bash
155
+ uv tool install "code-context-engine[local]" # includes fastembed + ONNX Runtime
156
+ ```
157
+
151
158
  Restart your editor. Done. Every question now hits the index instead of re-reading files.
152
159
 
153
160
  `cce init` auto-detects your editor and writes the right config:
@@ -425,11 +432,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
425
432
 
426
433
  | Component | Size |
427
434
  |-----------|------|
428
- | Installed package | ~189 MB (ONNX Runtime is 66 MB of that) |
429
- | Embedding model (one-time download) | ~60 MB |
435
+ | Core install (Ollama backend) | ~17 MB |
436
+ | With `[local]` extra (fastembed + ONNX) | ~189 MB |
437
+ | Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
430
438
  | Index per project (small/medium/large) | 5-60 MB |
431
439
 
432
- No GPU required. Embedding model runs on CPU via ONNX Runtime.
440
+ No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
433
441
 
434
442
  ---
435
443
 
@@ -98,6 +98,12 @@ cd /path/to/your/project
98
98
  cce init # index, install hooks, register MCP server
99
99
  ```
100
100
 
101
+ **Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
102
+
103
+ ```bash
104
+ uv tool install "code-context-engine[local]" # includes fastembed + ONNX Runtime
105
+ ```
106
+
101
107
  Restart your editor. Done. Every question now hits the index instead of re-reading files.
102
108
 
103
109
  `cce init` auto-detects your editor and writes the right config:
@@ -375,11 +381,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
375
381
 
376
382
  | Component | Size |
377
383
  |-----------|------|
378
- | Installed package | ~189 MB (ONNX Runtime is 66 MB of that) |
379
- | Embedding model (one-time download) | ~60 MB |
384
+ | Core install (Ollama backend) | ~17 MB |
385
+ | With `[local]` extra (fastembed + ONNX) | ~189 MB |
386
+ | Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
380
387
  | Index per project (small/medium/large) | 5-60 MB |
381
388
 
382
- No GPU required. Embedding model runs on CPU via ONNX Runtime.
389
+ No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
383
390
 
384
391
  ---
385
392
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "code-context-engine"
3
- version = "0.4.19"
3
+ version = "0.4.20"
4
4
  description = "Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server."
5
5
  readme = {file = "README.md", content-type = "text/markdown"}
6
6
  license = "MIT"
@@ -23,7 +23,6 @@ dependencies = [
23
23
  "click>=8.1",
24
24
  "pyyaml>=6.0",
25
25
  "sqlite-vec>=0.1.6",
26
- "fastembed>=0.4",
27
26
  "numpy>=1.24",
28
27
  "tree-sitter>=0.22",
29
28
  "tree-sitter-python>=0.21",
@@ -60,6 +59,11 @@ dev = [
60
59
  "ruff>=0.13",
61
60
  ]
62
61
  http = [] # back-compat: aiohttp is now a core dependency
62
+ # Local on-device embedding via fastembed (ONNX). ~172 MB install
63
+ # footprint; needed only if you don't have Ollama running. Without
64
+ # this extra, CCE auto-detects Ollama at localhost:11434 and uses
65
+ # nomic-embed-text via /api/embed.
66
+ local = ["fastembed>=0.4"]
63
67
 
64
68
  [project.scripts]
65
69
  cce = "context_engine.cli:main"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code-context-engine
3
- Version: 0.4.19
3
+ Version: 0.4.20
4
4
  Summary: Save 94% on Claude Code tokens. Index your codebase locally, AI agents search instead of reading files. Reduce Claude API costs, save tokens on Cursor, VS Code, Gemini CLI. Free, open source MCP server.
5
5
  Author-email: Fazle Elahee <felahee@gmail.com>, Raj <rajkumar.sakti@gmail.com>
6
6
  License-Expression: MIT
@@ -21,7 +21,6 @@ License-File: LICENSE
21
21
  Requires-Dist: click>=8.1
22
22
  Requires-Dist: pyyaml>=6.0
23
23
  Requires-Dist: sqlite-vec>=0.1.6
24
- Requires-Dist: fastembed>=0.4
25
24
  Requires-Dist: numpy>=1.24
26
25
  Requires-Dist: tree-sitter>=0.22
27
26
  Requires-Dist: tree-sitter-python>=0.21
@@ -46,6 +45,8 @@ Requires-Dist: pytest-cov>=5.0; extra == "dev"
46
45
  Requires-Dist: pytest-xdist>=3.5; extra == "dev"
47
46
  Requires-Dist: ruff>=0.13; extra == "dev"
48
47
  Provides-Extra: http
48
+ Provides-Extra: local
49
+ Requires-Dist: fastembed>=0.4; extra == "local"
49
50
  Dynamic: license-file
50
51
 
51
52
  <p align="center">
@@ -148,6 +149,12 @@ cd /path/to/your/project
148
149
  cce init # index, install hooks, register MCP server
149
150
  ```
150
151
 
152
+ **Embedding backends:** CCE auto-detects the best available backend. If you have Ollama running, it uses `nomic-embed-text` with zero extra dependencies. For offline/local embedding without Ollama, install the `[local]` extra:
153
+
154
+ ```bash
155
+ uv tool install "code-context-engine[local]" # includes fastembed + ONNX Runtime
156
+ ```
157
+
151
158
  Restart your editor. Done. Every question now hits the index instead of re-reading files.
152
159
 
153
160
  `cce init` auto-detects your editor and writes the right config:
@@ -425,11 +432,12 @@ Tell Claude: "switch to max compression" or "turn off compression". Code blocks
425
432
 
426
433
  | Component | Size |
427
434
  |-----------|------|
428
- | Installed package | ~189 MB (ONNX Runtime is 66 MB of that) |
429
- | Embedding model (one-time download) | ~60 MB |
435
+ | Core install (Ollama backend) | ~17 MB |
436
+ | With `[local]` extra (fastembed + ONNX) | ~189 MB |
437
+ | Embedding model (one-time download) | ~60 MB (fastembed) or managed by Ollama |
430
438
  | Index per project (small/medium/large) | 5-60 MB |
431
439
 
432
- No GPU required. Embedding model runs on CPU via ONNX Runtime.
440
+ No GPU required. With Ollama, embeddings are handled by the Ollama server. With the `[local]` extra, the embedding model runs on CPU via ONNX Runtime.
433
441
 
434
442
  ---
435
443
 
@@ -1,7 +1,6 @@
1
1
  click>=8.1
2
2
  pyyaml>=6.0
3
3
  sqlite-vec>=0.1.6
4
- fastembed>=0.4
5
4
  numpy>=1.24
6
5
  tree-sitter>=0.22
7
6
  tree-sitter-python>=0.21
@@ -28,3 +27,6 @@ pytest-xdist>=3.5
28
27
  ruff>=0.13
29
28
 
30
29
  [http]
30
+
31
+ [local]
32
+ fastembed>=0.4
@@ -2,6 +2,7 @@
2
2
  """CLI entry point for code-context-engine."""
3
3
  import asyncio
4
4
  import json
5
+ import os
5
6
  import socket
6
7
  import sys
7
8
  from pathlib import Path
@@ -536,25 +537,43 @@ def _show_welcome_banner(config) -> None:
536
537
  def _preflight_check(config) -> None:
537
538
  """Verify all required components are ready before indexing starts.
538
539
 
539
- Downloads the embedding model on first use with a clear progress message,
540
- and reports Ollama status so users know what compression level they will get.
540
+ Auto-detects an embedding backend (fastembed Ollama), reports which
541
+ one was picked, and surfaces Ollama status for the separate compression
542
+ path so users know what compression level they will get.
541
543
  """
542
- # --- Embedding model ---
543
- click.echo(_dim(" Checking embedding model") + "...", nl=False)
544
+ # --- Embedding backend ---
545
+ click.echo(_dim(" Detecting embedding backend") + "...", nl=False)
546
+ from context_engine.config import resolve_ollama_url
547
+ ollama_model = getattr(config, "ollama_embed_model", "nomic-embed-text")
548
+ ollama_url = resolve_ollama_url(config)
544
549
  try:
545
- from fastembed import TextEmbedding
546
- model_name = getattr(config, "embedding_model", "BAAI/bge-small-en-v1.5")
547
- if "/" not in model_name:
548
- model_name = f"sentence-transformers/{model_name}"
549
- click.echo(_dim(" downloading if needed (60 MB, first time only)") + "...", nl=False)
550
- TextEmbedding(model_name)
551
- click.echo(" " + click.style("ready", fg="green"))
550
+ from context_engine.indexer.embedder import select_backend
551
+ # Don't echo a tentative "loading fastembed…" or "using Ollama…"
552
+ # banner before select_backend() picks. CCE_EMBED_BACKEND can
553
+ # force a different choice than the probe order suggests, and
554
+ # printing both messages produced contradictory output. Wait for
555
+ # the actual selection, then echo once with the truth.
556
+ backend = select_backend(
557
+ model_name=getattr(config, "embedding_model", "BAAI/bge-small-en-v1.5"),
558
+ ollama_model=ollama_model,
559
+ ollama_url=ollama_url,
560
+ )
561
+ click.echo(
562
+ " " + click.style(
563
+ f"ready ({backend.name}, {backend.dimension}-d, {backend.model_name})",
564
+ fg="green",
565
+ )
566
+ )
552
567
  except Exception as exc:
553
568
  click.echo("")
554
- _warn(f"Could not load embedding model: {exc}")
555
- _warn("Indexing will attempt to continue but may fail.")
569
+ _warn(f"No embedding backend available: {exc}")
570
+ _warn(
571
+ "Install fastembed (`pip install code-context-engine[local]`) "
572
+ f"or start an Ollama server at {ollama_url} and pull "
573
+ f"{ollama_model}."
574
+ )
556
575
 
557
- # --- Ollama (optional) ---
576
+ # --- Ollama for LLM compression (independent of the embedding path) ---
558
577
  try:
559
578
  import httpx
560
579
  resp = httpx.get("http://localhost:11434/api/tags", timeout=2.0)
@@ -2786,6 +2805,16 @@ async def _run_index(
2786
2805
  async def _run_serve(config) -> None:
2787
2806
  """Start MCP server with live file watcher."""
2788
2807
  import logging
2808
+ import signal
2809
+ # Force single-process embedding inside `cce serve` unless the user
2810
+ # explicitly overrode it. The reindex worker triggered by file changes
2811
+ # otherwise spawns a fastembed forkserver pool (~4 workers × ~1.6 GB on
2812
+ # Linux) that orphans on abnormal exit and leaks RSS across `cce index`
2813
+ # invocations (issue #66). Single-process embed is plenty for one-file
2814
+ # watcher reindexes; bulk `cce index` run from a separate shell still
2815
+ # gets the multiprocess path.
2816
+ os.environ.setdefault("CCE_EMBED_PARALLEL", "0")
2817
+
2789
2818
  from context_engine.storage.local_backend import LocalBackend
2790
2819
  from context_engine.indexer.embedder import Embedder
2791
2820
  from context_engine.retrieval.retriever import HybridRetriever
@@ -2903,9 +2932,56 @@ async def _run_serve(config) -> None:
2903
2932
  file=sys.stderr,
2904
2933
  )
2905
2934
 
2935
+ # Install signal handlers so SIGINT (Ctrl-C), SIGTERM, and SIGHUP all
2936
+ # route through the same orderly shutdown path. Previously only SIGTERM
2937
+ # cancelled the MCP task — SIGINT was swallowed by stdio reads, leaving
2938
+ # `cce serve` unkillable except via SIGKILL, which orphans the embed
2939
+ # workers (#66).
2940
+ serve_loop = asyncio.get_running_loop()
2941
+ mcp_task = asyncio.create_task(mcp.run_stdio())
2942
+
2943
+ def _request_shutdown(signame: str) -> None:
2944
+ if not mcp_task.done():
2945
+ _log.info("Received %s, shutting down...", signame)
2946
+ mcp_task.cancel()
2947
+
2948
+ # Build the candidate list with getattr so we don't reference
2949
+ # `signal.SIGHUP` at the tuple-construction site — SIGHUP is
2950
+ # undefined on Windows and that AttributeError would fire *before*
2951
+ # the try/except below could swallow it, crashing `cce serve` on
2952
+ # Windows entirely (Copilot review on #69).
2953
+ installed_signals: list[int] = []
2954
+ candidate_sigs = [
2955
+ s for s in (
2956
+ getattr(signal, "SIGINT", None),
2957
+ getattr(signal, "SIGTERM", None),
2958
+ getattr(signal, "SIGHUP", None),
2959
+ ) if s is not None
2960
+ ]
2961
+ for _sig in candidate_sigs:
2962
+ try:
2963
+ serve_loop.add_signal_handler(
2964
+ _sig, _request_shutdown, _sig.name,
2965
+ )
2966
+ installed_signals.append(_sig)
2967
+ except (NotImplementedError, RuntimeError):
2968
+ # Windows's ProactorEventLoop refuses add_signal_handler;
2969
+ # asyncio also raises NotImplementedError outside the main
2970
+ # thread. SIGTERM still arrives via the default Python
2971
+ # handler in those environments.
2972
+ pass
2973
+
2906
2974
  try:
2907
- await mcp.run_stdio()
2975
+ try:
2976
+ await mcp_task
2977
+ except asyncio.CancelledError:
2978
+ pass
2908
2979
  finally:
2980
+ for _sig in installed_signals:
2981
+ try:
2982
+ serve_loop.remove_signal_handler(_sig)
2983
+ except (NotImplementedError, RuntimeError):
2984
+ pass
2909
2985
  if watcher:
2910
2986
  watcher.stop()
2911
2987
  if worker_task:
@@ -59,6 +59,11 @@ class Config:
59
59
 
60
60
  # Embedding
61
61
  embedding_model: str = "BAAI/bge-small-en-v1.5"
62
+ # Model used when the Ollama embedding backend is selected. Only
63
+ # consulted if fastembed isn't installed or `CCE_EMBED_BACKEND=ollama`
64
+ # forces the Ollama path. 768-dim default; switching this triggers a
65
+ # full reindex because the vector store rejects dimension mismatches.
66
+ ollama_embed_model: str = "nomic-embed-text"
62
67
 
63
68
  # Retrieval
64
69
  retrieval_confidence_threshold: float = 0.2
@@ -120,6 +125,7 @@ _EXPECTED_TYPES: dict[str, type | tuple[type, ...]] = {
120
125
  "ollama_url": str,
121
126
  "output_compression": str,
122
127
  "embedding_model": str,
128
+ "ollama_embed_model": str,
123
129
  "retrieval_confidence_threshold": (int, float),
124
130
  "retrieval_top_k": int,
125
131
  "bootstrap_max_tokens": int,
@@ -141,6 +147,7 @@ def _apply_dict_to_config(config: Config, data: dict) -> None:
141
147
  ("compression", "ollama_url"): "ollama_url",
142
148
  ("compression", "output"): "output_compression",
143
149
  ("embedding", "model"): "embedding_model",
150
+ ("embedding", "ollama_model"): "ollama_embed_model",
144
151
  ("retrieval", "confidence_threshold"): "retrieval_confidence_threshold",
145
152
  ("retrieval", "top_k"): "retrieval_top_k",
146
153
  ("retrieval", "bootstrap_max_tokens"): "bootstrap_max_tokens",