cocoindex-code 0.2.22__tar.gz → 0.2.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/.gitignore +4 -0
  2. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/PKG-INFO +105 -37
  3. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/README.md +98 -35
  4. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/pyproject.toml +13 -2
  5. cocoindex_code-0.2.24/src/cocoindex_code/_daemon_paths.py +60 -0
  6. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/_version.py +2 -2
  7. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/cli.py +207 -21
  8. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/client.py +40 -14
  9. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/daemon.py +63 -31
  10. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/protocol.py +1 -0
  11. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/settings.py +140 -40
  12. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/shared.py +37 -1
  13. cocoindex_code-0.2.22/src/cocoindex_code/_daemon_paths.py +0 -44
  14. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/LICENSE +0 -0
  15. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/__init__.py +0 -0
  16. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/__main__.py +0 -0
  17. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/chunking.py +0 -0
  18. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/indexer.py +0 -0
  19. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/litellm_embedder.py +0 -0
  20. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/project.py +0 -0
  21. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/query.py +0 -0
  22. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/schema.py +0 -0
  23. {cocoindex_code-0.2.22 → cocoindex_code-0.2.24}/src/cocoindex_code/server.py +0 -0
@@ -46,3 +46,7 @@ src/cocoindex_code/_version.py
46
46
 
47
47
  # CocoIndex Code (ccc)
48
48
  /.cocoindex_code/
49
+
50
+ # Docker E2E fixtures contain a `lib/` dir that the generic Python rule above
51
+ # would ignore — keep it tracked.
52
+ !tests/e2e_docker_fixtures/**
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex-code
3
- Version: 0.2.22
3
+ Version: 0.2.24
4
4
  Summary: MCP server for indexing and querying codebases using CocoIndex
5
5
  Project-URL: Homepage, https://github.com/cocoindex-io/cocoindex-code
6
6
  Project-URL: Repository, https://github.com/cocoindex-io/cocoindex-code
@@ -25,16 +25,21 @@ Requires-Dist: numpy>=1.24.0
25
25
  Requires-Dist: pathspec>=0.12.1
26
26
  Requires-Dist: pydantic>=2.0.0
27
27
  Requires-Dist: pyyaml>=6.0
28
- Requires-Dist: sentence-transformers>=2.2.0
28
+ Requires-Dist: questionary>=2.0.0
29
29
  Requires-Dist: sqlite-vec>=0.1.0
30
30
  Requires-Dist: typer>=0.9.0
31
+ Provides-Extra: default
32
+ Requires-Dist: cocoindex[sentence-transformers]==1.0.0a43; extra == 'default'
31
33
  Provides-Extra: dev
34
+ Requires-Dist: cocoindex[sentence-transformers]==1.0.0a43; extra == 'dev'
32
35
  Requires-Dist: mypy>=1.0.0; extra == 'dev'
33
36
  Requires-Dist: prek>=0.1.0; extra == 'dev'
34
37
  Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
35
38
  Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
36
39
  Requires-Dist: pytest>=7.0.0; extra == 'dev'
37
40
  Requires-Dist: ruff>=0.1.0; extra == 'dev'
41
+ Provides-Extra: embeddings-local
42
+ Requires-Dist: cocoindex[sentence-transformers]==1.0.0a43; extra == 'embeddings-local'
38
43
  Description-Content-Type: text/markdown
39
44
 
40
45
  <p align="center">
@@ -85,16 +90,18 @@ A lightweight, effective **(AST-based)** semantic code search tool for your code
85
90
 
86
91
  Using [pipx](https://pipx.pypa.io/stable/installation/):
87
92
  ```bash
88
- pipx install cocoindex-code # first install
89
- pipx upgrade cocoindex-code # upgrade
93
+ pipx install 'cocoindex-code[default]' # batteries included (local embeddings)
94
+ pipx upgrade cocoindex-code # upgrade
90
95
  ```
91
96
 
92
97
  Using [uv](https://docs.astral.sh/uv/getting-started/installation/):
93
98
  ```bash
94
- uv tool install --upgrade cocoindex-code --prerelease explicit --with "cocoindex>=1.0.0a24"
99
+ uv tool install --upgrade 'cocoindex-code[default]' --prerelease explicit --with "cocoindex>=1.0.0a24"
95
100
  ```
96
101
 
97
- The default embedding model runs locally ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) — no API key required, completely free.
102
+ Two install styles:
103
+ - `cocoindex-code[default]` — batteries-included. Pulls in `sentence-transformers` so local embeddings (no API key required) work out of the box. The `ccc init` interactive prompt defaults to [Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs).
104
+ - `cocoindex-code` — slim. LiteLLM-only; requires a cloud embedding provider and API key. Use when you don't want the local-embedding deps (~1 GB of torch + transformers).
98
105
 
99
106
  Next, set up your [coding agent integration](#coding-agent-integration) — or jump to [Manual CLI Usage](#manual-cli-usage) if you prefer direct control.
100
107
 
@@ -235,33 +242,79 @@ The recommended approach is a **persistent container**: start it once, and use
235
242
  `docker exec` to run CLI commands or connect MCP sessions to it. The daemon
236
243
  inside stays warm across sessions, so the embedding model is loaded only once.
237
244
 
238
- ### Step 1Start the container
245
+ ### Quick start`docker compose up -d`
246
+
247
+ Grab [`docker/docker-compose.yml`](./docker/docker-compose.yml) from this repo and run:
248
+
249
+ ```bash
250
+ # macOS / Windows
251
+ docker compose up -d
252
+
253
+ # Linux (aligns file ownership on bind-mounted paths with your host user)
254
+ PUID=$(id -u) PGID=$(id -g) docker compose up -d
255
+ ```
256
+
257
+ By default your home directory is mounted into the container (set
258
+ `COCOINDEX_HOST_WORKSPACE` to narrow this to a specific code folder). Index
259
+ data and the embedding model cache persist in a Docker volume across
260
+ restarts. Your global settings file at `$HOME/.cocoindex_code/global_settings.yml`
261
+ is visible and editable on the host; edits take effect on your next `ccc` command.
262
+
263
+ > **GHCR:** to pull from GitHub Container Registry instead of Docker Hub,
264
+ > change the `image:` line in your copy of `docker-compose.yml` to
265
+ > `ghcr.io/cocoindex-io/cocoindex-code:latest`.
266
+
267
+ ### Or: `docker run`
268
+
269
+ <details>
270
+ <summary>Docker Desktop (macOS / Windows)</summary>
271
+
272
+ ```bash
273
+ docker run -d --name cocoindex-code \
274
+ --volume "$HOME:/workspace" \
275
+ --volume cocoindex-data:/var/cocoindex \
276
+ -e COCOINDEX_CODE_HOST_PATH_MAPPING="/workspace=$HOME" \
277
+ cocoindex/cocoindex-code:latest
278
+ ```
279
+ </details>
280
+
281
+ <details>
282
+ <summary>Linux (with <code>PUID</code>/<code>PGID</code>)</summary>
239
283
 
240
284
  ```bash
241
285
  docker run -d --name cocoindex-code \
242
- --volume "$(pwd):/workspace" \
243
- --volume cocoindex-db:/db \
244
- --volume cocoindex-model-cache:/root/.cache \
245
- ghcr.io/cocoindex-io/cocoindex-code:latest
286
+ -e PUID=$(id -u) -e PGID=$(id -g) \
287
+ --volume "$HOME:/workspace" \
288
+ --volume cocoindex-data:/var/cocoindex \
289
+ -e COCOINDEX_CODE_HOST_PATH_MAPPING="/workspace=$HOME" \
290
+ cocoindex/cocoindex-code:latest
246
291
  ```
292
+ </details>
247
293
 
248
- - `/workspace` mount your project root here
249
- - `cocoindex-db` — index databases live inside the container (fast native I/O, no cross-OS volume issues)
250
- - `cocoindex-model-cache` — persists the embedding model across image upgrades
294
+ ### Shell wrapper for `ccc` commands
251
295
 
252
- ### Step 2 Index your codebase
296
+ Paste this into `~/.bashrc` / `~/.zshrc` so `ccc` feels native on the host
297
+ and picks up the right project based on your current directory:
253
298
 
254
299
  ```bash
255
- docker exec -it cocoindex-code ccc index
300
+ ccc() {
301
+ docker exec -it -e COCOINDEX_CODE_HOST_CWD="$PWD" cocoindex-code ccc "$@"
302
+ }
256
303
  ```
257
304
 
258
- ### Step 3 Connect your coding agent
305
+ Now `cd` into any project under your workspace and run `ccc init`, `ccc index`,
306
+ `ccc search ...`, `ccc status`, etc. — it just works.
307
+
308
+ ### Connect your coding agent
259
309
 
260
310
  <details>
261
311
  <summary>Claude Code</summary>
262
312
 
313
+ Register MCP from inside the target project so `$PWD` points there:
314
+
263
315
  ```bash
264
- claude mcp add cocoindex-code -- docker exec -i cocoindex-code ccc mcp
316
+ claude mcp add cocoindex-code -- docker exec -i \
317
+ -e COCOINDEX_CODE_HOST_CWD="$PWD" cocoindex-code ccc mcp
265
318
  ```
266
319
 
267
320
  Or via `.mcp.json`:
@@ -272,40 +325,50 @@ Or via `.mcp.json`:
272
325
  "cocoindex-code": {
273
326
  "type": "stdio",
274
327
  "command": "docker",
275
- "args": ["exec", "-i", "cocoindex-code", "ccc", "mcp"]
328
+ "args": [
329
+ "exec",
330
+ "-i",
331
+ "-e",
332
+ "COCOINDEX_CODE_HOST_CWD=${PWD}",
333
+ "cocoindex-code",
334
+ "ccc",
335
+ "mcp"
336
+ ]
276
337
  }
277
338
  }
278
339
  }
279
340
  ```
341
+
342
+ > Note: use `-i` (not `-it`). The `-t` flag allocates a terminal, which
343
+ > interferes with MCP's JSON messaging over stdin/stdout — only add it for
344
+ > interactive `ccc` commands like `ccc init`.
280
345
  </details>
281
346
 
282
347
  <details>
283
348
  <summary>Codex</summary>
284
349
 
285
350
  ```bash
286
- codex mcp add cocoindex-code -- docker exec -i cocoindex-code ccc mcp
351
+ codex mcp add cocoindex-code -- docker exec -i \
352
+ -e COCOINDEX_CODE_HOST_CWD="$PWD" cocoindex-code ccc mcp
287
353
  ```
288
354
  </details>
289
355
 
290
- ### CLI usage inside the container
291
-
292
- All `ccc` commands work via `docker exec`:
293
-
294
- ```bash
295
- docker exec -it cocoindex-code ccc index
296
- docker exec -it cocoindex-code ccc search "authentication logic"
297
- docker exec -it cocoindex-code ccc status
298
- ```
356
+ ### Upgrading from an older image
299
357
 
300
- Or set an alias on your host so it feels native:
358
+ Earlier images used separate `cocoindex-db` and `cocoindex-model-cache`
359
+ volumes; the current image consolidates them into a single `cocoindex-data`
360
+ volume. Before pulling the new image, drop the old container and volumes —
361
+ indexes rebuild on your next `ccc index`, and the embedding model is
362
+ re-populated automatically on first start:
301
363
 
302
364
  ```bash
303
- alias ccc='docker exec -it cocoindex-code ccc'
365
+ docker rm -f cocoindex-code
366
+ docker volume rm cocoindex-db cocoindex-model-cache
304
367
  ```
305
368
 
306
369
  ### Configuration via environment variables
307
370
 
308
- Pass configuration to `docker run` with `-e`:
371
+ Pass configuration to `docker run` / compose with `-e`:
309
372
 
310
373
  ```bash
311
374
  # Extra extensions (e.g. Typesafe Config, SBT build files)
@@ -314,11 +377,14 @@ Pass configuration to `docker run` with `-e`:
314
377
  # Exclude build artefacts (Scala/SBT example)
315
378
  -e COCOINDEX_CODE_EXCLUDE_PATTERNS='["**/target/**","**/.bloop/**","**/.metals/**"]'
316
379
 
317
- # Swap in a code-optimised embedding model
318
- -e COCOINDEX_CODE_EMBEDDING_MODEL=voyage/voyage-code-3
380
+ # Set an API key
319
381
  -e VOYAGE_API_KEY=your-key
320
382
  ```
321
383
 
384
+ > **Security note:** mounting `$HOME` gives the container read/write access
385
+ > to everything under it. If that's too broad, bind-mount a narrower
386
+ > directory instead (`COCOINDEX_HOST_WORKSPACE=/path/to/code`).
387
+
322
388
  ### Build the image locally
323
389
 
324
390
  ```bash
@@ -330,7 +396,7 @@ docker build -t cocoindex-code:local -f docker/Dockerfile .
330
396
  - **Ultra Performant**: ⚡ Built on top of ultra performant [Rust indexing engine](https://github.com/cocoindex-io/cocoindex). Only re-indexes changed files for fast updates.
331
397
  - **Multi-Language Support**: Python, JavaScript/TypeScript, Rust, Go, Java, C/C++, C#, SQL, Shell, and more.
332
398
  - **Embedded**: Portable and just works, no database setup required!
333
- - **Flexible Embeddings**: Local SentenceTransformers by default (free!) or 100+ cloud providers via LiteLLM.
399
+ - **Flexible Embeddings**: Local SentenceTransformers via the `[default]` extra (free, no API key!) or 100+ cloud providers via LiteLLM.
334
400
 
335
401
  ## Configuration
336
402
 
@@ -343,7 +409,7 @@ Shared across all projects. Controls the embedding model and environment variabl
343
409
  ```yaml
344
410
  embedding:
345
411
  provider: sentence-transformers # or "litellm"
346
- model: sentence-transformers/all-MiniLM-L6-v2
412
+ model: Snowflake/snowflake-arctic-embed-xs
347
413
  device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
348
414
  min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
349
415
 
@@ -353,6 +419,8 @@ envs: # extra environment variabl
353
419
 
354
420
  > **Note:** The daemon inherits your shell environment. If an API key (e.g. `OPENAI_API_KEY`) is already set as an environment variable, you don't need to duplicate it in `envs`. The `envs` field is only for values that aren't in your environment.
355
421
 
422
+ > **Custom location:** set `COCOINDEX_CODE_DIR` to place `global_settings.yml` somewhere other than `~/.cocoindex_code/` — useful if you want the file to live alongside your projects (e.g. on a synced folder).
423
+
356
424
  ### Project Settings (`<project>/.cocoindex_code/settings.yml`)
357
425
 
358
426
  Per-project. Controls which files to index.
@@ -415,7 +483,7 @@ See [`src/cocoindex_code/chunking.py`](./src/cocoindex_code/chunking.py) for the
415
483
 
416
484
  ## Embedding Models
417
485
 
418
- By default, a local SentenceTransformers model ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) is used — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
486
+ With the `[default]` extra installed, `ccc init` defaults to a local SentenceTransformers model ([Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs)) — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
419
487
 
420
488
  > The `envs` entries below are only needed if the key isn't already in your shell environment — the daemon inherits your environment automatically.
421
489
 
@@ -46,16 +46,18 @@ A lightweight, effective **(AST-based)** semantic code search tool for your code
46
46
 
47
47
  Using [pipx](https://pipx.pypa.io/stable/installation/):
48
48
  ```bash
49
- pipx install cocoindex-code # first install
50
- pipx upgrade cocoindex-code # upgrade
49
+ pipx install 'cocoindex-code[default]' # batteries included (local embeddings)
50
+ pipx upgrade cocoindex-code # upgrade
51
51
  ```
52
52
 
53
53
  Using [uv](https://docs.astral.sh/uv/getting-started/installation/):
54
54
  ```bash
55
- uv tool install --upgrade cocoindex-code --prerelease explicit --with "cocoindex>=1.0.0a24"
55
+ uv tool install --upgrade 'cocoindex-code[default]' --prerelease explicit --with "cocoindex>=1.0.0a24"
56
56
  ```
57
57
 
58
- The default embedding model runs locally ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) — no API key required, completely free.
58
+ Two install styles:
59
+ - `cocoindex-code[default]` — batteries-included. Pulls in `sentence-transformers` so local embeddings (no API key required) work out of the box. The `ccc init` interactive prompt defaults to [Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs).
60
+ - `cocoindex-code` — slim. LiteLLM-only; requires a cloud embedding provider and API key. Use when you don't want the local-embedding deps (~1 GB of torch + transformers).
59
61
 
60
62
  Next, set up your [coding agent integration](#coding-agent-integration) — or jump to [Manual CLI Usage](#manual-cli-usage) if you prefer direct control.
61
63
 
@@ -196,33 +198,79 @@ The recommended approach is a **persistent container**: start it once, and use
196
198
  `docker exec` to run CLI commands or connect MCP sessions to it. The daemon
197
199
  inside stays warm across sessions, so the embedding model is loaded only once.
198
200
 
199
- ### Step 1Start the container
201
+ ### Quick start`docker compose up -d`
202
+
203
+ Grab [`docker/docker-compose.yml`](./docker/docker-compose.yml) from this repo and run:
204
+
205
+ ```bash
206
+ # macOS / Windows
207
+ docker compose up -d
208
+
209
+ # Linux (aligns file ownership on bind-mounted paths with your host user)
210
+ PUID=$(id -u) PGID=$(id -g) docker compose up -d
211
+ ```
212
+
213
+ By default your home directory is mounted into the container (set
214
+ `COCOINDEX_HOST_WORKSPACE` to narrow this to a specific code folder). Index
215
+ data and the embedding model cache persist in a Docker volume across
216
+ restarts. Your global settings file at `$HOME/.cocoindex_code/global_settings.yml`
217
+ is visible and editable on the host; edits take effect on your next `ccc` command.
218
+
219
+ > **GHCR:** to pull from GitHub Container Registry instead of Docker Hub,
220
+ > change the `image:` line in your copy of `docker-compose.yml` to
221
+ > `ghcr.io/cocoindex-io/cocoindex-code:latest`.
222
+
223
+ ### Or: `docker run`
224
+
225
+ <details>
226
+ <summary>Docker Desktop (macOS / Windows)</summary>
227
+
228
+ ```bash
229
+ docker run -d --name cocoindex-code \
230
+ --volume "$HOME:/workspace" \
231
+ --volume cocoindex-data:/var/cocoindex \
232
+ -e COCOINDEX_CODE_HOST_PATH_MAPPING="/workspace=$HOME" \
233
+ cocoindex/cocoindex-code:latest
234
+ ```
235
+ </details>
236
+
237
+ <details>
238
+ <summary>Linux (with <code>PUID</code>/<code>PGID</code>)</summary>
200
239
 
201
240
  ```bash
202
241
  docker run -d --name cocoindex-code \
203
- --volume "$(pwd):/workspace" \
204
- --volume cocoindex-db:/db \
205
- --volume cocoindex-model-cache:/root/.cache \
206
- ghcr.io/cocoindex-io/cocoindex-code:latest
242
+ -e PUID=$(id -u) -e PGID=$(id -g) \
243
+ --volume "$HOME:/workspace" \
244
+ --volume cocoindex-data:/var/cocoindex \
245
+ -e COCOINDEX_CODE_HOST_PATH_MAPPING="/workspace=$HOME" \
246
+ cocoindex/cocoindex-code:latest
207
247
  ```
248
+ </details>
208
249
 
209
- - `/workspace` mount your project root here
210
- - `cocoindex-db` — index databases live inside the container (fast native I/O, no cross-OS volume issues)
211
- - `cocoindex-model-cache` — persists the embedding model across image upgrades
250
+ ### Shell wrapper for `ccc` commands
212
251
 
213
- ### Step 2 Index your codebase
252
+ Paste this into `~/.bashrc` / `~/.zshrc` so `ccc` feels native on the host
253
+ and picks up the right project based on your current directory:
214
254
 
215
255
  ```bash
216
- docker exec -it cocoindex-code ccc index
256
+ ccc() {
257
+ docker exec -it -e COCOINDEX_CODE_HOST_CWD="$PWD" cocoindex-code ccc "$@"
258
+ }
217
259
  ```
218
260
 
219
- ### Step 3 Connect your coding agent
261
+ Now `cd` into any project under your workspace and run `ccc init`, `ccc index`,
262
+ `ccc search ...`, `ccc status`, etc. — it just works.
263
+
264
+ ### Connect your coding agent
220
265
 
221
266
  <details>
222
267
  <summary>Claude Code</summary>
223
268
 
269
+ Register MCP from inside the target project so `$PWD` points there:
270
+
224
271
  ```bash
225
- claude mcp add cocoindex-code -- docker exec -i cocoindex-code ccc mcp
272
+ claude mcp add cocoindex-code -- docker exec -i \
273
+ -e COCOINDEX_CODE_HOST_CWD="$PWD" cocoindex-code ccc mcp
226
274
  ```
227
275
 
228
276
  Or via `.mcp.json`:
@@ -233,40 +281,50 @@ Or via `.mcp.json`:
233
281
  "cocoindex-code": {
234
282
  "type": "stdio",
235
283
  "command": "docker",
236
- "args": ["exec", "-i", "cocoindex-code", "ccc", "mcp"]
284
+ "args": [
285
+ "exec",
286
+ "-i",
287
+ "-e",
288
+ "COCOINDEX_CODE_HOST_CWD=${PWD}",
289
+ "cocoindex-code",
290
+ "ccc",
291
+ "mcp"
292
+ ]
237
293
  }
238
294
  }
239
295
  }
240
296
  ```
297
+
298
+ > Note: use `-i` (not `-it`). The `-t` flag allocates a terminal, which
299
+ > interferes with MCP's JSON messaging over stdin/stdout — only add it for
300
+ > interactive `ccc` commands like `ccc init`.
241
301
  </details>
242
302
 
243
303
  <details>
244
304
  <summary>Codex</summary>
245
305
 
246
306
  ```bash
247
- codex mcp add cocoindex-code -- docker exec -i cocoindex-code ccc mcp
307
+ codex mcp add cocoindex-code -- docker exec -i \
308
+ -e COCOINDEX_CODE_HOST_CWD="$PWD" cocoindex-code ccc mcp
248
309
  ```
249
310
  </details>
250
311
 
251
- ### CLI usage inside the container
252
-
253
- All `ccc` commands work via `docker exec`:
254
-
255
- ```bash
256
- docker exec -it cocoindex-code ccc index
257
- docker exec -it cocoindex-code ccc search "authentication logic"
258
- docker exec -it cocoindex-code ccc status
259
- ```
312
+ ### Upgrading from an older image
260
313
 
261
- Or set an alias on your host so it feels native:
314
+ Earlier images used separate `cocoindex-db` and `cocoindex-model-cache`
315
+ volumes; the current image consolidates them into a single `cocoindex-data`
316
+ volume. Before pulling the new image, drop the old container and volumes —
317
+ indexes rebuild on your next `ccc index`, and the embedding model is
318
+ re-populated automatically on first start:
262
319
 
263
320
  ```bash
264
- alias ccc='docker exec -it cocoindex-code ccc'
321
+ docker rm -f cocoindex-code
322
+ docker volume rm cocoindex-db cocoindex-model-cache
265
323
  ```
266
324
 
267
325
  ### Configuration via environment variables
268
326
 
269
- Pass configuration to `docker run` with `-e`:
327
+ Pass configuration to `docker run` / compose with `-e`:
270
328
 
271
329
  ```bash
272
330
  # Extra extensions (e.g. Typesafe Config, SBT build files)
@@ -275,11 +333,14 @@ Pass configuration to `docker run` with `-e`:
275
333
  # Exclude build artefacts (Scala/SBT example)
276
334
  -e COCOINDEX_CODE_EXCLUDE_PATTERNS='["**/target/**","**/.bloop/**","**/.metals/**"]'
277
335
 
278
- # Swap in a code-optimised embedding model
279
- -e COCOINDEX_CODE_EMBEDDING_MODEL=voyage/voyage-code-3
336
+ # Set an API key
280
337
  -e VOYAGE_API_KEY=your-key
281
338
  ```
282
339
 
340
+ > **Security note:** mounting `$HOME` gives the container read/write access
341
+ > to everything under it. If that's too broad, bind-mount a narrower
342
+ > directory instead (`COCOINDEX_HOST_WORKSPACE=/path/to/code`).
343
+
283
344
  ### Build the image locally
284
345
 
285
346
  ```bash
@@ -291,7 +352,7 @@ docker build -t cocoindex-code:local -f docker/Dockerfile .
291
352
  - **Ultra Performant**: ⚡ Built on top of ultra performant [Rust indexing engine](https://github.com/cocoindex-io/cocoindex). Only re-indexes changed files for fast updates.
292
353
  - **Multi-Language Support**: Python, JavaScript/TypeScript, Rust, Go, Java, C/C++, C#, SQL, Shell, and more.
293
354
  - **Embedded**: Portable and just works, no database setup required!
294
- - **Flexible Embeddings**: Local SentenceTransformers by default (free!) or 100+ cloud providers via LiteLLM.
355
+ - **Flexible Embeddings**: Local SentenceTransformers via the `[default]` extra (free, no API key!) or 100+ cloud providers via LiteLLM.
295
356
 
296
357
  ## Configuration
297
358
 
@@ -304,7 +365,7 @@ Shared across all projects. Controls the embedding model and environment variabl
304
365
  ```yaml
305
366
  embedding:
306
367
  provider: sentence-transformers # or "litellm"
307
- model: sentence-transformers/all-MiniLM-L6-v2
368
+ model: Snowflake/snowflake-arctic-embed-xs
308
369
  device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
309
370
  min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
310
371
 
@@ -314,6 +375,8 @@ envs: # extra environment variabl
314
375
 
315
376
  > **Note:** The daemon inherits your shell environment. If an API key (e.g. `OPENAI_API_KEY`) is already set as an environment variable, you don't need to duplicate it in `envs`. The `envs` field is only for values that aren't in your environment.
316
377
 
378
+ > **Custom location:** set `COCOINDEX_CODE_DIR` to place `global_settings.yml` somewhere other than `~/.cocoindex_code/` — useful if you want the file to live alongside your projects (e.g. on a synced folder).
379
+
317
380
  ### Project Settings (`<project>/.cocoindex_code/settings.yml`)
318
381
 
319
382
  Per-project. Controls which files to index.
@@ -376,7 +439,7 @@ See [`src/cocoindex_code/chunking.py`](./src/cocoindex_code/chunking.py) for the
376
439
 
377
440
  ## Embedding Models
378
441
 
379
- By default, a local SentenceTransformers model ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) is used — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
442
+ With the `[default]` extra installed, `ccc init` defaults to a local SentenceTransformers model ([Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs)) — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
380
443
 
381
444
  > The `envs` entries below are only needed if the key isn't already in your shell environment — the daemon inherits your environment automatically.
382
445
 
@@ -24,7 +24,6 @@ classifiers = [
24
24
  dependencies = [
25
25
  "mcp>=1.0.0",
26
26
  "cocoindex[litellm]==1.0.0a43",
27
- "sentence-transformers>=2.2.0",
28
27
  "sqlite-vec>=0.1.0",
29
28
  "pydantic>=2.0.0",
30
29
  "numpy>=1.24.0",
@@ -33,9 +32,16 @@ dependencies = [
33
32
  "msgspec>=0.19.0",
34
33
  "pathspec>=0.12.1",
35
34
  "pyyaml>=6.0",
35
+ "questionary>=2.0.0",
36
36
  ]
37
37
 
38
38
  [project.optional-dependencies]
39
+ embeddings-local = [
40
+ "cocoindex[sentence-transformers]==1.0.0a43",
41
+ ]
42
+ default = [
43
+ "cocoindex[sentence-transformers]==1.0.0a43",
44
+ ]
39
45
  dev = [
40
46
  "pytest>=7.0.0",
41
47
  "pytest-asyncio>=0.21.0",
@@ -43,6 +49,7 @@ dev = [
43
49
  "ruff>=0.1.0",
44
50
  "mypy>=1.0.0",
45
51
  "prek>=0.1.0",
52
+ "cocoindex[sentence-transformers]==1.0.0a43",
46
53
  ]
47
54
 
48
55
  [project.scripts]
@@ -76,6 +83,7 @@ dev = [
76
83
  "mypy>=1.0.0",
77
84
  "prek>=0.1.0",
78
85
  "types-pyyaml>=6.0.12.20250915",
86
+ "cocoindex[sentence-transformers]==1.0.0a43",
79
87
  ]
80
88
 
81
89
  [tool.uv]
@@ -97,5 +105,8 @@ explicit_package_bases = true
97
105
  testpaths = ["tests"]
98
106
  python_files = ["test_*.py"]
99
107
  python_functions = ["test_*"]
100
- addopts = "-v --tb=short"
108
+ addopts = "-v --tb=short -m 'not docker_e2e'"
101
109
  asyncio_mode = "auto"
110
+ markers = [
111
+ "docker_e2e: requires Docker; builds the image and runs containerized E2E tests. Run with: pytest -m docker_e2e",
112
+ ]
@@ -0,0 +1,60 @@
1
+ """Daemon filesystem paths and connection helpers.
2
+
3
+ Lightweight module with no cocoindex dependency so that the CLI client
4
+ can import these without pulling in the full daemon stack.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ from .settings import user_settings_dir
14
+
15
+
16
+ def daemon_runtime_dir() -> Path:
17
+ """Return the directory that holds daemon runtime artifacts.
18
+
19
+ Holds ``daemon.sock``, ``daemon.pid``, ``daemon.log``. Kept separate from
20
+ the user-settings dir so that (e.g. in Docker) the socket can live on the
21
+ container's native filesystem while ``global_settings.yml`` lives on a
22
+ bind mount.
23
+
24
+ Override with ``COCOINDEX_CODE_RUNTIME_DIR``. Defaults to
25
+ :func:`user_settings_dir` for backward compatibility — non-Docker users
26
+ see identical behavior to before the split.
27
+ """
28
+ override = os.environ.get("COCOINDEX_CODE_RUNTIME_DIR")
29
+ if override:
30
+ return Path(override)
31
+ return user_settings_dir()
32
+
33
+
34
+ def connection_family() -> str:
35
+ """Return the multiprocessing connection family for this platform."""
36
+ return "AF_PIPE" if sys.platform == "win32" else "AF_UNIX"
37
+
38
+
39
+ def daemon_socket_path() -> str:
40
+ """Return the daemon socket/pipe address."""
41
+ if sys.platform == "win32":
42
+ import hashlib
43
+
44
+ # Hash the runtime dir so COCOINDEX_CODE_RUNTIME_DIR (or the
45
+ # COCOINDEX_CODE_DIR fallback) overrides produce unique pipe names,
46
+ # preventing conflicts between different daemon instances (tests,
47
+ # users, etc.)
48
+ dir_hash = hashlib.md5(str(daemon_runtime_dir()).encode()).hexdigest()[:12]
49
+ return rf"\\.\pipe\cocoindex_code_{dir_hash}"
50
+ return str(daemon_runtime_dir() / "daemon.sock")
51
+
52
+
53
+ def daemon_pid_path() -> Path:
54
+ """Return the path for the daemon's PID file."""
55
+ return daemon_runtime_dir() / "daemon.pid"
56
+
57
+
58
+ def daemon_log_path() -> Path:
59
+ """Return the path for the daemon's log file."""
60
+ return daemon_runtime_dir() / "daemon.log"
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.2.22'
22
- __version_tuple__ = version_tuple = (0, 2, 22)
21
+ __version__ = version = '0.2.24'
22
+ __version_tuple__ = version_tuple = (0, 2, 24)
23
23
 
24
24
  __commit_id__ = commit_id = None