cocoindex-code 0.2.11__tar.gz → 0.2.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/PKG-INFO +18 -12
  2. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/README.md +10 -9
  3. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/pyproject.toml +10 -2
  4. cocoindex_code-0.2.23/src/cocoindex_code/_daemon_paths.py +44 -0
  5. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/_version.py +2 -2
  6. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/cli.py +165 -12
  7. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/client.py +10 -6
  8. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/daemon.py +18 -51
  9. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/indexer.py +3 -2
  10. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/settings.py +57 -11
  11. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/shared.py +37 -1
  12. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/.gitignore +0 -0
  13. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/LICENSE +0 -0
  14. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/__init__.py +0 -0
  15. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/__main__.py +0 -0
  16. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/chunking.py +0 -0
  17. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/litellm_embedder.py +0 -0
  18. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/project.py +0 -0
  19. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/protocol.py +0 -0
  20. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/query.py +0 -0
  21. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/schema.py +0 -0
  22. {cocoindex_code-0.2.11 → cocoindex_code-0.2.23}/src/cocoindex_code/server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex-code
3
- Version: 0.2.11
3
+ Version: 0.2.23
4
4
  Summary: MCP server for indexing and querying codebases using CocoIndex
5
5
  Project-URL: Homepage, https://github.com/cocoindex-io/cocoindex-code
6
6
  Project-URL: Repository, https://github.com/cocoindex-io/cocoindex-code
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
19
  Requires-Python: >=3.11
20
- Requires-Dist: cocoindex[litellm]==1.0.0a38
20
+ Requires-Dist: cocoindex[litellm]==1.0.0a43
21
21
  Requires-Dist: einops>=0.8.2
22
22
  Requires-Dist: mcp>=1.0.0
23
23
  Requires-Dist: msgspec>=0.19.0
@@ -25,16 +25,21 @@ Requires-Dist: numpy>=1.24.0
25
25
  Requires-Dist: pathspec>=0.12.1
26
26
  Requires-Dist: pydantic>=2.0.0
27
27
  Requires-Dist: pyyaml>=6.0
28
- Requires-Dist: sentence-transformers>=2.2.0
28
+ Requires-Dist: questionary>=2.0.0
29
29
  Requires-Dist: sqlite-vec>=0.1.0
30
30
  Requires-Dist: typer>=0.9.0
31
+ Provides-Extra: default
32
+ Requires-Dist: cocoindex[sentence-transformers]==1.0.0a43; extra == 'default'
31
33
  Provides-Extra: dev
34
+ Requires-Dist: cocoindex[sentence-transformers]==1.0.0a43; extra == 'dev'
32
35
  Requires-Dist: mypy>=1.0.0; extra == 'dev'
33
36
  Requires-Dist: prek>=0.1.0; extra == 'dev'
34
37
  Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
35
38
  Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
36
39
  Requires-Dist: pytest>=7.0.0; extra == 'dev'
37
40
  Requires-Dist: ruff>=0.1.0; extra == 'dev'
41
+ Provides-Extra: embeddings-local
42
+ Requires-Dist: cocoindex[sentence-transformers]==1.0.0a43; extra == 'embeddings-local'
38
43
  Description-Content-Type: text/markdown
39
44
 
40
45
  <p align="center">
@@ -85,16 +90,18 @@ A lightweight, effective **(AST-based)** semantic code search tool for your code
85
90
 
86
91
  Using [pipx](https://pipx.pypa.io/stable/installation/):
87
92
  ```bash
88
- pipx install cocoindex-code # first install
89
- pipx upgrade cocoindex-code # upgrade
93
+ pipx install 'cocoindex-code[default]' # batteries included (local embeddings)
94
+ pipx upgrade cocoindex-code # upgrade
90
95
  ```
91
96
 
92
97
  Using [uv](https://docs.astral.sh/uv/getting-started/installation/):
93
98
  ```bash
94
- uv tool install --upgrade cocoindex-code --prerelease explicit --with "cocoindex>=1.0.0a24"
99
+ uv tool install --upgrade 'cocoindex-code[default]' --prerelease explicit --with "cocoindex>=1.0.0a24"
95
100
  ```
96
101
 
97
- The default embedding model runs locally ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) — no API key required, completely free.
102
+ Two install styles:
103
+ - `cocoindex-code[default]` — batteries-included. Pulls in `sentence-transformers` so local embeddings (no API key required) work out of the box. The `ccc init` interactive prompt defaults to [Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs).
104
+ - `cocoindex-code` — slim. LiteLLM-only; requires a cloud embedding provider and API key. Use when you don't want the local-embedding deps (~1 GB of torch + transformers).
98
105
 
99
106
  Next, set up your [coding agent integration](#coding-agent-integration) — or jump to [Manual CLI Usage](#manual-cli-usage) if you prefer direct control.
100
107
 
@@ -314,8 +321,7 @@ Pass configuration to `docker run` with `-e`:
314
321
  # Exclude build artefacts (Scala/SBT example)
315
322
  -e COCOINDEX_CODE_EXCLUDE_PATTERNS='["**/target/**","**/.bloop/**","**/.metals/**"]'
316
323
 
317
- # Swap in a code-optimised embedding model
318
- -e COCOINDEX_CODE_EMBEDDING_MODEL=voyage/voyage-code-3
324
+ # Set an API key
319
325
  -e VOYAGE_API_KEY=your-key
320
326
  ```
321
327
 
@@ -330,7 +336,7 @@ docker build -t cocoindex-code:local -f docker/Dockerfile .
330
336
  - **Ultra Performant**: ⚡ Built on top of ultra performant [Rust indexing engine](https://github.com/cocoindex-io/cocoindex). Only re-indexes changed files for fast updates.
331
337
  - **Multi-Language Support**: Python, JavaScript/TypeScript, Rust, Go, Java, C/C++, C#, SQL, Shell, and more.
332
338
  - **Embedded**: Portable and just works, no database setup required!
333
- - **Flexible Embeddings**: Local SentenceTransformers by default (free!) or 100+ cloud providers via LiteLLM.
339
+ - **Flexible Embeddings**: Local SentenceTransformers via the `[default]` extra (free, no API key!) or 100+ cloud providers via LiteLLM.
334
340
 
335
341
  ## Configuration
336
342
 
@@ -343,7 +349,7 @@ Shared across all projects. Controls the embedding model and environment variabl
343
349
  ```yaml
344
350
  embedding:
345
351
  provider: sentence-transformers # or "litellm"
346
- model: sentence-transformers/all-MiniLM-L6-v2
352
+ model: Snowflake/snowflake-arctic-embed-xs
347
353
  device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
348
354
  min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
349
355
 
@@ -415,7 +421,7 @@ See [`src/cocoindex_code/chunking.py`](./src/cocoindex_code/chunking.py) for the
415
421
 
416
422
  ## Embedding Models
417
423
 
418
- By default, a local SentenceTransformers model ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) is used — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
424
+ With the `[default]` extra installed, `ccc init` defaults to a local SentenceTransformers model ([Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs)) — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
419
425
 
420
426
  > The `envs` entries below are only needed if the key isn't already in your shell environment — the daemon inherits your environment automatically.
421
427
 
@@ -46,16 +46,18 @@ A lightweight, effective **(AST-based)** semantic code search tool for your code
46
46
 
47
47
  Using [pipx](https://pipx.pypa.io/stable/installation/):
48
48
  ```bash
49
- pipx install cocoindex-code # first install
50
- pipx upgrade cocoindex-code # upgrade
49
+ pipx install 'cocoindex-code[default]' # batteries included (local embeddings)
50
+ pipx upgrade cocoindex-code # upgrade
51
51
  ```
52
52
 
53
53
  Using [uv](https://docs.astral.sh/uv/getting-started/installation/):
54
54
  ```bash
55
- uv tool install --upgrade cocoindex-code --prerelease explicit --with "cocoindex>=1.0.0a24"
55
+ uv tool install --upgrade 'cocoindex-code[default]' --prerelease explicit --with "cocoindex>=1.0.0a24"
56
56
  ```
57
57
 
58
- The default embedding model runs locally ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) — no API key required, completely free.
58
+ Two install styles:
59
+ - `cocoindex-code[default]` — batteries-included. Pulls in `sentence-transformers` so local embeddings (no API key required) work out of the box. The `ccc init` interactive prompt defaults to [Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs).
60
+ - `cocoindex-code` — slim. LiteLLM-only; requires a cloud embedding provider and API key. Use when you don't want the local-embedding deps (~1 GB of torch + transformers).
59
61
 
60
62
  Next, set up your [coding agent integration](#coding-agent-integration) — or jump to [Manual CLI Usage](#manual-cli-usage) if you prefer direct control.
61
63
 
@@ -275,8 +277,7 @@ Pass configuration to `docker run` with `-e`:
275
277
  # Exclude build artefacts (Scala/SBT example)
276
278
  -e COCOINDEX_CODE_EXCLUDE_PATTERNS='["**/target/**","**/.bloop/**","**/.metals/**"]'
277
279
 
278
- # Swap in a code-optimised embedding model
279
- -e COCOINDEX_CODE_EMBEDDING_MODEL=voyage/voyage-code-3
280
+ # Set an API key
280
281
  -e VOYAGE_API_KEY=your-key
281
282
  ```
282
283
 
@@ -291,7 +292,7 @@ docker build -t cocoindex-code:local -f docker/Dockerfile .
291
292
  - **Ultra Performant**: ⚡ Built on top of ultra performant [Rust indexing engine](https://github.com/cocoindex-io/cocoindex). Only re-indexes changed files for fast updates.
292
293
  - **Multi-Language Support**: Python, JavaScript/TypeScript, Rust, Go, Java, C/C++, C#, SQL, Shell, and more.
293
294
  - **Embedded**: Portable and just works, no database setup required!
294
- - **Flexible Embeddings**: Local SentenceTransformers by default (free!) or 100+ cloud providers via LiteLLM.
295
+ - **Flexible Embeddings**: Local SentenceTransformers via the `[default]` extra (free, no API key!) or 100+ cloud providers via LiteLLM.
295
296
 
296
297
  ## Configuration
297
298
 
@@ -304,7 +305,7 @@ Shared across all projects. Controls the embedding model and environment variabl
304
305
  ```yaml
305
306
  embedding:
306
307
  provider: sentence-transformers # or "litellm"
307
- model: sentence-transformers/all-MiniLM-L6-v2
308
+ model: Snowflake/snowflake-arctic-embed-xs
308
309
  device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
309
310
  min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
310
311
 
@@ -376,7 +377,7 @@ See [`src/cocoindex_code/chunking.py`](./src/cocoindex_code/chunking.py) for the
376
377
 
377
378
  ## Embedding Models
378
379
 
379
- By default, a local SentenceTransformers model ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) is used — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
380
+ With the `[default]` extra installed, `ccc init` defaults to a local SentenceTransformers model ([Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs)) — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
380
381
 
381
382
  > The `envs` entries below are only needed if the key isn't already in your shell environment — the daemon inherits your environment automatically.
382
383
 
@@ -23,8 +23,7 @@ classifiers = [
23
23
 
24
24
  dependencies = [
25
25
  "mcp>=1.0.0",
26
- "cocoindex[litellm]==1.0.0a38",
27
- "sentence-transformers>=2.2.0",
26
+ "cocoindex[litellm]==1.0.0a43",
28
27
  "sqlite-vec>=0.1.0",
29
28
  "pydantic>=2.0.0",
30
29
  "numpy>=1.24.0",
@@ -33,9 +32,16 @@ dependencies = [
33
32
  "msgspec>=0.19.0",
34
33
  "pathspec>=0.12.1",
35
34
  "pyyaml>=6.0",
35
+ "questionary>=2.0.0",
36
36
  ]
37
37
 
38
38
  [project.optional-dependencies]
39
+ embeddings-local = [
40
+ "cocoindex[sentence-transformers]==1.0.0a43",
41
+ ]
42
+ default = [
43
+ "cocoindex[sentence-transformers]==1.0.0a43",
44
+ ]
39
45
  dev = [
40
46
  "pytest>=7.0.0",
41
47
  "pytest-asyncio>=0.21.0",
@@ -43,6 +49,7 @@ dev = [
43
49
  "ruff>=0.1.0",
44
50
  "mypy>=1.0.0",
45
51
  "prek>=0.1.0",
52
+ "cocoindex[sentence-transformers]==1.0.0a43",
46
53
  ]
47
54
 
48
55
  [project.scripts]
@@ -76,6 +83,7 @@ dev = [
76
83
  "mypy>=1.0.0",
77
84
  "prek>=0.1.0",
78
85
  "types-pyyaml>=6.0.12.20250915",
86
+ "cocoindex[sentence-transformers]==1.0.0a43",
79
87
  ]
80
88
 
81
89
  [tool.uv]
@@ -0,0 +1,44 @@
1
+ """Daemon filesystem paths and connection helpers.
2
+
3
+ Lightweight module with no cocoindex dependency so that the CLI client
4
+ can import these without pulling in the full daemon stack.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ from .settings import user_settings_dir
13
+
14
+
15
+ def daemon_dir() -> Path:
16
+ """Return the daemon directory (``~/.cocoindex_code/``)."""
17
+ return user_settings_dir()
18
+
19
+
20
+ def connection_family() -> str:
21
+ """Return the multiprocessing connection family for this platform."""
22
+ return "AF_PIPE" if sys.platform == "win32" else "AF_UNIX"
23
+
24
+
25
+ def daemon_socket_path() -> str:
26
+ """Return the daemon socket/pipe address."""
27
+ if sys.platform == "win32":
28
+ import hashlib
29
+
30
+ # Hash the daemon dir so COCOINDEX_CODE_DIR overrides create unique pipe names,
31
+ # preventing conflicts between different daemon instances (tests, users, etc.)
32
+ dir_hash = hashlib.md5(str(daemon_dir()).encode()).hexdigest()[:12]
33
+ return rf"\\.\pipe\cocoindex_code_{dir_hash}"
34
+ return str(daemon_dir() / "daemon.sock")
35
+
36
+
37
+ def daemon_pid_path() -> Path:
38
+ """Return the path for the daemon's PID file."""
39
+ return daemon_dir() / "daemon.pid"
40
+
41
+
42
+ def daemon_log_path() -> Path:
43
+ """Return the path for the daemon's log file."""
44
+ return daemon_dir() / "daemon.log"
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.2.11'
22
- __version_tuple__ = version_tuple = (0, 2, 11)
21
+ __version__ = version = '0.2.23'
22
+ __version_tuple__ = version_tuple = (0, 2, 23)
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import functools
6
+ import sys
6
7
  from collections.abc import Callable
7
8
  from pathlib import Path
8
9
  from typing import TypeVar
@@ -12,15 +13,16 @@ import typer as _typer
12
13
  from .client import DaemonStartError
13
14
  from .protocol import DoctorCheckResult, IndexingProgress, ProjectStatusResponse, SearchResponse
14
15
  from .settings import (
16
+ DEFAULT_ST_MODEL,
17
+ EmbeddingSettings,
15
18
  cocoindex_db_path,
16
19
  default_project_settings,
17
- default_user_settings,
18
20
  find_parent_with_marker,
19
21
  find_project_root,
20
22
  project_settings_path,
21
23
  resolve_db_dir,
24
+ save_initial_user_settings,
22
25
  save_project_settings,
23
- save_user_settings,
24
26
  target_sqlite_db_path,
25
27
  user_settings_path,
26
28
  )
@@ -282,19 +284,173 @@ def remove_from_gitignore(project_root: Path) -> None:
282
284
  # ---------------------------------------------------------------------------
283
285
 
284
286
 
287
+ _LITELLM_MODELS_URL = "https://docs.litellm.ai/docs/embedding/supported_embedding"
288
+
289
+
290
+ def _resolve_embedding_choice(
291
+ litellm_model_flag: str | None,
292
+ st_installed: bool,
293
+ tty: bool,
294
+ ) -> EmbeddingSettings:
295
+ """Resolve the embedding settings per the init control-flow diagram."""
296
+ if litellm_model_flag is not None:
297
+ return EmbeddingSettings(provider="litellm", model=litellm_model_flag)
298
+
299
+ if not tty:
300
+ if st_installed:
301
+ return EmbeddingSettings(provider="sentence-transformers", model=DEFAULT_ST_MODEL)
302
+ _typer.echo(
303
+ "Error: sentence-transformers is not installed and stdin is not a TTY.\n"
304
+ "Either install the extra (`pip install cocoindex-code[embeddings-local]`)\n"
305
+ "or pass `--litellm-model MODEL` to select a LiteLLM model.",
306
+ err=True,
307
+ )
308
+ raise _typer.Exit(code=1)
309
+
310
+ # Interactive
311
+ import questionary
312
+
313
+ if st_installed:
314
+ provider = questionary.select(
315
+ "Embedding provider",
316
+ choices=[
317
+ questionary.Choice(
318
+ title="sentence-transformers (local, free)",
319
+ value="sentence-transformers",
320
+ ),
321
+ questionary.Choice(
322
+ title="litellm (cloud, 100+ providers)",
323
+ value="litellm",
324
+ ),
325
+ ],
326
+ ).ask()
327
+ else:
328
+ _typer.echo(
329
+ "sentence-transformers is not installed — only `litellm` is available.\n"
330
+ "To enable local embeddings, install `cocoindex-code[embeddings-local]`."
331
+ )
332
+ provider = "litellm"
333
+
334
+ if provider is None: # user cancelled (Ctrl-C / Esc)
335
+ raise _typer.Exit(code=1)
336
+
337
+ if provider == "sentence-transformers":
338
+ model = questionary.text("Model name", default=DEFAULT_ST_MODEL).ask()
339
+ elif provider == "litellm":
340
+ _typer.echo(f"See supported LiteLLM embedding models: {_LITELLM_MODELS_URL}")
341
+ model = questionary.text("Model name").ask()
342
+ else:
343
+ _typer.echo(f"Error: unknown provider {provider!r}", err=True)
344
+ raise _typer.Exit(code=1)
345
+
346
+ if not model: # None (cancelled) or empty string
347
+ raise _typer.Exit(code=1)
348
+
349
+ return EmbeddingSettings(provider=provider, model=model.strip())
350
+
351
+
352
+ def _ok_fail_tag(ok: bool) -> str:
353
+ """Return a colored `[OK]` or `[FAIL]` tag string."""
354
+ import click as _click
355
+
356
+ if ok:
357
+ return _click.style("[OK]", fg="green", bold=True)
358
+ return _click.style("[FAIL]", fg="red", bold=True)
359
+
360
+
361
+ def _run_init_model_check(settings_path: Path) -> None:
362
+ """Ask the daemon to test the embedding model; print results and a hint on failure.
363
+
364
+ Drives the check via `DoctorRequest(project_root=None)`. The daemon loads
365
+ the model once and stays running, so the user's next `ccc index` starts
366
+ warm. Both DaemonStartError and generic exceptions are rendered as a
367
+ synthetic failed DoctorCheckResult — uniform failure-output shape.
368
+ """
369
+ from rich.console import Console as _Console
370
+ from rich.live import Live as _Live
371
+ from rich.spinner import Spinner as _Spinner
372
+
373
+ from . import client as _client
374
+
375
+ err_console = _Console(stderr=True)
376
+ results: list[DoctorCheckResult] = []
377
+ try:
378
+ with _Live(
379
+ _Spinner("dots", "Testing embedding model..."),
380
+ console=err_console,
381
+ transient=True,
382
+ ):
383
+ results = _client.doctor(project_root=None)
384
+ except Exception as e:
385
+ results = [
386
+ DoctorCheckResult(
387
+ name="Model Check",
388
+ ok=False,
389
+ details=[],
390
+ errors=[f"{type(e).__name__}: {e}"],
391
+ )
392
+ ]
393
+
394
+ failed = False
395
+ for r in results:
396
+ if r.name == "done":
397
+ continue
398
+ _print_doctor_result(r)
399
+ if not r.ok:
400
+ failed = True
401
+
402
+ if failed:
403
+ _typer.echo(
404
+ f"You can edit {settings_path} to change the model or add API keys\n"
405
+ "under `envs:`. Then run `ccc doctor` to verify.",
406
+ err=True,
407
+ )
408
+
409
+
410
+ def _setup_user_settings_interactive(litellm_model_flag: str | None) -> None:
411
+ """Interactive global-settings setup — only runs when settings are missing."""
412
+ from .shared import is_sentence_transformers_installed
413
+
414
+ embedding = _resolve_embedding_choice(
415
+ litellm_model_flag=litellm_model_flag,
416
+ st_installed=is_sentence_transformers_installed(),
417
+ tty=sys.stdin.isatty(),
418
+ )
419
+
420
+ path = save_initial_user_settings(embedding)
421
+ _typer.echo()
422
+ _typer.echo(f"Created user settings: {path}")
423
+
424
+ _typer.echo()
425
+ _typer.echo(f"Testing embedding model: {embedding.provider} / {embedding.model}")
426
+ _run_init_model_check(path)
427
+ _typer.echo()
428
+
429
+
285
430
  @app.command()
286
431
  def init(
432
+ litellm_model: str | None = _typer.Option(
433
+ None,
434
+ "--litellm-model",
435
+ help="Use the given LiteLLM model and skip provider/model prompts.",
436
+ ),
287
437
  force: bool = _typer.Option(False, "-f", "--force", help="Skip parent directory warning"),
288
438
  ) -> None:
289
439
  """Initialize a project for cocoindex-code."""
290
440
  cwd = Path.cwd().resolve()
291
441
  settings_file = project_settings_path(cwd)
292
442
 
293
- # Always ensure user settings exist
294
443
  user_path = user_settings_path()
295
- if not user_path.is_file():
296
- save_user_settings(default_user_settings())
297
- _typer.echo(f"Created user settings: {user_path}")
444
+ if user_path.is_file():
445
+ if litellm_model is not None:
446
+ _typer.echo(
447
+ f"Error: global settings already exist at {user_path}.\n"
448
+ "Edit that file or remove it before passing `--litellm-model`.",
449
+ err=True,
450
+ )
451
+ raise _typer.Exit(code=1)
452
+ else:
453
+ _setup_user_settings_interactive(litellm_model)
298
454
 
299
455
  # Check if already initialized
300
456
  if settings_file.is_file():
@@ -489,10 +645,7 @@ def _print_doctor_result(result: DoctorCheckResult) -> None:
489
645
 
490
646
  if result.name == "done":
491
647
  return
492
- if result.ok:
493
- tag = _click.style("[OK]", fg="green", bold=True)
494
- else:
495
- tag = _click.style("[FAIL]", fg="red", bold=True)
648
+ tag = _ok_fail_tag(result.ok)
496
649
  _typer.echo(f"\n {tag} {result.name}")
497
650
  for line in result.details:
498
651
  _typer.echo(f" {line}")
@@ -599,7 +752,7 @@ def doctor() -> None:
599
752
 
600
753
  # --- 8. Log files ---
601
754
  _print_section("Log Files")
602
- from .daemon import daemon_log_path as _daemon_log_path
755
+ from ._daemon_paths import daemon_log_path as _daemon_log_path
603
756
 
604
757
  _typer.echo(f" Daemon logs: {_daemon_log_path()}")
605
758
  _typer.echo(" Check logs above for further troubleshooting.")
@@ -675,8 +828,8 @@ def daemon_restart() -> None:
675
828
  @daemon_app.command("stop")
676
829
  def daemon_stop() -> None:
677
830
  """Stop the daemon."""
831
+ from ._daemon_paths import daemon_pid_path
678
832
  from .client import is_daemon_running, stop_daemon
679
- from .daemon import daemon_pid_path
680
833
 
681
834
  pid_path = daemon_pid_path()
682
835
  if not pid_path.exists() and not is_daemon_running():
@@ -17,8 +17,14 @@ from collections.abc import Callable
17
17
  from multiprocessing.connection import Client, Connection
18
18
  from pathlib import Path
19
19
 
20
+ from ._daemon_paths import (
21
+ connection_family,
22
+ daemon_dir,
23
+ daemon_log_path,
24
+ daemon_pid_path,
25
+ daemon_socket_path,
26
+ )
20
27
  from ._version import __version__
21
- from .daemon import _connection_family, daemon_log_path, daemon_pid_path, daemon_socket_path
22
28
  from .protocol import (
23
29
  DaemonEnvRequest,
24
30
  DaemonEnvResponse,
@@ -105,7 +111,7 @@ def _raw_connect_and_handshake() -> Connection:
105
111
  if sys.platform != "win32" and not os.path.exists(sock):
106
112
  raise ConnectionRefusedError(f"Daemon socket not found: {sock}")
107
113
  try:
108
- conn = Client(sock, family=_connection_family())
114
+ conn = Client(sock, family=connection_family())
109
115
  except (ConnectionRefusedError, FileNotFoundError, OSError) as e:
110
116
  raise ConnectionRefusedError(f"Cannot connect to daemon: {e}") from e
111
117
 
@@ -329,7 +335,7 @@ def is_daemon_running() -> bool:
329
335
  """Check if the daemon is running."""
330
336
  if sys.platform == "win32":
331
337
  try:
332
- conn = Client(daemon_socket_path(), family=_connection_family())
338
+ conn = Client(daemon_socket_path(), family=connection_family())
333
339
  conn.close()
334
340
  return True
335
341
  except (ConnectionRefusedError, OSError):
@@ -343,8 +349,6 @@ def start_daemon() -> subprocess.Popen[bytes]:
343
349
  Returns the ``Popen`` object so callers can detect early process death
344
350
  (via ``proc.poll()``) instead of waiting for a full timeout.
345
351
  """
346
- from .daemon import daemon_dir, daemon_log_path
347
-
348
352
  daemon_dir().mkdir(parents=True, exist_ok=True)
349
353
  log_path = daemon_log_path()
350
354
 
@@ -518,7 +522,7 @@ def _wait_for_daemon(
518
522
 
519
523
  if sys.platform == "win32":
520
524
  try:
521
- conn = Client(sock_path, family=_connection_family())
525
+ conn = Client(sock_path, family=connection_family())
522
526
  conn.close()
523
527
  return
524
528
  except (ConnectionRefusedError, OSError):
@@ -15,6 +15,13 @@ from multiprocessing.connection import Connection, Listener
15
15
  from pathlib import Path
16
16
  from typing import Any
17
17
 
18
+ from ._daemon_paths import (
19
+ connection_family,
20
+ daemon_dir,
21
+ daemon_log_path,
22
+ daemon_pid_path,
23
+ daemon_socket_path,
24
+ )
18
25
  from ._version import __version__
19
26
  from .chunking import ChunkerFn as _ChunkerFn
20
27
  from .project import Project
@@ -53,9 +60,8 @@ from .settings import (
53
60
  load_project_settings,
54
61
  load_user_settings,
55
62
  target_sqlite_db_path,
56
- user_settings_dir,
57
63
  )
58
- from .shared import Embedder, create_embedder
64
+ from .shared import Embedder, check_embedding, create_embedder
59
65
 
60
66
  logger = logging.getLogger(__name__)
61
67
 
@@ -79,43 +85,6 @@ def _resolve_chunker_registry(mappings: list[ChunkerMapping]) -> dict[str, _Chun
79
85
  return registry
80
86
 
81
87
 
82
- # ---------------------------------------------------------------------------
83
- # Daemon paths
84
- # ---------------------------------------------------------------------------
85
-
86
-
87
- def daemon_dir() -> Path:
88
- """Return the daemon directory (``~/.cocoindex_code/``)."""
89
- return user_settings_dir()
90
-
91
-
92
- def _connection_family() -> str:
93
- """Return the multiprocessing connection family for this platform."""
94
- return "AF_PIPE" if sys.platform == "win32" else "AF_UNIX"
95
-
96
-
97
- def daemon_socket_path() -> str:
98
- """Return the daemon socket/pipe address."""
99
- if sys.platform == "win32":
100
- import hashlib
101
-
102
- # Hash the daemon dir so COCOINDEX_CODE_DIR overrides create unique pipe names,
103
- # preventing conflicts between different daemon instances (tests, users, etc.)
104
- dir_hash = hashlib.md5(str(daemon_dir()).encode()).hexdigest()[:12]
105
- return rf"\\.\pipe\cocoindex_code_{dir_hash}"
106
- return str(daemon_dir() / "daemon.sock")
107
-
108
-
109
- def daemon_pid_path() -> Path:
110
- """Return the path for the daemon's PID file."""
111
- return daemon_dir() / "daemon.pid"
112
-
113
-
114
- def daemon_log_path() -> Path:
115
- """Return the path for the daemon's log file."""
116
- return daemon_dir() / "daemon.log"
117
-
118
-
119
88
  # ---------------------------------------------------------------------------
120
89
  # Project Registry
121
90
  # ---------------------------------------------------------------------------
@@ -293,22 +262,20 @@ async def _handle_doctor(
293
262
 
294
263
  async def _check_model(embedder: Embedder) -> DoctorCheckResult:
295
264
  """Test the embedding model by embedding a short string."""
296
- try:
297
- vec = await embedder.embed("hello world")
298
- dim = len(vec)
265
+ result = await check_embedding(embedder)
266
+ if result.error is None:
299
267
  return DoctorCheckResult(
300
268
  name="Model Check",
301
269
  ok=True,
302
- details=[f"Embedding dimension: {dim}"],
270
+ details=[f"Embedding dimension: {result.dim}"],
303
271
  errors=[],
304
272
  )
305
- except Exception as e:
306
- return DoctorCheckResult(
307
- name="Model Check",
308
- ok=False,
309
- details=[],
310
- errors=[str(e)],
311
- )
273
+ return DoctorCheckResult(
274
+ name="Model Check",
275
+ ok=False,
276
+ details=[],
277
+ errors=[result.error],
278
+ )
312
279
 
313
280
 
314
281
  async def _check_file_walk(project_root_str: str) -> DoctorCheckResult:
@@ -540,7 +507,7 @@ def run_daemon() -> None:
540
507
  except Exception:
541
508
  pass
542
509
 
543
- listener = Listener(sock_path, family=_connection_family())
510
+ listener = Listener(sock_path, family=connection_family())
544
511
  logger.info("Listening on %s", sock_path)
545
512
 
546
513
  loop = asyncio.new_event_loop()
@@ -224,5 +224,6 @@ async def indexer_main() -> None:
224
224
  path_matcher=matcher,
225
225
  )
226
226
 
227
- with coco.component_subpath(coco.Symbol("process_file")):
228
- await coco.mount_each(process_file, files.items(), table)
227
+ await coco.mount_each(
228
+ coco.component_subpath(coco.Symbol("process_file")), process_file, files.items(), table
229
+ )
@@ -126,11 +126,14 @@ class ProjectSettings:
126
126
  # ---------------------------------------------------------------------------
127
127
 
128
128
 
129
+ DEFAULT_ST_MODEL = "Snowflake/snowflake-arctic-embed-xs"
130
+
131
+
129
132
  def default_user_settings() -> UserSettings:
130
133
  return UserSettings(
131
134
  embedding=EmbeddingSettings(
132
135
  provider="sentence-transformers",
133
- model="sentence-transformers/all-MiniLM-L6-v2",
136
+ model=DEFAULT_ST_MODEL,
134
137
  )
135
138
  )
136
139
 
@@ -344,17 +347,20 @@ def load_gitignore_spec(project_root: Path) -> GitIgnoreSpec | None:
344
347
  # ---------------------------------------------------------------------------
345
348
 
346
349
 
347
- def _user_settings_to_dict(settings: UserSettings) -> dict[str, Any]:
348
- d: dict[str, Any] = {}
349
- emb: dict[str, Any] = {
350
- "provider": settings.embedding.provider,
351
- "model": settings.embedding.model,
350
+ def _embedding_settings_to_dict(embedding: EmbeddingSettings) -> dict[str, Any]:
351
+ d: dict[str, Any] = {
352
+ "provider": embedding.provider,
353
+ "model": embedding.model,
352
354
  }
353
- if settings.embedding.device is not None:
354
- emb["device"] = settings.embedding.device
355
- if settings.embedding.min_interval_ms is not None:
356
- emb["min_interval_ms"] = settings.embedding.min_interval_ms
357
- d["embedding"] = emb
355
+ if embedding.device is not None:
356
+ d["device"] = embedding.device
357
+ if embedding.min_interval_ms is not None:
358
+ d["min_interval_ms"] = embedding.min_interval_ms
359
+ return d
360
+
361
+
362
+ def _user_settings_to_dict(settings: UserSettings) -> dict[str, Any]:
363
+ d: dict[str, Any] = {"embedding": _embedding_settings_to_dict(settings.embedding)}
358
364
  if settings.envs:
359
365
  d["envs"] = dict(settings.envs)
360
366
  return d
@@ -436,6 +442,46 @@ def save_user_settings(settings: UserSettings) -> Path:
436
442
  return path
437
443
 
438
444
 
445
+ _INITIAL_HEADER = (
446
+ "# CocoIndex Code — global settings.\n"
447
+ "# After editing this file, run `ccc doctor` to verify your configuration.\n"
448
+ "\n"
449
+ )
450
+
451
+ _INITIAL_ENVS_COMMENT = (
452
+ "\n"
453
+ "# Environment variables to inject into the daemon running in the background.\n"
454
+ "# Uncomment and fill in keys for the LiteLLM providers you plan to use.\n"
455
+ "#\n"
456
+ "# envs:\n"
457
+ "# OPENAI_API_KEY: ...\n"
458
+ "# GEMINI_API_KEY: ...\n"
459
+ "# ANTHROPIC_API_KEY: ...\n"
460
+ "# VOYAGE_API_KEY: ...\n"
461
+ )
462
+
463
+
464
+ def save_initial_user_settings(embedding: EmbeddingSettings) -> Path:
465
+ """Write the initial global_settings.yml with comment hints and env examples.
466
+
467
+ Only used by `ccc init` on first-time setup. Emits only the `embedding:`
468
+ block from the input; the `envs:` section is a commented-out template.
469
+ Subsequent programmatic writes use `save_user_settings` and do not
470
+ preserve comments.
471
+ """
472
+ emb_block = _yaml.safe_dump(
473
+ {"embedding": _embedding_settings_to_dict(embedding)},
474
+ default_flow_style=False,
475
+ sort_keys=False,
476
+ )
477
+ content = _INITIAL_HEADER + emb_block + _INITIAL_ENVS_COMMENT
478
+
479
+ path = user_settings_path()
480
+ path.parent.mkdir(parents=True, exist_ok=True)
481
+ path.write_text(content)
482
+ return path
483
+
484
+
439
485
  def load_project_settings(project_root: Path) -> ProjectSettings:
440
486
  """Read ``$PROJECT_ROOT/.cocoindex_code/settings.yml``.
441
487
 
@@ -2,10 +2,11 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import importlib.util
5
6
  import logging
6
7
  import pathlib
7
8
  from dataclasses import dataclass
8
- from typing import TYPE_CHECKING, Annotated, Union
9
+ from typing import TYPE_CHECKING, Annotated, NamedTuple, Union
9
10
 
10
11
  import cocoindex as coco
11
12
  import numpy as np
@@ -41,6 +42,41 @@ embedder: Embedder | None = None
41
42
  query_prompt_name: str | None = None
42
43
 
43
44
 
45
+ def is_sentence_transformers_installed() -> bool:
46
+ """Return True if the `sentence_transformers` package can be imported.
47
+
48
+ Uses `find_spec` rather than `import` to avoid triggering the slow,
49
+ torch-loading import as a side effect of the check.
50
+ """
51
+ return importlib.util.find_spec("sentence_transformers") is not None
52
+
53
+
54
+ class EmbeddingCheckResult(NamedTuple):
55
+ """Outcome of a single embed-test call. See `check_embedding`.
56
+
57
+ Exactly one of ``dim`` / ``error`` is set: ``error is None`` means success.
58
+ """
59
+
60
+ dim: int | None
61
+ error: str | None
62
+
63
+
64
+ async def check_embedding(embedder: Embedder) -> EmbeddingCheckResult:
65
+ """Run a single embed call against *embedder* and report dim or error.
66
+
67
+ Never raises. Used by both the daemon's doctor path (`daemon._check_model`)
68
+ and the CLI's init flow (`cli._test_embedding_model`).
69
+ """
70
+ try:
71
+ vec = await embedder.embed("hello world")
72
+ return EmbeddingCheckResult(dim=len(vec), error=None)
73
+ except Exception as e:
74
+ msg = f"{type(e).__name__}: {e}".splitlines()[0]
75
+ if len(msg) > 500:
76
+ msg = msg[:500] + "…"
77
+ return EmbeddingCheckResult(dim=None, error=msg)
78
+
79
+
44
80
  def create_embedder(settings: EmbeddingSettings) -> Embedder:
45
81
  """Create and return an embedder instance based on settings.
46
82
 
File without changes