cocoindex-code 0.2.22__tar.gz → 0.2.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/PKG-INFO +17 -11
  2. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/README.md +10 -9
  3. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/pyproject.toml +9 -1
  4. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/_version.py +2 -2
  5. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/cli.py +163 -10
  6. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/daemon.py +10 -12
  7. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/settings.py +57 -11
  8. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/shared.py +37 -1
  9. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/.gitignore +0 -0
  10. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/LICENSE +0 -0
  11. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/__init__.py +0 -0
  12. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/__main__.py +0 -0
  13. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/_daemon_paths.py +0 -0
  14. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/chunking.py +0 -0
  15. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/client.py +0 -0
  16. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/indexer.py +0 -0
  17. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/litellm_embedder.py +0 -0
  18. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/project.py +0 -0
  19. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/protocol.py +0 -0
  20. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/query.py +0 -0
  21. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/schema.py +0 -0
  22. {cocoindex_code-0.2.22 → cocoindex_code-0.2.23}/src/cocoindex_code/server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex-code
3
- Version: 0.2.22
3
+ Version: 0.2.23
4
4
  Summary: MCP server for indexing and querying codebases using CocoIndex
5
5
  Project-URL: Homepage, https://github.com/cocoindex-io/cocoindex-code
6
6
  Project-URL: Repository, https://github.com/cocoindex-io/cocoindex-code
@@ -25,16 +25,21 @@ Requires-Dist: numpy>=1.24.0
25
25
  Requires-Dist: pathspec>=0.12.1
26
26
  Requires-Dist: pydantic>=2.0.0
27
27
  Requires-Dist: pyyaml>=6.0
28
- Requires-Dist: sentence-transformers>=2.2.0
28
+ Requires-Dist: questionary>=2.0.0
29
29
  Requires-Dist: sqlite-vec>=0.1.0
30
30
  Requires-Dist: typer>=0.9.0
31
+ Provides-Extra: default
32
+ Requires-Dist: cocoindex[sentence-transformers]==1.0.0a43; extra == 'default'
31
33
  Provides-Extra: dev
34
+ Requires-Dist: cocoindex[sentence-transformers]==1.0.0a43; extra == 'dev'
32
35
  Requires-Dist: mypy>=1.0.0; extra == 'dev'
33
36
  Requires-Dist: prek>=0.1.0; extra == 'dev'
34
37
  Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
35
38
  Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
36
39
  Requires-Dist: pytest>=7.0.0; extra == 'dev'
37
40
  Requires-Dist: ruff>=0.1.0; extra == 'dev'
41
+ Provides-Extra: embeddings-local
42
+ Requires-Dist: cocoindex[sentence-transformers]==1.0.0a43; extra == 'embeddings-local'
38
43
  Description-Content-Type: text/markdown
39
44
 
40
45
  <p align="center">
@@ -85,16 +90,18 @@ A lightweight, effective **(AST-based)** semantic code search tool for your code
85
90
 
86
91
  Using [pipx](https://pipx.pypa.io/stable/installation/):
87
92
  ```bash
88
- pipx install cocoindex-code # first install
89
- pipx upgrade cocoindex-code # upgrade
93
+ pipx install 'cocoindex-code[default]' # batteries included (local embeddings)
94
+ pipx upgrade cocoindex-code # upgrade
90
95
  ```
91
96
 
92
97
  Using [uv](https://docs.astral.sh/uv/getting-started/installation/):
93
98
  ```bash
94
- uv tool install --upgrade cocoindex-code --prerelease explicit --with "cocoindex>=1.0.0a24"
99
+ uv tool install --upgrade 'cocoindex-code[default]' --prerelease explicit --with "cocoindex>=1.0.0a24"
95
100
  ```
96
101
 
97
- The default embedding model runs locally ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) — no API key required, completely free.
102
+ Two install styles:
103
+ - `cocoindex-code[default]` — batteries-included. Pulls in `sentence-transformers` so local embeddings (no API key required) work out of the box. The `ccc init` interactive prompt defaults to [Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs).
104
+ - `cocoindex-code` — slim. LiteLLM-only; requires a cloud embedding provider and API key. Use when you don't want the local-embedding deps (~1 GB of torch + transformers).
98
105
 
99
106
  Next, set up your [coding agent integration](#coding-agent-integration) — or jump to [Manual CLI Usage](#manual-cli-usage) if you prefer direct control.
100
107
 
@@ -314,8 +321,7 @@ Pass configuration to `docker run` with `-e`:
314
321
  # Exclude build artefacts (Scala/SBT example)
315
322
  -e COCOINDEX_CODE_EXCLUDE_PATTERNS='["**/target/**","**/.bloop/**","**/.metals/**"]'
316
323
 
317
- # Swap in a code-optimised embedding model
318
- -e COCOINDEX_CODE_EMBEDDING_MODEL=voyage/voyage-code-3
324
+ # Set an API key
319
325
  -e VOYAGE_API_KEY=your-key
320
326
  ```
321
327
 
@@ -330,7 +336,7 @@ docker build -t cocoindex-code:local -f docker/Dockerfile .
330
336
  - **Ultra Performant**: ⚡ Built on top of ultra performant [Rust indexing engine](https://github.com/cocoindex-io/cocoindex). Only re-indexes changed files for fast updates.
331
337
  - **Multi-Language Support**: Python, JavaScript/TypeScript, Rust, Go, Java, C/C++, C#, SQL, Shell, and more.
332
338
  - **Embedded**: Portable and just works, no database setup required!
333
- - **Flexible Embeddings**: Local SentenceTransformers by default (free!) or 100+ cloud providers via LiteLLM.
339
+ - **Flexible Embeddings**: Local SentenceTransformers via the `[default]` extra (free, no API key!) or 100+ cloud providers via LiteLLM.
334
340
 
335
341
  ## Configuration
336
342
 
@@ -343,7 +349,7 @@ Shared across all projects. Controls the embedding model and environment variabl
343
349
  ```yaml
344
350
  embedding:
345
351
  provider: sentence-transformers # or "litellm"
346
- model: sentence-transformers/all-MiniLM-L6-v2
352
+ model: Snowflake/snowflake-arctic-embed-xs
347
353
  device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
348
354
  min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
349
355
 
@@ -415,7 +421,7 @@ See [`src/cocoindex_code/chunking.py`](./src/cocoindex_code/chunking.py) for the
415
421
 
416
422
  ## Embedding Models
417
423
 
418
- By default, a local SentenceTransformers model ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) is used — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
424
+ With the `[default]` extra installed, `ccc init` defaults to a local SentenceTransformers model ([Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs)) — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
419
425
 
420
426
  > The `envs` entries below are only needed if the key isn't already in your shell environment — the daemon inherits your environment automatically.
421
427
 
@@ -46,16 +46,18 @@ A lightweight, effective **(AST-based)** semantic code search tool for your code
46
46
 
47
47
  Using [pipx](https://pipx.pypa.io/stable/installation/):
48
48
  ```bash
49
- pipx install cocoindex-code # first install
50
- pipx upgrade cocoindex-code # upgrade
49
+ pipx install 'cocoindex-code[default]' # batteries included (local embeddings)
50
+ pipx upgrade cocoindex-code # upgrade
51
51
  ```
52
52
 
53
53
  Using [uv](https://docs.astral.sh/uv/getting-started/installation/):
54
54
  ```bash
55
- uv tool install --upgrade cocoindex-code --prerelease explicit --with "cocoindex>=1.0.0a24"
55
+ uv tool install --upgrade 'cocoindex-code[default]' --prerelease explicit --with "cocoindex>=1.0.0a24"
56
56
  ```
57
57
 
58
- The default embedding model runs locally ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) — no API key required, completely free.
58
+ Two install styles:
59
+ - `cocoindex-code[default]` — batteries-included. Pulls in `sentence-transformers` so local embeddings (no API key required) work out of the box. The `ccc init` interactive prompt defaults to [Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs).
60
+ - `cocoindex-code` — slim. LiteLLM-only; requires a cloud embedding provider and API key. Use when you don't want the local-embedding deps (~1 GB of torch + transformers).
59
61
 
60
62
  Next, set up your [coding agent integration](#coding-agent-integration) — or jump to [Manual CLI Usage](#manual-cli-usage) if you prefer direct control.
61
63
 
@@ -275,8 +277,7 @@ Pass configuration to `docker run` with `-e`:
275
277
  # Exclude build artefacts (Scala/SBT example)
276
278
  -e COCOINDEX_CODE_EXCLUDE_PATTERNS='["**/target/**","**/.bloop/**","**/.metals/**"]'
277
279
 
278
- # Swap in a code-optimised embedding model
279
- -e COCOINDEX_CODE_EMBEDDING_MODEL=voyage/voyage-code-3
280
+ # Set an API key
280
281
  -e VOYAGE_API_KEY=your-key
281
282
  ```
282
283
 
@@ -291,7 +292,7 @@ docker build -t cocoindex-code:local -f docker/Dockerfile .
291
292
  - **Ultra Performant**: ⚡ Built on top of ultra performant [Rust indexing engine](https://github.com/cocoindex-io/cocoindex). Only re-indexes changed files for fast updates.
292
293
  - **Multi-Language Support**: Python, JavaScript/TypeScript, Rust, Go, Java, C/C++, C#, SQL, Shell, and more.
293
294
  - **Embedded**: Portable and just works, no database setup required!
294
- - **Flexible Embeddings**: Local SentenceTransformers by default (free!) or 100+ cloud providers via LiteLLM.
295
+ - **Flexible Embeddings**: Local SentenceTransformers via the `[default]` extra (free, no API key!) or 100+ cloud providers via LiteLLM.
295
296
 
296
297
  ## Configuration
297
298
 
@@ -304,7 +305,7 @@ Shared across all projects. Controls the embedding model and environment variabl
304
305
  ```yaml
305
306
  embedding:
306
307
  provider: sentence-transformers # or "litellm"
307
- model: sentence-transformers/all-MiniLM-L6-v2
308
+ model: Snowflake/snowflake-arctic-embed-xs
308
309
  device: mps # optional: cpu, cuda, mps (auto-detected if omitted)
309
310
  min_interval_ms: 300 # optional: pace LiteLLM embedding requests to reduce 429s; defaults to 5 for LiteLLM
310
311
 
@@ -376,7 +377,7 @@ See [`src/cocoindex_code/chunking.py`](./src/cocoindex_code/chunking.py) for the
376
377
 
377
378
  ## Embedding Models
378
379
 
379
- By default, a local SentenceTransformers model ([sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)) is used — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
380
+ With the `[default]` extra installed, `ccc init` defaults to a local SentenceTransformers model ([Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs)) — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
380
381
 
381
382
  > The `envs` entries below are only needed if the key isn't already in your shell environment — the daemon inherits your environment automatically.
382
383
 
@@ -24,7 +24,6 @@ classifiers = [
24
24
  dependencies = [
25
25
  "mcp>=1.0.0",
26
26
  "cocoindex[litellm]==1.0.0a43",
27
- "sentence-transformers>=2.2.0",
28
27
  "sqlite-vec>=0.1.0",
29
28
  "pydantic>=2.0.0",
30
29
  "numpy>=1.24.0",
@@ -33,9 +32,16 @@ dependencies = [
33
32
  "msgspec>=0.19.0",
34
33
  "pathspec>=0.12.1",
35
34
  "pyyaml>=6.0",
35
+ "questionary>=2.0.0",
36
36
  ]
37
37
 
38
38
  [project.optional-dependencies]
39
+ embeddings-local = [
40
+ "cocoindex[sentence-transformers]==1.0.0a43",
41
+ ]
42
+ default = [
43
+ "cocoindex[sentence-transformers]==1.0.0a43",
44
+ ]
39
45
  dev = [
40
46
  "pytest>=7.0.0",
41
47
  "pytest-asyncio>=0.21.0",
@@ -43,6 +49,7 @@ dev = [
43
49
  "ruff>=0.1.0",
44
50
  "mypy>=1.0.0",
45
51
  "prek>=0.1.0",
52
+ "cocoindex[sentence-transformers]==1.0.0a43",
46
53
  ]
47
54
 
48
55
  [project.scripts]
@@ -76,6 +83,7 @@ dev = [
76
83
  "mypy>=1.0.0",
77
84
  "prek>=0.1.0",
78
85
  "types-pyyaml>=6.0.12.20250915",
86
+ "cocoindex[sentence-transformers]==1.0.0a43",
79
87
  ]
80
88
 
81
89
  [tool.uv]
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.2.22'
22
- __version_tuple__ = version_tuple = (0, 2, 22)
21
+ __version__ = version = '0.2.23'
22
+ __version_tuple__ = version_tuple = (0, 2, 23)
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import functools
6
+ import sys
6
7
  from collections.abc import Callable
7
8
  from pathlib import Path
8
9
  from typing import TypeVar
@@ -12,15 +13,16 @@ import typer as _typer
12
13
  from .client import DaemonStartError
13
14
  from .protocol import DoctorCheckResult, IndexingProgress, ProjectStatusResponse, SearchResponse
14
15
  from .settings import (
16
+ DEFAULT_ST_MODEL,
17
+ EmbeddingSettings,
15
18
  cocoindex_db_path,
16
19
  default_project_settings,
17
- default_user_settings,
18
20
  find_parent_with_marker,
19
21
  find_project_root,
20
22
  project_settings_path,
21
23
  resolve_db_dir,
24
+ save_initial_user_settings,
22
25
  save_project_settings,
23
- save_user_settings,
24
26
  target_sqlite_db_path,
25
27
  user_settings_path,
26
28
  )
@@ -282,19 +284,173 @@ def remove_from_gitignore(project_root: Path) -> None:
282
284
  # ---------------------------------------------------------------------------
283
285
 
284
286
 
287
+ _LITELLM_MODELS_URL = "https://docs.litellm.ai/docs/embedding/supported_embedding"
288
+
289
+
290
+ def _resolve_embedding_choice(
291
+ litellm_model_flag: str | None,
292
+ st_installed: bool,
293
+ tty: bool,
294
+ ) -> EmbeddingSettings:
295
+ """Resolve the embedding settings per the init control-flow diagram."""
296
+ if litellm_model_flag is not None:
297
+ return EmbeddingSettings(provider="litellm", model=litellm_model_flag)
298
+
299
+ if not tty:
300
+ if st_installed:
301
+ return EmbeddingSettings(provider="sentence-transformers", model=DEFAULT_ST_MODEL)
302
+ _typer.echo(
303
+ "Error: sentence-transformers is not installed and stdin is not a TTY.\n"
304
+ "Either install the extra (`pip install cocoindex-code[embeddings-local]`)\n"
305
+ "or pass `--litellm-model MODEL` to select a LiteLLM model.",
306
+ err=True,
307
+ )
308
+ raise _typer.Exit(code=1)
309
+
310
+ # Interactive
311
+ import questionary
312
+
313
+ if st_installed:
314
+ provider = questionary.select(
315
+ "Embedding provider",
316
+ choices=[
317
+ questionary.Choice(
318
+ title="sentence-transformers (local, free)",
319
+ value="sentence-transformers",
320
+ ),
321
+ questionary.Choice(
322
+ title="litellm (cloud, 100+ providers)",
323
+ value="litellm",
324
+ ),
325
+ ],
326
+ ).ask()
327
+ else:
328
+ _typer.echo(
329
+ "sentence-transformers is not installed — only `litellm` is available.\n"
330
+ "To enable local embeddings, install `cocoindex-code[embeddings-local]`."
331
+ )
332
+ provider = "litellm"
333
+
334
+ if provider is None: # user cancelled (Ctrl-C / Esc)
335
+ raise _typer.Exit(code=1)
336
+
337
+ if provider == "sentence-transformers":
338
+ model = questionary.text("Model name", default=DEFAULT_ST_MODEL).ask()
339
+ elif provider == "litellm":
340
+ _typer.echo(f"See supported LiteLLM embedding models: {_LITELLM_MODELS_URL}")
341
+ model = questionary.text("Model name").ask()
342
+ else:
343
+ _typer.echo(f"Error: unknown provider {provider!r}", err=True)
344
+ raise _typer.Exit(code=1)
345
+
346
+ if not model: # None (cancelled) or empty string
347
+ raise _typer.Exit(code=1)
348
+
349
+ return EmbeddingSettings(provider=provider, model=model.strip())
350
+
351
+
352
+ def _ok_fail_tag(ok: bool) -> str:
353
+ """Return a colored `[OK]` or `[FAIL]` tag string."""
354
+ import click as _click
355
+
356
+ if ok:
357
+ return _click.style("[OK]", fg="green", bold=True)
358
+ return _click.style("[FAIL]", fg="red", bold=True)
359
+
360
+
361
+ def _run_init_model_check(settings_path: Path) -> None:
362
+ """Ask the daemon to test the embedding model; print results and a hint on failure.
363
+
364
+ Drives the check via `DoctorRequest(project_root=None)`. The daemon loads
365
+ the model once and stays running, so the user's next `ccc index` starts
366
+ warm. Both DaemonStartError and generic exceptions are rendered as a
367
+ synthetic failed DoctorCheckResult — uniform failure-output shape.
368
+ """
369
+ from rich.console import Console as _Console
370
+ from rich.live import Live as _Live
371
+ from rich.spinner import Spinner as _Spinner
372
+
373
+ from . import client as _client
374
+
375
+ err_console = _Console(stderr=True)
376
+ results: list[DoctorCheckResult] = []
377
+ try:
378
+ with _Live(
379
+ _Spinner("dots", "Testing embedding model..."),
380
+ console=err_console,
381
+ transient=True,
382
+ ):
383
+ results = _client.doctor(project_root=None)
384
+ except Exception as e:
385
+ results = [
386
+ DoctorCheckResult(
387
+ name="Model Check",
388
+ ok=False,
389
+ details=[],
390
+ errors=[f"{type(e).__name__}: {e}"],
391
+ )
392
+ ]
393
+
394
+ failed = False
395
+ for r in results:
396
+ if r.name == "done":
397
+ continue
398
+ _print_doctor_result(r)
399
+ if not r.ok:
400
+ failed = True
401
+
402
+ if failed:
403
+ _typer.echo(
404
+ f"You can edit {settings_path} to change the model or add API keys\n"
405
+ "under `envs:`. Then run `ccc doctor` to verify.",
406
+ err=True,
407
+ )
408
+
409
+
410
+ def _setup_user_settings_interactive(litellm_model_flag: str | None) -> None:
411
+ """Interactive global-settings setup — only runs when settings are missing."""
412
+ from .shared import is_sentence_transformers_installed
413
+
414
+ embedding = _resolve_embedding_choice(
415
+ litellm_model_flag=litellm_model_flag,
416
+ st_installed=is_sentence_transformers_installed(),
417
+ tty=sys.stdin.isatty(),
418
+ )
419
+
420
+ path = save_initial_user_settings(embedding)
421
+ _typer.echo()
422
+ _typer.echo(f"Created user settings: {path}")
423
+
424
+ _typer.echo()
425
+ _typer.echo(f"Testing embedding model: {embedding.provider} / {embedding.model}")
426
+ _run_init_model_check(path)
427
+ _typer.echo()
428
+
429
+
285
430
  @app.command()
286
431
  def init(
432
+ litellm_model: str | None = _typer.Option(
433
+ None,
434
+ "--litellm-model",
435
+ help="Use the given LiteLLM model and skip provider/model prompts.",
436
+ ),
287
437
  force: bool = _typer.Option(False, "-f", "--force", help="Skip parent directory warning"),
288
438
  ) -> None:
289
439
  """Initialize a project for cocoindex-code."""
290
440
  cwd = Path.cwd().resolve()
291
441
  settings_file = project_settings_path(cwd)
292
442
 
293
- # Always ensure user settings exist
294
443
  user_path = user_settings_path()
295
- if not user_path.is_file():
296
- save_user_settings(default_user_settings())
297
- _typer.echo(f"Created user settings: {user_path}")
444
+ if user_path.is_file():
445
+ if litellm_model is not None:
446
+ _typer.echo(
447
+ f"Error: global settings already exist at {user_path}.\n"
448
+ "Edit that file or remove it before passing `--litellm-model`.",
449
+ err=True,
450
+ )
451
+ raise _typer.Exit(code=1)
452
+ else:
453
+ _setup_user_settings_interactive(litellm_model)
298
454
 
299
455
  # Check if already initialized
300
456
  if settings_file.is_file():
@@ -489,10 +645,7 @@ def _print_doctor_result(result: DoctorCheckResult) -> None:
489
645
 
490
646
  if result.name == "done":
491
647
  return
492
- if result.ok:
493
- tag = _click.style("[OK]", fg="green", bold=True)
494
- else:
495
- tag = _click.style("[FAIL]", fg="red", bold=True)
648
+ tag = _ok_fail_tag(result.ok)
496
649
  _typer.echo(f"\n {tag} {result.name}")
497
650
  for line in result.details:
498
651
  _typer.echo(f" {line}")
@@ -61,7 +61,7 @@ from .settings import (
61
61
  load_user_settings,
62
62
  target_sqlite_db_path,
63
63
  )
64
- from .shared import Embedder, create_embedder
64
+ from .shared import Embedder, check_embedding, create_embedder
65
65
 
66
66
  logger = logging.getLogger(__name__)
67
67
 
@@ -262,22 +262,20 @@ async def _handle_doctor(
262
262
 
263
263
  async def _check_model(embedder: Embedder) -> DoctorCheckResult:
264
264
  """Test the embedding model by embedding a short string."""
265
- try:
266
- vec = await embedder.embed("hello world")
267
- dim = len(vec)
265
+ result = await check_embedding(embedder)
266
+ if result.error is None:
268
267
  return DoctorCheckResult(
269
268
  name="Model Check",
270
269
  ok=True,
271
- details=[f"Embedding dimension: {dim}"],
270
+ details=[f"Embedding dimension: {result.dim}"],
272
271
  errors=[],
273
272
  )
274
- except Exception as e:
275
- return DoctorCheckResult(
276
- name="Model Check",
277
- ok=False,
278
- details=[],
279
- errors=[str(e)],
280
- )
273
+ return DoctorCheckResult(
274
+ name="Model Check",
275
+ ok=False,
276
+ details=[],
277
+ errors=[result.error],
278
+ )
281
279
 
282
280
 
283
281
  async def _check_file_walk(project_root_str: str) -> DoctorCheckResult:
@@ -126,11 +126,14 @@ class ProjectSettings:
126
126
  # ---------------------------------------------------------------------------
127
127
 
128
128
 
129
+ DEFAULT_ST_MODEL = "Snowflake/snowflake-arctic-embed-xs"
130
+
131
+
129
132
  def default_user_settings() -> UserSettings:
130
133
  return UserSettings(
131
134
  embedding=EmbeddingSettings(
132
135
  provider="sentence-transformers",
133
- model="sentence-transformers/all-MiniLM-L6-v2",
136
+ model=DEFAULT_ST_MODEL,
134
137
  )
135
138
  )
136
139
 
@@ -344,17 +347,20 @@ def load_gitignore_spec(project_root: Path) -> GitIgnoreSpec | None:
344
347
  # ---------------------------------------------------------------------------
345
348
 
346
349
 
347
- def _user_settings_to_dict(settings: UserSettings) -> dict[str, Any]:
348
- d: dict[str, Any] = {}
349
- emb: dict[str, Any] = {
350
- "provider": settings.embedding.provider,
351
- "model": settings.embedding.model,
350
+ def _embedding_settings_to_dict(embedding: EmbeddingSettings) -> dict[str, Any]:
351
+ d: dict[str, Any] = {
352
+ "provider": embedding.provider,
353
+ "model": embedding.model,
352
354
  }
353
- if settings.embedding.device is not None:
354
- emb["device"] = settings.embedding.device
355
- if settings.embedding.min_interval_ms is not None:
356
- emb["min_interval_ms"] = settings.embedding.min_interval_ms
357
- d["embedding"] = emb
355
+ if embedding.device is not None:
356
+ d["device"] = embedding.device
357
+ if embedding.min_interval_ms is not None:
358
+ d["min_interval_ms"] = embedding.min_interval_ms
359
+ return d
360
+
361
+
362
+ def _user_settings_to_dict(settings: UserSettings) -> dict[str, Any]:
363
+ d: dict[str, Any] = {"embedding": _embedding_settings_to_dict(settings.embedding)}
358
364
  if settings.envs:
359
365
  d["envs"] = dict(settings.envs)
360
366
  return d
@@ -436,6 +442,46 @@ def save_user_settings(settings: UserSettings) -> Path:
436
442
  return path
437
443
 
438
444
 
445
+ _INITIAL_HEADER = (
446
+ "# CocoIndex Code — global settings.\n"
447
+ "# After editing this file, run `ccc doctor` to verify your configuration.\n"
448
+ "\n"
449
+ )
450
+
451
+ _INITIAL_ENVS_COMMENT = (
452
+ "\n"
453
+ "# Environment variables to inject into the daemon running in the background.\n"
454
+ "# Uncomment and fill in keys for the LiteLLM providers you plan to use.\n"
455
+ "#\n"
456
+ "# envs:\n"
457
+ "# OPENAI_API_KEY: ...\n"
458
+ "# GEMINI_API_KEY: ...\n"
459
+ "# ANTHROPIC_API_KEY: ...\n"
460
+ "# VOYAGE_API_KEY: ...\n"
461
+ )
462
+
463
+
464
+ def save_initial_user_settings(embedding: EmbeddingSettings) -> Path:
465
+ """Write the initial global_settings.yml with comment hints and env examples.
466
+
467
+ Only used by `ccc init` on first-time setup. Emits only the `embedding:`
468
+ block from the input; the `envs:` section is a commented-out template.
469
+ Subsequent programmatic writes use `save_user_settings` and do not
470
+ preserve comments.
471
+ """
472
+ emb_block = _yaml.safe_dump(
473
+ {"embedding": _embedding_settings_to_dict(embedding)},
474
+ default_flow_style=False,
475
+ sort_keys=False,
476
+ )
477
+ content = _INITIAL_HEADER + emb_block + _INITIAL_ENVS_COMMENT
478
+
479
+ path = user_settings_path()
480
+ path.parent.mkdir(parents=True, exist_ok=True)
481
+ path.write_text(content)
482
+ return path
483
+
484
+
439
485
  def load_project_settings(project_root: Path) -> ProjectSettings:
440
486
  """Read ``$PROJECT_ROOT/.cocoindex_code/settings.yml``.
441
487
 
@@ -2,10 +2,11 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import importlib.util
5
6
  import logging
6
7
  import pathlib
7
8
  from dataclasses import dataclass
8
- from typing import TYPE_CHECKING, Annotated, Union
9
+ from typing import TYPE_CHECKING, Annotated, NamedTuple, Union
9
10
 
10
11
  import cocoindex as coco
11
12
  import numpy as np
@@ -41,6 +42,41 @@ embedder: Embedder | None = None
41
42
  query_prompt_name: str | None = None
42
43
 
43
44
 
45
+ def is_sentence_transformers_installed() -> bool:
46
+ """Return True if the `sentence_transformers` package can be imported.
47
+
48
+ Uses `find_spec` rather than `import` to avoid triggering the slow,
49
+ torch-loading import as a side effect of the check.
50
+ """
51
+ return importlib.util.find_spec("sentence_transformers") is not None
52
+
53
+
54
+ class EmbeddingCheckResult(NamedTuple):
55
+ """Outcome of a single embed-test call. See `check_embedding`.
56
+
57
+ Exactly one of ``dim`` / ``error`` is set: ``error is None`` means success.
58
+ """
59
+
60
+ dim: int | None
61
+ error: str | None
62
+
63
+
64
+ async def check_embedding(embedder: Embedder) -> EmbeddingCheckResult:
65
+ """Run a single embed call against *embedder* and report dim or error.
66
+
67
+ Never raises. Used by both the daemon's doctor path (`daemon._check_model`)
68
+ and the CLI's init flow (`cli._test_embedding_model`).
69
+ """
70
+ try:
71
+ vec = await embedder.embed("hello world")
72
+ return EmbeddingCheckResult(dim=len(vec), error=None)
73
+ except Exception as e:
74
+ msg = f"{type(e).__name__}: {e}".splitlines()[0]
75
+ if len(msg) > 500:
76
+ msg = msg[:500] + "…"
77
+ return EmbeddingCheckResult(dim=None, error=msg)
78
+
79
+
44
80
  def create_embedder(settings: EmbeddingSettings) -> Embedder:
45
81
  """Create and return an embedder instance based on settings.
46
82
 
File without changes