cocoindex-code 0.2.29__tar.gz → 0.2.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/PKG-INFO +3 -3
  2. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/README.md +2 -2
  3. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/_version.py +2 -2
  4. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/daemon.py +1 -1
  5. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/embedder_params.py +6 -5
  6. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/settings.py +1 -1
  7. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/shared.py +21 -2
  8. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/.gitignore +0 -0
  9. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/LICENSE +0 -0
  10. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/pyproject.toml +0 -0
  11. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/__init__.py +0 -0
  12. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/__main__.py +0 -0
  13. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/_daemon_paths.py +0 -0
  14. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/chunking.py +0 -0
  15. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/cli.py +0 -0
  16. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/client.py +0 -0
  17. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/embedder_defaults.py +0 -0
  18. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/indexer.py +0 -0
  19. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/litellm_embedder.py +0 -0
  20. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/project.py +0 -0
  21. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/protocol.py +0 -0
  22. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/query.py +0 -0
  23. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/schema.py +0 -0
  24. {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex-code
3
- Version: 0.2.29
3
+ Version: 0.2.30
4
4
  Summary: MCP server for indexing and querying codebases using CocoIndex
5
5
  Project-URL: Homepage, https://github.com/cocoindex-io/cocoindex-code
6
6
  Project-URL: Repository, https://github.com/cocoindex-io/cocoindex-code
@@ -441,7 +441,7 @@ embedding:
441
441
  # `ccc init` auto-populates these for known models (e.g. Cohere, Voyage, Nvidia NIM,
442
442
  # nomic-ai code-retrieval models, Snowflake arctic-embed).
443
443
  # indexing_params:
444
- # input_type: search_document # litellm: input_type, dimensions
444
+ # input_type: search_document # litellm: input_type
445
445
  # query_params:
446
446
  # input_type: search_query # sentence-transformers: prompt_name
447
447
 
@@ -471,7 +471,7 @@ embedding:
471
471
 
472
472
  OpenAI embeddings (`text-embedding-3-*`, `text-embedding-ada-002`) are intentionally not in the list: they're symmetric and have no equivalent knob.
473
473
 
474
- **Accepted keys:** `prompt_name` (sentence-transformers), `input_type` and `dimensions` (litellm). Other keys are rejected at daemon startup with a clear error.
474
+ **Accepted keys:** `prompt_name` (sentence-transformers) and `input_type` (litellm). Other keys are rejected at daemon startup with a clear error. Note: `dimensions` is intentionally not exposed here — output dimension must be identical for indexing and query, so it's a model-wide setting rather than a per-side knob.
475
475
 
476
476
  **Doctor checks both sides.** `ccc doctor` exercises the model once with `indexing_params` and once with `query_params`, reporting each as a separate `Model Check (indexing)` / `Model Check (query)` entry — so a misconfiguration on one side is diagnosable without hiding behind the other.
477
477
 
@@ -397,7 +397,7 @@ embedding:
397
397
  # `ccc init` auto-populates these for known models (e.g. Cohere, Voyage, Nvidia NIM,
398
398
  # nomic-ai code-retrieval models, Snowflake arctic-embed).
399
399
  # indexing_params:
400
- # input_type: search_document # litellm: input_type, dimensions
400
+ # input_type: search_document # litellm: input_type
401
401
  # query_params:
402
402
  # input_type: search_query # sentence-transformers: prompt_name
403
403
 
@@ -427,7 +427,7 @@ embedding:
427
427
 
428
428
  OpenAI embeddings (`text-embedding-3-*`, `text-embedding-ada-002`) are intentionally not in the list: they're symmetric and have no equivalent knob.
429
429
 
430
- **Accepted keys:** `prompt_name` (sentence-transformers), `input_type` and `dimensions` (litellm). Other keys are rejected at daemon startup with a clear error.
430
+ **Accepted keys:** `prompt_name` (sentence-transformers) and `input_type` (litellm). Other keys are rejected at daemon startup with a clear error. Note: `dimensions` is intentionally not exposed here — output dimension must be identical for indexing and query, so it's a model-wide setting rather than a per-side knob.
431
431
 
432
432
  **Doctor checks both sides.** `ccc doctor` exercises the model once with `indexing_params` and once with `query_params`, reporting each as a separate `Model Check (indexing)` / `Model Check (query)` entry — so a misconfiguration on one side is diagnosable without hiding behind the other.
433
433
 
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.2.29'
22
- __version_tuple__ = version_tuple = (0, 2, 29)
21
+ __version__ = version = '0.2.30'
22
+ __version_tuple__ = version_tuple = (0, 2, 30)
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -586,7 +586,7 @@ def run_daemon() -> None:
586
586
  handshake_warnings.append(
587
587
  _build_backward_compat_warning(user_settings, user_settings_path())
588
588
  )
589
- embedder = create_embedder(user_settings.embedding)
589
+ embedder = create_embedder(user_settings.embedding, indexing_params=indexing_params)
590
590
  else:
591
591
  settings_env_keys = []
592
592
  embedder = None
@@ -21,13 +21,14 @@ __all__ = [
21
21
 
22
22
 
23
23
  # Accepted kwargs per provider. Intentionally minimal — we only expose knobs
24
- # that users have reason to tune. ``normalize_embeddings`` (sentence-
25
- # transformers) and ``encoding_format`` (litellm) are deliberately excluded
26
- # because other code assumes unit vectors (query._l2_to_score) and float
27
- # payloads (litellm_embedder hardcodes encoding_format="float").
24
+ # that users have reason to tune AND that make sense per-side (indexing vs
25
+ # query). Excluded keys:
26
+ # - ``normalize_embeddings`` (sentence-transformers): query._l2_to_score
27
+ # assumes unit vectors.
28
+ # - ``encoding_format`` (litellm): litellm_embedder hardcodes "float".
28
29
  _ACCEPTED_KWARGS: dict[str, frozenset[str]] = {
29
30
  "sentence-transformers": frozenset({"prompt_name"}),
30
- "litellm": frozenset({"input_type", "dimensions"}),
31
+ "litellm": frozenset({"input_type"}),
31
32
  }
32
33
 
33
34
 
@@ -544,7 +544,7 @@ _PARAMS_COMMENT_BY_PROVIDER: dict[str, str] = {
544
544
  "litellm": (
545
545
  " #\n"
546
546
  " # Extra kwargs passed to the embedder. Supported keys:\n"
547
- " # input_type, dimensions\n"
547
+ " # input_type\n"
548
548
  " # indexing_params: {}\n"
549
549
  " # query_params: {}\n"
550
550
  ),
@@ -76,8 +76,26 @@ async def check_embedding(
76
76
  return EmbeddingCheckResult(dim=None, error=msg)
77
77
 
78
78
 
79
- def create_embedder(settings: EmbeddingSettings) -> Embedder:
80
- """Create and return an embedder instance based on settings."""
79
+ def create_embedder(
80
+ settings: EmbeddingSettings,
81
+ indexing_params: dict[str, Any] | None = None,
82
+ ) -> Embedder:
83
+ """Create and return an embedder instance based on settings.
84
+
85
+ For LiteLLM embedders, *indexing_params* (e.g. ``{"input_type": "passage"}``)
86
+ are passed to the constructor as default kwargs forwarded into every
87
+ ``litellm.aembedding`` call — including paths that don't go through
88
+ :data:`INDEXING_EMBED_PARAMS` (e.g. the dimension probe in ``_get_dim``,
89
+ or any helper that calls ``embed()`` with no per-side kwargs). Per-call
90
+ overrides (the ``query_params`` spread at query time) still take effect
91
+ because :meth:`LiteLLMEmbedder._embed` overlays kwargs on top of the
92
+ constructor's ``self._kwargs``.
93
+
94
+ *indexing_params* is ignored for sentence-transformers — its constructor
95
+ doesn't accept arbitrary kwargs; ``prompt_name`` is a per-call argument
96
+ only and the indexing default is supplied at the call site via
97
+ :data:`INDEXING_EMBED_PARAMS`.
98
+ """
81
99
  if settings.provider == "sentence-transformers":
82
100
  from cocoindex.ops.sentence_transformers import SentenceTransformerEmbedder
83
101
 
@@ -103,6 +121,7 @@ def create_embedder(settings: EmbeddingSettings) -> Embedder:
103
121
  instance = PacedLiteLLMEmbedder(
104
122
  settings.model,
105
123
  min_interval_ms=min_interval_ms,
124
+ **(dict(indexing_params) if indexing_params else {}),
106
125
  )
107
126
  logger.info(
108
127
  "Embedding model (LiteLLM): %s | min_interval_ms: %s",
File without changes