cocoindex-code 0.2.29__tar.gz → 0.2.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/PKG-INFO +3 -3
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/README.md +2 -2
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/_version.py +2 -2
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/daemon.py +1 -1
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/embedder_params.py +6 -5
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/settings.py +1 -1
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/shared.py +21 -2
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/.gitignore +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/LICENSE +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/pyproject.toml +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/__init__.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/__main__.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/_daemon_paths.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/chunking.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/cli.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/client.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/embedder_defaults.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/indexer.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/litellm_embedder.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/project.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/protocol.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/query.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/schema.py +0 -0
- {cocoindex_code-0.2.29 → cocoindex_code-0.2.30}/src/cocoindex_code/server.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cocoindex-code
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.30
|
|
4
4
|
Summary: MCP server for indexing and querying codebases using CocoIndex
|
|
5
5
|
Project-URL: Homepage, https://github.com/cocoindex-io/cocoindex-code
|
|
6
6
|
Project-URL: Repository, https://github.com/cocoindex-io/cocoindex-code
|
|
@@ -441,7 +441,7 @@ embedding:
|
|
|
441
441
|
# `ccc init` auto-populates these for known models (e.g. Cohere, Voyage, Nvidia NIM,
|
|
442
442
|
# nomic-ai code-retrieval models, Snowflake arctic-embed).
|
|
443
443
|
# indexing_params:
|
|
444
|
-
# input_type: search_document # litellm: input_type
|
|
444
|
+
# input_type: search_document # litellm: input_type
|
|
445
445
|
# query_params:
|
|
446
446
|
# input_type: search_query # sentence-transformers: prompt_name
|
|
447
447
|
|
|
@@ -471,7 +471,7 @@ embedding:
|
|
|
471
471
|
|
|
472
472
|
OpenAI embeddings (`text-embedding-3-*`, `text-embedding-ada-002`) are intentionally not in the list: they're symmetric and have no equivalent knob.
|
|
473
473
|
|
|
474
|
-
**Accepted keys:** `prompt_name` (sentence-transformers)
|
|
474
|
+
**Accepted keys:** `prompt_name` (sentence-transformers) and `input_type` (litellm). Other keys are rejected at daemon startup with a clear error. Note: `dimensions` is intentionally not exposed here — output dimension must be identical for indexing and query, so it's a model-wide setting rather than a per-side knob.
|
|
475
475
|
|
|
476
476
|
**Doctor checks both sides.** `ccc doctor` exercises the model once with `indexing_params` and once with `query_params`, reporting each as a separate `Model Check (indexing)` / `Model Check (query)` entry — so a misconfiguration on one side is diagnosable without hiding behind the other.
|
|
477
477
|
|
|
@@ -397,7 +397,7 @@ embedding:
|
|
|
397
397
|
# `ccc init` auto-populates these for known models (e.g. Cohere, Voyage, Nvidia NIM,
|
|
398
398
|
# nomic-ai code-retrieval models, Snowflake arctic-embed).
|
|
399
399
|
# indexing_params:
|
|
400
|
-
# input_type: search_document # litellm: input_type
|
|
400
|
+
# input_type: search_document # litellm: input_type
|
|
401
401
|
# query_params:
|
|
402
402
|
# input_type: search_query # sentence-transformers: prompt_name
|
|
403
403
|
|
|
@@ -427,7 +427,7 @@ embedding:
|
|
|
427
427
|
|
|
428
428
|
OpenAI embeddings (`text-embedding-3-*`, `text-embedding-ada-002`) are intentionally not in the list: they're symmetric and have no equivalent knob.
|
|
429
429
|
|
|
430
|
-
**Accepted keys:** `prompt_name` (sentence-transformers)
|
|
430
|
+
**Accepted keys:** `prompt_name` (sentence-transformers) and `input_type` (litellm). Other keys are rejected at daemon startup with a clear error. Note: `dimensions` is intentionally not exposed here — output dimension must be identical for indexing and query, so it's a model-wide setting rather than a per-side knob.
|
|
431
431
|
|
|
432
432
|
**Doctor checks both sides.** `ccc doctor` exercises the model once with `indexing_params` and once with `query_params`, reporting each as a separate `Model Check (indexing)` / `Model Check (query)` entry — so a misconfiguration on one side is diagnosable without hiding behind the other.
|
|
433
433
|
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.2.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 2,
|
|
21
|
+
__version__ = version = '0.2.30'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 2, 30)
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -586,7 +586,7 @@ def run_daemon() -> None:
|
|
|
586
586
|
handshake_warnings.append(
|
|
587
587
|
_build_backward_compat_warning(user_settings, user_settings_path())
|
|
588
588
|
)
|
|
589
|
-
embedder = create_embedder(user_settings.embedding)
|
|
589
|
+
embedder = create_embedder(user_settings.embedding, indexing_params=indexing_params)
|
|
590
590
|
else:
|
|
591
591
|
settings_env_keys = []
|
|
592
592
|
embedder = None
|
|
@@ -21,13 +21,14 @@ __all__ = [
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
# Accepted kwargs per provider. Intentionally minimal — we only expose knobs
|
|
24
|
-
# that users have reason to tune
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
#
|
|
24
|
+
# that users have reason to tune AND that make sense per-side (indexing vs
|
|
25
|
+
# query). Excluded keys:
|
|
26
|
+
# - ``normalize_embeddings`` (sentence-transformers): query._l2_to_score
|
|
27
|
+
# assumes unit vectors.
|
|
28
|
+
# - ``encoding_format`` (litellm): litellm_embedder hardcodes "float".
|
|
28
29
|
_ACCEPTED_KWARGS: dict[str, frozenset[str]] = {
|
|
29
30
|
"sentence-transformers": frozenset({"prompt_name"}),
|
|
30
|
-
"litellm": frozenset({"input_type"
|
|
31
|
+
"litellm": frozenset({"input_type"}),
|
|
31
32
|
}
|
|
32
33
|
|
|
33
34
|
|
|
@@ -544,7 +544,7 @@ _PARAMS_COMMENT_BY_PROVIDER: dict[str, str] = {
|
|
|
544
544
|
"litellm": (
|
|
545
545
|
" #\n"
|
|
546
546
|
" # Extra kwargs passed to the embedder. Supported keys:\n"
|
|
547
|
-
" # input_type
|
|
547
|
+
" # input_type\n"
|
|
548
548
|
" # indexing_params: {}\n"
|
|
549
549
|
" # query_params: {}\n"
|
|
550
550
|
),
|
|
@@ -76,8 +76,26 @@ async def check_embedding(
|
|
|
76
76
|
return EmbeddingCheckResult(dim=None, error=msg)
|
|
77
77
|
|
|
78
78
|
|
|
79
|
-
def create_embedder(
|
|
80
|
-
|
|
79
|
+
def create_embedder(
|
|
80
|
+
settings: EmbeddingSettings,
|
|
81
|
+
indexing_params: dict[str, Any] | None = None,
|
|
82
|
+
) -> Embedder:
|
|
83
|
+
"""Create and return an embedder instance based on settings.
|
|
84
|
+
|
|
85
|
+
For LiteLLM embedders, *indexing_params* (e.g. ``{"input_type": "passage"}``)
|
|
86
|
+
are passed to the constructor as default kwargs forwarded into every
|
|
87
|
+
``litellm.aembedding`` call — including paths that don't go through
|
|
88
|
+
:data:`INDEXING_EMBED_PARAMS` (e.g. the dimension probe in ``_get_dim``,
|
|
89
|
+
or any helper that calls ``embed()`` with no per-side kwargs). Per-call
|
|
90
|
+
overrides (the ``query_params`` spread at query time) still take effect
|
|
91
|
+
because :meth:`LiteLLMEmbedder._embed` overlays kwargs on top of the
|
|
92
|
+
constructor's ``self._kwargs``.
|
|
93
|
+
|
|
94
|
+
*indexing_params* is ignored for sentence-transformers — its constructor
|
|
95
|
+
doesn't accept arbitrary kwargs; ``prompt_name`` is a per-call argument
|
|
96
|
+
only and the indexing default is supplied at the call site via
|
|
97
|
+
:data:`INDEXING_EMBED_PARAMS`.
|
|
98
|
+
"""
|
|
81
99
|
if settings.provider == "sentence-transformers":
|
|
82
100
|
from cocoindex.ops.sentence_transformers import SentenceTransformerEmbedder
|
|
83
101
|
|
|
@@ -103,6 +121,7 @@ def create_embedder(settings: EmbeddingSettings) -> Embedder:
|
|
|
103
121
|
instance = PacedLiteLLMEmbedder(
|
|
104
122
|
settings.model,
|
|
105
123
|
min_interval_ms=min_interval_ms,
|
|
124
|
+
**(dict(indexing_params) if indexing_params else {}),
|
|
106
125
|
)
|
|
107
126
|
logger.info(
|
|
108
127
|
"Embedding model (LiteLLM): %s | min_interval_ms: %s",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|